diff options
Diffstat (limited to 'fs')
111 files changed, 4604 insertions, 2326 deletions
diff --git a/fs/Config.in b/fs/Config.in index fe279dc94..a9f922d8a 100644 --- a/fs/Config.in +++ b/fs/Config.in @@ -5,6 +5,27 @@ mainmenu_option next_comment comment 'Filesystems' bool 'Quota support' CONFIG_QUOTA +bool 'Preload dcache entries in readdir() [ALPHA, currently dangerous!]' CONFIG_DCACHE_PRELOAD +bool 'Include support for omirr online mirror' CONFIG_OMIRR +bool 'Translate filename suffixes' CONFIG_TRANS_NAMES +if [ "$CONFIG_TRANS_NAMES" = "y" ]; then + bool ' Restrict translation to specific gid' CONFIG_TRANS_RESTRICT + if [ "$CONFIG_TRANS_RESTRICT" = "y" ]; then + int ' Enter gid to compile in' CONFIG_TRANS_GID 4 + fi + bool ' Translate nodename' CONFIG_TR_NODENAME + bool ' Translate compiled-in kernelname' CONFIG_TR_KERNNAME + if [ "$CONFIG_TR_KERNNAME" = "y" ]; then + string ' Enter kernelname string to compile in' CONFIG_KERNNAME banana + fi + bool ' Translate compiled-in kerneltype' CONFIG_TR_KERNTYPE + if [ "$CONFIG_TR_KERNTYPE" = "y" ]; then + string ' Enter kerneltype string to compile in' CONFIG_KERNTYPE default + fi + bool ' Translate machine type' CONFIG_TR_MACHINE + bool ' Translate sysname' CONFIG_TR_SYSNAME +fi + tristate 'Minix fs support' CONFIG_MINIX_FS tristate 'Second extended fs support' CONFIG_EXT2_FS diff --git a/fs/Makefile b/fs/Makefile index 471a9de5c..fb9da7124 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,10 +10,10 @@ L_TARGET := filesystems.a L_OBJS = $(join $(SUB_DIRS),$(SUB_DIRS:%=/%.o)) O_TARGET := fs.o -O_OBJS = open.o read_write.o inode.o devices.o file_table.o buffer.o \ +O_OBJS = open.o read_write.o devices.o file_table.o buffer.o \ super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \ - dcache.o $(BINFMTS) + inode.o dcache.o attr.o $(BINFMTS) MOD_LIST_NAME := FS_MODULES ALL_SUB_DIRS = minix ext2 fat msdos vfat proc isofs nfs umsdos \ @@ -25,6 +25,10 @@ else O_OBJS += noquot.o endif +ifeq ($(CONFIG_TRANS_NAMES),y) +O_OBJS += nametrans.o +endif + ifeq ($(CONFIG_MINIX_FS),y) SUB_DIRS += minix else @@ -211,6 +215,14 @@ else endif +ifeq ($(CONFIG_BINFMT_MISC),y) +BINFMTS += binfmt_misc.o +else + ifeq ($(CONFIG_BINFMT_MISC),m) + M_OBJS += binfmt_misc.o + endif +endif + # binfmt_script is always there BINFMTS += binfmt_script.o diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 8ae71e5bd..5baea0b7d 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -55,7 +55,6 @@ struct inode_operations affs_dir_inode_operations = { NULL, /* mknod */ affs_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/affs/file.c b/fs/affs/file.c index 0fffbf41e..46b10bcb1 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -71,7 +71,6 @@ struct inode_operations affs_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ affs_bmap, /* bmap */ @@ -105,7 +104,6 @@ struct inode_operations affs_file_inode_operations_ofs = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 654a8ca61..2805f1ccf 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -899,7 +899,7 @@ affs_new_inode(const struct inode *dir) return NULL; } - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_nlink = 1; inode->i_dev = sb->s_dev; inode->i_uid = current->fsuid; @@ -1031,9 +1031,9 @@ addentry_done: } static struct file_system_type affs_fs_type = { - affs_read_super, "affs", - 1, + FS_REQUIRES_DEV, + affs_read_super, NULL }; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 6a9b02bac..5ea649425 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -343,7 +343,7 @@ affs_rmdir(struct inode *dir, const char *name, int len) retval = -ENOTEMPTY; goto rmdir_done; } - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { retval = -EBUSY; goto rmdir_done; } @@ -512,7 +512,7 @@ subdir(struct inode *new_inode, struct inode *old_inode) int ino; int result; - new_inode->i_count++; + atomic_inc(&new_inode->i_count); result = 0; for (;;) { if (new_inode == old_inode) { @@ -535,8 +535,7 @@ subdir(struct inode *new_inode, struct inode *old_inode) int affs_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir) + struct inode *new_dir, const char *new_name, int new_len) { struct inode *old_inode; struct inode *new_inode; @@ -570,8 +569,6 @@ start_up: old_inode = __iget(old_dir->i_sb,old_ino,0); if (!old_inode) goto end_rename; - if (must_be_dir && !S_ISDIR(old_inode->i_mode)) - goto end_rename; new_bh = affs_find_entry(new_dir,new_name,new_len,&new_ino); if (new_bh) { new_inode = __iget(new_dir->i_sb,new_ino,0); @@ -595,7 +592,7 @@ start_up: if (!empty_dir(new_bh,AFFS_I2HSIZE(new_inode))) goto end_rename; retval = -EBUSY; - if (new_inode->i_count > 1) + if (atomic_read(&new_inode->i_count) > 1) goto end_rename; } if (S_ISDIR(old_inode->i_mode)) { diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c index de93eac5c..24d1ed118 100644 --- a/fs/affs/symlink.c +++ b/fs/affs/symlink.c @@ -20,7 +20,6 @@ #define MIN(a,b) (((a) < (b)) ? (a) : (b)) static int affs_readlink(struct inode *, char *, int); -static int affs_follow_link(struct inode *, struct inode *, int, int, struct inode **); struct inode_operations affs_symlink_inode_operations = { NULL, /* no file-operations */ @@ -34,92 +33,12 @@ struct inode_operations affs_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ affs_readlink, /* readlink */ - affs_follow_link, /* follow_link */ NULL, /* bmap */ NULL, /* truncate */ NULL /* permission */ }; static int -affs_follow_link(struct inode *dir, struct inode *inode, int flag, int mode, - struct inode **res_inode) -{ - struct buffer_head *bh; - struct slink_front *lf; - char *buffer; - int error; - int i, j; - char c; - char lc; - - pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino); - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput(inode); - iput(dir); - return -ELOOP; - } - if (!(buffer = kmalloc(1024,GFP_KERNEL))) { - iput(inode); - iput(dir); - return -ENOSPC; - } - bh = affs_bread(inode->i_dev,inode->i_ino,AFFS_I2BSIZE(inode)); - i = 0; - j = 0; - if (!bh) { - affs_error(inode->i_sb,"follow_link","Cannot read block %lu\n",inode->i_ino); - kfree(buffer); - iput(inode); - iput(dir); - return -EIO; - } - lf = (struct slink_front *)bh->b_data; - lc = 0; - if (strchr(lf->symname,':')) { /* Handle assign or volume name */ - while (i < 1023 && (c = inode->i_sb->u.affs_sb.s_prefix[i])) - buffer[i++] = c; - while (i < 1023 && lf->symname[j] != ':') - buffer[i++] = lf->symname[j++]; - if (i < 1023) - buffer[i++] = '/'; - j++; - lc = '/'; - } - while (i < 1023 && (c = lf->symname[j])) { - if (c == '/' && lc == '/' && i < 1020) { /* parent dir */ - buffer[i++] = '.'; - buffer[i++] = '.'; - } - buffer[i++] = c; - lc = c; - j++; - } - buffer[i] = '\0'; - affs_brelse(bh); - iput(inode); - current->link_count++; - error = open_namei(buffer,flag,mode,res_inode,dir); - current->link_count--; - kfree(buffer); - return error; -} - -static int affs_readlink(struct inode *inode, char *buffer, int buflen) { struct buffer_head *bh; @@ -130,10 +49,6 @@ affs_readlink(struct inode *inode, char *buffer, int buflen) pr_debug("AFFS: readlink(ino=%lu,buflen=%d)\n",inode->i_ino,buflen); - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } bh = affs_bread(inode->i_dev,inode->i_ino,AFFS_I2BSIZE(inode)); i = 0; j = 0; diff --git a/fs/attr.c b/fs/attr.c new file mode 100644 index 000000000..be824dd4a --- /dev/null +++ b/fs/attr.c @@ -0,0 +1,99 @@ +/* + * linux/fs/attr.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * changes by Thomas Schoebel-Theuer + */ + +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <asm/system.h> + +/* Taken over from the old code... */ + +/* POSIX UID/GID verification for setting inode attributes. */ +int inode_change_ok(struct inode *inode, struct iattr *attr) +{ + /* If force is set do it anyway. */ + if (attr->ia_valid & ATTR_FORCE) + return 0; + + /* Make sure a caller can chown. */ + if ((attr->ia_valid & ATTR_UID) && + (current->fsuid != inode->i_uid || + attr->ia_uid != inode->i_uid) && !fsuser()) + return -EPERM; + + /* Make sure caller can chgrp. */ + if ((attr->ia_valid & ATTR_GID) && + (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid) && + !fsuser()) + return -EPERM; + + /* Make sure a caller can chmod. */ + if (attr->ia_valid & ATTR_MODE) { + if ((current->fsuid != inode->i_uid) && !fsuser()) + return -EPERM; + /* Also check the setgid bit! */ + if (!fsuser() && !in_group_p((attr->ia_valid & ATTR_GID) ? attr->ia_gid : + inode->i_gid)) + attr->ia_mode &= ~S_ISGID; + } + + /* Check for setting the inode time. */ + if ((attr->ia_valid & ATTR_ATIME_SET) && + ((current->fsuid != inode->i_uid) && !fsuser())) + return -EPERM; + if ((attr->ia_valid & ATTR_MTIME_SET) && + ((current->fsuid != inode->i_uid) && !fsuser())) + return -EPERM; + return 0; +} + +void inode_setattr(struct inode * inode, struct iattr * attr) +{ + if(attr->ia_valid & + (ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_MTIME|ATTR_CTIME|ATTR_MODE)) { + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + if (attr->ia_valid & ATTR_SIZE) + inode->i_size = attr->ia_size; + if (attr->ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; + if (attr->ia_valid & ATTR_MODE) { + inode->i_mode = attr->ia_mode; + if (!fsuser() && !in_group_p(inode->i_gid)) + inode->i_mode &= ~S_ISGID; + } + inode->i_dirt = 1; + } +} + +int notify_change(struct inode * inode, struct iattr * attr) +{ + int error; + time_t now = CURRENT_TIME; + + attr->ia_ctime = now; + if ((attr->ia_valid & (ATTR_ATIME | ATTR_ATIME_SET)) == ATTR_ATIME) + attr->ia_atime = now; + if ((attr->ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) == ATTR_MTIME) + attr->ia_mtime = now; + attr->ia_valid &= ~(ATTR_CTIME); + if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->notify_change) + return inode->i_sb->s_op->notify_change(inode, attr); + error = inode_change_ok(inode, attr); + if(!error) + inode_setattr(inode, attr); + return error; +} + diff --git a/fs/autofs/dir.c b/fs/autofs/dir.c index 461688e9f..0f529c900 100644 --- a/fs/autofs/dir.c +++ b/fs/autofs/dir.c @@ -80,7 +80,6 @@ struct inode_operations autofs_dir_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* read_page */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/autofs/init.c b/fs/autofs/init.c index 1b3f6f165..4dbb76c85 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -20,7 +20,10 @@ #endif static struct file_system_type autofs_fs_type = { - autofs_read_super, "autofs", 0, NULL + "autofs", + FS_NO_DCACHE, + autofs_read_super, + NULL }; #ifdef MODULE diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 69e62f823..a615ede29 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -48,7 +48,6 @@ struct inode_operations autofs_root_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index 46c333103..d6ac82ed4 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -14,39 +14,6 @@ #include <linux/sched.h> #include "autofs_i.h" -static int autofs_follow_link(struct inode *dir, struct inode *inode, - int flag, int mode, struct inode **res_inode) -{ - int error; - char *link; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput(dir); - iput(inode); - return -ELOOP; - } - link = ((struct autofs_symlink *)inode->u.generic_ip)->data; - current->link_count++; - error = open_namei(link,flag,mode,res_inode,dir); - current->link_count--; - iput(inode); - return error; -} - static int autofs_readlink(struct inode *inode, char *buffer, int buflen) { struct autofs_symlink *sl; @@ -76,7 +43,6 @@ struct inode_operations autofs_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ autofs_readlink, /* readlink */ - autofs_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index d9ef6d6ac..394f41eb1 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -214,6 +214,7 @@ do_aout_core_dump(long signr, struct pt_regs * regs) /* Finally dump the task struct. Not be used by gdb, but could be useful */ set_fs(KERNEL_DS); DUMP_WRITE(current,sizeof(*current)); + inode->i_status |= ST_MODIFIED; close_coredump: if (file.f_op->release) file.f_op->release(inode,&file); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f12d89ef3..ff987e0e8 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -717,7 +717,7 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) #ifndef VM_STACK_FLAGS current->executable = bprm->inode; - bprm->inode->i_count++; + atomic_inc(&bprm->inode->i_count); #endif #ifdef LOW_ELF_STACK current->start_stack = bprm->p = elf_stack - 4; @@ -923,6 +923,7 @@ static int load_elf_library(int fd) */ static int dump_write(struct file *file, const void *addr, int nr) { + file->f_inode->i_status |= ST_MODIFIED; return file->f_op->write(file->f_inode, file, addr, nr) == nr; } diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c new file mode 100644 index 000000000..28dced394 --- /dev/null +++ b/fs/binfmt_misc.c @@ -0,0 +1,505 @@ +/* + * binfmt_misc.c + * + * Copyright (C) 1997 Richard Günther + * + * binfmt_misc detects binaries via a magic or filename extension and invokes + * a specified wrapper. This should obsolete binfmt_java, binfmt_em86 and + * binfmt_mz. + * + * 25.4.97 first version + * [...] + * 19.5.97 cleanup + */ + +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/malloc.h> +#include <linux/binfmts.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <asm/uaccess.h> +#include <asm/spinlock.h> + + +#define VERBOSE_STATUS /* undef this to save 400 bytes kernel memory */ + +#ifndef MIN +#define MIN(x,y) (((x)<(y))?(x):(y)) +#endif + +struct binfmt_entry { + struct binfmt_entry *next; + int id; + int flags; /* type, status, etc. */ + int offset; /* offset of magic */ + int size; /* size of magic/mask */ + char *magic; /* magic or filename extension */ + char *mask; /* mask, NULL for exact match */ + char *interpreter; /* filename of interpreter */ + char *proc_name; + struct proc_dir_entry *proc_dir; +}; + +#define ENTRY_ENABLED 1 /* the old binfmt_entry.enabled */ +#define ENTRY_MAGIC 8 /* not filename detection */ +#define ENTRY_STRIP_EXT 32 /* strip of last filename extension */ + +static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs); +static void entry_proc_cleanup(struct binfmt_entry *e); +static int entry_proc_setup(struct binfmt_entry *e); + +static struct linux_binfmt misc_format = { +#ifndef MODULE + NULL, 0, load_misc_binary, NULL, NULL +#else + NULL, &__this_module, load_misc_binary, NULL, NULL +#endif +}; + +static struct proc_dir_entry *bm_dir = NULL; + +static struct binfmt_entry *entries = NULL; +static int free_id = 1; +static int enabled = 1; +static rwlock_t entries_lock = RW_LOCK_UNLOCKED; + + +/* + * Unregister one entry + */ +static void clear_entry(int id) +{ + struct binfmt_entry **ep, *e; + + write_lock(&entries_lock); + ep = &entries; + while (*ep && ((*ep)->id != id)) + ep = &((*ep)->next); + if ((e = *ep)) { + *ep = e->next; + entry_proc_cleanup(e); + kfree(e); + MOD_DEC_USE_COUNT; + } + write_unlock(&entries_lock); +} + +/* + * Clear all registered binary formats + */ +static void clear_entries(void) +{ + struct binfmt_entry *e; + + write_lock(&entries_lock); + while ((e = entries)) { + entries = entries->next; + entry_proc_cleanup(e); + kfree(e); + MOD_DEC_USE_COUNT; + } + write_unlock(&entries_lock); +} + +/* + * Find entry through id - caller has to do locking + */ +static struct binfmt_entry *get_entry(int id) +{ + struct binfmt_entry *e = entries; + + while (e && (e->id != id)) + e = e->next; + return e; +} + + +/* + * Check if we support the binfmt + * if we do, return the binfmt_entry, else NULL + * locking is done in load_misc_binary + */ +static struct binfmt_entry *check_file(struct linux_binprm *bprm) +{ + struct binfmt_entry *e = entries; + char *p = strrchr(bprm->filename, '.'); + int j; + + while (e) { + if (e->flags & ENTRY_ENABLED) { + if (!(e->flags & ENTRY_MAGIC)) { + if (p && !strcmp(e->magic, p + 1)) + return e; + } else { + j = 0; + while ((j < e->size) && + !((bprm->buf[e->offset + j] ^ e->magic[j]) + & (e->mask ? e->mask[j] : 0xff))) + j++; + if (j == e->size) + return e; + } + } + e = e->next; + }; + return NULL; +} + +/* + * the loader itself + */ +static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) +{ + struct binfmt_entry *fmt; + char iname[128]; + char *iname_addr = iname, *p; + int retval, fmt_flags = 0; + + MOD_INC_USE_COUNT; + if (!enabled) { + retval = -ENOEXEC; + goto _ret; + } + + /* to keep locking time low, we copy the interpreter string */ + read_lock(&entries_lock); + if ((fmt = check_file(bprm))) { + strncpy(iname, fmt->interpreter, 127); + iname[127] = '\0'; + fmt_flags = fmt->flags; + } + read_unlock(&entries_lock); + if (!fmt) { + retval = -ENOEXEC; + goto _ret; + } + + iput(bprm->inode); + bprm->dont_iput = 1; + + /* Build args for interpreter */ + if ((fmt_flags & ENTRY_STRIP_EXT) && + (p = strrchr(bprm->filename, '.'))) { + *p = '\0'; + remove_arg_zero(bprm); + bprm->p = copy_strings(1, &bprm->filename, bprm->page, bprm->p, 2); + bprm->argc++; + } + bprm->p = copy_strings(1, &iname_addr, bprm->page, bprm->p, 2); + bprm->argc++; + if (!bprm->p) { + retval = -E2BIG; + goto _ret; + } + bprm->filename = iname; /* for binfmt_script */ + + if ((retval = open_namei(iname, 0, 0, &bprm->inode, NULL))) + goto _ret; + bprm->dont_iput = 0; + + if ((retval = prepare_binprm(bprm)) >= 0) + retval = search_binary_handler(bprm, regs); +_ret: + MOD_DEC_USE_COUNT; + return retval; +} + + + +/* + * /proc handling routines + */ + +/* + * parses and copies one argument enclosed in del from *sp to *dp, + * recognising the \x special. + * returns pointer to the copied argument or NULL in case of an + * error (and sets err) or null argument length. + */ +static char *copyarg(char **dp, const char **sp, int *count, + char del, int special, int *err) +{ + char c, *res = *dp; + + while (!*err && ((c = *((*sp)++)), (*count)--) && (c != del)) { + switch (c) { + case '\\': + if (special && (**sp == 'x')) { + if (!isxdigit(c = toupper(*(++*sp)))) + *err = -EINVAL; + **dp = (c - (isdigit(c) ? '0' : 'A' - 10)) * 16; + if (!isxdigit(c = toupper(*(++*sp)))) + *err = -EINVAL; + *((*dp)++) += c - (isdigit(c) ? '0' : 'A' - 10); + ++*sp; + *count -= 3; + break; + } + default: + *((*dp)++) = c; + } + } + if (*err || (c != del) || (res == *dp)) + res = NULL; + else if (!special) + *((*dp)++) = '\0'; + return res; +} + +/* + * This registers a new binary format, it recognises the syntax + * ':name:type:offset:magic:mask:interpreter:' + * where the ':' is the IFS, that can be chosen with the first char + */ +static int proc_write_register(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + const char *sp; + char del, *dp; + struct binfmt_entry *e; + int memsize, cnt = count - 1, err = 0; + + MOD_INC_USE_COUNT; + /* some sanity checks */ + if ((count < 11) || (count > 256)) { + err = -EINVAL; + goto _err; + } + + memsize = sizeof(struct binfmt_entry) + count; + if (!(e = (struct binfmt_entry *) kmalloc(memsize, GFP_USER))) { + err = -ENOMEM; + goto _err; + } + + sp = buffer + 1; + del = buffer[0]; + dp = (char *)e + sizeof(struct binfmt_entry); + + e->proc_name = copyarg(&dp, &sp, &cnt, del, 0, &err); + + /* we can use bit 3 and 5 of type for ext/magic and ext-strip + flag due to the nice encoding of E, M, e and m */ + if ((*sp & 0x92) || (sp[1] != del)) + err = -EINVAL; + else + e->flags = (*sp++ & (ENTRY_MAGIC | ENTRY_STRIP_EXT)) + | ENTRY_ENABLED; + cnt -= 2; sp++; + + e->offset = 0; + while (cnt-- && isdigit(*sp)) + e->offset = e->offset * 10 + *sp++ - '0'; + if (*sp++ != del) + err = -EINVAL; + + e->magic = copyarg(&dp, &sp, &cnt, del, (e->flags & ENTRY_MAGIC), &err); + e->size = dp - e->magic; + e->mask = copyarg(&dp, &sp, &cnt, del, 1, &err); + if (e->mask && ((dp - e->mask) != e->size)) + err = -EINVAL; + e->interpreter = copyarg(&dp, &sp, &cnt, del, 0, &err); + e->id = free_id++; + + /* more sanity checks */ + if (err || !(!cnt || (!(--cnt) && (*sp == '\n'))) || + (e->size < 1) || ((e->size + e->offset) > 127) || + !(e->proc_name) || !(e->interpreter) || + entry_proc_setup(e)) { + kfree(e); + err = -EINVAL; + goto _err; + } + + write_lock(&entries_lock); + e->next = entries; + entries = e; + write_unlock(&entries_lock); + + return count; +_err: + MOD_DEC_USE_COUNT; + return err; +} + +/* + * Get status of entry/binfmt_misc + * FIXME? should an entry be marked disabled if binfmt_misc is disabled though + * entry is enabled? + */ +static int proc_read_status(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct binfmt_entry *e; + char *dp; + int elen, i; + + MOD_INC_USE_COUNT; +#ifndef VERBOSE_STATUS + if (data) { + read_lock(&entries_lock); + if (!(e = get_entry((int) data))) + i = 0; + else + i = e->flags & ENTRY_ENABLED; + read_unlock(&entries_lock); + } else { + i = enabled; + } + sprintf(page, "%s\n", (i ? "enabled" : "disabled")); +#else + if (!data) + sprintf(page, "%s\n", (enabled ? "enabled" : "disabled")); + else { + read_lock(&entries_lock); + if (!(e = get_entry((int) data))) { + *page = '\0'; + goto _out; + } + sprintf(page, "%s\ninterpreter %s\n", + (e->flags & ENTRY_ENABLED ? "enabled" : "disabled"), + e->interpreter); + dp = page + strlen(page); + if (!(e->flags & ENTRY_MAGIC)) { + sprintf(dp, "extension .%s\n", e->magic); + dp = page + strlen(page); + } else { + sprintf(dp, "offset %i\nmagic ", e->offset); + dp = page + strlen(page); + for (i = 0; i < e->size; i++) { + sprintf(dp, "%02x", 0xff & (int) (e->magic[i])); + dp += 2; + } + if (e->mask) { + sprintf(dp, "\nmask "); + dp += 6; + for (i = 0; i < e->size; i++) { + sprintf(dp, "%02x", 0xff & (int) (e->mask[i])); + dp += 2; + } + } + *dp++ = '\n'; + *dp = '\0'; + } + if (e->flags & ENTRY_STRIP_EXT) + sprintf(dp, "extension stripped\n"); +_out: + read_unlock(&entries_lock); + } +#endif + + elen = strlen(page) - off; + if (elen < 0) + elen = 0; + *eof = (elen <= count) ? 1 : 0; + *start = page + off; + + MOD_DEC_USE_COUNT; + return elen; +} + +/* + * Set status of entry/binfmt_misc: + * '1' enables, '0' disables and '-1' clears entry/binfmt_misc + */ +static int proc_write_status(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct binfmt_entry *e; + int res = count; + + MOD_INC_USE_COUNT; + if (((buffer[0] == '1') || (buffer[0] == '0')) && + ((count == 1) || ((count == 2) && (buffer[1] == '\n')))) { + if (data) { + read_lock(&entries_lock); + if ((e = get_entry((int) data))) + e->flags = (e->flags & -2) | (int) (buffer[0] - '0'); + read_unlock(&entries_lock); + } else { + enabled = buffer[0] - '0'; + } + } else if ((buffer[0] == '-') && (buffer[1] == '1') && + ((count == 2) || ((count == 3) && (buffer[2] == '\n')))) { + if (data) + clear_entry((int) data); + else + clear_entries(); + } else { + res = -EINVAL; + } + MOD_DEC_USE_COUNT; + return res; +} + +/* + * Remove the /proc-dir entries of one binfmt + */ +static void entry_proc_cleanup(struct binfmt_entry *e) +{ + remove_proc_entry(e->proc_name, bm_dir); +} + +/* + * Create the /proc-dir entry for binfmt + */ +static int entry_proc_setup(struct binfmt_entry *e) +{ + if (!(e->proc_dir = create_proc_entry(e->proc_name, + S_IFREG | S_IRUGO | S_IWUSR, bm_dir))) + return -ENOMEM; + + e->proc_dir->data = (void *) (e->id); + e->proc_dir->read_proc = proc_read_status; + e->proc_dir->write_proc = proc_write_status; + + return 0; +} + + +__initfunc(int init_misc_binfmt(void)) +{ + struct proc_dir_entry *status = NULL, *reg; + + if (!(bm_dir = create_proc_entry("sys/fs/binfmt_misc", S_IFDIR, + NULL)) || + !(status = create_proc_entry("status", S_IFREG | S_IRUGO | S_IWUSR, + bm_dir)) || + !(reg = create_proc_entry("register", S_IFREG | S_IWUSR, + bm_dir))) { + if (status) + remove_proc_entry("status", bm_dir); + if (bm_dir) + remove_proc_entry("sys/fs/binfmt_misc", NULL); + return -ENOMEM; + } + status->read_proc = proc_read_status; + status->write_proc = proc_write_status; + + reg->write_proc = proc_write_register; + + return register_binfmt(&misc_format); +} + +#ifdef MODULE +EXPORT_NO_SYMBOLS; +int init_module(void) +{ + return init_misc_binfmt(); +} + +void cleanup_module(void) +{ + unregister_binfmt(&misc_format); + remove_proc_entry("register", bm_dir); + remove_proc_entry("status", bm_dir); + remove_proc_entry("sys/fs/binfmt_misc", NULL); +} +#endif +#undef VERBOSE_STATUS diff --git a/fs/buffer.c b/fs/buffer.c index b8bd754c1..bd06972f3 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -36,6 +36,7 @@ #include <linux/smp_lock.h> #include <linux/vmalloc.h> #include <linux/blkdev.h> +#include <linux/sysrq.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -81,8 +82,6 @@ int buffermem = 0; * remove any of the parameters, make sure to update kernel/sysctl.c. */ -static void wakeup_bdflush(int); - #define N_PARAM 9 /* The dummy values in this structure are left in there for compatibility @@ -113,6 +112,8 @@ union bdflush_param{ int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 100, 100, 1, 1}; int bdflush_max[N_PARAM] = {100,5000, 2000, 2000,100, 60000, 60000, 2047, 5}; +void wakeup_bdflush(int); + /* * Rewrote the wait-routines to use the "new" wait-queue functionality, * and getting rid of the cli-sti pairs. The wait-queue routines still @@ -1109,10 +1110,10 @@ static inline void after_unlock_page (struct page * page) { if (test_and_clear_bit(PG_decr_after, &page->flags)) atomic_dec(&nr_async_pages); + if (test_and_clear_bit(PG_swap_unlock_after, &page->flags)) + swap_after_unlock_page(page->pg_swap_entry); if (test_and_clear_bit(PG_free_after, &page->flags)) __free_page(page); - if (test_and_clear_bit(PG_swap_unlock_after, &page->flags)) - swap_after_unlock_page(page->swap_unlock_entry); } /* @@ -1533,7 +1534,7 @@ struct wait_queue * bdflush_wait = NULL; struct wait_queue * bdflush_done = NULL; struct task_struct *bdflush_tsk = 0; -static void wakeup_bdflush(int wait) +void wakeup_bdflush(int wait) { if (current == bdflush_tsk) return; @@ -1707,7 +1708,9 @@ int bdflush(void * unused) #ifdef DEBUG printk("bdflush() activated..."); #endif - + + CHECK_EMERGENCY_SYNC + ncount = 0; #ifdef DEBUG for(nlist = 0; nlist < NR_LIST; nlist++) diff --git a/fs/dcache.c b/fs/dcache.c index f6ab04693..0472487e0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1,283 +1,1039 @@ /* - * linux/fs/dcache.c + * fs/dcache.c * - * (C) Copyright 1994 Linus Torvalds + * Complete reimplementation + * (C) 1997 Thomas Schoebel-Theuer */ -/* Speeded up searches a bit and threaded the mess. -DaveM */ +/* The new dcache is exclusively called from the VFS, not from + * the specific fs'es any more. Despite having the same name as in the + * old code, it has less to do with it. + * + * It serves many purposes: + * + * 1) Any inode that has been retrieved with lookup() and is in use + * (i_count>0), has access to its full absolute path name, by going + * to inode->i_dentry and then recursively following the entry->d_parent + * chain. Use d_path() as predefined method for that. + * You may find out the corresponding inode belonging to + * a dentry by calling d_inode(). This can be used as an easy way for + * determining .. and its absolute pathname, an old UNIX problem that + * deserved a solution for a long time. + * Note that hardlinked inodes may have multiple dentries assigned to + * (via the d_next chain), reflecting multiple alias pathnames. + * + * 2) If not disabled by filesystem types specifying FS_NO_DCACHE, + * the dentries of unused (aged) inodes are retained for speeding up + * lookup()s, by allowing hashed inquiry starting from the dentry of + * the parent directory. + * + * 3) It can remeber so-called "negative entries", that is dentries for + * pathnames that are known to *not* exist, so unneccessary repeated + * lookup()s for non-existant names can be saved. + * + * 4) It provides a means for keeping deleted files (inode->i_nlink==0) + * accessible in the so-called *basket*. Inodes in the basket have been + * removed with unlink() while being in use (i_count>0), so they would + * normally use up space on the disk and be accessile through their + * filedescriptor, but would not be accessible for lookup() any more. + * The basket simply keeps such files in the dcache (for potential + * dcache lookup) until they are either eventually removed completely, + * or transferred to the second-level basket, the so-called *ibasket*. + * The ibasket is implemented in the new inode code, on request of + * filesystem types that have the flag FS_IBASKET set, and proliferates + * the unlinked files when i_count has gone to zero, at least as long + * as there is space on the disk and enough inodes remain available + * and no umount() has started. + * + * 5) Preliminary dentries can be added by readdir(). While normal dentries + * directly point to the inode via u.d_inode only the inode number is + * known from readdir(), but not more. They can be converted to + * normal dentries by using d_inode(). + */ /* - * The directory cache is a "two-level" cache, each level doing LRU on - * its entries. Adding new entries puts them at the end of the LRU - * queue on the first-level cache, while the second-level cache is - * fed by any cache hits. + * Notes on the allocation strategy: * - * The idea is that new additions (from readdir(), for example) will not - * flush the cache of entries that have really been used. - * - * There is a global hash-table over both caches that hashes the entries - * based on the directory inode number and device as well as on a - * string-hash computed over the name. + * The dcache is a full slave cache of the inodes. Whenever an inode + * is cleared, all the dentries associated with it will recursively + * disappear. dentries have no own reference counting; this has to + * be obeyed for SMP. + * If directories could go out of inode cache while + * successors are alive, this would interrupt the d_parent chain of + * the live successors. To prevent this without using zombies, all + * directories are thus prevented from __iput() as long as successors + * are alive. */ -#include <linux/fs.h> +#include <linux/config.h> #include <linux/string.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/dalloc.h> +#include <linux/dlists.h> -#include <asm/unaligned.h> -#include <asm/spinlock.h> +/* this should be removed after the beta phase */ +/* #define DEBUG */ +/*#undef DEBUG*/ +/* #define DEBUG_DDIR_COUNT */ -spinlock_t dcache_lock = SPIN_LOCK_UNLOCKED; +#define D_HASHSIZE 64 -/* - * Don't bother caching long names.. They just take up space in the cache, and - * for a name cache you just want to cache the "normal" names anyway which tend - * to be short. +/* local flags for d_flag */ +#define D_DIR 32 +#define D_HASHED 64 +#define D_ZOMBIE 128 +#define D_PRELIMINARY 256 +#define D_INC_DDIR 512 + +/* local flags for d_del() */ +#define D_RECURSIVE 4 +#define D_NO_FREE 8 + +/* adjust these constants if you know a probability distribution ... */ +#define D_SMALL 16 +#define D_MEDIUM 64 +#define D_LARGE 256 +#define D_HUGE D_MAXLEN + +#define BASE_DHEADER(x) (struct dheader*)((unsigned long)(x) & ~(PAGE_SIZE-1)) +#define BYTE_ADD(x,n) (void*)((char*)(x) + (n)) +#define BYTE_SUB(x,n) (void*)((char*)(x) - (n)) + +/* This is for global allocation of dentries. Remove this when + * converting to SLAB. */ -#define DCACHE_NAME_LEN 15 -#define DCACHE_SIZE 1024 -#define DCACHE_HASH_QUEUES 256 /* keep this a pow2 */ +struct dheader { + struct dentry * emptylist; + short free, maxfree; + struct dheader * next; + struct dheader * prev; +}; -/* - * The dir_cache_entry must be in this order: we do ugly things with the pointers +struct anchors { + struct dheader * free; /* each contains at least 1 empty dentry */ + struct dheader * full; /* all the used up ones */ + struct dheader * dir_free; + struct dheader * dir_full; +}; + +/* This is only used for directory dentries. Think of it as an extension + * of the dentry. + * It is defined as separate struct, so it uses up space only + * where necessary. */ -struct dir_cache_entry { - struct dir_cache_entry *next; - struct dir_cache_entry **pprev; - kdev_t dc_dev; - unsigned long dir; - unsigned long version; - unsigned long ino; - unsigned char name_len; - char name[DCACHE_NAME_LEN]; - struct dir_cache_entry ** lru_head; - struct dir_cache_entry * next_lru, * prev_lru; +struct ddir { + struct dentry * dd_hashtable[D_HASHSIZE]; + struct dentry * dd_neglist; + struct dentry * dd_basketlist; + struct dentry * dd_zombielist; + unsigned short dd_alloced; /* # d_alloc()ed, but not yet d_add()ed */ + unsigned short dd_hashed; /* # of entries in hashtable */ + unsigned short dd_true_hashed; /* # non-preliminaries in hashtable */ + unsigned short dd_negs; /* # of negative entries */ }; -#define dcache_offset(x) ((unsigned long)&((struct dir_cache_entry*)0)->x) -#define dcache_datalen (dcache_offset(lru_head) - dcache_offset(dc_dev)) +DEF_INSERT(header,struct dheader,next,prev) +DEF_REMOVE(header,struct dheader,next,prev) -#define COPYDATA(de, newde) \ -memcpy((void *) &newde->dc_dev, (void *) &de->dc_dev, dcache_datalen) +DEF_INSERT(alias,struct dentry,d_next,d_prev) +DEF_REMOVE(alias,struct dentry,d_next,d_prev) -static struct dir_cache_entry level1_cache[DCACHE_SIZE]; -static struct dir_cache_entry level2_cache[DCACHE_SIZE]; +DEF_INSERT(hash,struct dentry,d_hash_next,d_hash_prev) +DEF_REMOVE(hash,struct dentry,d_hash_next,d_hash_prev) -/* - * The LRU-lists are doubly-linked circular lists, and do not change in size - * so these pointers always have something to point to (after _init) - */ -static struct dir_cache_entry * level1_head; -static struct dir_cache_entry * level2_head; +DEF_INSERT(basket,struct dentry,d_basket_next,d_basket_prev) +DEF_REMOVE(basket,struct dentry,d_basket_next,d_basket_prev) -/* The hash queues are layed out in a slightly different manner. */ -static struct dir_cache_entry *hash_table[DCACHE_HASH_QUEUES]; +static struct anchors anchors[4]; -#define hash_fn(dev,dir,namehash) \ - ((HASHDEV(dev) ^ (dir) ^ (namehash)) & (DCACHE_HASH_QUEUES - 1)) +struct dentry * the_root = NULL; -/* - * Stupid name"hash" algorithm. Write something better if you want to, - * but I doubt it matters that much. +unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end) +{ + memset(anchors, 0, sizeof(anchors)); + return mem_start; +} + +#ifdef DEBUG +/* throw this away after the beta phase */ +/*************************************************************************/ +extern void xcheck(char * txt, struct inode * p); + +static int x_alloc = 0; +static int x_freed = 0; +static int x_free = 0; + +static void * tst[20000]; +static int cnt = 0; + +static void ins(void* ptr) +{ + extern int inodes_stat; + tst[cnt++] = ptr; + if(cnt % 1000 == 0) + printk("------%d allocated: %d: %d %d %d\n", inodes_stat, cnt, + x_alloc, x_freed, x_free); + if(cnt>=20000) panic("stop"); +} + +#if 0 +static inline int search(void* ptr) +{ + int i; + for(i = cnt-1; i>=0; i--) + if(tst[i] == ptr) + return i; + return -1; +} + +#define TST(n,x) if(search(x)<0) printk("%s bad ptr %p line %d\n", n, x, __LINE__) +#else +#define TST(n,x) /*nothing*/ +#endif + +void LOG(char * txt, struct dentry * entry) +{ + static int count = 0; + if(entry) { + TST(txt,entry); + } + if(count) { + count--; + printk("%s: entry=%p\n", txt, entry); + } +} + +#ifdef DEBUG_DDIR_COUNT +static struct ddir * d_dir(struct dentry * entry); +void recursive_test(struct dentry * entry) +{ + int i; + struct ddir * ddir = d_dir(entry); + int sons = 0; + + if(ddir->dd_zombielist) + sons++; + for(i=0; i < D_HASHSIZE; i++) { + struct dentry ** base = &ddir->dd_hashtable[i]; + struct dentry * tmp = *base; + if(tmp) do { + TST("__clear",tmp); + if(!(tmp->d_flag & D_HASHED)) { + printk("VFS: dcache entry not hashed!\n"); + printpath(*base); printk("\n"); + printpath(tmp); + } + if(!(tmp->d_flag & D_PRELIMINARY)) + sons++; + if(tmp->d_flag & D_DIR) + recursive_test(tmp); + tmp = tmp->d_hash_next; + } while(tmp && tmp != *base); + } + if(!sons && !(entry->d_flag & D_PRELIMINARY) && entry->u.d_inode) { + struct inode * inode = entry->u.d_inode; + if(!atomic_read(&inode->i_count)) { + if(!(inode->i_status & 1/*ST_AGED*/)) { + printpath(entry); + printk(" is not aged!\n"); + } + if(inode->i_ddir_count) { + printpath(entry); + printk(" has ddir_count blockage!\n"); + } + } + } +} +#else +#define recursive_test(e) /*nothing*/ +#endif +#else +#define TST(n,x) /*nothing*/ +#define LOG(n,x) /*nothing*/ +#define xcheck(t,i) /*nothing*/ +#define recursive_test(e) /*nothing*/ +/*****************************************************************************/ +#endif + +void printpath(struct dentry * entry) +{ + if(!IS_ROOT(entry)) + printpath(entry->d_parent); + printk("/%s", entry->d_name); +} + +static inline long has_sons(struct ddir * ddir) +{ + return ((ddir->dd_alloced | ddir->dd_hashed) || + ddir->dd_neglist || + ddir->dd_basketlist || + ddir->dd_zombielist); +} + +static inline int has_true_sons(struct ddir * ddir) +{ + return (ddir->dd_alloced | ddir->dd_true_hashed); +} + +/* Only hold the i_ddir_count pseudo refcount when neccessary (i.e. when + * they have true_sons), to prevent keeping too much dir inodes in use. */ -static unsigned long namehash(const char * name, int len) +static inline void inc_ddir(struct dentry * entry, struct inode * inode) { - unsigned long hash = 0; + if(!(entry->d_flag & D_INC_DDIR)) { + entry->d_flag |= D_INC_DDIR; +#ifdef DEBUG + if(inode->i_ddir_count) { + printpath(entry); + printk(" ddir_count=%d\n", inode->i_ddir_count); + } +#endif + inode->i_ddir_count++; + _get_inode(inode); + } +} - while ((len -= sizeof(unsigned long)) > 0) { - hash += get_unaligned((unsigned long *)name); - name += sizeof(unsigned long); +static inline blocking void dec_ddir(struct dentry * entry, struct inode * inode) +{ + if(entry->d_flag & D_INC_DDIR) { + entry->d_flag &= ~D_INC_DDIR; + inode->i_ddir_count--; + if(!inode->i_ddir_count) + __iput(inode); } - return hash + - (get_unaligned((unsigned long *)name) & - ~(~0UL << ((len + sizeof(unsigned long)) << 3))); } -static inline struct dir_cache_entry **get_hlist(struct inode *dir, - const char *name, int len) +/* Do not inline this many times. */ +static void d_panic(void) { - return hash_table + hash_fn(dir->i_dev, dir->i_ino, namehash(name, len)); + panic("VFS: dcache directory corruption"); } -static inline void remove_lru(struct dir_cache_entry * de) +static inline struct ddir * d_dir(struct dentry * entry) { - struct dir_cache_entry * next = de->next_lru; - struct dir_cache_entry * prev = de->prev_lru; + struct ddir * res = BYTE_SUB(entry, sizeof(struct ddir)); - next->prev_lru = prev; - prev->next_lru = next; + if(!(entry->d_flag & D_DIR)) + d_panic(); +#ifdef DEBUG + if(!entry) + panic("entry NULL!"); + if(BASE_DHEADER(res) != BASE_DHEADER(entry)) + printk("Scheisse!!!\n"); +#endif + return res; } -static inline void add_lru(struct dir_cache_entry * de, struct dir_cache_entry *head) +static /*inline*/ struct dheader * dinit(int isdir, int size) { - struct dir_cache_entry * prev = head->prev_lru; + struct dheader * res = (struct dheader*)__get_free_page(GFP_KERNEL); + int restlen = PAGE_SIZE - sizeof(struct dheader); + struct dentry * ptr = BYTE_ADD(res, sizeof(struct dheader)); - de->next_lru = head; - de->prev_lru = prev; - prev->next_lru = de; - head->prev_lru = de; + if(!res) + return NULL; + memset(res, 0, sizeof(struct dheader)); + if(isdir) { + ptr = BYTE_ADD(ptr, sizeof(struct ddir)); + size += sizeof(struct ddir); + } + if(BASE_DHEADER(ptr) != res) + panic("Bad kernel page alignment"); + size += sizeof(struct dentry) - D_MAXLEN; + res->emptylist = NULL; + res->free = 0; + while(restlen >= size) { +#ifdef DEBUG + ins(ptr); + if(BASE_DHEADER(ptr) != res) + panic("Wrong dinit!"); +#endif + ptr->d_next = res->emptylist; + res->emptylist = ptr; + ptr = BYTE_ADD(ptr, size); + res->free++; + restlen -= size; + } + res->maxfree = res->free; + return res; } -static inline void update_lru(struct dir_cache_entry * de) +static /*inline*/ struct dentry * __dalloc(struct anchors * anchor, + struct dentry * parent, int isdir, + int len, int size) { - if (de == *de->lru_head) - *de->lru_head = de->next_lru; - else { - remove_lru(de); - add_lru(de,*de->lru_head); + struct dheader ** free = isdir ? &anchor->dir_free : &anchor->free; + struct dheader ** full = isdir ? &anchor->dir_full : &anchor->full; + struct dheader * base = *free; + struct dentry * res; + + if(!base) { + base = dinit(isdir, size); + if(!base) + return NULL; + insert_header(free, base); + } + base->free--; + res = base->emptylist; + if(!(base->emptylist = res->d_next)) { + remove_header(free, base); + insert_header(full, base); + } + memset(res, 0, sizeof(struct dentry) - D_MAXLEN); + if(isdir) { + res->d_flag = D_DIR; + memset(d_dir(res), 0, sizeof(struct ddir)); } + res->d_len = len; + res->d_parent = parent; + if(parent) { + struct ddir * pdir = d_dir(parent); +#ifdef DEBUG + if(pdir->dd_alloced > 1 && !IS_ROOT(parent)) { + printpath(parent); + printk(" dd_alloced=%d\n", pdir->dd_alloced); + } +#endif + pdir->dd_alloced++; + } +#ifdef DEBUG + x_alloc++; +#endif + return res; } -/* - * Hash queue manipulation. Look out for the casts.. - * - * What casts? 8-) -DaveM - */ -static inline void remove_hash(struct dir_cache_entry * de) +struct dentry * d_alloc(struct dentry * parent, int len, int isdir) { - if(de->pprev) { - if(de->next) - de->next->pprev = de->pprev; - *de->pprev = de->next; - de->pprev = NULL; + int i, size; + +#ifdef DEBUG + if(the_root) + recursive_test(the_root); + LOG("d_alloc", parent); +#endif + if(len >= D_MEDIUM) { + if(len >= D_LARGE) { + i = 3; + size = D_HUGE; + } else { + i = 2; + size = D_LARGE; + } + } else if(len >= D_SMALL) { + i = 1; + size = D_MEDIUM; + } else { + i = 0; + size = D_SMALL; } + return __dalloc(&anchors[i], parent, isdir, len, size); } -static inline void add_hash(struct dir_cache_entry * de, struct dir_cache_entry ** hash) +extern blocking struct dentry * d_alloc_root(struct inode * root_inode) { - if((de->next = *hash) != NULL) - (*hash)->pprev = &de->next; - *hash = de; - de->pprev = hash; + struct dentry * res = the_root; + + if(res) { + d_del(res, D_NO_CLEAR_INODE); /* invalidate everything beyond */ + } else { + struct ddir * ddir; + + the_root = res = d_alloc(NULL, 0, 1); + LOG("d_alloc_root", res); + res->d_parent = res; + res->d_name[0]='\0'; + ddir = d_dir(res); + ddir->dd_alloced = 999; /* protect from deletion */ + } + insert_alias(&root_inode->i_dentry, res); + root_inode->i_dent_count++; + root_inode->i_ddir_count++; + res->u.d_inode = root_inode; + return res; } -/* - * Find a directory cache entry given all the necessary info. - */ -static inline struct dir_cache_entry * find_entry(struct inode * dir, const char * name, unsigned char len, struct dir_cache_entry ** hash) +static inline unsigned long d_hash(char first, char last) +{ + return ((unsigned long)first ^ ((unsigned long)last << 4)) & (D_HASHSIZE-1); +} + +static inline struct dentry ** d_base_entry(struct ddir * pdir, struct dentry * entry) +{ + return &pdir->dd_hashtable[d_hash(entry->d_name[0], + entry->d_name[entry->d_len-1])]; +} + +static inline struct dentry ** d_base_qstr(struct ddir * pdir, + struct qstr * s1, + struct qstr * s2) { - struct dir_cache_entry *de; + unsigned long hash; - de = *hash; - goto inside; - for (;;) { - de = de->next; -inside: - if (!de) - break; - if((de->name_len == (unsigned char) len) && - (de->dc_dev == dir->i_dev) && - (de->dir == dir->i_ino) && - (de->version == dir->i_version) && - (!memcmp(de->name, name, len))) - break; + if(s2 && s2->len) { + hash = d_hash(s1->name[0], s2->name[s2->len-1]); + } else { + hash = d_hash(s1->name[0], s1->name[s1->len-1]); } - return de; + return &pdir->dd_hashtable[hash]; } -/* - * Move a successfully used entry to level2. If already at level2, - * move it to the end of the LRU queue.. + +static /*inline*/ blocking void _d_remove_from_parent(struct dentry * entry, + struct ddir * pdir, + struct inode * inode, + int flags) +{ + if(entry->d_flag & D_HASHED) { + struct dentry ** base = d_base_entry(pdir, entry); + + remove_hash(base, entry); + entry->d_flag &= ~D_HASHED; + pdir->dd_hashed--; + if(!(entry->d_flag & D_PRELIMINARY)) { + pdir->dd_true_hashed--; + if(!inode) { +#ifdef DEBUG + if(!entry->d_next || !entry->d_prev) { + printpath(entry); + printk(" flags=%x d_flag=%x negs=%d " + "hashed=%d\n", flags, entry->d_flag, + pdir->dd_negs, pdir->dd_hashed); + } +#endif + remove_alias(&pdir->dd_neglist, entry); + pdir->dd_negs--; + } + } + } else if(!(entry->d_flag & D_ZOMBIE)) { +#ifdef DEBUG + if(!pdir->dd_alloced) printk("dd_alloced is 0!\n"); +#endif + pdir->dd_alloced--; + } + if(entry->d_flag & D_BASKET) { + remove_basket(&pdir->dd_basketlist, entry); + entry->d_flag &= ~D_BASKET; + } +} + +/* Theoretically, zombies should never or extremely seldom appear, + * so this code is nearly superfluous. + * A way to get zombies is while using inodes (i_count>0), unlink() + * them as well as rmdir() the parent dir => the parent dir becomes a zombie. + * Zombies are *not* in the hashtable, because somebody could re-creat() + * that filename in it's parent dir again. + * Besides coding errors during beta phase, when forcing an umount() + * (e.g. at shutdown time), inodes could be in use such that the parent + * dir is cleared, resulting also in zombies. */ -static inline void move_to_level2(struct dir_cache_entry * old_de, struct dir_cache_entry ** hash) +static /*inline*/ void _d_handle_zombie(struct dentry * entry, + struct ddir * ddir, + struct ddir * pdir) { - struct dir_cache_entry * de; + if(entry->d_flag & D_DIR) { + if(entry->d_flag & D_ZOMBIE) { + if(!has_sons(ddir)) { + entry->d_flag &= ~D_ZOMBIE; + remove_hash(&pdir->dd_zombielist, entry); + if(!pdir->dd_zombielist && + (entry->d_parent->d_flag & D_ZOMBIE)) { + d_del(entry->d_parent, D_NORMAL); + } + } + } else if(has_sons(ddir)) { + entry->d_flag |= D_ZOMBIE; + insert_hash(&pdir->dd_zombielist, entry); - if (old_de->lru_head == &level2_head) { - update_lru(old_de); - return; - } - de = level2_head; - level2_head = de->next_lru; - remove_hash(de); - COPYDATA(old_de, de); - add_hash(de, hash); + /* This condition is no longer a bug, with the removal + * of recursive_clear() this happens naturally during + * an unmount attempt of a filesystem which is busy. + */ +#if 0 + /* Not sure when this message should show up... */ + if(!IS_ROOT(entry)) { + printk("VFS: clearing dcache directory " + "with successors\n"); +#ifdef DEBUG + printpath(entry); + printk(" d_flag=%x alloced=%d negs=%d hashed=%d " + "basket=%p zombies=%p\n", + entry->d_flag, ddir->dd_alloced, + ddir->dd_negs, ddir->dd_hashed, + ddir->dd_basketlist, ddir->dd_zombielist); +#endif + } +#endif + } + } } -int dcache_lookup(struct inode * dir, const char * name, int len, unsigned long * ino) +static /*inline*/ blocking void _d_del(struct dentry * entry, + struct anchors * anchor, + int flags) { - int ret = 0; + struct dheader ** free; + struct dheader ** full; + struct dheader * base = BASE_DHEADER(entry); + struct ddir * ddir = NULL; + struct ddir * pdir; + struct inode * inode = entry->d_flag & D_PRELIMINARY ? NULL : entry->u.d_inode; - if(len <= DCACHE_NAME_LEN) { - struct dir_cache_entry **hash = get_hlist(dir, name, len); - struct dir_cache_entry *de; +#ifdef DEBUG + if(inode) + xcheck("_d_del", inode); +#endif + if(!entry->d_parent) { + printk("VFS: dcache parent is NULL\n"); + return; + } + if(entry->d_flag & D_DIR) { + free = &anchor->dir_free; + full = &anchor->dir_full; + } else { + free = &anchor->free; + full = &anchor->full; + } + pdir = d_dir(entry->d_parent); + if(!IS_ROOT(entry)) + _d_remove_from_parent(entry, pdir, inode, flags); + + /* This may block, be careful! _d_remove_from_parent() is + * thus called before. + */ + if(entry->d_flag & D_DIR) + ddir = d_dir(entry); + if(IS_ROOT(entry)) + return; + + if(flags & D_NO_FREE) { + /* Make it re-d_add()able */ + pdir->dd_alloced++; + entry->d_flag &= D_DIR; + } else + _d_handle_zombie(entry, ddir, pdir); - spin_lock(&dcache_lock); - de = find_entry(dir, name, (unsigned char) len, hash); - if(de) { - *ino = de->ino; - move_to_level2(de, hash); - ret = 1; + /* This dec_ddir() must occur after zombie handling. */ + if(!has_true_sons(pdir)) + dec_ddir(entry->d_parent, entry->d_parent->u.d_inode); + + entry->u.d_inode = NULL; + if(inode) { + remove_alias(&inode->i_dentry, entry); + inode->i_dent_count--; + if (entry->d_flag & D_DIR) + dec_ddir(entry, inode); + + if(!(flags & D_NO_CLEAR_INODE) && + !(atomic_read(&inode->i_count) + + inode->i_ddir_count + + inode->i_dent_count)) { +#ifdef DEBUG + printk("#"); +#endif + /* This may block also. */ + _clear_inode(inode, 0, 0); + } + } + if(!(flags & D_NO_FREE) && !(entry->d_flag & D_ZOMBIE)) { + base->free++; + if(base->free == base->maxfree) { +#ifndef DEBUG + remove_header(free, base); + free_page((unsigned long)base); + goto done; +#endif + } + entry->d_next = base->emptylist; + base->emptylist = entry; + if(!entry->d_next) { + remove_header(full, base); + insert_header(free, base); } - spin_unlock(&dcache_lock); +#ifdef DEBUG + x_freed++; +#endif } - return ret; +#ifndef DEBUG +done: +#else + x_free++; +#endif } -void dcache_add(struct inode * dir, const char * name, int len, unsigned long ino) +blocking void d_del(struct dentry * entry, int flags) { - if (len <= DCACHE_NAME_LEN) { - struct dir_cache_entry **hash = get_hlist(dir, name, len); - struct dir_cache_entry *de; + int i; - spin_lock(&dcache_lock); - de = find_entry(dir, name, (unsigned char) len, hash); - if (de) { - de->ino = ino; - update_lru(de); + if(!entry) + return; + LOG("d_clear", entry); + if(entry->d_len >= D_MEDIUM) { + if(entry->d_len >= D_LARGE) { + i = 3; } else { - de = level1_head; - level1_head = de->next_lru; - remove_hash(de); - de->dc_dev = dir->i_dev; - de->dir = dir->i_ino; - de->version = dir->i_version; - de->ino = ino; - de->name_len = len; - memcpy(de->name, name, len); - add_hash(de, hash); + i = 2; + } + } else if(entry->d_len >= D_SMALL) { + i = 1; + } else { + i = 0; + } + _d_del(entry, &anchors[i], flags); +} + +static inline struct dentry * __dlookup(struct dentry ** base, + struct qstr * name, + struct qstr * appendix) +{ + struct dentry * tmp = *base; + + if(tmp && name->len) { + int totallen = name->len; + + if(appendix) + totallen += appendix->len; + do { + if(tmp->d_len == totallen && + !(tmp->d_flag & D_DUPLICATE) && + !strncmp(tmp->d_name, name->name, name->len) && + (!appendix || !strncmp(tmp->d_name+name->len, + appendix->name, appendix->len))) + return tmp; + tmp = tmp->d_hash_next; + } while(tmp != *base); + } + return NULL; +} + +struct dentry * d_lookup(struct inode * dir, + struct qstr * name, + struct qstr * appendix) +{ + if(dir->i_dentry) { + struct ddir * ddir = d_dir(dir->i_dentry); + struct dentry ** base = d_base_qstr(ddir, name, appendix); + + return __dlookup(base, name, appendix); + } + return NULL; +} + +static /*inline*/ blocking void _d_insert_to_parent(struct dentry * entry, + struct ddir * pdir, + struct inode * inode, + struct qstr * ininame, + int flags) +{ + struct dentry ** base; + struct dentry * parent = entry->d_parent; + +#ifdef DEBUG + if(!pdir->dd_alloced) + printk("dd_alloced is 0!\n"); +#endif + base = d_base_qstr(pdir, ininame, NULL); + if(!(flags & (D_NOCHECKDUP|D_DUPLICATE)) && + __dlookup(base, ininame, NULL)) { + d_del(entry, D_NO_CLEAR_INODE); + return; + } + if(entry->d_flag & D_HASHED) { + printk("VFS: dcache entry is already hashed\n"); + return; + } + if(!(flags & D_PRELIMINARY)) + pdir->dd_true_hashed++; + pdir->dd_hashed++; + insert_hash(base, entry); + entry->d_flag |= D_HASHED; + pdir->dd_alloced--; + if(flags & D_BASKET) + insert_basket(&pdir->dd_basketlist, entry); + +#ifdef DEBUG + if(inode && inode->i_dentry && (entry->d_flag & D_DIR)) { + struct dentry * tmp = inode->i_dentry; + printk("Auweia inode=%p entry=%p (%p %p %s)\n", + inode, entry, parent->u.d_inode, parent, parent->d_name); + printk("entry path="); printpath(entry); printk("\n"); + do { + TST("auweia",tmp); + printk("alias path="); printpath(tmp); printk("\n"); + tmp = tmp->d_next; + } while(tmp != inode->i_dentry); + printk("\n"); + } +#endif + if(has_true_sons(pdir)) + inc_ddir(parent, parent->u.d_inode); + if(!inode && !(flags & D_PRELIMINARY)) { + insert_alias(&pdir->dd_neglist, entry); + pdir->dd_negs++; + + /* Don't allow the negative list to grow too much ... */ + while(pdir->dd_negs > (pdir->dd_true_hashed >> 1) + 5) + d_del(pdir->dd_neglist->d_prev, D_REMOVE); + } +} + +blocking void d_add(struct dentry * entry, struct inode * inode, + struct qstr * ininame, int flags) +{ + struct dentry * parent = entry->d_parent; + struct qstr dummy; + struct ddir * pdir; + +#ifdef DEBUG + if(inode) + xcheck("d_add", inode); + if(IS_ROOT(entry)) { + printk("VFS: d_add for root dentry "); + printpath(entry); + printk(" -> "); + if(ininame) + printk("%s", ininame->name); + printk("\n"); + return; + } + if(!parent) + panic("d_add with parent==NULL"); + LOG("d_add", entry); +#endif + if(ininame) { + if(ininame->len != entry->d_len) { + printk("VFS: d_add with wrong string length"); + entry->d_len = ininame->len; /* kludge */ + } + memcpy(entry->d_name, ininame->name, ininame->len); + entry->d_name[ininame->len] = '\0'; + } else { + dummy.name = entry->d_name; + dummy.len = entry->d_len; + ininame = &dummy; + } + if(entry->d_flag & D_HASHED) + printk("VFS: d_add of already added dcache entry\n"); + + pdir = d_dir(parent); + _d_insert_to_parent(entry, pdir, inode, ininame, flags); + entry->d_flag |= flags; + if(inode && !(flags & D_PRELIMINARY)) { + if(entry->d_flag & D_DIR) { + if(inode->i_dentry) { + printk("VFS: creating dcache directory alias\n"); + return; + } } - spin_unlock(&dcache_lock); + insert_alias(&inode->i_dentry, entry); + inode->i_dent_count++; } + entry->u.d_inode = inode; } -unsigned long name_cache_init(unsigned long mem_start, unsigned long mem_end) +blocking struct dentry * d_entry(struct dentry * parent, + struct qstr * name, + struct inode * inode) { - int i; - struct dir_cache_entry * p; + struct ddir * pdir = d_dir(parent); + struct dentry ** base = d_base_qstr(pdir, name, NULL); + struct dentry * found = __dlookup(base, name, NULL); - /* - * Init level1 LRU lists.. - */ - p = level1_cache; - do { - p[1].prev_lru = p; - p[0].next_lru = p+1; - p[0].lru_head = &level1_head; - } while (++p < level1_cache + DCACHE_SIZE-1); - level1_cache[0].prev_lru = p; - p[0].next_lru = &level1_cache[0]; - p[0].lru_head = &level1_head; - level1_head = level1_cache; - - /* - * Init level2 LRU lists.. - */ - p = level2_cache; - do { - p[1].prev_lru = p; - p[0].next_lru = p+1; - p[0].lru_head = &level2_head; - } while (++p < level2_cache + DCACHE_SIZE-1); - level2_cache[0].prev_lru = p; - p[0].next_lru = &level2_cache[0]; - p[0].lru_head = &level2_head; - level2_head = level2_cache; - - /* - * Empty hash queues.. - */ - for (i = 0 ; i < DCACHE_HASH_QUEUES ; i++) - hash_table[i] = NULL; + if(!found) { + int isdir = (inode && S_ISDIR(inode->i_mode)); - return mem_start; + found = d_alloc(parent, name->len, isdir); + if(found) { + d_add(found, inode, name, + isdir ? (D_DIR|D_NOCHECKDUP) : D_NOCHECKDUP); + } else + printk("VFS: problem with d_alloc\n"); + } + return found; +} + +blocking void d_entry_preliminary(struct dentry * parent, + struct qstr * name, + unsigned long ino) +{ + struct ddir * pdir = d_dir(parent); + struct dentry ** base = d_base_qstr(pdir, name, NULL); + struct dentry * found = __dlookup(base, name, NULL); + + if(!found && ino) { + struct dentry * new = d_alloc(parent, name->len, 0); + + if(new) { + d_add(new, NULL, name, D_PRELIMINARY|D_NOCHECKDUP); + new->u.d_ino = ino; + } else + printk("VFS: problem with d_alloc\n"); + } +} + +blocking void d_move(struct dentry * entry, struct inode * newdir, + struct qstr * newname, struct qstr * newapp) +{ + struct ddir tmp; + struct dentry * new; + struct inode * inode; + int len; + int flags; + + if(!entry) + return; + inode = entry->u.d_inode; + flags = entry->d_flag; + if((flags & D_PRELIMINARY) || !inode) { + if(!(flags & D_PRELIMINARY)) + printk("VFS: trying to move negative dcache entry\n"); + d_del(entry, D_NO_CLEAR_INODE); + return; + } +#if 0 +printk("d_move %p '%s' -> '%s%s' dent_count=%d\n", inode, entry->d_name, + newname->name, newapp ? newapp->name : "", inode->i_dent_count); +#endif + if(flags & D_ZOMBIE) { + printk("VFS: moving zombie entry\n"); + } + if(flags & D_DIR) { + struct ddir * ddir = d_dir(entry); + + memcpy(&tmp, ddir, sizeof(struct ddir)); + + /* Simulate empty dir for d_del(). */ + memset(ddir, 0, sizeof(struct ddir)); + } + len = newname->len; + if(newapp) { + len += newapp->len; + flags |= D_BASKET; + } else + flags &= ~D_BASKET; + new = d_alloc(newdir->i_dentry, len, flags & D_DIR); + memcpy(new->d_name, newname->name, newname->len); + if(newapp) + memcpy(new->d_name+newname->len, newapp->name, newapp->len); + new->d_name[len] = '\0'; + d_del(entry, D_NO_CLEAR_INODE); + d_add(new, inode, NULL, flags & (D_DIR|D_BASKET)); + if(flags & D_DIR) { + struct ddir * ddir = d_dir(new); + + memcpy(ddir, &tmp, sizeof(struct ddir)); + } +} + +int d_path(struct dentry * entry, struct inode * chroot, char * buf) +{ + if(IS_ROOT(entry) || (chroot && entry->u.d_inode == chroot && + !(entry->d_flag & D_PRELIMINARY))) { + *buf = '/'; + return 1; + } else { + int len = d_path(entry->d_parent, chroot, buf); + + buf += len; + if(len > 1) { + *buf++ = '/'; + len++; + } + memcpy(buf, entry->d_name, entry->d_len); + return len + entry->d_len; + } +} + +struct dentry * d_basket(struct dentry * dir_entry) +{ + if(dir_entry && (dir_entry->d_flag & D_DIR)) { + struct ddir * ddir = d_dir(dir_entry); + + return ddir->dd_basketlist; + } else + return NULL; +} + +int d_isbasket(struct dentry * entry) +{ + return entry->d_flag & D_BASKET; +} + +blocking struct inode * d_inode(struct dentry ** changing_entry) +{ + struct dentry * entry = *changing_entry; + struct inode * inode; + +#ifdef CONFIG_DCACHE_PRELOAD + if(entry->d_flag & D_PRELIMINARY) { + struct qstr name = { entry->d_name, entry->d_len }; + struct ddir * pdir = d_dir(entry->d_parent); + struct dentry ** base = d_base_qstr(pdir, &name, NULL); + struct dentry * found; + unsigned long ino; + struct inode * dir = entry->d_parent->u.d_inode; + TST("d_inode",entry); + ino = entry->u.d_ino; + if(!dir) + d_panic(); + + /* Prevent concurrent d_lookup()s or d_inode()s before + * giving up vfs_lock. This just removes from the parent, + * but does not deallocate it. + */ + + /* !!!!!!! Aiee, here is an unresolved race if somebody + * unlink()s the inode during the iget(). The problem is + * that we need to synchronize externally. Proposed solution: + * put a rw_lock (read-mode) on the parent dir for each + * iget(), lookup() and so on, and a write-mode lock for + * everything that changes the dir (e.g. unlink()), and do + * this consistently everywhere in the generic VFS (not in + * the concrete filesystems). This should kill similar + * races everywhere, with a single clean concept. + * Later, the synchronization stuff can be cleaned out + * of the concrete fs'es. + */ + d_del(entry, D_NO_CLEAR_INODE|D_NO_FREE); + vfs_unlock(); + + /* This circumvents the normal lookup() of pathnames. + * Therefore, preliminary entries must not be used + * (see FS_NO_DCACHE and FS_NO_PRELIM) if the fs does not + * permit fetching *valid* inodes with plain iget(). + */ + inode = __iget(dir->i_sb, ino, 0); + vfs_lock(); + if(!inode) { + printk("VFS: preliminary dcache entry was invalid\n"); + *changing_entry = NULL; + return NULL; + } + xcheck("d_inode iget()", inode); + if((found = __dlookup(base, &name, NULL))) { + d_del(entry, D_NO_CLEAR_INODE); + *changing_entry = found; + } else if(S_ISDIR(inode->i_mode)) { + struct dentry * new = d_alloc(entry->d_parent, entry->d_len, 1); + if(new) + d_add(new, inode, &name, D_DIR); + *changing_entry = new; + + /* Finally deallocate old entry. */ + d_del(entry, D_NO_CLEAR_INODE); + } else { + /* Re-insert to the parent, but now as normal dentry. */ + d_add(entry, inode, NULL, 0); + } + return inode; + } +#endif + inode = entry->u.d_inode; + if(inode) { +#ifdef DEBUG + xcheck("d_inode", inode); +#endif + iinc_zero(inode); + } + return inode; } diff --git a/fs/devices.c b/fs/devices.c index 6ea9880ba..d3b1d6846 100644 --- a/fs/devices.c +++ b/fs/devices.c @@ -273,7 +273,6 @@ struct inode_operations blkdev_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -326,7 +325,6 @@ struct inode_operations chrdev_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/dquot.c b/fs/dquot.c index dda3f642a..59d2112d9 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -237,9 +237,12 @@ static void write_dquot(struct dquot *dquot) filp->f_pos = dqoff(dquot->dq_id); fs = get_fs(); set_fs(KERNEL_DS); + if (filp->f_op->write(filp->f_inode, filp, (char *)&dquot->dq_dqb, sizeof(struct dqblk)) == sizeof(struct dqblk)) dquot->dq_flags &= ~DQ_MOD; + /* inode->i_status |= ST_MODIFIED is willingly *not* done here */ + up(&dquot->dq_mnt->mnt_sem); set_fs(fs); unlock_dquot(dquot); @@ -1035,7 +1038,8 @@ asmlinkage int sys_quotactl(int cmd, const char *special, int id, caddr_t addr) if (special == (char *)NULL && (cmds == Q_SYNC || cmds == Q_GETSTATS)) dev = 0; else { - if (namei(special, &ino)) + int error = namei(NAM_FOLLOW_LINK, special, &ino); + if(error) goto out; dev = ino->i_rdev; ret = -ENOTBLK; @@ -68,6 +68,10 @@ static struct linux_binfmt *formats = (struct linux_binfmt *) NULL; __initfunc(void binfmt_setup(void)) { +#ifdef CONFIG_BINFMT_MISC + init_misc_binfmt(); +#endif + #ifdef CONFIG_BINFMT_ELF init_elf_binfmt(); #endif @@ -158,7 +162,7 @@ int open_inode(struct inode * inode, int mode) } } current->files->fd[fd] = f; - inode->i_count++; + atomic_inc(&inode->i_count); } return fd; } diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 171de1cf5..4d2b561ee 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -291,6 +291,7 @@ int ext2_new_block (const struct inode * inode, unsigned long goal, printk ("ext2_new_block: nonexistent device"); return 0; } +retry: lock_super (sb); es = sb->u.ext2_sb.s_es; if (le32_to_cpu(es->s_free_blocks_count) <= le32_to_cpu(es->s_r_blocks_count) && @@ -298,6 +299,8 @@ int ext2_new_block (const struct inode * inode, unsigned long goal, (sb->u.ext2_sb.s_resgid == 0 || !in_group_p (sb->u.ext2_sb.s_resgid)))) { unlock_super (sb); + if(sb->s_ibasket && free_ibasket(sb)) + goto retry; return 0; } @@ -389,6 +392,8 @@ repeat: } if (k >= sb->u.ext2_sb.s_groups_count) { unlock_super (sb); + if(sb->s_ibasket && free_ibasket(sb)) + goto retry; return 0; } bitmap_nr = load_block_bitmap (sb, i); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index fce6fc4c8..d9b1957e3 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -65,7 +65,6 @@ struct inode_operations ext2_dir_inode_operations = { ext2_mknod, /* mknod */ ext2_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -194,12 +193,13 @@ revalidate: * currently swapped out. So, use a * version stamp to detect whether or * not the directory has been modified - * during the copy operation. */ - unsigned long version; - dcache_add(inode, de->name, le16_to_cpu(de->name_len), - le32_to_cpu(de->inode)); - version = inode->i_version; - error = filldir(dirent, de->name, le16_to_cpu(de->name_len), filp->f_pos, le32_to_cpu(de->inode)); + * during the copy operation. + */ + unsigned long version = inode->i_version; + + error = filldir(dirent, de->name, + le16_to_cpu(de->name_len), + filp->f_pos, le32_to_cpu(de->inode)); if (error) break; if (version != inode->i_version) diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 274dc31fd..1627f5cee 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -72,7 +72,6 @@ struct inode_operations ext2_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ ext2_bmap, /* bmap */ diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 16751329e..a486679f9 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -171,9 +171,9 @@ void ext2_free_inode (struct inode * inode) printk ("ext2_free_inode: inode has no device\n"); return; } - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { printk ("ext2_free_inode: inode has count=%d\n", - inode->i_count); + atomic_read(&inode->i_count)); return; } if (inode->i_nlink) { @@ -404,7 +404,7 @@ repeat: sb->s_dirt = 1; inode->i_mode = mode; inode->i_sb = sb; - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_nlink = 1; inode->i_dev = sb->s_dev; inode->i_uid = current->fsuid; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 39716678a..421393581 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -172,27 +172,12 @@ int ext2_lookup (struct inode * dir, const char * name, int len, iput (dir); return -ENAMETOOLONG; } - if (dcache_lookup(dir, name, len, &ino)) { - if (!ino) { - iput(dir); - return -ENOENT; - } - if (!(*result = iget (dir->i_sb, ino))) { - iput (dir); - return -EACCES; - } - iput (dir); - return 0; - } ino = dir->i_version; if (!(bh = ext2_find_entry (dir, name, len, &de))) { - if (ino == dir->i_version) - dcache_add(dir, name, len, 0); iput (dir); return -ENOENT; } ino = le32_to_cpu(de->inode); - dcache_add(dir, name, len, ino); brelse (bh); if (!(*result = iget (dir->i_sb, ino))) { iput (dir); @@ -391,7 +376,6 @@ int ext2_create (struct inode * dir,const char * name, int len, int mode, } de->inode = cpu_to_le32(inode->i_ino); dir->i_version = ++event; - dcache_add(dir, de->name, le16_to_cpu(de->name_len), le32_to_cpu(de->inode)); mark_buffer_dirty(bh, 1); if (IS_SYNC(dir)) { ll_rw_block (WRITE, 1, &bh); @@ -460,7 +444,6 @@ int ext2_mknod (struct inode * dir, const char * name, int len, int mode, } de->inode = cpu_to_le32(inode->i_ino); dir->i_version = ++event; - dcache_add(dir, de->name, le16_to_cpu(de->name_len), le32_to_cpu(de->inode)); mark_buffer_dirty(bh, 1); if (IS_SYNC(dir)) { ll_rw_block (WRITE, 1, &bh); @@ -538,7 +521,6 @@ int ext2_mkdir (struct inode * dir, const char * name, int len, int mode) } de->inode = cpu_to_le32(inode->i_ino); dir->i_version = ++event; - dcache_add(dir, de->name, le16_to_cpu(de->name_len), le32_to_cpu(de->inode)); mark_buffer_dirty(bh, 1); if (IS_SYNC(dir)) { ll_rw_block (WRITE, 1, &bh); @@ -662,7 +644,7 @@ repeat: else if (le32_to_cpu(de->inode) != inode->i_ino) retval = -ENOENT; else { - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { /* * Are we deleting the last instance of a busy directory? * Better clean up if so. @@ -836,7 +818,6 @@ int ext2_symlink (struct inode * dir, const char * name, int len, } de->inode = cpu_to_le32(inode->i_ino); dir->i_version = ++event; - dcache_add(dir, de->name, le16_to_cpu(de->name_len), le32_to_cpu(de->inode)); mark_buffer_dirty(bh, 1); if (IS_SYNC(dir)) { ll_rw_block (WRITE, 1, &bh); @@ -885,7 +866,6 @@ int ext2_link (struct inode * oldinode, struct inode * dir, } de->inode = cpu_to_le32(oldinode->i_ino); dir->i_version = ++event; - dcache_add(dir, de->name, le16_to_cpu(de->name_len), le32_to_cpu(de->inode)); mark_buffer_dirty(bh, 1); if (IS_SYNC(dir)) { ll_rw_block (WRITE, 1, &bh); @@ -905,7 +885,7 @@ static int subdir (struct inode * new_inode, struct inode * old_inode) int ino; int result; - new_inode->i_count++; + atomic_inc(&new_inode->i_count); result = 0; for (;;) { if (new_inode == old_inode) { @@ -945,8 +925,7 @@ static int subdir (struct inode * new_inode, struct inode * old_inode) */ static int do_ext2_rename (struct inode * old_dir, const char * old_name, int old_len, struct inode * new_dir, - const char * new_name, int new_len, - int must_be_dir) + const char * new_name, int new_len) { struct inode * old_inode, * new_inode; struct buffer_head * old_bh, * new_bh, * dir_bh; @@ -981,8 +960,6 @@ start_up: old_inode = __iget (old_dir->i_sb, le32_to_cpu(old_de->inode), 0); /* don't cross mnt-points */ if (!old_inode) goto end_rename; - if (must_be_dir && !S_ISDIR(old_inode->i_mode)) - goto end_rename; retval = -EPERM; if ((old_dir->i_mode & S_ISVTX) && current->fsuid != old_inode->i_uid && @@ -1016,7 +993,7 @@ start_up: if (!empty_dir (new_inode)) goto end_rename; retval = -EBUSY; - if (new_inode->i_count > 1) + if (atomic_read(&new_inode->i_count) > 1) goto end_rename; } retval = -EPERM; @@ -1059,7 +1036,6 @@ start_up: * ok, that's it */ new_de->inode = le32_to_cpu(old_inode->i_ino); - dcache_add(new_dir, new_de->name, le16_to_cpu(new_de->name_len), le32_to_cpu(new_de->inode)); retval = ext2_delete_entry (old_de, old_bh); if (retval == -ENOENT) goto try_again; @@ -1075,7 +1051,6 @@ start_up: old_dir->i_dirt = 1; if (dir_bh) { PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); - dcache_add(old_inode, "..", 2, new_dir->i_ino); mark_buffer_dirty(dir_bh, 1); old_dir->i_nlink--; old_dir->i_dirt = 1; @@ -1123,8 +1098,7 @@ end_rename: * on the same file system */ int ext2_rename (struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len, - int must_be_dir) + struct inode * new_dir, const char * new_name, int new_len) { int result; @@ -1132,7 +1106,7 @@ int ext2_rename (struct inode * old_dir, const char * old_name, int old_len, sleep_on (&old_dir->i_sb->u.ext2_sb.s_rename_wait); old_dir->i_sb->u.ext2_sb.s_rename_lock = 1; result = do_ext2_rename (old_dir, old_name, old_len, new_dir, - new_name, new_len, must_be_dir); + new_name, new_len); old_dir->i_sb->u.ext2_sb.s_rename_lock = 0; wake_up (&old_dir->i_sb->u.ext2_sb.s_rename_wait); return result; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 5885e3067..635a45692 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -319,6 +319,13 @@ static void ext2_setup_super (struct super_block * sb, ext2_check_inodes_bitmap (sb); } } +#if 0 /* ibasket's still have unresolved bugs... -DaveM */ + + /* [T. Schoebel-Theuer] This limit should be maintained on disk. + * This is just provisionary. + */ + sb->s_ibasket_max = 100; +#endif } static int ext2_check_descriptors (struct super_block * sb) @@ -728,7 +735,10 @@ int ext2_remount (struct super_block * sb, int * flags, char * data) } static struct file_system_type ext2_fs_type = { - ext2_read_super, "ext2", 1, NULL + "ext2", + FS_REQUIRES_DEV /* | FS_IBASKET */, /* ibaskets have unresolved bugs */ + ext2_read_super, + NULL }; __initfunc(int init_ext2_fs(void)) diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 31f8276b0..4d5a5cada 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -25,8 +25,6 @@ #include <linux/stat.h> static int ext2_readlink (struct inode *, char *, int); -static int ext2_follow_link (struct inode *, struct inode *, int, int, - struct inode **); /* * symlinks can't do much... @@ -43,7 +41,6 @@ struct inode_operations ext2_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ ext2_readlink, /* readlink */ - ext2_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -52,70 +49,20 @@ struct inode_operations ext2_symlink_inode_operations = { NULL /* smap */ }; -static int ext2_follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - int error; - struct buffer_head * bh = NULL; - char * link; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput (dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput (dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput (dir); - iput (inode); - return -ELOOP; - } - if (inode->i_blocks) { - if (!(bh = ext2_bread (inode, 0, 0, &error))) { - iput (dir); - iput (inode); - return -EIO; - } - link = bh->b_data; - } else - link = (char *) inode->u.ext2_i.i_data; - if (DO_UPDATE_ATIME(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } - current->link_count++; - error = open_namei (link, flag, mode, res_inode, dir); - current->link_count--; - iput (inode); - if (bh) - brelse (bh); - return error; -} - static int ext2_readlink (struct inode * inode, char * buffer, int buflen) { struct buffer_head * bh = NULL; char * link; int i, err; - if (!S_ISLNK(inode->i_mode)) { - iput (inode); - return -EINVAL; - } if (buflen > inode->i_sb->s_blocksize - 1) buflen = inode->i_sb->s_blocksize - 1; if (inode->i_blocks) { bh = ext2_bread (inode, 0, 0, &err); if (!bh) { iput (inode); + if(err < 0) /* indicate type of error */ + return err; return 0; } link = bh->b_data; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 45b31836b..c39661904 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -260,10 +260,8 @@ int fat_readdirx( ino = fat_parent_ino(inode,0); if (shortnames || !is_long) { - dcache_add(inode, bufname, i+dotoffset, ino); - if (both) { + if (both) bufname[i+dotoffset] = '\0'; - } spos = oldpos; if (is_long) { spos = filp->f_pos - sizeof(struct msdos_dir_entry); @@ -276,7 +274,6 @@ int fat_readdirx( } } if (is_long && longnames) { - dcache_add(inode, longname, long_len, ino); if (both) { memcpy(&longname[long_len+1], bufname, i+dotoffset); long_len += i+dotoffset; diff --git a/fs/fat/file.c b/fs/fat/file.c index 6dec1ba42..82787075a 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -52,7 +52,6 @@ struct inode_operations fat_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ fat_bmap, /* bmap */ @@ -100,7 +99,6 @@ struct inode_operations fat_file_inode_operations_1024 = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/fat/mmap.c b/fs/fat/mmap.c index 7896a4cfe..6a3515eef 100644 --- a/fs/fat/mmap.c +++ b/fs/fat/mmap.c @@ -105,7 +105,7 @@ int fat_mmap(struct inode * inode, struct file * file, struct vm_area_struct * v } vma->vm_inode = inode; - inode->i_count++; + atomic_inc(&inode->i_count); vma->vm_ops = &fat_file_mmap; return 0; } @@ -143,7 +143,6 @@ struct inode_operations fifo_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/filesystems.c b/fs/filesystems.c index 74016aa67..004ee0aff 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -8,6 +8,7 @@ #include <linux/config.h> #include <linux/fs.h> +#include <linux/nametrans.h> #include <linux/minix_fs.h> #include <linux/ext2_fs.h> @@ -44,6 +45,10 @@ __initfunc(static void do_sys_setup(void)) binfmt_setup(); +#ifdef CONFIG_TRANS_NAMES + init_nametrans(); +#endif + #ifdef CONFIG_EXT2_FS init_ext2_fs(); #endif diff --git a/fs/hpfs/hpfs_fs.c b/fs/hpfs/hpfs_fs.c index 5bc73819c..878a3f069 100644 --- a/fs/hpfs/hpfs_fs.c +++ b/fs/hpfs/hpfs_fs.c @@ -175,7 +175,6 @@ static const struct inode_operations hpfs_file_iops = NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ (int (*)(struct inode *, int)) @@ -219,7 +218,6 @@ static const struct inode_operations hpfs_dir_iops = NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -1746,7 +1744,10 @@ static void brelse4(struct quad_buffer_head *qbh) } static struct file_system_type hpfs_fs_type = { - hpfs_read_super, "hpfs", 1, NULL + "hpfs", + FS_REQUIRES_DEV, + hpfs_read_super, + NULL }; __initfunc(int init_hpfs_fs(void)) diff --git a/fs/inode.c b/fs/inode.c index 307b76063..7215e1204 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,657 +1,708 @@ /* - * linux/fs/inode.c: Keeping track of inodes. + * fs/inode.c * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 1997 David S. Miller + * Complete reimplementation + * (C) 1997 Thomas Schoebel-Theuer */ -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/slab.h> +/* Everything here is intended to be MP-safe. However, other parts + * of the kernel are not yet MP-safe, in particular the inode->i_count++ + * that are spread over everywhere. These should be replaced by + * iinc() as soon as possible. Since I have no MP machine, I could + * not test it. + */ +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/fs.h> #include <linux/string.h> +#include <linux/mm.h> +#include <linux/kernel.h> +#include <linux/dlists.h> +#include <linux/dalloc.h> +#include <linux/omirr.h> + +/* #define DEBUG */ + +#define HASH_SIZE 1024 /* must be a power of 2 */ +#define NR_LEVELS 4 + +#define ST_AGED 1 +#define ST_HASHED 2 +#define ST_EMPTY 4 +#define ST_TO_READ 8 +#define ST_TO_WRITE 16 +#define ST_TO_PUT 32 +#define ST_TO_DROP 64 +#define ST_IO (ST_TO_READ|ST_TO_WRITE|ST_TO_PUT|ST_TO_DROP) +#define ST_WAITING 128 +#define ST_FREEING 256 +#define ST_IBASKET 512 + +/* The idea is to keep empty inodes in a separate list, so no search + * is required as long as empty inodes exit. + * All reusable inodes occurring in the hash table with i_count==0 + * are also registered in the ringlist aged_i[level], but in LRU order. + * Used inodes with i_count>0 are kept solely in the hashtable and in + * all_i, but in no other list. + * The level is used for multilevel aging to avoid thrashing; each + * time i_count decreases to 0, the inode is inserted into the next level + * ringlist. Cache reusage is simply by taking the _last_ element from the + * lowest-level ringlist that contains inodes. + * In contrast to the old code, there isn't any O(n) search overhead now + * in iget/iput (if you make HASH_SIZE large enough). + */ +static struct inode * hashtable[HASH_SIZE];/* linked with i_hash_{next,prev} */ +static struct inode * all_i = NULL; /* linked with i_{next,prev} */ +static struct inode * empty_i = NULL; /* linked with i_{next,prev} */ +static struct inode * aged_i[NR_LEVELS+1]; /* linked with i_lru_{next,prev} */ +static int aged_reused[NR_LEVELS+1]; /* # removals from aged_i[level] */ +static int age_table[NR_LEVELS+1] = { /* You may tune this. */ + 1, 4, 10, 100, 1000 +}; /* after which # of uses to increase to the next level */ + +/* This is for kernel/sysctl.c */ + +/* Just aligning plain ints and arrays thereof doesn't work reliably.. */ +struct { + int nr_inodes; + int nr_free_inodes; + int aged_count[NR_LEVELS+1]; /* # in each level */ +} inodes_stat; -int nr_inodes = 0, nr_free_inodes = 0; int max_inodes = NR_INODE; +unsigned long last_inode = 0; -#define INODE_HASHSZ 1024 - -static struct inode *inode_hash[INODE_HASHSZ]; - -/* All the details of hashing and lookup. */ -#define hashfn(dev, i) ((HASHDEV(dev) + ((i) ^ ((i) >> 10))) & (INODE_HASHSZ - 1)) - -__inline__ void insert_inode_hash(struct inode *inode) +void inode_init(void) { - struct inode **htable = &inode_hash[hashfn(inode->i_dev, inode->i_ino)]; - if((inode->i_hash_next = *htable) != NULL) - (*htable)->i_hash_pprev = &inode->i_hash_next; - *htable = inode; - inode->i_hash_pprev = htable; + memset(hashtable, 0, sizeof(hashtable)); + memset(aged_i, 0, sizeof(aged_i)); + memset(aged_reused, 0, sizeof(aged_reused)); + memset(&inodes_stat, 0, sizeof(inodes_stat)); } -#define hash_inode(inode) insert_inode_hash(inode) +/* Intended for short locks of the above global data structures. + * Could be replaced with spinlocks completely, since there is + * no blocking during manipulation of the static data; however the + * lock in invalidate_inodes() may last relatively long. + */ +#ifdef __SMP__ +struct semaphore vfs_sem = MUTEX; +#endif -static inline void unhash_inode(struct inode *inode) -{ - if(inode->i_hash_pprev) { - if(inode->i_hash_next) - inode->i_hash_next->i_hash_pprev = inode->i_hash_pprev; - *(inode->i_hash_pprev) = inode->i_hash_next; - inode->i_hash_pprev = NULL; - } -} +DEF_INSERT(all,struct inode,i_next,i_prev) +DEF_REMOVE(all,struct inode,i_next,i_prev) + +DEF_INSERT(lru,struct inode,i_lru_next,i_lru_prev) +DEF_REMOVE(lru,struct inode,i_lru_next,i_lru_prev) -static inline struct inode *find_inode(unsigned int hashent, - kdev_t dev, unsigned long ino) -{ - struct inode *inode; +DEF_INSERT(hash,struct inode,i_hash_next,i_hash_prev) +DEF_REMOVE(hash,struct inode,i_hash_next,i_hash_prev) - for(inode = inode_hash[hashent]; inode; inode = inode->i_hash_next) - if(inode->i_dev == dev && inode->i_ino == ino) - break; - return inode; -} +DEF_INSERT(ibasket,struct inode,i_basket_next,i_basket_prev) +DEF_REMOVE(ibasket,struct inode,i_basket_next,i_basket_prev) -/* Free list queue and management. */ -static struct free_inode_queue { - struct inode *head; - struct inode **last; -} free_inodes = { NULL, &free_inodes.head }; - -static inline void put_inode_head(struct inode *inode) -{ - if((inode->i_next = free_inodes.head) != NULL) - free_inodes.head->i_pprev = &inode->i_next; - else - free_inodes.last = &inode->i_next; - free_inodes.head = inode; - inode->i_pprev = &free_inodes.head; - nr_free_inodes++; -} +#ifdef DEBUG +extern void printpath(struct dentry * entry); +struct inode * xtst[15000]; +int xcnt = 0; -static inline void put_inode_last(struct inode *inode) +void xcheck(char * txt, struct inode * p) { - inode->i_next = NULL; - inode->i_pprev = free_inodes.last; - *free_inodes.last = inode; - free_inodes.last = &inode->i_next; - nr_free_inodes++; + int i; + for(i=xcnt-1; i>=0; i--) + if(xtst[i] == p) + return; + printk("Bogus inode %p in %s\n", p, txt); } +#else +#define xcheck(t,p) /*nothing*/ +#endif -static inline void remove_free_inode(struct inode *inode) +static inline struct inode * grow_inodes(void) { - if(inode->i_pprev) { - if(inode->i_next) - inode->i_next->i_pprev = inode->i_pprev; - else - free_inodes.last = inode->i_pprev; - *inode->i_pprev = inode->i_next; - inode->i_pprev = NULL; - nr_free_inodes--; + struct inode * res; + struct inode * inode = res = (struct inode*)__get_free_page(GFP_KERNEL); + int size = PAGE_SIZE; + if(!inode) + return NULL; + + size -= sizeof(struct inode); + inode++; + inodes_stat.nr_inodes++; +#ifdef DEBUG +xtst[xcnt++]=res; +#endif + while(size >= sizeof(struct inode)) { +#ifdef DEBUG +xtst[xcnt++]=inode; +#endif + inodes_stat.nr_inodes++; + inodes_stat.nr_free_inodes++; + insert_all(&empty_i, inode); + inode->i_status = ST_EMPTY; + inode++; + size -= sizeof(struct inode); } + return res; } -/* This is the in-use queue, if i_count > 0 (as far as we can tell) - * the sucker is here. - */ -static struct inode *inuse_list = NULL; - -static inline void put_inuse(struct inode *inode) +static inline int hash(dev_t i_dev, unsigned long i_ino) { - if((inode->i_next = inuse_list) != NULL) - inuse_list->i_pprev = &inode->i_next; - inuse_list = inode; - inode->i_pprev = &inuse_list; + return ((int)i_ino ^ ((int)i_dev << 6)) & (HASH_SIZE-1); } -static inline void remove_inuse(struct inode *inode) +static inline blocking void wait_io(struct inode * inode, unsigned short flags) { - if(inode->i_pprev) { - if(inode->i_next) - inode->i_next->i_pprev = inode->i_pprev; - *inode->i_pprev = inode->i_next; - inode->i_pprev = NULL; + while(inode->i_status & flags) { + struct wait_queue wait = {current, NULL}; + inode->i_status |= ST_WAITING; + vfs_unlock(); + add_wait_queue(&inode->i_wait, &wait); + sleep_on(&inode->i_wait); + remove_wait_queue(&inode->i_wait, &wait); + vfs_lock(); } } -/* Locking and unlocking inodes, plus waiting for locks to clear. */ -static void __wait_on_inode(struct inode *); - -static inline void wait_on_inode(struct inode *inode) -{ - if(inode->i_lock) - __wait_on_inode(inode); -} - -static inline void lock_inode(struct inode *inode) +static inline blocking void set_io(struct inode * inode, + unsigned short waitflags, + unsigned short setflags) { - if(inode->i_lock) - __wait_on_inode(inode); - inode->i_lock = 1; + wait_io(inode, waitflags); + inode->i_status |= setflags; + vfs_unlock(); } -static inline void unlock_inode(struct inode *inode) +static inline blocking int release_io(struct inode * inode, unsigned short flags) { - inode->i_lock = 0; - wake_up(&inode->i_wait); -} - -static void __wait_on_inode(struct inode * inode) -{ - struct wait_queue wait = { current, NULL }; - - add_wait_queue(&inode->i_wait, &wait); -repeat: - current->state = TASK_UNINTERRUPTIBLE; - if (inode->i_lock) { - schedule(); - goto repeat; + int res = 0; + vfs_lock(); + inode->i_status &= ~flags; + if(inode->i_status & ST_WAITING) { + inode->i_status &= ~ST_WAITING; + vfs_unlock(); + wake_up(&inode->i_wait); + res = 1; + } + return res; +} + +static inline blocking void _io(void (*op)(struct inode*), struct inode * inode, + unsigned short waitflags, unsigned short setflags) +{ + /* Do nothing if the same op is already in progress. */ + if(op && !(inode->i_status & setflags)) { + set_io(inode, waitflags, setflags); + op(inode); + if(release_io(inode, setflags)) { + /* Somebody grabbed my inode from under me. */ +#ifdef DEBUG + printk("_io grab!\n"); +#endif + vfs_lock(); + } } - remove_wait_queue(&inode->i_wait, &wait); - current->state = TASK_RUNNING; } -/* Clear an inode of all it's identity, this is exported to the world. */ -void clear_inode(struct inode *inode) +blocking int _free_ibasket(struct super_block * sb) { - struct wait_queue *wait; - - /* So we don't disappear. */ - inode->i_count++; - - truncate_inode_pages(inode, 0); - wait_on_inode(inode); - if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op) - inode->i_sb->dq_op->drop(inode); - - if(--inode->i_count > 0) - remove_inuse(inode); - else - remove_free_inode(inode); - unhash_inode(inode); - wait = inode->i_wait; - memset(inode, 0, sizeof(*inode)); barrier(); - inode->i_wait = wait; - put_inode_head(inode); /* Pages zapped, put at the front. */ + if(sb->s_ibasket) { + struct inode * delinquish = sb->s_ibasket->i_basket_prev; +#if 0 +printpath(delinquish->i_dentry); +printk(" delinquish\n"); +#endif + _clear_inode(delinquish, 0, 1); + return 1; + } + return 0; } -/* These check the validity of a mount/umount type operation, we essentially - * check if there are any inodes hanging around which prevent this operation - * from occurring. We also clear out clean inodes referencing this device. - */ -int fs_may_mount(kdev_t dev) +static /*inline*/ void _put_ibasket(struct inode * inode) { - struct inode *inode; - int pass = 0; - - inode = free_inodes.head; -repeat: - while(inode) { - struct inode *next = inode->i_next; - if(inode->i_dev != dev) - goto next; - if(inode->i_count || inode->i_dirt || inode->i_lock) - return 0; - clear_inode(inode); - next: - inode = next; - } - if(pass == 0) { - inode = inuse_list; - pass = 1; - goto repeat; + struct super_block * sb = inode->i_sb; + if(!(inode->i_status & ST_IBASKET)) { + inode->i_status |= ST_IBASKET; + insert_ibasket(&sb->s_ibasket, inode); + sb->s_ibasket_count++; + if(sb->s_ibasket_count > sb->s_ibasket_max) + (void)_free_ibasket(sb); } - return 1; /* Tis' cool bro. */ } -int fs_may_umount(kdev_t dev, struct inode *iroot) -{ - struct inode *inode; - int pass = 0; - - inode = free_inodes.head; -repeat: - for(; inode; inode = inode->i_next) { - if(inode->i_dev != dev || !inode->i_count) - continue; - if(inode == iroot && - (inode->i_count == (inode->i_mount == inode ? 2 : 1))) - continue; - return 0; +blocking void _clear_inode(struct inode * inode, int external, int verbose) +{ +xcheck("_clear_inode",inode); + if(inode->i_status & ST_IBASKET) { + struct super_block * sb = inode->i_sb; + remove_ibasket(&sb->s_ibasket, inode); + sb->s_ibasket_count--; + inode->i_status &= ~ST_IBASKET; +#if 0 +printpath(inode->i_dentry); +printk(" put_inode\n"); +#endif + _io(sb->s_op->put_inode, inode, ST_TO_PUT|ST_TO_WRITE, ST_TO_PUT); + if(inode->i_status & ST_EMPTY) + return; } - if(pass == 0) { - inode = inuse_list; - pass = 1; - goto repeat; + if(inode->i_status & ST_HASHED) + remove_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode); + if(inode->i_status & ST_AGED) { + /* "cannot happen" when called from an fs because at least + * the caller must use it. Can happen when called from + * invalidate_inodes(). */ + if(verbose) + printk("VFS: clearing aged inode\n"); + if(atomic_read(&inode->i_count)) + printk("VFS: aged inode is in use\n"); + remove_lru(&aged_i[inode->i_level], inode); + inodes_stat.aged_count[inode->i_level]--; } - return 1; /* Tis' cool bro. */ -} - -/* This belongs in file_table.c, not here... */ -int fs_may_remount_ro(kdev_t dev) -{ - struct file * file; - - /* Check that no files are currently opened for writing. */ - for (file = inuse_filps; file; file = file->f_next) { - if (!file->f_inode || file->f_inode->i_dev != dev) - continue; - if (S_ISREG(file->f_inode->i_mode) && (file->f_mode & 2)) - return 0; + if(!external && inode->i_status & ST_IO) { + printk("VFS: clearing inode during IO operation\n"); } - return 1; /* Tis' cool bro. */ -} - -/* Reading/writing inodes. */ -static void write_inode(struct inode *inode) -{ - if(inode->i_dirt) { - wait_on_inode(inode); - if(inode->i_dirt) { - if(inode->i_sb && - inode->i_sb->s_op && - inode->i_sb->s_op->write_inode) { - inode->i_lock = 1; - inode->i_sb->s_op->write_inode(inode); - unlock_inode(inode); - } else { - inode->i_dirt = 0; + if(!(inode->i_status & ST_EMPTY)) { + remove_all(&all_i, inode); + inode->i_status = ST_EMPTY; + while(inode->i_dentry) { + d_del(inode->i_dentry, D_NO_CLEAR_INODE); + } + if(inode->i_pages) { + vfs_unlock(); /* may block, can that be revised? */ + truncate_inode_pages(inode, 0); + vfs_lock(); + } + insert_all(&empty_i, inode); + inodes_stat.nr_free_inodes++; + } else if(external) + printk("VFS: empty inode is unnecessarily cleared multiple " + "times by an fs\n"); + else + printk("VFS: clearing empty inode\n"); + inode->i_status = ST_EMPTY; + /* The inode is not really cleared any more here, but only once + * when taken from empty_i. This saves instructions and processor + * cache pollution. + */ +} + +void insert_inode_hash(struct inode * inode) +{ +xcheck("insert_inode_hash",inode); + vfs_lock(); + if(!(inode->i_status & ST_HASHED)) { + insert_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode); + inode->i_status |= ST_HASHED; + } else + printk("VFS: trying to hash an inode again\n"); + vfs_unlock(); +} + +blocking struct inode * _get_empty_inode(void) +{ + struct inode * inode; + int retry = 0; + +retry: + inode = empty_i; + if(inode) { + remove_all(&empty_i, inode); + inodes_stat.nr_free_inodes--; + } else if(inodes_stat.nr_inodes < max_inodes || retry > 2) { + inode = grow_inodes(); + } + if(!inode) { + int level; + int usable = 0; + for(level = 0; level <= NR_LEVELS; level++) + if(aged_i[level]) { + inode = aged_i[level]->i_lru_prev; + /* Here is the picking strategy, tune this */ + if(aged_reused[level] < (usable++ ? + inodes_stat.aged_count[level] : + 2)) + break; + aged_reused[level] = 0; } + if(inode) { + if(!(inode->i_status & ST_AGED)) + printk("VFS: inode aging inconsistency\n"); + if(atomic_read(&inode->i_count) + inode->i_ddir_count) + printk("VFS: i_count of aged inode is not zero\n"); + if(inode->i_dirt) + printk("VFS: Hey, somebody made my aged inode dirty\n"); + _clear_inode(inode, 0, 0); + goto retry; } } -} - -static inline void read_inode(struct inode *inode) -{ - if(inode->i_sb && - inode->i_sb->s_op && - inode->i_sb->s_op->read_inode) { - lock_inode(inode); - inode->i_sb->s_op->read_inode(inode); - unlock_inode(inode); + if(!inode) { + vfs_unlock(); + schedule(); + if(retry > 10) + panic("VFS: cannot repair inode shortage"); + if(retry > 2) + printk("VFS: no free inodes\n"); + retry++; + vfs_lock(); + goto retry; } +xcheck("get_empty_inode",inode); + memset(inode, 0, sizeof(struct inode)); + atomic_set(&inode->i_count, 1); + inode->i_nlink = 1; + sema_init(&inode->i_sem, 1); + inode->i_ino = ++last_inode; + inode->i_version = ++event; + insert_all(&all_i, inode); + return inode; } -int inode_change_ok(struct inode *inode, struct iattr *attr) +static inline blocking struct inode * _get_empty_inode_hashed(dev_t i_dev, + unsigned long i_ino) { - if(!(attr->ia_valid & ATTR_FORCE)) { - unsigned short fsuid = current->fsuid; - uid_t iuid = inode->i_uid; - int not_fsuser = !fsuser(); - - if(((attr->ia_valid & ATTR_UID) && - ((fsuid != iuid) || - (attr->ia_uid != iuid)) && not_fsuser) || - - ((attr->ia_valid & ATTR_GID) && - (!in_group_p(attr->ia_gid) && - (attr->ia_gid != inode->i_gid)) && not_fsuser) || - - ((attr->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)) && - (fsuid != iuid) && not_fsuser)) - return -EPERM; - - if(attr->ia_valid & ATTR_MODE) { - gid_t grp; - if(fsuid != iuid && not_fsuser) - return -EPERM; - grp = attr->ia_valid & ATTR_GID ? attr->ia_gid : inode->i_gid; - if(not_fsuser && !in_group_p(grp)) - attr->ia_mode &= ~S_ISGID; + struct inode ** base = &hashtable[hash(i_dev, i_ino)]; + struct inode * inode = *base; + if(inode) do { + if(inode->i_ino == i_ino && inode->i_dev == i_dev) { + atomic_inc(&inode->i_count); + printk("VFS: inode %lx is already in use\n", i_ino); + return inode; } - } - return 0; -} - -void inode_setattr(struct inode *inode, struct iattr *attr) -{ - if (attr->ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - if (attr->ia_valid & ATTR_SIZE) - inode->i_size = attr->ia_size; - if (attr->ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; - if (attr->ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; - if (attr->ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; - if (attr->ia_valid & ATTR_MODE) { - inode->i_mode = attr->ia_mode; - if (!fsuser() && !in_group_p(inode->i_gid)) - inode->i_mode &= ~S_ISGID; - } - if (attr->ia_valid & ATTR_ATTR_FLAG) - inode->i_attr_flags = attr->ia_attr_flags; - inode->i_dirt = 1; + inode = inode->i_hash_next; + } while(inode != *base); + inode = _get_empty_inode(); + inode->i_dev = i_dev; + inode->i_ino = i_ino; + insert_hash(base, inode); + inode->i_status |= ST_HASHED; + return inode; } -int notify_change(struct inode *inode, struct iattr *attr) +blocking struct inode * get_empty_inode_hashed(dev_t i_dev, unsigned long i_ino) { - attr->ia_ctime = CURRENT_TIME; - if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME)) { - if (!(attr->ia_valid & ATTR_ATIME_SET)) - attr->ia_atime = attr->ia_ctime; - if (!(attr->ia_valid & ATTR_MTIME_SET)) - attr->ia_mtime = attr->ia_ctime; - } - - if (inode->i_sb && - inode->i_sb->s_op && - inode->i_sb->s_op->notify_change) - return inode->i_sb->s_op->notify_change(inode, attr); - - if(inode_change_ok(inode, attr) != 0) - return -EPERM; + struct inode * inode; - inode_setattr(inode, attr); - return 0; -} - -int bmap(struct inode *inode, int block) -{ - if(inode->i_op && inode->i_op->bmap) - return inode->i_op->bmap(inode, block); - return 0; + vfs_lock(); + inode = _get_empty_inode_hashed(i_dev, i_ino); + vfs_unlock(); + return inode; } -void invalidate_inodes(kdev_t dev) +void _get_inode(struct inode * inode) { - struct inode *inode; - int pass = 0; - - inode = free_inodes.head; -repeat: - while(inode) { - struct inode *next = inode->i_next; - if(inode->i_dev != dev) - goto next; - clear_inode(inode); - next: - inode = next; + if(inode->i_status & ST_IBASKET) { + inode->i_status &= ~ST_IBASKET; + remove_ibasket(&inode->i_sb->s_ibasket, inode); + inode->i_sb->s_ibasket_count--; } - if(pass == 0) { - inode = inuse_list; - pass = 1; - goto repeat; - } -} - -void sync_inodes(kdev_t dev) -{ - struct inode *inode; - int pass = 0; - - inode = free_inodes.head; -repeat: - while(inode) { - struct inode *next = inode->i_next; - if(dev && inode->i_dev != dev) - goto next; - wait_on_inode(inode); - write_inode(inode); - next: - inode = next; + if(inode->i_status & ST_AGED) { + inode->i_status &= ~ST_AGED; + remove_lru(&aged_i[inode->i_level], inode); + inodes_stat.aged_count[inode->i_level]--; + aged_reused[inode->i_level]++; + if(S_ISDIR(inode->i_mode)) + /* make dirs less thrashable */ + inode->i_level = NR_LEVELS-1; + else if(inode->i_nlink > 1) + /* keep hardlinks totally separate */ + inode->i_level = NR_LEVELS; + else if(++inode->i_reuse_count >= age_table[inode->i_level] + && inode->i_level < NR_LEVELS-1) + inode->i_level++; + if(atomic_read(&inode->i_count) != 1) + printk("VFS: inode count was not zero\n"); + } else if(inode->i_status & ST_EMPTY) + printk("VFS: invalid reuse of empty inode\n"); +} + +blocking struct inode * __iget(struct super_block * sb, + unsigned long i_ino, + int crossmntp) +{ + struct inode ** base; + struct inode * inode; + dev_t i_dev; + + if(!sb) + panic("VFS: iget with sb == NULL"); + i_dev = sb->s_dev; + if(!i_dev) + panic("VFS: sb->s_dev is NULL\n"); + base = &hashtable[hash(i_dev, i_ino)]; + vfs_lock(); + inode = *base; + if(inode) do { + if(inode->i_ino == i_ino && inode->i_dev == i_dev) { + atomic_inc(&inode->i_count); + _get_inode(inode); + + /* Allow concurrent writes/puts. This is in particular + * useful e.g. when syncing large chunks. + * I hope the i_dirty flag is everywhere set as soon + * as _any_ modifcation is made and _before_ + * giving up control, so no harm should occur if data + * is modified during writes, because it will be + * rewritten again (does a short inconsistency on the + * disk harm?) + */ + wait_io(inode, ST_TO_READ); + vfs_unlock(); + goto done; + } + inode = inode->i_hash_next; + } while(inode != *base); + inode = _get_empty_inode_hashed(i_dev, i_ino); + inode->i_sb = sb; + inode->i_flags = sb->s_flags; + if(sb->s_op && sb->s_op->read_inode) { + set_io(inode, 0, ST_TO_READ); /* do not wait at all */ + sb->s_op->read_inode(inode); + if(release_io(inode, ST_TO_READ)) + goto done; } - if(pass == 0) { - inode = inuse_list; - pass = 1; - goto repeat; + vfs_unlock(); +done: + while(crossmntp && inode->i_mount) { + struct inode * tmp = inode->i_mount; + iinc(tmp); + iput(inode); + inode = tmp; } +xcheck("_iget",inode); + return inode; } -static struct wait_queue *inode_wait, *update_wait; - -void iput(struct inode *inode) +blocking void __iput(struct inode * inode) { - if(!inode) - return; - wait_on_inode(inode); - if(!inode->i_count) { - printk("VFS: Freeing free inode, tell DaveM\n"); + struct super_block * sb; +xcheck("_iput",inode); + if(atomic_read(&inode->i_count) + inode->i_ddir_count < 0) + printk("VFS: i_count is negative\n"); + if((atomic_read(&inode->i_count) + inode->i_ddir_count) || + (inode->i_status & ST_FREEING)) { return; } - if(inode->i_pipe) - wake_up_interruptible(&PIPE_WAIT(*inode)); -we_slept: - if(inode->i_count > 1) { - inode->i_count--; - } else { - wake_up(&inode_wait); - if(inode->i_pipe) { - free_page((unsigned long)PIPE_BASE(*inode)); - PIPE_BASE(*inode) = NULL; - } - if(inode->i_sb && - inode->i_sb->s_op && - inode->i_sb->s_op->put_inode) { - inode->i_sb->s_op->put_inode(inode); - if(!inode->i_nlink) - return; - } - if(inode->i_dirt) { - write_inode(inode); - wait_on_inode(inode); - goto we_slept; + inode->i_status |= ST_FREEING; +#ifdef CONFIG_OMIRR + if(inode->i_status & ST_MODIFIED) { + inode->i_status &= ~ST_MODIFIED; + omirr_printall(inode, " W %ld ", CURRENT_TIME); + } +#endif + if(inode->i_pipe) { + free_page((unsigned long)PIPE_BASE(*inode)); + PIPE_BASE(*inode)= NULL; + } + if((sb = inode->i_sb)) { + if(sb->s_type && (sb->s_type->fs_flags & FS_NO_DCACHE)) { + while(inode->i_dentry) + d_del(inode->i_dentry, D_NO_CLEAR_INODE); + if(atomic_read(&inode->i_count) + inode->i_ddir_count) + goto done; } - if(IS_WRITABLE(inode) && - inode->i_sb && - inode->i_sb->dq_op) { - inode->i_lock = 1; - inode->i_sb->dq_op->drop(inode); - unlock_inode(inode); - goto we_slept; + if(sb->s_op) { + if(inode->i_nlink <= 0 && inode->i_dent_count && + !(inode->i_status & (ST_EMPTY|ST_IBASKET)) && + (sb->s_type->fs_flags & FS_IBASKET)) { + _put_ibasket(inode); + goto done; + } + if(!inode->i_dent_count || + (sb->s_type->fs_flags & FS_NO_DCACHE)) { + _io(sb->s_op->put_inode, inode, + ST_TO_PUT|ST_TO_WRITE, ST_TO_PUT); + if(atomic_read(&inode->i_count) + inode->i_ddir_count) + goto done; + if(inode->i_nlink <= 0) { + if(!(inode->i_status & ST_EMPTY)) { + _clear_inode(inode, 0, 1); + } + goto done; + } + } + if(inode->i_dirt) { + inode->i_dirt = 0; + _io(sb->s_op->write_inode, inode, + ST_TO_PUT|ST_TO_WRITE, ST_TO_WRITE); + if(atomic_read(&inode->i_count) + inode->i_ddir_count) + goto done; + } } - /* There is a serious race leading to here, watch out. */ - if(--inode->i_count == 0) { - remove_inuse(inode); - put_inode_last(inode); /* Place at end of LRU free queue */ + if(IS_WRITABLE(inode) && sb->dq_op) { + /* can operate in parallel to other ops ? */ + _io(sb->dq_op->drop, inode, 0, ST_TO_DROP); + if(atomic_read(&inode->i_count) + inode->i_ddir_count) + goto done; } } + if(inode->i_mmap) + printk("VFS: inode has mappings\n"); + if(inode->i_status & ST_AGED) { + printk("VFS: reaging inode\n"); +#if defined(DEBUG) +printpath(inode->i_dentry); +printk("\n"); +#endif + goto done; + } + if(!(inode->i_status & (ST_HASHED|ST_EMPTY))) { + _clear_inode(inode, 0, 1); + goto done; + } + if(inode->i_status & ST_EMPTY) { + printk("VFS: aging an empty inode\n"); + goto done; + } + insert_lru(&aged_i[inode->i_level], inode); + inodes_stat.aged_count[inode->i_level]++; + inode->i_status |= ST_AGED; +done: + inode->i_status &= ~ST_FREEING; } -static kmem_cache_t *inode_cachep; - -static void grow_inodes(void) +blocking void _iput(struct inode * inode) { - int i = 16; - - while(i--) { - struct inode *inode; - - inode = kmem_cache_alloc(inode_cachep, SLAB_KERNEL); - if(!inode) - return; - memset(inode, 0, sizeof(*inode)); - put_inode_head(inode); - nr_inodes++; - } + vfs_lock(); + __iput(inode); + vfs_unlock(); } -/* We have to be really careful, it's really easy to run yourself into - * inefficient sequences of events. The first problem is that when you - * steal a non-referenced inode you run the risk of zaping a considerable - * number of page cache entries, which might get refernced once again. - * But if you are growing the inode set to quickly, you suck up ram - * and cause other problems. - * - * We approach the problem in the following way, we take two things into - * consideration. Firstly we take a look at how much we have "committed" - * to this inode already (i_nrpages), this accounts for the cost of getting - * those pages back if someone should reference that inode soon. We also - * attempt to factor in i_blocks, which says "how much of a problem could - * this potentially be". It still needs some tuning though. -DaveM - */ -#define BLOCK_FACTOR_SHIFT 5 /* It is not factored in as much. */ -static struct inode *find_best_candidate_weighted(struct inode *inode) +blocking void sync_inodes(kdev_t dev) { - struct inode *best = NULL; + struct inode * inode; + vfs_lock(); + inode = all_i; + if(inode) do { +xcheck("sync_inodes",inode); + if(inode->i_dirt && (inode->i_dev == dev || !dev)) { + if(inode->i_sb && inode->i_sb->s_op && + !(inode->i_status & ST_FREEING)) { + inode->i_dirt = 0; + _io(inode->i_sb->s_op->write_inode, inode, + ST_IO, ST_TO_WRITE); + } + } + inode = inode->i_next; + } while(inode != all_i); + vfs_unlock(); +} + +blocking int _check_inodes(kdev_t dev, int complain) +{ + struct inode * inode; + int bad = 0; + + vfs_lock(); +startover: + inode = all_i; + if(inode) do { + struct inode * next; +xcheck("_check_inodes",inode); + next = inode->i_next; + if(inode->i_dev == dev) { + if(inode->i_dirt || atomic_read(&inode->i_count)) { + bad++; + } else { + _clear_inode(inode, 0, 0); - if(inode) { - unsigned long bestscore = 1000; - int limit = nr_free_inodes >> 2; - do { - if(!(inode->i_lock | inode->i_dirt)) { - int myscore = inode->i_nrpages; - - myscore += (inode->i_blocks >> BLOCK_FACTOR_SHIFT); - if(myscore < bestscore) { - bestscore = myscore; - best = inode; - } + /* _clear_inode() may recursively clear other + * inodes, probably also the next one. + */ + if(next->i_status & ST_EMPTY) + goto startover; } - inode = inode->i_next; - } while(inode && --limit); - } - return best; + } + inode = next; + } while(inode != all_i); + vfs_unlock(); + if(complain && bad) + printk("VFS: %d inode(s) busy on removed device `%s'\n", + bad, kdevname(dev)); + return (bad == 0); } -static inline struct inode *find_best_free(struct inode *inode) +/*inline*/ void invalidate_inodes(kdev_t dev) { - if(inode) { - int limit = nr_free_inodes >> 5; - do { - if(!inode->i_nrpages) - return inode; - inode = inode->i_next; - } while(inode && --limit); - } - return NULL; + /* Requires two passes, because of the new dcache holding + * directories with i_count > 1. + */ + (void)_check_inodes(dev, 0); + (void)_check_inodes(dev, 1); } -struct inode *get_empty_inode(void) +/*inline*/ int fs_may_mount(kdev_t dev) { - static int ino = 0; - struct inode *inode; - -repeat: - inode = find_best_free(free_inodes.head); - if(!inode) - goto pressure; -got_it: - inode->i_count++; - truncate_inode_pages(inode, 0); - wait_on_inode(inode); - if(IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op) - inode->i_sb->dq_op->drop(inode); - unhash_inode(inode); - remove_free_inode(inode); - - memset(inode, 0, sizeof(*inode)); - inode->i_count = 1; - inode->i_nlink = 1; - inode->i_version = ++event; - sema_init(&inode->i_sem, 1); - inode->i_ino = ++ino; - inode->i_dev = 0; - put_inuse(inode); - return inode; -pressure: - if(nr_inodes < max_inodes) { - grow_inodes(); - goto repeat; - } - inode = find_best_candidate_weighted(free_inodes.head); - if(!inode) { - printk("VFS: No free inodes, contact DaveM\n"); - sleep_on(&inode_wait); - goto repeat; - } - if(inode->i_lock) { - wait_on_inode(inode); - goto repeat; - } else if(inode->i_dirt) { - write_inode(inode); - goto repeat; - } - goto got_it; + return _check_inodes(dev, 0); } -struct inode *get_pipe_inode(void) +int fs_may_remount_ro(kdev_t dev) { - extern struct inode_operations pipe_inode_operations; - struct inode *inode = get_empty_inode(); + (void)dev; + return 1; /* not checked any more */ +} - if(inode) { - unsigned long page = __get_free_page(GFP_USER); - if(!page) { - iput(inode); - inode = NULL; - } else { - PIPE_BASE(*inode) = (char *) page; - inode->i_op = &pipe_inode_operations; - inode->i_count = 2; - PIPE_WAIT(*inode) = NULL; - PIPE_START(*inode) = PIPE_LEN(*inode) = 0; - PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; - PIPE_LOCK(*inode) = 0; - inode->i_pipe = 1; - inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_blksize = PAGE_SIZE; - } - } - return inode; +int fs_may_umount(kdev_t dev, struct inode * mount_root) +{ + struct inode * inode; + vfs_lock(); + inode = all_i; + if(inode) do { +xcheck("fs_may_umount",inode); + if(inode->i_dev == dev && atomic_read(&inode->i_count)) + if(inode != mount_root || atomic_read(&inode->i_count) > + (inode->i_mount == inode ? 2 : 1)) { + vfs_unlock(); + return 0; + } + inode = inode->i_next; + } while(inode != all_i); + vfs_unlock(); + return 1; } -static int inode_updating[INODE_HASHSZ]; +extern struct inode_operations pipe_inode_operations; -struct inode *__iget(struct super_block *sb, int nr, int crossmntp) +blocking struct inode * get_pipe_inode(void) { - unsigned int hashent = hashfn(sb->s_dev, nr); - struct inode *inode, *empty = NULL; - -we_slept: - if((inode = find_inode(hashent, sb->s_dev, nr)) == NULL) { - if(empty == NULL) { - inode_updating[hashent]++; - empty = get_empty_inode(); - if(!--inode_updating[hashent]) - wake_up(&update_wait); - goto we_slept; - } - inode = empty; - inode->i_sb = sb; - inode->i_dev = sb->s_dev; - inode->i_ino = nr; - inode->i_flags = sb->s_flags; - hash_inode(inode); - read_inode(inode); - } else { - if(!inode->i_count++) { - remove_free_inode(inode); - put_inuse(inode); - } - wait_on_inode(inode); - if(crossmntp && inode->i_mount) { - struct inode *mp = inode->i_mount; - mp->i_count++; - iput(inode); - wait_on_inode(inode = mp); - } - if(empty) - iput(empty); + struct inode * inode = get_empty_inode(); + + PIPE_BASE(*inode) = (char*)__get_free_page(GFP_USER); + if(!(PIPE_BASE(*inode))) { + iput(inode); + return NULL; + } + inode->i_blksize = PAGE_SIZE; + inode->i_pipe = 1; + inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; + atomic_inc(&inode->i_count); + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &pipe_inode_operations; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + + /* I hope this does not introduce security problems. + * Please check and give me response. + */ + { + char dummyname[32]; + struct qstr dummy = { dummyname, 0 }; + struct dentry * new; + sprintf(dummyname, ".anonymous-pipe-%06lud", inode->i_ino); + dummy.len = strlen(dummyname); + vfs_lock(); + new = d_alloc(the_root, dummy.len, 0); + if(new) + d_add(new, inode, &dummy, D_BASKET); + vfs_unlock(); } - while(inode_updating[hashent]) - sleep_on(&update_wait); return inode; } -void inode_init(void) +int bmap(struct inode * inode, int block) { - int i; - - inode_cachep = kmem_cache_create("inode", sizeof(struct inode), - 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if(!inode_cachep) - panic("Cannot create inode SLAB cache\n"); - - for(i = 0; i < INODE_HASHSZ; i++) - inode_hash[i] = NULL; + if (inode->i_op && inode->i_op->bmap) + return inode->i_op->bmap(inode, block); + return 0; } diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 30d0bf4c4..e22c3ca3b 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -54,7 +54,6 @@ struct inode_operations isofs_dir_inode_operations = NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ isofs_bmap, /* bmap */ @@ -226,7 +225,6 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, /* rrflag == 1 means that we have a new name (kmalloced) */ if (rrflag == 1) { rrflag = filldir(dirent, name, len, filp->f_pos, inode_number); - dcache_add(inode, name, len, inode_number); kfree(name); /* this was allocated in get_r_r_filename.. */ if (rrflag < 0) break; @@ -239,7 +237,6 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, len = isofs_name_translate(name, len, tmpname); if (filldir(dirent, tmpname, len, filp->f_pos, inode_number) < 0) break; - dcache_add(inode, tmpname, len, inode_number); filp->f_pos += de_len; continue; } @@ -247,7 +244,6 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, if (filldir(dirent, name, len, filp->f_pos, inode_number) < 0) break; - dcache_add(inode, name, len, inode_number); filp->f_pos += de_len; continue; } diff --git a/fs/isofs/file.c b/fs/isofs/file.c index d14a558a0..2742283f7 100644 --- a/fs/isofs/file.c +++ b/fs/isofs/file.c @@ -47,7 +47,6 @@ struct inode_operations isofs_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ isofs_bmap, /* bmap */ diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 708198a00..d081a4cdd 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -912,7 +912,10 @@ void leak_check_brelse(struct buffer_head * bh){ #endif static struct file_system_type iso9660_fs_type = { - isofs_read_super, "iso9660", 1, NULL + "iso9660", + FS_REQUIRES_DEV, + isofs_read_super, + NULL }; __initfunc(int init_iso9660_fs(void)) diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 06ccfde5c..155f4ae43 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -206,6 +206,7 @@ int isofs_lookup(struct inode * dir,const char * name, int len, { unsigned long ino, ino_back; struct buffer_head * bh; + char *lcname; #ifdef DEBUG printk("lookup: %x %d\n",dir->i_ino, len); @@ -219,38 +220,29 @@ int isofs_lookup(struct inode * dir,const char * name, int len, return -ENOENT; } - ino = 0; + /* If mounted with check=relaxed (and most likely norock), + * then first convert this name to lower case. + */ + if (dir->i_sb->u.isofs_sb.s_name_check == 'r' && + (lcname = kmalloc(len, GFP_KERNEL)) != NULL) { + int i; + char c; - if (dcache_lookup(dir, name, len, &ino)) ino_back = dir->i_ino; - - if (!ino) { - char *lcname; - - /* If mounted with check=relaxed (and most likely norock), - then first convert this name to lower case. */ - if (dir->i_sb->u.isofs_sb.s_name_check == 'r' - && (lcname = kmalloc(len, GFP_KERNEL)) != NULL) { - int i; - char c; - - for (i=0; i<len; i++) { - c = name[i]; - if (c >= 'A' && c <= 'Z') c |= 0x20; - lcname[i] = c; - } - bh = isofs_find_entry(dir,lcname,len, &ino, &ino_back); - kfree(lcname); - } else - bh = isofs_find_entry(dir,name,len, &ino, &ino_back); - - if (!bh) { - iput(dir); - return -ENOENT; + for (i=0; i<len; i++) { + c = name[i]; + if (c >= 'A' && c <= 'Z') c |= 0x20; + lcname[i] = c; } - if (ino_back == dir->i_ino) - dcache_add(dir, name, len, ino); - brelse(bh); + bh = isofs_find_entry(dir,lcname,len, &ino, &ino_back); + kfree(lcname); + } else + bh = isofs_find_entry(dir,name,len, &ino, &ino_back); + + if (!bh) { + iput(dir); + return -ENOENT; } + brelse(bh); if (!(*result = iget(dir->i_sb,ino))) { iput(dir); @@ -258,14 +250,12 @@ int isofs_lookup(struct inode * dir,const char * name, int len, } /* We need this backlink for the ".." entry unless the name that we - are looking up traversed a mount point (in which case the inode - may not even be on an iso9660 filesystem, and writing to - u.isofs_i would only cause memory corruption). - */ - - if (ino_back && !(*result)->i_pipe && (*result)->i_sb == dir->i_sb) { - (*result)->u.isofs_i.i_backlink = ino_back; - } + * are looking up traversed a mount point (in which case the inode + * may not even be on an iso9660 filesystem, and writing to + * u.isofs_i would only cause memory corruption). + */ + if (ino_back && !(*result)->i_pipe && (*result)->i_sb == dir->i_sb) + (*result)->u.isofs_i.i_backlink = ino_back; iput(dir); return 0; diff --git a/fs/isofs/symlink.c b/fs/isofs/symlink.c index 87e544324..f49bc3ee3 100644 --- a/fs/isofs/symlink.c +++ b/fs/isofs/symlink.c @@ -19,7 +19,6 @@ #include <asm/uaccess.h> static int isofs_readlink(struct inode *, char *, int); -static int isofs_follow_link(struct inode *, struct inode *, int, int, struct inode **); /* * symlinks can't do much... @@ -36,7 +35,6 @@ struct inode_operations isofs_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ isofs_readlink, /* readlink */ - isofs_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -44,51 +42,11 @@ struct inode_operations isofs_symlink_inode_operations = { NULL /* permission */ }; -static int isofs_follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - int error; - char * pnt; - - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - *res_inode = NULL; - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if ((current->link_count > 5) || - !(pnt = get_rock_ridge_symlink(inode))) { - iput(dir); - iput(inode); - *res_inode = NULL; - return -ELOOP; - } - iput(inode); - current->link_count++; - error = open_namei(pnt,flag,mode,res_inode,dir); - current->link_count--; - kfree(pnt); - return error; -} - static int isofs_readlink(struct inode * inode, char * buffer, int buflen) { char * pnt; int i; - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } - if (buflen > 1023) buflen = 1023; pnt = get_rock_ridge_symlink(inode); diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 2acbe5c17..81ac9b047 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -191,8 +191,8 @@ void minix_free_inode(struct inode * inode) printk("free_inode: inode has no device\n"); return; } - if (inode->i_count != 1) { - printk("free_inode: inode has count=%d\n",inode->i_count); + if (atomic_read(&inode->i_count) != 1) { + printk("free_inode: inode has count=%d\n",atomic_read(&inode->i_count)); return; } if (inode->i_nlink) { @@ -251,7 +251,7 @@ struct inode * minix_new_inode(const struct inode * dir) iput(inode); return NULL; } - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_nlink = 1; inode->i_dev = sb->s_dev; inode->i_uid = current->fsuid; diff --git a/fs/minix/dir.c b/fs/minix/dir.c index ec5113c4a..439005f4e 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -50,7 +50,6 @@ struct inode_operations minix_dir_inode_operations = { minix_mknod, /* mknod */ minix_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/minix/file.c b/fs/minix/file.c index 23aa70268..86cbca2b2 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -58,7 +58,6 @@ struct inode_operations minix_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ minix_bmap, /* bmap */ diff --git a/fs/minix/inode.c b/fs/minix/inode.c index faf5ce8a4..cbd735ef1 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -944,7 +944,10 @@ int minix_sync_inode(struct inode * inode) } static struct file_system_type minix_fs_type = { - minix_read_super, "minix", 1, NULL + "minix", + FS_REQUIRES_DEV, + minix_read_super, + NULL }; __initfunc(int init_minix_fs(void)) diff --git a/fs/minix/namei.c b/fs/minix/namei.c index c55d77fbc..b6041ad92 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -462,7 +462,7 @@ int minix_rmdir(struct inode * dir, const char * name, int len) retval = -ENOENT; goto end_rmdir; } - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { retval = -EBUSY; goto end_rmdir; } @@ -639,7 +639,7 @@ static int subdir(struct inode * new_inode, struct inode * old_inode) int ino; int result; - new_inode->i_count++; + atomic_inc(&new_inode->i_count); result = 0; for (;;) { if (new_inode == old_inode) { @@ -672,7 +672,7 @@ static int subdir(struct inode * new_inode, struct inode * old_inode) * higher-level routines. */ static int do_minix_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len, int must_be_dir) + struct inode * new_dir, const char * new_name, int new_len) { struct inode * old_inode, * new_inode; struct buffer_head * old_bh, * new_bh, * dir_bh; @@ -700,8 +700,6 @@ start_up: old_inode = __iget(old_dir->i_sb, old_de->inode,0); /* don't cross mnt-points */ if (!old_inode) goto end_rename; - if (must_be_dir && !S_ISDIR(old_inode->i_mode)) - goto end_rename; retval = -EPERM; if ((old_dir->i_mode & S_ISVTX) && current->fsuid != old_inode->i_uid && @@ -730,7 +728,7 @@ start_up: if (!empty_dir(new_inode)) goto end_rename; retval = -EBUSY; - if (new_inode->i_count > 1) + if (atomic_read(&new_inode->i_count) > 1) goto end_rename; } retval = -EPERM; @@ -818,8 +816,7 @@ end_rename: * as they are on different partitions. */ int minix_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len, - int must_be_dir) + struct inode * new_dir, const char * new_name, int new_len) { static struct wait_queue * wait = NULL; static int lock = 0; @@ -829,7 +826,7 @@ int minix_rename(struct inode * old_dir, const char * old_name, int old_len, sleep_on(&wait); lock = 1; result = do_minix_rename(old_dir, old_name, old_len, - new_dir, new_name, new_len, must_be_dir); + new_dir, new_name, new_len); lock = 0; wake_up(&wait); return result; diff --git a/fs/minix/symlink.c b/fs/minix/symlink.c index 9a340ec9b..92539cded 100644 --- a/fs/minix/symlink.c +++ b/fs/minix/symlink.c @@ -15,7 +15,6 @@ #include <asm/uaccess.h> static int minix_readlink(struct inode *, char *, int); -static int minix_follow_link(struct inode *, struct inode *, int, int, struct inode **); /* * symlinks can't do much... @@ -32,7 +31,6 @@ struct inode_operations minix_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ minix_readlink, /* readlink */ - minix_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -40,54 +38,12 @@ struct inode_operations minix_symlink_inode_operations = { NULL /* permission */ }; -static int minix_follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - int error; - struct buffer_head * bh; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput(inode); - iput(dir); - return -ELOOP; - } - if (!(bh = minix_bread(inode, 0, 0))) { - iput(inode); - iput(dir); - return -EIO; - } - iput(inode); - current->link_count++; - error = open_namei(bh->b_data,flag,mode,res_inode,dir); - current->link_count--; - brelse(bh); - return error; -} - static int minix_readlink(struct inode * inode, char * buffer, int buflen) { struct buffer_head * bh; int i; char c; - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } if (buflen > 1023) buflen = 1023; bh = minix_bread(inode, 0, 0); diff --git a/fs/msdos/msdosfs_syms.c b/fs/msdos/msdosfs_syms.c index 9e2c26bd6..c8b1e8092 100644 --- a/fs/msdos/msdosfs_syms.c +++ b/fs/msdos/msdosfs_syms.c @@ -31,7 +31,10 @@ EXPORT_SYMBOL(msdos_put_super); struct file_system_type msdos_fs_type = { - msdos_read_super, "msdos", 1, NULL + "msdos", + FS_REQUIRES_DEV, + msdos_read_super, + NULL }; __initfunc(int init_msdos_fs(void)) diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 1c76bdc41..bcf6782d0 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -219,14 +219,6 @@ int msdos_lookup(struct inode *dir,const char *name,int len, if (!(*result = iget(dir->i_sb,ino))) return -EACCES; return 0; } -#if 0 - if (dcache_lookup(dir, name, len, (unsigned long *) &ino)) { - iput(dir); - if (!(*result = iget(dir->i_sb, ino))) - return -EACCES; - return 0; - } -#endif PRINTK (("msdos_lookup 3\n")); if ((res = msdos_find(dir,name,len,&bh,&de,&ino)) < 0) { iput(dir); @@ -304,7 +296,6 @@ static int msdos_create_entry(struct inode *dir, const char *name,int len, (*result)->i_mtime = (*result)->i_atime = (*result)->i_ctime = CURRENT_TIME; (*result)->i_dirt = 1; - dcache_add(dir, name, len, ino); return 0; } @@ -378,7 +369,7 @@ static int msdos_empty(struct inode *dir) struct buffer_head *bh; struct msdos_dir_entry *de; - if (dir->i_count > 1) + if (atomic_read(&dir->i_count) > 1) return -EBUSY; if (MSDOS_I(dir)->i_start) { /* may be zero in mkdir */ pos = 0; @@ -596,7 +587,6 @@ static int rename_same_dir(struct inode *old_dir,char *old_name,int old_len, new_inode->i_dirt = 1; new_de->name[0] = DELETED_FLAG; fat_mark_buffer_dirty(sb, new_bh, 1); - dcache_add(new_dir, new_name, new_len, new_ino); iput(new_inode); fat_brelse(sb, new_bh); } @@ -721,10 +711,9 @@ static int rename_diff_dir(struct inode *old_dir,char *old_name,int old_len, MSDOS_I(new_inode)->i_depend = free_inode; MSDOS_I(free_inode)->i_old = new_inode; /* Two references now exist to free_inode so increase count */ - free_inode->i_count++; + atomic_inc(&free_inode->i_count); /* free_inode is put after putting new_inode and old_inode */ iput(new_inode); - dcache_add(new_dir, new_name, new_len, new_ino); fat_brelse(sb, new_bh); } if (S_ISDIR(old_inode->i_mode)) { @@ -755,8 +744,7 @@ rename_done: /***** Rename, a wrapper for rename_same_dir & rename_diff_dir */ int msdos_rename(struct inode *old_dir,const char *old_name,int old_len, - struct inode *new_dir,const char *new_name,int new_len, - int must_be_dir) + struct inode *new_dir,const char *new_name,int new_len) { struct super_block *sb = old_dir->i_sb; char old_msdos_name[MSDOS_NAME],new_msdos_name[MSDOS_NAME]; @@ -805,7 +793,6 @@ struct inode_operations msdos_dir_inode_operations = { NULL, /* mknod */ msdos_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ fat_bmap, /* bmap */ diff --git a/fs/namei.c b/fs/namei.c index 35ebbd4f4..198179b98 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -8,6 +8,11 @@ * Some corrections by tytso. */ +/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname + * lookup logic. + */ + +#include <linux/config.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -15,18 +20,114 @@ #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/mm.h> +#include <linux/dalloc.h> +#include <linux/nametrans.h> +#include <linux/proc_fs.h> +#include <linux/omirr.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> #include <asm/unaligned.h> +#include <asm/semaphore.h> #include <asm/namei.h> +/* This can be removed after the beta phase. */ +#define CACHE_SUPERVISE /* debug the correctness of dcache entries */ +#undef DEBUG /* some other debugging */ + + +/* local flags for __namei() */ +#define NAM_SEMLOCK 8 /* set a semlock on the last dir */ +#define NAM_TRANSCREATE 16 /* last component may be created, try "=CREATE#" suffix*/ +#define NAM_NO_TRAILSLASH 32 /* disallow trailing slashes by returning EISDIR */ #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) +/* [Feb-1997 T. Schoebel-Theuer] + * Fundamental changes in the pathname lookup mechanisms (namei) + * were necessary because of omirr. The reason is that omirr needs + * to know the _real_ pathname, not the user-supplied one, in case + * of symlinks (and also when transname replacements occur). + * + * The new code replaces the old recursive symlink resolution with + * an iterative one (in case of non-nested symlink chains). It does + * this by looking up the symlink name from the particular filesystem, + * and then follows this name as if it were a user-supplied one. This + * is done solely in the VFS level, such that <fs>_follow_link() is not + * used any more and could be removed in future. As a side effect, + * dir_namei(), _namei() and follow_link() are now replaced with a single + * function __namei() that can handle all the special cases of the former + * code. + * + * With the new dcache, the pathname is stored at each inode, at least as + * long as the refcount of the inode is positive. As a side effect, the + * size of the dcache depends on the inode cache and thus is dynamic. + */ -/* - * In order to reduce some races, while at the same time doing additional +/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: + * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL + * and the name already exists in form of a symlink, try to create the new + * name indicated by the symlink. The old code always complained that the + * name already exists, due to not following the symlink even if its target + * is non-existant. The new semantics affects also mknod() and link() when + * the name is a symlink pointing to a non-existant name. + * + * I don't know which semantics is the right one, since I have no access + * to standards. But I found by trial that HP-UX 9.0 has the full "new" + * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the + * "old" one. Personally, I think the new semantics is much more logical. + * Note that "ln old new" where "new" is a symlink pointing to a non-existing + * file does succeed in both HP-UX and SunOs, but not in Solaris + * and in the old Linux semantics. + */ + +static char * quicklist = NULL; +static int quickcount = 0; +struct semaphore quicklock = MUTEX; + +/* Tuning: increase locality by reusing same pages again... + * if quicklist becomes too long on low memory machines, either a limit + * should be added or after a number of cycles some pages should + * be released again ... + */ +static inline char * get_page(void) +{ + char * res; + down(&quicklock); + res = quicklist; + if(res) { +#ifdef DEBUG + char * tmp = res; + int i; + for(i=0; i<quickcount; i++) + tmp = *(char**)tmp; + if(tmp) + printk("bad quicklist %x\n", (int)tmp); +#endif + quicklist = *(char**)res; + quickcount--; + } + else + res = (char*)__get_free_page(GFP_KERNEL); + up(&quicklock); + return res; +} + +inline void putname(char * name) +{ + if(name) { + down(&quicklock); + *(char**)name = quicklist; + quicklist = name; + quickcount++; + up(&quicklock); + } + /* if a quicklist limit is necessary to introduce, call + * free_page((unsigned long) name); + */ +} + +/* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. * @@ -53,44 +154,22 @@ static inline int do_getname(const char *filename, char *page) return retval; } -/* - * This is a single page for faster getname. - * If the page is available when entering getname, use it. - * If the page is not available, call __get_free_page instead. - * This works even though do_getname can block (think about it). - * -- Michael Chastain, based on idea of Linus Torvalds, 1 Dec 1996. - */ -static unsigned long name_page_cache = 0; - int getname(const char * filename, char **result) { - unsigned long page; + char *tmp; int retval; - page = name_page_cache; - name_page_cache = 0; - if (!page) { - page = __get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - } - - retval = do_getname(filename, (char *) page); + tmp = get_page(); + if(!tmp) + return -ENOMEM; + retval = do_getname(filename, tmp); if (retval < 0) - putname( (char *) page ); + putname(tmp); else - *result = (char *) page; + *result = tmp; return retval; } -void putname(char * name) -{ - if (name_page_cache == 0) - name_page_cache = (unsigned long) name; - else - free_page((unsigned long) name); -} - /* * permission() * @@ -143,155 +222,416 @@ void put_write_access(struct inode * inode) inode->i_writecount--; } -/* - * lookup() looks up one part of a pathname, using the fs-dependent - * routines (currently minix_lookup) for it. It also checks for - * fathers (pseudo-roots, mount-points) +static /*inline */ int concat(struct qstr * name, struct qstr * appendix, char * buf) +{ + int totallen = name->len; + if(name->len > MAX_TRANS_FILELEN || + appendix->len > MAX_TRANS_SUFFIX) { + return -ENAMETOOLONG; + } + memcpy(buf, name->name, name->len); + memcpy(buf + name->len, appendix->name, appendix->len); + totallen += appendix->len; + buf[totallen] = '\0'; + return totallen; +} + +/* Internal lookup() using the new generic dcache. + * buf must only be supplied if appendix!=NULL. */ -int lookup(struct inode * dir,const char * name, int len, - struct inode ** result) +static int cached_lookup(struct inode * dir, struct qstr * name, + struct qstr * appendix, char * buf, + struct qstr * res_name, struct dentry ** res_entry, + struct inode ** result) { - struct super_block * sb; - int perm; + struct qstr tmp = { name->name, name->len }; + int error; + struct dentry * cached; *result = NULL; - if (!dir) - return -ENOENT; -/* check permissions before traversing mount-points */ - perm = permission(dir,MAY_EXEC); - if (len==2 && get_unaligned((u16 *) name) == 0x2e2e) { - if (dir == current->fs->root) { - *result = dir; - return 0; - } else if ((sb = dir->i_sb) && (dir == sb->s_mounted)) { - iput(dir); - dir = sb->s_covered; - if (!dir) - return -ENOENT; - dir->i_count++; + if(name->len >= D_MAXLEN) + return -ENAMETOOLONG; + vfs_lock(); + cached = d_lookup(dir, name, appendix); + if(cached) { + struct inode *inode = NULL; + + if(cached->u.d_inode && (inode = d_inode(&cached))) { + error = 0; + if(appendix && res_name) { + tmp.len = error = concat(name, appendix, buf); + tmp.name = buf; + if(error > 0) + error = 0; + } + } else { + error = -ENOENT; } - } - if (!dir->i_op || !dir->i_op->lookup) { - iput(dir); - return -ENOTDIR; - } - if (perm != 0) { - iput(dir); - return perm; - } - if (!len) { - *result = dir; - return 0; - } - return dir->i_op->lookup(dir, name, len, result); -} + vfs_unlock(); + if(res_entry) + *res_entry = cached; -int follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - if (!dir || !inode) { - iput(dir); - iput(inode); - *res_inode = NULL; - return -ENOENT; - } - if (!inode->i_op || !inode->i_op->follow_link) { - iput(dir); - *res_inode = inode; - return 0; + /* Since we are bypassing the iget() mechanism, we have to + * fabricate the act of crossing any mount points. + */ + if(!error && inode && inode->i_mount) { + do { + struct inode *mnti = inode->i_mount; + iinc(mnti); + iput(inode); + inode = mnti; + } while(inode->i_mount); + } + *result = inode; + goto done; + } else + vfs_unlock(); + + if(appendix) { + tmp.len = error = concat(name, appendix, buf); + tmp.name = buf; + if(error < 0) + goto done; + } + atomic_inc(&dir->i_count); + error = dir->i_op->lookup(dir, tmp.name, tmp.len, result); + if(dir->i_dentry && tmp.len && + (!error || (error == -ENOENT && (!dir->i_sb || !dir->i_sb->s_type || + !(dir->i_sb->s_type->fs_flags & FS_NO_DCACHE))))) { + struct dentry * res; + vfs_lock(); + res = d_entry(dir->i_dentry, &tmp, error ? NULL : *result); + vfs_unlock(); + if(res_entry) + *res_entry = res; + } +done: + if(res_name) { + if(error) { + res_name->name = name->name; + res_name->len = name->len; + } else { + res_name->name = tmp.name; + res_name->len = tmp.len; + } } - return inode->i_op->follow_link(dir,inode,flag,mode,res_inode); + return error; } -/* - * dir_namei() - * - * dir_namei() returns the inode of the directory of the - * specified name, and the name within that directory. +#ifdef CONFIG_TRANS_NAMES +/* If a normal filename is seen, try to determine whether a + * "#keyword=context#" file exists and return the new filename. + * If the name is to be created (create_mode), check whether a + * "#keyword=CREATE" name exists and optionally return the corresponding + * context name even if it didn't exist before. */ -static int dir_namei(const char *pathname, int *namelen, const char **name, - struct inode * base, struct inode **res_inode) +static int check_suffixes(struct inode * dir, struct qstr * name, + int create_mode, char * buf, + struct qstr * res_name, struct dentry ** res_entry, + struct inode ** result) { - unsigned char c; - const char * thisname; - int len,error; - struct inode * inode; + struct translations * trans; + char * env; + struct qstr * suffixes; + int i; + int error = -ENOENT; - *res_inode = NULL; - if (!base) { - base = current->fs->pwd; - base->i_count++; - } - if ((c = *pathname) == '/') { - iput(base); - base = current->fs->root; - pathname++; - base->i_count++; - } - while (1) { - thisname = pathname; - for(len=0;(c = *(pathname++))&&(c != '/');len++) - /* nothing */ ; - if (!c) + if(!buf) + panic("buf==NULL"); + env = env_transl(); +#ifdef CONFIG_TRANS_RESTRICT + if(!env && dir->i_gid != CONFIG_TRANS_GID) { + return error; + } +#endif + trans = get_translations(env); + suffixes = create_mode ? trans->c_name : trans->name; + for(i = 0; i < trans->count; i++) { + error = cached_lookup(dir, name, &suffixes[i], + buf, res_name, res_entry, result); + if(!error) { + if(res_name && create_mode) { + /* buf == res_name->name, but is writable */ + memcpy(buf + name->len, + trans->name[i].name, + trans->name[i].len); + res_name->len = name->len + trans->name[i].len; + buf[res_name->len] = '\0'; + } break; - base->i_count++; - error = lookup(base, thisname, len, &inode); - if (error) { - iput(base); - return error; } - error = follow_link(base,inode,0,0,&base); - if (error) - return error; } - if (!base->i_op || !base->i_op->lookup) { - iput(base); - return -ENOTDIR; + if(env) + free_page((unsigned long)trans); + return error; +} + +#endif + +/* Any operations involving reserved names at the VFS level should go here. */ +static /*inline*/ int reserved_lookup(struct inode * dir, struct qstr * name, + int create_mode, char * buf, + struct inode ** result) +{ + int error = -ENOENT; + if(name->name[0] == '.') { + if(name->len == 1) { + *result = dir; + error = 0; + } else if (name->len==2 && name->name[1] == '.') { + if (dir == current->fs->root) { + *result = dir; + error = 0; + } + else if(dir->i_dentry) { + error = 0; + *result = dir->i_dentry->d_parent->u.d_inode; + if(!*result) { + printk("dcache parent directory is lost"); + error = -ESTALE; /* random error */ + } + } + } + if(!error) + atomic_inc(&(*result)->i_count); } - *name = thisname; - *namelen = len; - *res_inode = base; - return 0; + return error; } -int _namei(const char * pathname, struct inode * base, - int follow_links, struct inode ** res_inode) +/* In difference to the former version, lookup() no longer eats the dir. */ +static /*inline*/ int lookup(struct inode * dir, struct qstr * name, int create_mode, + char * buf, struct qstr * res_name, + struct dentry ** res_entry, struct inode ** result) { - const char *basename; - int namelen,error; - struct inode * inode; + int perm; - translate_namei(pathname, base, follow_links, res_inode); - *res_inode = NULL; - error = dir_namei(pathname, &namelen, &basename, base, &base); - if (error) - return error; - base->i_count++; /* lookup uses up base */ - error = lookup(base, basename, namelen, &inode); - if (error) { - iput(base); - return error; + *result = NULL; + perm = -ENOENT; + if (!dir) + goto done; + + /* Check permissions before traversing mount-points. */ + perm = permission(dir,MAY_EXEC); + if (perm) + goto done; + perm = reserved_lookup(dir, name, create_mode, buf, result); + if(!perm) { + if(res_name) { + res_name->name = name->name; + res_name->len = name->len; + } + goto done; + } + perm = -ENOTDIR; + if (!dir->i_op || !dir->i_op->lookup) + goto done; +#ifdef CONFIG_TRANS_NAMES /* try suffixes */ + perm = check_suffixes(dir, name, 0, buf, res_name, res_entry, result); + if(perm) /* try original name */ +#endif + perm = cached_lookup(dir, name, NULL, buf, res_name, res_entry, result); +#ifdef CONFIG_TRANS_NAMES + if(perm == -ENOENT && create_mode) { /* try the =CREATE# suffix */ + struct inode * dummy; + if(!check_suffixes(dir, name, 1, buf, res_name, NULL, &dummy)) { + iput(dummy); + } } - if (follow_links) { - error = follow_link(base, inode, 0, 0, &inode); - if (error) - return error; - } else - iput(base); - *res_inode = inode; - return 0; +#endif +done: + return perm; } -int lnamei(const char *pathname, struct inode **res_inode) +/* [8-Feb-97 T. Schoebel-Theuer] follow_link() modified for generic operation + * on the VFS layer: first call <fs>_readlink() and then open_namei(). + * All <fs>_follow_link() are not used any more and may be eliminated + * (by Linus; I refrained in order to not break other patches). + * Single exeption is procfs, where proc_follow_link() is used + * internally (and perhaps should be rewritten). + * Note: [partly obsolete] I removed parameters flag and mode, since now + * __namei() is called instead of open_namei(). In the old semantics, + * the _last_ instance of open_namei() did the real create() if O_CREAT was + * set and the name existed already in form of a symlink. This has been + * simplified now, and also the semantics when combined with O_EXCL has changed. + **************************************************************************** + * [13-Feb-97] Complete rewrite -> functionality of reading symlinks factored + * out into _read_link(). The above notes remain valid in principle. + */ +static /*inline*/ int _read_link(struct inode * inode, char ** linkname, int loopcount) { + unsigned long old_fs; int error; - char * tmp; - error = getname(pathname, &tmp); - if (!error) { - error = _namei(tmp, NULL, 0, res_inode); - putname(tmp); + error = -ENOSYS; + if (!inode->i_op || !inode->i_op->readlink) + goto done; + error = -ELOOP; + if (current->link_count + loopcount > 10) + goto done; + error = -ENOMEM; + if(!*linkname && !(*linkname = get_page())) + goto done; + if (DO_UPDATE_ATIME(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } + atomic_inc(&inode->i_count); + old_fs = get_fs(); + set_fs(KERNEL_DS); + error = inode->i_op->readlink(inode, *linkname, PAGE_SIZE); + set_fs(old_fs); + if(!error) { + error = -ENOENT; /* ? or other error code ? */ + } else if(error > 0) { + (*linkname)[error] = '\0'; + error = 0; + } +done: + iput(inode); + return error; +} + +/* [13-Feb-97 T. Schoebel-Theuer] complete rewrite: + * merged dir_name(), _namei() and follow_link() into one new routine + * that obeys all the special cases hidden in the old routines in a + * (hopefully) systematic way: + * parameter retrieve_mode is bitwise or'ed of the ST_* flags. + * if res_inode is a NULL pointer, dont try to retrieve the last component + * at all. Parameters with prefix last_ are used only if res_inode is + * non-NULL and refer to the last component of the path only. + */ +int __namei(int retrieve_mode, const char * name, struct inode * base, + char * buf, struct inode ** res_dir, struct inode ** res_inode, + struct qstr * last_name, struct dentry ** last_entry, + int * last_error) +{ + char c; + struct qstr this; + char * linkname = NULL; + char * oldlinkname = NULL; + int trail_flag = 0; + int loopcount = 0; + int error; +#ifdef DEBUG + if(last_name) { + last_name->name = "(Uninitialized)"; + last_name->len = 15; + } +#endif +again: + error = -ENOENT; + this.name = name; + if (this.name[0] == '/') { + if(base) + iput(base); + if (__prefix_namei(retrieve_mode, this.name, base, buf, + res_dir, res_inode, + last_name, last_entry, last_error) == 0) + return 0; + base = current->fs->root; + atomic_inc(&base->i_count); + this.name++; + } else if (!base) { + base = current->fs->pwd; + atomic_inc(&base->i_count); } + for(;;) { + struct inode * inode; + const char * tmp = this.name; + int len; + + for(len = 0; (c = *tmp++) && (c != '/'); len++) ; + this.len = len; + if(!c) + break; + while((c = *tmp) == '/') /* remove embedded/trailing slashes */ + tmp++; + if(!c) { + trail_flag = 1; + if(retrieve_mode & NAM_NO_TRAILSLASH) { + error = -EISDIR; + goto alldone; + } + break; + } +#if 0 + if(atomic_read(&base->i_count) == 0) + printk("vor lookup this=%s tmp=%s\n", this.name, tmp); +#endif + error = lookup(base, &this, 0, buf, NULL, NULL, &inode); +#if 0 + if(atomic_read(&base->i_count) == 0) + printk("nach lookup this=%s tmp=%s\n", this.name, tmp); +#endif + if (error) + goto alldone; + if(S_ISLNK(inode->i_mode)) { + error = _read_link(inode, &linkname, loopcount); + if(error) + goto alldone; + current->link_count++; + error = __namei((retrieve_mode & + ~(NAM_SEMLOCK|NAM_TRANSCREATE|NAM_NO_TRAILSLASH)) + | NAM_FOLLOW_LINK, + linkname, base, buf, + &base, &inode, NULL, NULL, NULL); + current->link_count--; + if(error) + goto alldone; + } +#if 0 + if(atomic_read(&base->i_count) == 0) + printk("this=%s tmp=%s\n", this.name, tmp); +#endif + this.name = tmp; + iput(base); + base = inode; + } + if(res_inode) { + if(retrieve_mode & NAM_SEMLOCK) + down(&base->i_sem); + error = lookup(base, &this, retrieve_mode & NAM_TRANSCREATE, + buf, last_name, last_entry, res_inode); + if(!error && S_ISLNK((*res_inode)->i_mode) && + ((retrieve_mode & NAM_FOLLOW_LINK) || + (trail_flag && (retrieve_mode & NAM_FOLLOW_TRAILSLASH)))) { + char * tmp; + + error = _read_link(*res_inode, &linkname, loopcount); + if(error) + goto lastdone; + if(retrieve_mode & NAM_SEMLOCK) + up(&base->i_sem); + /* exchange pages */ + name = tmp = linkname; + linkname = oldlinkname; oldlinkname = tmp; + loopcount++; + goto again; /* Tail recursion elimination "by hand", + * uses less dynamic memory. + */ + + /* Note that trail_flag is not reset, so it + * does not matter in a symlink chain where a + * trailing slash indicates a directory endpoint. + */ + } + if(!error && trail_flag && !S_ISDIR((*res_inode)->i_mode)) { + iput(*res_inode); + error = -ENOTDIR; + } + lastdone: + if(last_error) { + *last_error = error; + error = 0; + } + } +alldone: + if(!error && res_dir) + *res_dir = base; + else + iput(base); + putname(linkname); + putname(oldlinkname); return error; } @@ -302,14 +642,20 @@ int lnamei(const char *pathname, struct inode **res_inode) * Open, link etc use their own routines, but this is enough for things * like 'chmod' etc. */ -int namei(const char *pathname, struct inode **res_inode) + +/* [Feb 1997 T.Schoebel-Theuer] lnamei() completely removed; can be + * simulated when calling with retrieve_mode==NAM_FOLLOW_TRAILSLASH. + */ +int namei(int retrieve_mode, const char *pathname, struct inode **res_inode) { int error; char * tmp; error = getname(pathname, &tmp); if (!error) { - error = _namei(tmp, NULL, 1, res_inode); + char buf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + error = __namei(retrieve_mode, tmp, NULL, + buf, NULL, res_inode, NULL, NULL, NULL); putname(tmp); } return error; @@ -328,40 +674,30 @@ int namei(const char *pathname, struct inode **res_inode) * which is a lot more logical, and also allows the "no perm" needed * for symlinks (where the permissions are checked later). */ -int -open_namei(const char * pathname, int flag, int mode, - struct inode ** res_inode, struct inode * base) +int open_namei(const char * pathname, int flag, int mode, + struct inode ** res_inode, struct inode * base) { - const char * basename; - int namelen,error; - struct inode * dir, *inode; + char buf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr last; + int error; + int lasterror; + struct inode * dir, * inode; + int namei_mode; - translate_open_namei(pathname, flag, mode, res_inode, base); mode &= S_IALLUGO & ~current->fs->umask; mode |= S_IFREG; - error = dir_namei(pathname, &namelen, &basename, base, &dir); + + namei_mode = NAM_FOLLOW_LINK; + if(flag & O_CREAT) + namei_mode |= NAM_SEMLOCK|NAM_TRANSCREATE|NAM_NO_TRAILSLASH; + error = __namei(namei_mode, pathname, base, buf, + &dir, &inode, &last, NULL, &lasterror); if (error) - return error; - if (!namelen) { /* special case: '/usr/' etc */ - if (flag & 2) { - iput(dir); - return -EISDIR; - } - /* thanks to Paul Pluzhnikov for noticing this was missing.. */ - if ((error = permission(dir,ACC_MODE(flag))) != 0) { - iput(dir); - return error; - } - *res_inode=dir; - return 0; - } - dir->i_count++; /* lookup eats the dir */ + goto exit; + error = lasterror; if (flag & O_CREAT) { - down(&dir->i_sem); - error = lookup(dir, basename, namelen, &inode); if (!error) { if (flag & O_EXCL) { - iput(inode); error = -EEXIST; } } else if (IS_RDONLY(dir)) @@ -371,31 +707,31 @@ open_namei(const char * pathname, int flag, int mode, else if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) ; /* error is already set! */ else { - dir->i_count++; /* create eats the dir */ + d_del(d_lookup(dir, &last, NULL), D_REMOVE); + atomic_inc(&dir->i_count); /* create eats the dir */ if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); - error = dir->i_op->create(dir, basename, namelen, mode, res_inode); + error = dir->i_op->create(dir, last.name, last.len, + mode, res_inode); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(dir->i_dentry, NULL, &last, + " c %ld %d ", CURRENT_TIME, mode); +#endif up(&dir->i_sem); - iput(dir); - return error; + goto exit_dir; } up(&dir->i_sem); - } else - error = lookup(dir, basename, namelen, &inode); - if (error) { - iput(dir); - return error; } - error = follow_link(dir,inode,flag,mode,&inode); if (error) - return error; + goto exit_inode; + if (S_ISDIR(inode->i_mode) && (flag & 2)) { - iput(inode); - return -EISDIR; + error = -EISDIR; + goto exit_inode; } if ((error = permission(inode,ACC_MODE(flag))) != 0) { - iput(inode); - return error; + goto exit_inode; } if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { /* @@ -410,86 +746,102 @@ open_namei(const char * pathname, int flag, int mode, } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { if (IS_NODEV(inode)) { - iput(inode); - return -EACCES; + error = -EACCES; + goto exit_inode; } flag &= ~O_TRUNC; } else { if (IS_RDONLY(inode) && (flag & 2)) { - iput(inode); - return -EROFS; + error = -EROFS; + goto exit_inode; } } /* - * An append-only file must be opened in append mode for writing + * An append-only file must be opened in append mode for writing. */ if (IS_APPEND(inode) && ((flag & FMODE_WRITE) && !(flag & O_APPEND))) { - iput(inode); - return -EPERM; + error = -EPERM; + goto exit_inode; } if (flag & O_TRUNC) { - if ((error = get_write_access(inode))) { - iput(inode); - return error; - } + if ((error = get_write_access(inode))) + goto exit_inode; /* - * Refuse to truncate files with mandatory locks held on them + * Refuse to truncate files with mandatory locks held on them. */ error = locks_verify_locked(inode); - if (error) { - iput(inode); - return error; - } + if (error) + goto exit_inode; if (inode->i_sb && inode->i_sb->dq_op) inode->i_sb->dq_op->initialize(inode, -1); error = do_truncate(inode, 0); put_write_access(inode); - if (error) { - iput(inode); - return error; - } } else if (flag & FMODE_WRITE) if (inode->i_sb && inode->i_sb->dq_op) inode->i_sb->dq_op->initialize(inode, -1); - *res_inode = inode; - return 0; +exit_inode: + if(error) { + if(!lasterror) + iput(inode); + } else + *res_inode = inode; +exit_dir: + iput(dir); +exit: + return error; } int do_mknod(const char * filename, int mode, dev_t dev) { - const char * basename; - int namelen, error; + char buf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr last; + int error, lasterror; struct inode * dir; + struct inode * inode; mode &= ~current->fs->umask; - error = dir_namei(filename, &namelen, &basename, NULL, &dir); + error = __namei(NAM_FOLLOW_LINK|NAM_TRANSCREATE|NAM_NO_TRAILSLASH, + filename, NULL, buf, + &dir, &inode, &last, NULL, &lasterror); if (error) - return error; - if (!namelen) { - iput(dir); - return -ENOENT; + goto exit; + if(!lasterror) { + error = -EEXIST; + goto exit_inode; } - if (IS_RDONLY(dir)) { - iput(dir); - return -EROFS; + if (!last.len) { + error = -ENOENT; + goto exit_inode; } - if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) { - iput(dir); - return error; + if (IS_RDONLY(dir)) { + error = -EROFS; + goto exit_inode; } + if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) + goto exit_inode; if (!dir->i_op || !dir->i_op->mknod) { - iput(dir); - return -EPERM; + error = -ENOSYS; /* instead of EPERM, what does Posix say? */ + goto exit_inode; } - dir->i_count++; + atomic_inc(&dir->i_count); if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); down(&dir->i_sem); - error = dir->i_op->mknod(dir,basename,namelen,mode,dev); + d_del(d_lookup(dir, &last, NULL), D_REMOVE); + error = dir->i_op->mknod(dir, last.name, last.len, mode, dev); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(dir->i_dentry, NULL, &last, " n %ld %d %d ", + CURRENT_TIME, mode, dev); +#endif up(&dir->i_sem); +exit_inode: + if(!lasterror) + iput(inode); iput(dir); +exit: return error; } @@ -522,75 +874,59 @@ out: return error; } -/* - * Some operations need to remove trailing slashes for POSIX.1 - * conformance. For rename we also need to change the behaviour - * depending on whether we had a trailing slash or not.. (we - * cannot rename normal files with trailing slashes, only dirs) - * - * "dummy" is used to make sure we don't do "/" -> "". +/* [Feb-97 T. Schoebel-Theuer] remove_trailing_slashes() is now obsolete, + * its functionality is handled by observing trailing slashes in __namei(). */ -static int remove_trailing_slashes(char * name) +static inline int do_mkdir(const char * pathname, int mode) { - int result; - char dummy[1]; - char *remove = dummy+1; - - for (;;) { - char c = *name; - name++; - if (!c) - break; - if (c != '/') { - remove = NULL; - continue; - } - if (remove) - continue; - remove = name; - } - - result = 0; - if (remove) { - remove[-1] = 0; - result = 1; - } - - return result; -} - -static int do_mkdir(const char * pathname, int mode) -{ - const char * basename; - int namelen, error; + char buf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr last; + int error, lasterror; struct inode * dir; + struct inode * inode; - error = dir_namei(pathname, &namelen, &basename, NULL, &dir); + mode &= 0777 & ~current->fs->umask; + + error = __namei(NAM_FOLLOW_LINK|NAM_TRANSCREATE, pathname, NULL, buf, + &dir, &inode, &last, NULL, &lasterror); if (error) - return error; - if (!namelen) { - iput(dir); - return -ENOENT; + goto exit; + if(!lasterror) { + error = -EEXIST; + goto exit_inode; } - if (IS_RDONLY(dir)) { - iput(dir); - return -EROFS; + if (!last.len) { + error = -ENOENT; + goto exit_inode; } - if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) { - iput(dir); - return error; + if (IS_RDONLY(dir)) { + error = -EROFS; + goto exit_inode; } + if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) + goto exit_inode; if (!dir->i_op || !dir->i_op->mkdir) { - iput(dir); - return -EPERM; + error = -ENOSYS; /* instead of EPERM, what does Posix say? */ + goto exit_inode; } - dir->i_count++; + atomic_inc(&dir->i_count); if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); down(&dir->i_sem); - error = dir->i_op->mkdir(dir, basename, namelen, mode & 01777 & ~current->fs->umask); + d_del(d_lookup(dir, &last, NULL), D_REMOVE); + mode &= 01777 & ~current->fs->umask; + error = dir->i_op->mkdir(dir, last.name, last.len, mode); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(dir->i_dentry, NULL, &last, " d %ld %d ", + CURRENT_TIME, mode); +#endif up(&dir->i_sem); +exit_inode: + if(!lasterror) + iput(inode); iput(dir); +exit: return error; } @@ -602,7 +938,6 @@ asmlinkage int sys_mkdir(const char * pathname, int mode) lock_kernel(); error = getname(pathname,&tmp); if (!error) { - remove_trailing_slashes(tmp); error = do_mkdir(tmp,mode); putname(tmp); } @@ -610,43 +945,125 @@ asmlinkage int sys_mkdir(const char * pathname, int mode) return error; } -static int do_rmdir(const char * name) +#if 0 /* We need a "deletefs", someone please write it. -DaveM */ +/* Perhaps this could be moved out into a new file. */ +static void basket_name(struct inode * dir, struct dentry * entry) +{ + char prefix[32]; + struct qstr prename = { prefix, 14 }; + struct qstr entname = { entry->d_name, entry->d_len }; + struct inode * inode; + struct dentry * old = entry; /* dummy */ + int i; + if(!entry || !(inode = d_inode(&entry))) + return; +#if 0 + if(atomic_read(&inode->i_count) > 2) { + extern void printpath(struct dentry *entry); + + printk("Caution: in use "); + if(inode->i_dentry) + printpath(inode->i_dentry); + printk(" i_nlink=%d i_count=%d i_ddir_count=%d i_dent_count=%d\n", + inode->i_nlink, atomic_read(&inode->i_count), + inode->i_ddir_count, inode->i_dent_count); + } +#endif + vfs_lock(); + for(i = 1; old; i++) { + sprintf(prefix, ".deleted-%04d.", i); + old = d_lookup(dir, &prename, &entname); + } + d_move(entry, dir, &prename, &entname); + vfs_unlock(); + iput(inode); +} +#endif + +static inline int do_rmdir(const char * name) { - const char * basename; - int namelen, error; + char buf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr last; + struct dentry * lastent = NULL; + int error; struct inode * dir; + struct inode * inode; - error = dir_namei(name, &namelen, &basename, NULL, &dir); + /* [T.Schoebel-Theuer] I'm not sure which flags to use here. + * Try the following on different platforms: + * [0] rm -rf test test2 + * [1] ln -s test2 test + * [2] mkdir test || mkdir test2 + * [3] rmdir test && mkdir test2 + * [4] rmdir test/ + * Now the rusults: + * cmd | HP-UX | SunOS | Solaris | Old Linux | New Linux | + * ---------------------------------------------------------------- + * [2] | (OK) | EEXIST | EEXIST | EEXIST | (OK) + * [3] | ENOTDIR | ENOTDIR | ENOTDIR | ENOTDIR | ENOTDIR + * [4] | (OK) | EINVAL | ENOTDIR | ENOTDIR | (OK) + * So I implemented the HP-UX semantics. If this is not right + * for Posix compliancy, change the flags accordingly. If Posix + * let the question open, I'd suggest to stay at the new semantics. + * I'd even make case [3] work by adding 2 to the flags parameter + * if Posix tolerates that. + */ + error = __namei(NAM_FOLLOW_TRAILSLASH, name, NULL, buf, + &dir, &inode, &last, &lastent, NULL); if (error) - return error; - if (!namelen) { - iput(dir); - return -ENOENT; - } + goto exit; if (IS_RDONLY(dir)) { - iput(dir); - return -EROFS; - } - if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) { - iput(dir); - return error; + error = -EROFS; + goto exit_dir; } + if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) + goto exit_dir; /* - * A subdirectory cannot be removed from an append-only directory + * A subdirectory cannot be removed from an append-only directory. */ if (IS_APPEND(dir)) { - iput(dir); - return -EPERM; + error = -EPERM; + goto exit_dir; } if (!dir->i_op || !dir->i_op->rmdir) { - iput(dir); - return -EPERM; + error = -ENOSYS; /* was EPERM */ + goto exit_dir; + } + /* Disallow removals of mountpoints. */ + if(inode->i_mount) { + error = -EBUSY; + goto exit_dir; } if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); - down(&dir->i_sem); - error = dir->i_op->rmdir(dir,basename,namelen); - up(&dir->i_sem); + + down(&dir->i_sem); +#if 0 + if(lastent && d_isbasket(lastent)) { + d_del(lastent, D_REMOVE); + error = 0; + goto exit_lock; + } +#endif + atomic_inc(&dir->i_count); + error = dir->i_op->rmdir(dir, last.name, last.len); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(lastent, NULL, NULL, " r %ld ", CURRENT_TIME); +#endif +#if 0 + if(!error && lastent) + basket_name(dir, lastent); +exit_lock: +#else + if(!error && lastent) + d_del(lastent, D_REMOVE); +#endif + up(&dir->i_sem); +exit_dir: + iput(inode); + iput(dir); +exit: return error; } @@ -658,7 +1075,6 @@ asmlinkage int sys_rmdir(const char * pathname) lock_kernel(); error = getname(pathname,&tmp); if (!error) { - remove_trailing_slashes(tmp); error = do_rmdir(tmp); putname(tmp); } @@ -666,43 +1082,93 @@ asmlinkage int sys_rmdir(const char * pathname) return error; } -static int do_unlink(const char * name) +static inline int do_unlink(const char * name) { - const char * basename; - int namelen, error; + char buf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr last; + struct dentry * lastent = NULL; + int error; struct inode * dir; + struct inode * inode; - error = dir_namei(name, &namelen, &basename, NULL, &dir); + /* HP-UX shows a strange behaviour: + * touch y; ln -s y x; rm x/ + * this succeeds and removes the file y, not the symlink x! + * Solaris and old Linux remove the symlink instead, and + * old SunOS complains ENOTDIR. + * I chose the SunOS behaviour (by not using NAM_FOLLOW_TRAILSLASH), + * but I'm not shure whether I should. + * The current code generally prohibits using trailing slashes with + * non-directories if the name already exists, but not if + * it is to be newly created. + * Perhaps this should be further strengthened (by introducing + * an additional flag bit indicating whether trailing slashes are + * allowed) to get it as consistant as possible, but I don't know + * what Posix says. + */ + error = __namei(NAM_NO_TRAILSLASH, name, NULL, buf, + &dir, &inode, &last, &lastent, NULL); if (error) - return error; - if (!namelen) { - iput(dir); - return -EPERM; - } + goto exit; if (IS_RDONLY(dir)) { - iput(dir); - return -EROFS; - } - if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) { - iput(dir); - return error; + error = -EROFS; + goto exit_dir; } + if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) + goto exit_dir; /* - * A file cannot be removed from an append-only directory + * A file cannot be removed from an append-only directory. */ if (IS_APPEND(dir)) { - iput(dir); - return -EPERM; + error = -EPERM; + goto exit_dir; } if (!dir->i_op || !dir->i_op->unlink) { - iput(dir); - return -EPERM; + error = -ENOSYS; /* was EPERM */ + goto exit_dir; } if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); - down(&dir->i_sem); - error = dir->i_op->unlink(dir,basename,namelen); - up(&dir->i_sem); + + down(&dir->i_sem); +#if 0 + if(atomic_read(&inode->i_count) > 1) { + extern void printpath(struct dentry *entry); + + printk("Fire "); + if(lastent) + printpath(lastent); + printk(" i_nlink=%d i_count=%d i_ddir_count=%d i_dent_count=%d\n", + inode->i_nlink, atomic_read(&inode->i_count), + inode->i_ddir_count, inode->i_dent_count); + } +#endif +#if 0 + if(lastent && d_isbasket(lastent)) { + d_del(lastent, D_REMOVE); + error = 0; + goto exit_lock; + } +#endif + atomic_inc(&dir->i_count); + error = dir->i_op->unlink(dir, last.name, last.len); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(lastent, NULL, NULL, " u %ld ", CURRENT_TIME); +#endif +#if 0 + if(!error && lastent) + basket_name(dir, lastent); +exit_lock: +#else + if(!error && lastent) + d_del(lastent, D_REMOVE); +#endif + up(&dir->i_sem); +exit_dir: + iput(inode); + iput(dir); +exit: return error; } @@ -721,38 +1187,65 @@ asmlinkage int sys_unlink(const char * pathname) return error; } -static int do_symlink(const char * oldname, const char * newname) +static inline int do_symlink(const char * oldname, const char * newname) { + char buf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr last; + int error, lasterror; struct inode * dir; - const char * basename; - int namelen, error; + struct inode * inode; - error = dir_namei(newname, &namelen, &basename, NULL, &dir); + /* The following works on HP-UX and Solaris, by producing + * a symlink chain: + * rm -rf ? ; mkdir z ; ln -s z y ; ln -s y x/ + * Under old SunOS, the following occurs: + * ln: x/: No such file or directory + * Under old Linux, very strange things occur: + * ln: cannot create symbolic link `x//y' to `y': No such file or directory + * This is very probably a bug, but may be caused by the ln program + * when checking for a directory target. + * + * I'm not shure whether to add NAM_NO_TRAILSLASH to inhibit trailing + * slashes in the target generally. + */ + error = __namei(NAM_TRANSCREATE, newname, NULL, buf, + &dir, &inode, &last, NULL, &lasterror); if (error) - return error; - if (!namelen) { - iput(dir); - return -ENOENT; + goto exit; + if(!lasterror) { + iput(inode); + error = -EEXIST; + goto exit_dir; } - if (IS_RDONLY(dir)) { - iput(dir); - return -EROFS; + if (!last.len) { + error = -ENOENT; + goto exit_dir; } - if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) { - iput(dir); - return error; + if (IS_RDONLY(dir)) { + error = -EROFS; + goto exit_dir; } + if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) + goto exit_dir; if (!dir->i_op || !dir->i_op->symlink) { - iput(dir); - return -EPERM; + error = -ENOSYS; /* was EPERM */ + goto exit_dir; } - dir->i_count++; + atomic_inc(&dir->i_count); if (dir->i_sb && dir->i_sb->dq_op) dir->i_sb->dq_op->initialize(dir, -1); down(&dir->i_sem); - error = dir->i_op->symlink(dir,basename,namelen,oldname); + d_del(d_lookup(dir, &last, NULL), D_REMOVE); + error = dir->i_op->symlink(dir, last.name, last.len, oldname); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(dir->i_dentry, NULL, &last, + " s %ld %s\0", CURRENT_TIME, oldname); +#endif up(&dir->i_sem); +exit_dir: iput(dir); +exit: return error; } @@ -775,149 +1268,198 @@ asmlinkage int sys_symlink(const char * oldname, const char * newname) return error; } -static int do_link(struct inode * oldinode, const char * newname) +static inline int do_link(const char * oldname, const char * newname) { - struct inode * dir; - const char * basename; - int namelen, error; + char oldbuf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + char newbuf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr oldlast; + struct qstr newlast; + struct dentry * oldent = NULL; + struct inode * oldinode; + struct inode * newinode; + struct inode * newdir; + int error, lasterror; - error = dir_namei(newname, &namelen, &basename, NULL, &dir); - if (error) { - iput(oldinode); - return error; - } - if (!namelen) { - iput(oldinode); - iput(dir); - return -EPERM; - } - if (IS_RDONLY(dir)) { - iput(oldinode); - iput(dir); - return -EROFS; - } - if (dir->i_dev != oldinode->i_dev) { - iput(dir); - iput(oldinode); - return -EXDEV; - } - if ((error = permission(dir,MAY_WRITE | MAY_EXEC)) != 0) { - iput(dir); - iput(oldinode); - return error; - } + error = __namei(NAM_FOLLOW_LINK|NAM_NO_TRAILSLASH, + oldname, NULL, oldbuf, + NULL, &oldinode, &oldlast, &oldent, NULL); + if (error) + goto exit; + + error = __namei(NAM_FOLLOW_LINK|NAM_TRANSCREATE, newname, NULL, newbuf, + &newdir, &newinode, &newlast, NULL, &lasterror); + if (error) + goto old_exit; + if(!lasterror) { + iput(newinode); + error = -EEXIST; + goto new_exit; + } + if (!newlast.len) { + error = -EPERM; + goto new_exit; + } + if (IS_RDONLY(newdir)) { + error = -EROFS; + goto new_exit; + } + if (newdir->i_dev != oldinode->i_dev) { + error = -EXDEV; + goto new_exit; + } + if ((error = permission(newdir,MAY_WRITE | MAY_EXEC)) != 0) + goto new_exit; /* - * A link to an append-only or immutable file cannot be created + * A link to an append-only or immutable file cannot be created. */ if (IS_APPEND(oldinode) || IS_IMMUTABLE(oldinode)) { - iput(dir); - iput(oldinode); - return -EPERM; - } - if (!dir->i_op || !dir->i_op->link) { - iput(dir); - iput(oldinode); - return -EPERM; - } - dir->i_count++; - if (dir->i_sb && dir->i_sb->dq_op) - dir->i_sb->dq_op->initialize(dir, -1); - down(&dir->i_sem); - error = dir->i_op->link(oldinode, dir, basename, namelen); - up(&dir->i_sem); - iput(dir); + error = -EPERM; + goto new_exit; + } + if (!newdir->i_op || !newdir->i_op->link) { + error = -ENOSYS; /* was EPERM */ + goto new_exit; + } + atomic_inc(&oldinode->i_count); + atomic_inc(&newdir->i_count); + if (newdir->i_sb && newdir->i_sb->dq_op) + newdir->i_sb->dq_op->initialize(newdir, -1); + down(&newdir->i_sem); + d_del(d_lookup(newdir, &newlast, NULL), D_REMOVE); + error = newdir->i_op->link(oldinode, newdir, newlast.name, newlast.len); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(oldent, newdir->i_dentry, &newlast, + " l %ld ", CURRENT_TIME); +#endif + up(&newdir->i_sem); +new_exit: + iput(newdir); +old_exit: + iput(oldinode); +exit: return error; } asmlinkage int sys_link(const char * oldname, const char * newname) { int error; - char * to; - struct inode * oldinode; + char * from, * to; lock_kernel(); - error = lnamei(oldname, &oldinode); - if (error) - goto out; - error = getname(newname,&to); - if (error) { - iput(oldinode); - goto out; + error = getname(oldname,&from); + if (!error) { + error = getname(newname,&to); + if (!error) { + error = do_link(from,to); + putname(to); + } + putname(from); } - error = do_link(oldinode,to); - putname(to); -out: unlock_kernel(); return error; } -static int do_rename(const char * oldname, const char * newname, int must_be_dir) +static inline int do_rename(const char * oldname, const char * newname) { - struct inode * old_dir, * new_dir; - const char * old_base, * new_base; - int old_len, new_len, error; + char oldbuf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr oldlast; + char newbuf[MAX_TRANS_FILELEN+MAX_TRANS_SUFFIX+2]; + struct qstr newlast; + struct dentry * oldent = NULL; + struct inode * olddir, * newdir; + struct inode * oldinode, * newinode; + int error, newlasterror; - error = dir_namei(oldname, &old_len, &old_base, NULL, &old_dir); + error = __namei(NAM_FOLLOW_TRAILSLASH, oldname, NULL, oldbuf, + &olddir, &oldinode, &oldlast, &oldent, NULL); if (error) - return error; - if ((error = permission(old_dir,MAY_WRITE | MAY_EXEC)) != 0) { - iput(old_dir); - return error; - } - if (!old_len || (old_base[0] == '.' && - (old_len == 1 || (old_base[1] == '.' && - old_len == 2)))) { - iput(old_dir); - return -EPERM; - } - error = dir_namei(newname, &new_len, &new_base, NULL, &new_dir); - if (error) { - iput(old_dir); - return error; - } - if ((error = permission(new_dir,MAY_WRITE | MAY_EXEC)) != 0){ - iput(old_dir); - iput(new_dir); - return error; + goto exit; + if ((error = permission(olddir,MAY_WRITE | MAY_EXEC)) != 0) + goto old_exit; + if (!oldlast.len || (oldlast.name[0] == '.' && + (oldlast.len == 1 || (oldlast.name[1] == '.' && + oldlast.len == 2)))) { + error = -EPERM; + goto old_exit; + } + /* Disallow moves of mountpoints. */ + if(oldinode->i_mount) { + error = -EBUSY; + goto old_exit; } - if (!new_len || (new_base[0] == '.' && - (new_len == 1 || (new_base[1] == '.' && - new_len == 2)))) { - iput(old_dir); - iput(new_dir); - return -EPERM; + + error = __namei(NAM_FOLLOW_LINK|NAM_TRANSCREATE, newname, NULL, newbuf, + &newdir, &newinode, &newlast, NULL, &newlasterror); + if (error) + goto old_exit; + if ((error = permission(newdir,MAY_WRITE | MAY_EXEC)) != 0) + goto new_exit; + if (!newlast.len || (newlast.name[0] == '.' && + (newlast.len == 1 || (newlast.name[1] == '.' && + newlast.len == 2)))) { + error = -EPERM; + goto new_exit; + } + if (newdir->i_dev != olddir->i_dev) { + error = -EXDEV; + goto new_exit; + } + if (IS_RDONLY(newdir) || IS_RDONLY(olddir)) { + error = -EROFS; + goto new_exit; } - if (new_dir->i_dev != old_dir->i_dev) { - iput(old_dir); - iput(new_dir); - return -EXDEV; + /* + * A file cannot be removed from an append-only directory. + */ + if (IS_APPEND(olddir)) { + error = -EPERM; + goto new_exit; } - if (IS_RDONLY(new_dir) || IS_RDONLY(old_dir)) { - iput(old_dir); - iput(new_dir); - return -EROFS; + if (!olddir->i_op || !olddir->i_op->rename) { + error = -ENOSYS; /* was EPERM */ + goto new_exit; } - /* - * A file cannot be removed from an append-only directory +#ifdef CONFIG_TRANS_NAMES + /* if oldname has been translated, but newname not (and + * has not already a suffix), take over the suffix from oldname. */ - if (IS_APPEND(old_dir)) { - iput(old_dir); - iput(new_dir); - return -EPERM; - } - if (!old_dir->i_op || !old_dir->i_op->rename) { - iput(old_dir); - iput(new_dir); - return -EPERM; - } - new_dir->i_count++; - if (new_dir->i_sb && new_dir->i_sb->dq_op) - new_dir->i_sb->dq_op->initialize(new_dir, -1); - down(&new_dir->i_sem); - error = old_dir->i_op->rename(old_dir, old_base, old_len, - new_dir, new_base, new_len, must_be_dir); - up(&new_dir->i_sem); - iput(new_dir); + if(oldlast.name == oldbuf && newlast.name != newbuf && + newlast.name[newlast.len-1] != '#') { + int i = oldlast.len - 2; + while (i > 0 && oldlast.name[i] != '#') + i--; + memcpy(newbuf, newlast.name, newlast.len); + memcpy(newbuf+newlast.len, oldlast.name+i, oldlast.len - i); + newlast.len += oldlast.len - i; + newlast.name = newbuf; + } +#endif + atomic_inc(&olddir->i_count); + atomic_inc(&newdir->i_count); + if (newdir->i_sb && newdir->i_sb->dq_op) + newdir->i_sb->dq_op->initialize(newdir, -1); + down(&newdir->i_sem); + error = olddir->i_op->rename(olddir, oldlast.name, oldlast.len, + newdir, newlast.name, newlast.len); +#ifdef CONFIG_OMIRR + if(!error) + omirr_print(oldent, newdir->i_dentry, &newlast, + " m %ld ", CURRENT_TIME); +#endif + if(!error) { + d_del(d_lookup(newdir, &newlast, NULL), D_REMOVE); + d_move(d_lookup(olddir, &oldlast, NULL), newdir, &newlast, NULL); + } + up(&newdir->i_sem); +new_exit: + if(!newlasterror) + iput(newinode); + iput(newdir); +old_exit: + iput(oldinode); + iput(olddir); +exit: return error; } @@ -931,9 +1473,7 @@ asmlinkage int sys_rename(const char * oldname, const char * newname) if (!error) { error = getname(newname,&to); if (!error) { - error = do_rename(from,to, - remove_trailing_slashes(from) | - remove_trailing_slashes(to)); + error = do_rename(from,to); putname(to); } putname(from); diff --git a/fs/nametrans.c b/fs/nametrans.c new file mode 100644 index 000000000..15c98ed70 --- /dev/null +++ b/fs/nametrans.c @@ -0,0 +1,310 @@ +/* + * $Id: nametrans.c,v 1.2 1997/06/04 23:45:44 davem Exp $ + * + * linux/fs/nametrans.c - context-dependend filename suffixes. + * Copyright (C) 1997, Thomas Schoebel-Theuer, + * <schoebel@informatik.uni-stuttgart.de>. + * + * translates names of the form "filename#host=myhost#" to "filename" + * as if both names were hardlinked to the same file. + * benefit: diskless clients can mount the / filesystem of the + * server if /etc/fstab (and other config files) are organized using + * context suffixes. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/utsname.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <asm/uaccess.h> +#include <linux/nametrans.h> + +char nametrans_txt[MAX_DEFAULT_TRANSLEN] = ""; +static struct translations * global_trans = NULL; +static int default_trans = 1; +static const char version[] = "revision: 2.3 <schoebel@informatik.uni-stuttgart.de>"; +int translations_dirty = 1; +static char * transl_names[] = { +#ifdef CONFIG_TR_NODENAME + "host=", system_utsname.nodename, +#endif +#ifdef CONFIG_TR_KERNNAME + "kname=", CONFIG_KERNNAME, +#endif +#ifdef CONFIG_TR_KERNTYPE + "ktype=", CONFIG_KERNTYPE, +#endif +#ifdef CONFIG_TR_MACHINE + "machine=", system_utsname.machine, +#endif +#ifdef CONFIG_TR_SYSNAME + "system=", system_utsname.sysname, +#endif + 0, 0 +}; + +/* Convert and do syntax checking. */ +static void convert(char * txt, struct translations * res) +{ + char * tmp = txt; + char * space = (char*)res + sizeof(struct translations); + + res->count = 0; + while(*tmp) { + struct qstr * name = &res->name[res->count]; + struct qstr * c_name = &res->c_name[res->count]; + int len; + char * p = tmp; + + if(*p++ != '#') + goto next; + while(*p && *p != '=' && *p != ':') + p++; + if(*p != '=') + goto next; + p++; + len = (unsigned long)p - (unsigned long)tmp; + c_name->name = space; + memcpy(space, tmp, len); + memcpy(space + len, "CREATE#", 8); + c_name->len = len + 7; + if(c_name->len >= MAX_TRANS_SUFFIX) + goto next; + while(*p && *p != '#' && *p != ':') + p++; + if(*p != '#') + goto next; + p++; + if(*p != ':' && *p) + goto next; + space += len + 8; + name->len = len = (unsigned long)p - (unsigned long)tmp; + if(len >= MAX_TRANS_SUFFIX) + goto next; + name->name = space; + memcpy(space, tmp, len); + space[len] = '\0'; + space += len + 1; + res->count++; + if(res->count >= MAX_TRANSLATIONS || + (unsigned long)space - (unsigned long)res >= PAGE_SIZE-2*MAX_TRANS_SUFFIX) + return; + next: + while(*p && *p++ != ':') ; + tmp = p; + } +} + +static inline void trans_to_string(struct translations * trans, char * buf, int maxlen) +{ + int i; + + for(i = 0; i < trans->count; i++) { + int len = trans->name[i].len; + if(len < maxlen) { + memcpy(buf, trans->name[i].name, len); + buf += len; + maxlen -= len; + *buf++ = ':'; + maxlen--; + } + } + buf--; + *buf = '\0'; +} + +static inline void default_nametrans(char * buf) +{ + char * res = buf; + char ** entry; + char * ptr; + + for (entry = transl_names; *entry; entry++) { + *res++ = '#'; + for(ptr = *entry; *ptr; ptr++) + *res++ = *ptr; + entry++; + for(ptr = *entry; *ptr; ptr++) + *res++ = *ptr; + *res++ = '#'; + *res++ = ':'; + } + res--; + *res = '\0'; +} + +void nametrans_setup(char * line) +{ + if(line) { + default_trans = (!line[0]); + if(!global_trans) { + /* This can happen at boot time, and there is no chance + * to allocate memory at this early stage. + */ + strncpy(nametrans_txt, line, MAX_DEFAULT_TRANSLEN); + } else { + if(default_trans) { + default_nametrans(nametrans_txt); + line = nametrans_txt; + } + convert(line, global_trans); + + /* Show what really was recognized after parsing... */ + trans_to_string(global_trans, nametrans_txt, MAX_DEFAULT_TRANSLEN); + } + } +} + +/* If the _first_ environment variable is "NAMETRANS", return + * a pointer to the list of appendices. + * You can set the first environment variable using + * 'env - NAMETRANS=... "`env`" command ...' + */ +char* env_transl(void) +{ + char* env; + int i; + + if(current && current->mm && (env = (char*)current->mm->env_start) + && get_ds() != get_fs() + && current->mm->env_end>=current->mm->env_start+10 + && !verify_area(VERIFY_READ,env,10)) { + for(i=0; i<10; i++) { + char c; + + get_user(c, env++); + if(c != "NAMETRANS="[i]) + return 0; + } + return env; + } + return 0; +} + +/* If name has the correct suffix "#keyword=correct_context#", + * return position of the suffix, else 0. + */ +char *testname(int restricted, char* name) +{ + char * ptr = name; + char * cut; + char * env; + struct translations * trans; + int i, len; + char c, tmp; + + env = env_transl(); +#ifdef CONFIG_TRANS_RESTRICT + if(!env && restricted) + goto done; +#else + (void)restricted; /* inhibit parameter usage warning */ +#endif + if(get_user(c, ptr)) + goto done; + while(c && c != '#') { + ptr++; + __get_user(c, ptr); + } + if(!c) + goto done; + cut = ptr++; + if(get_user(c, ptr)) + goto done; + while (c && c != '#') { + ptr++; + get_user(c, ptr); + } + if(!c) + goto done; + get_user(tmp, ptr); + if(tmp) + goto done; + trans = get_translations(env); + len = (unsigned long)ptr - (unsigned long)cut; + for(i = 0; i < trans->count; i++) + if(trans->name[i].len == len) { + const char * p1 = cut; + const char * p2 = trans->name[i].name; + get_user(c, p1); + while(c && c == *p2++) { + p1++; + get_user(c, p1); + } + if(!c) + return cut; + } +done: + return NULL; +} + +static inline void check_dirty(void) +{ + if(translations_dirty && default_trans) { + nametrans_setup(""); + translations_dirty = 0; + } +} + +struct translations * get_translations(char * env) +{ + struct translations * res; + + if(env) { + char * env_txt = (char*)__get_free_page(GFP_KERNEL); + + strncpy_from_user(env_txt, env, PAGE_SIZE); + res = (struct translations *)__get_free_page(GFP_KERNEL); + convert(env_txt, res); + free_page((unsigned long)env_txt); + } else { + check_dirty(); + res = global_trans; + } + return res; +} + +int nametrans_dostring(ctl_table * table, int write, struct file * filp, + void * buffer, size_t * lenp) +{ + int res; + check_dirty(); + res = proc_dostring(table, write, filp, buffer, lenp); + if(!res && write) + nametrans_setup(nametrans_txt); + + return res; +} + +int nametrans_string(ctl_table * table, int * name, int nlen, + void * oldval, size_t * oldlenp, + void * newval, size_t newlen, void ** context) +{ + int res; + check_dirty(); + res = sysctl_string(table, name, nlen, oldval, oldlenp, newval, newlen, context); + if(!res && newval && newlen) + nametrans_setup(nametrans_txt); + + return res; +} + +void init_nametrans(void) +{ + if(!global_trans) + global_trans = (struct translations*)__get_free_page(GFP_KERNEL); + if(!global_trans) { + printk("NAMETRANS: No free memory\n"); + return; + } + nametrans_setup(nametrans_txt); + + /* Notify user for the default/supplied translations. + * Extremely useful for finding translation problems. + */ + printk("Nametrans %s\nNametrans %s: %s\n", version, + default_trans ? "default translations" : "external parameter", + nametrans_txt); +} diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index e62e26e47..5eb73dbd0 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -67,8 +67,7 @@ static int static int ncp_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir); + struct inode *new_dir, const char *new_name, int new_len); static inline void str_upper(char *name) { @@ -129,7 +128,6 @@ struct inode_operations ncp_dir_inode_operations = NULL, /* mknod */ ncp_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* bmap */ NULL, /* truncate */ NULL, /* permission */ @@ -965,8 +963,7 @@ static int ncp_unlink(struct inode *dir, const char *name, int len) } static int ncp_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir) + struct inode *new_dir, const char *new_name, int new_len) { int res; char _old_name[old_len + 1]; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 9bdc793cc..3cb50fbbd 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -232,7 +232,6 @@ struct inode_operations ncp_file_inode_operations = NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* bmap */ NULL /* truncate */ }; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 4cf65f8a9..72ca3e6dd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -406,9 +406,11 @@ int ncp_malloced; int ncp_current_malloced; #endif -static struct file_system_type ncp_fs_type = -{ - ncp_read_super, "ncpfs", 0, NULL +static struct file_system_type ncp_fs_type = { + "ncpfs", + FS_NO_DCACHE, + ncp_read_super, + NULL }; __initfunc(int init_ncp_fs(void)) diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 52ff3c76a..8e814d153 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -133,7 +133,7 @@ int ncp_mmap(struct inode *inode, struct file *file, struct vm_area_struct *vma) inode->i_dirt = 1; } vma->vm_inode = inode; - inode->i_count++; + atomic_inc(&inode->i_count); vma->vm_ops = &ncp_file_mmap; return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a11b9fb6a..71835c255 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -51,7 +51,7 @@ static int nfs_symlink(struct inode *, const char *, int, const char *); static int nfs_link(struct inode *, struct inode *, const char *, int); static int nfs_mknod(struct inode *, const char *, int, int, int); static int nfs_rename(struct inode *, const char *, int, - struct inode *, const char *, int, int); + struct inode *, const char *, int); static struct file_operations nfs_dir_operations = { NULL, /* lseek - default */ @@ -78,7 +78,6 @@ struct inode_operations nfs_dir_inode_operations = { nfs_mknod, /* mknod */ nfs_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -351,7 +350,7 @@ static struct nfs_lookup_cache_entry { char filename[NFS_MAXNAMLEN + 1]; struct nfs_fh fhandle; struct nfs_fattr fattr; - int expiration_date; + unsigned long expiration_date; } nfs_lookup_cache[NFS_LOOKUP_CACHE_SIZE]; static struct nfs_lookup_cache_entry *nfs_lookup_cache_index(struct inode *dir, @@ -492,7 +491,7 @@ static int nfs_lookup(struct inode *dir, const char *__name, int len, } memcpy(name,__name,len); name[len] = '\0'; - if (len == 1 && name[0] == '.') { /* cheat for "." */ + if (len == 0 || (len == 1 && name[0] == '.')) { /* cheat for "" and "." */ *result = dir; return 0; } @@ -649,11 +648,11 @@ static int nfs_sillyrename(struct inode *dir, const char *name, int len) char silly[16]; int slen, ret; - dir->i_count++; + atomic_inc(&dir->i_count); if (nfs_lookup(dir, name, len, &inode) < 0) return -EIO; /* arbitrary */ - if (inode->i_count == 1) { + if (atomic_read(&inode->i_count) == 1) { iput(inode); return -EIO; } @@ -679,7 +678,7 @@ static int nfs_sillyrename(struct inode *dir, const char *name, int len) nfs_lookup_cache_remove(dir, NULL, name); nfs_lookup_cache_remove(dir, NULL, silly); NFS_RENAMED_DIR(inode) = dir; - dir->i_count++; + atomic_inc(&dir->i_count); } nfs_invalidate_dircache(dir); iput(inode); @@ -823,8 +822,7 @@ static int nfs_link(struct inode *oldinode, struct inode *dir, * file in old_dir will go away when the last process iput()s the inode. */ static int nfs_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir) + struct inode *new_dir, const char *new_name, int new_len) { int error; @@ -850,10 +848,6 @@ static int nfs_rename(struct inode *old_dir, const char *old_name, int old_len, return -ENAMETOOLONG; } - /* We don't do rename() with trailing slashes over NFS now. Hmm. */ - if (must_be_dir) - return -EINVAL; - error = nfs_proc_rename(NFS_SERVER(old_dir), NFS_FH(old_dir), old_name, NFS_FH(new_dir), new_name); @@ -879,7 +873,8 @@ void nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) int was_empty; dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d)\n", - inode->i_dev, inode->i_ino, inode->i_count); + inode->i_dev, inode->i_ino, + atomic_read(&inode->i_count)); if (!inode || !fattr) { printk("nfs_refresh_inode: inode or fattr is NULL\n"); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ca42719bd..56540bbdc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -69,7 +69,6 @@ struct inode_operations nfs_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ nfs_readpage, /* readpage */ nfs_writepage, /* writepage */ NULL, /* bmap */ @@ -143,7 +142,7 @@ nfs_file_write(struct inode *inode, struct file *file, int result; dfprintk(VFS, "nfs: write(%x/%ld (%d), %lu@%lu)\n", - inode->i_dev, inode->i_ino, inode->i_count, + inode->i_dev, inode->i_ino, atomic_read(&inode->i_count), count, (unsigned long) file->f_pos); if (!inode) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7f883270a..5ab9600e9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -316,7 +316,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fhandle, nfs_refresh_inode(inode, fattr); } dprintk("NFS: fhget(%x/%ld ct=%d)\n", - inode->i_dev, inode->i_ino, inode->i_count); + inode->i_dev, inode->i_ino, + atomic_read(&inode->i_count)); return inode; } @@ -433,7 +434,10 @@ done: * File system information */ static struct file_system_type nfs_fs_type = { - nfs_read_super, "nfs", 0, NULL + "nfs", + FS_NO_DCACHE, + nfs_read_super, + NULL }; /* diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 66070efd7..add3309f3 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -1,5 +1,5 @@ /* - * $Id: nfsroot.c,v 1.36 1997/05/27 15:57:47 mj Exp $ + * $Id: nfsroot.c,v 1.37 1997/06/04 08:28:10 davem Exp $ * * Copyright (C) 1995, 1996 Gero Kuhlmann <gero@gkminix.han.de> * @@ -78,6 +78,7 @@ #include <asm/param.h> #include <linux/utsname.h> +#include <linux/nametrans.h> #include <linux/in.h> #include <linux/if.h> #include <linux/inet.h> @@ -832,6 +833,9 @@ __initfunc(static void root_do_bootp_ext(u8 *ext)) root_bootp_string(nfs_path, ext+1, *ext, NFS_MAXPATHLEN); break; } +#ifdef CONFIG_TRANS_NAMES + translations_dirty = 1; +#endif } @@ -1254,6 +1258,9 @@ __initfunc(static void root_nfs_addrs(char *addrs)) system_utsname.domainname[0] = '\0'; user_dev_name[0] = '\0'; bootp_flag = rarp_flag = 1; +#ifdef CONFIG_TRANS_NAMES + translations_dirty = 1; +#endif /* The following is just a shortcut for automatic IP configuration */ if (!strcmp(addrs, "bootp")) { @@ -1299,6 +1306,9 @@ __initfunc(static void root_nfs_addrs(char *addrs)) } strncpy(system_utsname.nodename, ip, __NEW_UTS_LEN); system_utsname.nodename[__NEW_UTS_LEN] = '\0'; +#ifdef CONFIG_TRANS_NAMES + translations_dirty = 1; +#endif break; case 5: strncpy(user_dev_name, ip, IFNAMSIZ); @@ -1332,6 +1342,9 @@ __initfunc(static int root_nfs_setup(void)) if (!system_utsname.nodename[0]) { strncpy(system_utsname.nodename, in_ntoa(myaddr), __NEW_UTS_LEN); system_utsname.nodename[__NEW_UTS_LEN] = '\0'; +#ifdef CONFIG_TRANS_NAMES + translations_dirty = 1; +#endif } /* Set the correct netmask */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 714101bb7..58dcd95d0 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -177,8 +177,8 @@ nfs_proc_remove(struct nfs_server *server, struct nfs_fh *dir, const char *name) int nfs_proc_rename(struct nfs_server *server, - struct nfs_fh *old_dir, const char *old_name, - struct nfs_fh *new_dir, const char *new_name) + struct nfs_fh *old_dir, const char *old_name, + struct nfs_fh *new_dir, const char *new_name) { struct nfs_renameargs arg = { old_dir, old_name, new_dir, new_name }; int status; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cf7c5ece7..2c3b59036 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -188,7 +188,7 @@ nfs_readpage_async(struct inode *inode, struct page *page) nfs_readpage_result, req); if (result >= 0) { - inode->i_count++; + atomic_inc(&inode->i_count); atomic_inc(&page->count); return 0; } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 7ea2d6f99..a22f96239 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -19,8 +19,6 @@ #include <asm/uaccess.h> static int nfs_readlink(struct inode *, char *, int); -static int nfs_follow_link(struct inode *, struct inode *, int, int, - struct inode **); /* * symlinks can't do much... @@ -37,7 +35,6 @@ struct inode_operations nfs_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ nfs_readlink, /* readlink */ - nfs_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -45,55 +42,6 @@ struct inode_operations nfs_symlink_inode_operations = { NULL /* permission */ }; -static int nfs_follow_link(struct inode *dir, struct inode *inode, - int flag, int mode, struct inode **res_inode) -{ - int error; - unsigned int len; - char *res, *res2; - void *mem; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput(inode); - iput(dir); - return -ELOOP; - } - error = nfs_proc_readlink(NFS_SERVER(inode), NFS_FH(inode), &mem, - &res, &len, NFS_MAXPATHLEN); - if (error) { - iput(inode); - iput(dir); - kfree(mem); - return error; - } - while ((res2 = (char *) kmalloc(NFS_MAXPATHLEN + 1, GFP_NFS)) == NULL) { - schedule(); - } - memcpy(res2, res, len); - res2[len] = '\0'; - kfree(mem); - iput(inode); - current->link_count++; - error = open_namei(res2, flag, mode, res_inode, dir); - current->link_count--; - kfree_s(res2, NFS_MAXPATHLEN + 1); - return error; -} - static int nfs_readlink(struct inode *inode, char *buffer, int buflen) { int error; @@ -103,10 +51,6 @@ static int nfs_readlink(struct inode *inode, char *buffer, int buflen) dfprintk(VFS, "nfs: readlink(%x/%ld)\n", inode->i_dev, inode->i_ino); - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } if (buflen > NFS_MAXPATHLEN) buflen = NFS_MAXPATHLEN; error = nfs_proc_readlink(NFS_SERVER(inode), NFS_FH(inode), &mem, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4e2de9cfc..f27d083e4 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -133,7 +133,7 @@ nfs_unlock_page(struct page *page) if (test_and_clear_bit(PG_decr_after, &page->flags)) atomic_dec(&page->count); if (test_and_clear_bit(PG_swap_unlock_after, &page->flags)) - swap_after_unlock_page(page->swap_unlock_entry); + swap_after_unlock_page(page->pg_swap_entry); #endif } @@ -338,7 +338,7 @@ create_write_request(struct inode *inode, struct page *page, wreq->wb_page = page; wreq->wb_offset = offset; wreq->wb_bytes = bytes; - inode->i_count++; + atomic_inc(&inode->i_count); atomic_inc(&page->count); append_write_request(&NFS_WRITEBACK(inode), wreq); @@ -788,7 +788,7 @@ nfs_wback_result(struct rpc_task *task) dprintk("NFS: %4d saving write failure code\n", task->tk_pid); append_write_request(&nfs_failed_requests, req); - inode->i_count++; + atomic_inc(&inode->i_count); } clear_bit(PG_uptodate, &page->flags); } else if (!WB_CANCELLED(req)) { diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c83150b5f..a3b29313a 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -331,7 +331,7 @@ exp_rootfh(struct svc_client *clp, dev_t dev, ino_t ino, struct knfs_fh *f) if (!(exp = exp_get(clp, dev, ino))) return -EPERM; - exp->ex_inode->i_count++; + atomic_inc(&exp->ex_inode->i_count); fh_compose(&fh, exp, exp->ex_inode); memcpy(f, &fh.fh_handle, sizeof(struct knfs_fh)); fh_put(&fh); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c466321ed..88b69cb40 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -35,8 +35,8 @@ # define copy_to_user memcpy_tofs # define access_ok !verify_area #endif -#include <asm/smp.h> -#include <asm/smp_lock.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> extern long sys_call_table[]; @@ -214,8 +214,6 @@ EXPORT_NO_SYMBOLS; MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); #endif -static unsigned long old_syscallvec; - extern int (*do_nfsservctl)(int, void *, void *); /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6327cee48..a68fca997 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -39,9 +39,6 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP -/* Symbol not exported */ -static struct super_block *get_super(dev_t dev); - /* Open mode for nfsd_open */ #define OPEN_READ 0 #define OPEN_WRITE 1 @@ -123,13 +120,13 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, dotdot = (len == 2 && name[0] == '.' && name[1] == '.'); if (dotdot) { if (dirp == current->fs->root) { - dirp->i_count++; + atomic_inc(&dirp->i_count); *resfh = *fhp; return 0; } if (dirp->i_dev == exp->ex_dev && dirp->i_ino == exp->ex_ino) { - dirp->i_count++; + atomic_inc(&dirp->i_count); *resfh = *fhp; return 0; } @@ -147,12 +144,12 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, if (perm != 0) return perm; if (!len) { - dirp->i_count++; + atomic_inc(&dirp->i_count); *resfh = *fhp; return 0; } - dirp->i_count++; /* lookup eats the dirp inode */ + atomic_inc(&dirp->i_count); /* lookup eats the dirp inode */ err = dirp->i_op->lookup(dirp, name, len, &inode); if (err) @@ -165,7 +162,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, if (!dotdot && (sb = inode->i_sb) && (inode == sb->s_mounted)) { iput(inode); inode = sb->s_covered; - inode->i_count++; + atomic_inc(&inode->i_count); } fh_compose(resfh, exp, inode); @@ -294,7 +291,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, } } - inode->i_count++; + atomic_inc(&inode->i_count); return 0; } @@ -307,7 +304,7 @@ nfsd_close(struct file *filp) struct inode *inode; inode = filp->f_inode; - if (!inode->i_count) + if (!atomic_read(&inode->i_count)) printk(KERN_WARNING "nfsd: inode count == 0!\n"); if (filp->f_op && filp->f_op->release) filp->f_op->release(inode, filp); @@ -536,7 +533,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, fh_lock(fhp); /* lock directory */ dirp = fhp->fh_inode; - dirp->i_count++; /* dirop eats the inode */ + atomic_inc(&dirp->i_count); /* dirop eats the inode */ switch (type) { case S_IFREG: @@ -571,7 +568,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, * If the VFS call doesn't return the inode, look it up now. */ if (inode == NULL) { - dirp->i_count++; + atomic_inc(&dirp->i_count); err = dirp->i_op->lookup(dirp, fname, flen, &inode); if (err < 0) return -nfserrno(err); /* Huh?! */ @@ -646,7 +643,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) if (!inode->i_op || !inode->i_op->readlink) return nfserr_io; - inode->i_count++; + atomic_inc(&inode->i_count); oldfs = get_fs(); set_fs(KERNEL_DS); err = inode->i_op->readlink(inode, buf, *lenp); set_fs(oldfs); @@ -683,7 +680,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfserr_perm; fh_lock(fhp); /* lock inode */ - dirp->i_count++; + atomic_inc(&dirp->i_count); err = dirp->i_op->symlink(dirp, fname, flen, path); fh_unlock(fhp); /* unlock inode */ @@ -696,7 +693,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, /* * Okay, now look up the inode of the new symlink. */ - dirp->i_count++; /* lookup eats the dirp inode */ + atomic_inc(&dirp->i_count); /* lookup eats the dirp inode */ err = dirp->i_op->lookup(dirp, fname, flen, &inode); if (err) return nfserrno(-err); @@ -733,7 +730,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, return nfserr_perm; fh_lock(ffhp); /* lock directory inode */ - dirp->i_count++; + atomic_inc(&dirp->i_count); err = dirp->i_op->link(dest, dirp, fname, len); fh_unlock(ffhp); /* unlock inode */ @@ -773,9 +770,9 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, return nfserr_perm; fh_lock(tfhp); /* lock destination directory */ - tdir->i_count++; - fdir->i_count++; - err = fdir->i_op->rename(fdir, fname, flen, tdir, tname, tlen, 0); + atomic_inc(&tdir->i_count); + atomic_inc(&fdir->i_count); + err = fdir->i_op->rename(fdir, fname, flen, tdir, tname, tlen); fh_unlock(tfhp); /* unlock inode */ if (!err && EX_ISSYNC(tfhp->fh_export)) { @@ -808,12 +805,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (type == S_IFDIR) { if (!dirp->i_op || !dirp->i_op->rmdir) return nfserr_notdir; - dirp->i_count++; + atomic_inc(&dirp->i_count); err = dirp->i_op->rmdir(dirp, fname, flen); } else { /* other than S_IFDIR */ if (!dirp->i_op || !dirp->i_op->unlink) return nfserr_perm; - dirp->i_count++; + atomic_inc(&dirp->i_count); err = dirp->i_op->unlink(dirp, fname, flen); } @@ -1041,26 +1038,6 @@ nfsd_parentdev(dev_t* devp) return 1; } -/* Duplicated here from fs/super.c because it's not exported */ -static struct super_block * -get_super(dev_t dev) -{ - struct super_block *s; - - if (!dev) - return NULL; - s = 0 + super_blocks; - while (s < NR_SUPER + super_blocks) - if (s->s_dev == dev) { - wait_on_super(s); - if (s->s_dev == dev) - return s; - s = 0 + super_blocks; - } else - s++; - return NULL; -} - /* * This is a copy from fs/inode.c because it wasn't exported. */ @@ -4,6 +4,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ +#include <linux/config.h> #include <linux/vfs.h> #include <linux/types.h> #include <linux/utime.h> @@ -20,6 +21,7 @@ #include <linux/file.h> #include <linux/smp.h> #include <linux/smp_lock.h> +#include <linux/omirr.h> #include <asm/uaccess.h> #include <asm/bitops.h> @@ -33,7 +35,7 @@ asmlinkage int sys_statfs(const char * path, struct statfs * buf) error = verify_area(VERIFY_WRITE, buf, sizeof(struct statfs)); if (error) goto out; - error = namei(path,&inode); + error = namei(NAM_FOLLOW_LINK, path, &inode); if (error) goto out; error = -ENOSYS; @@ -88,6 +90,7 @@ int do_truncate(struct inode *inode, unsigned long length) vmtruncate(inode, length); if (inode->i_op && inode->i_op->truncate) inode->i_op->truncate(inode); + inode->i_status |= ST_MODIFIED; } up(&inode->i_sem); return error; @@ -99,7 +102,7 @@ asmlinkage int sys_truncate(const char * path, unsigned long length) int error; lock_kernel(); - error = namei(path,&inode); + error = namei(NAM_FOLLOW_LINK, path, &inode); if (error) goto out; @@ -185,33 +188,36 @@ asmlinkage int sys_utime(char * filename, struct utimbuf * times) struct iattr newattrs; lock_kernel(); - error = namei(filename,&inode); + /* Hmm, should I always follow symlinks or not ? */ + error = namei(NAM_FOLLOW_LINK, filename, &inode); if (error) goto out; error = -EROFS; - if (IS_RDONLY(inode)) { - iput(inode); - goto out; - } + if (IS_RDONLY(inode)) + goto iput_and_out; + /* Don't worry, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { error = get_user(newattrs.ia_atime, ×->actime); if (!error) error = get_user(newattrs.ia_mtime, ×->modtime); - if (error) { - iput(inode); - goto out; - } + if (error) + goto iput_and_out; + newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; } else { if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) { - iput(inode); - goto out; - } + (error = permission(inode,MAY_WRITE)) != 0) + goto iput_and_out; } error = notify_change(inode, &newattrs); +#ifdef CONFIG_OMIRR + if(!error) + omirr_printall(inode, " U %ld %ld %ld ", CURRENT_TIME, + newattrs.ia_atime, newattrs.ia_mtime); +#endif +iput_and_out: iput(inode); out: unlock_kernel(); @@ -231,7 +237,7 @@ asmlinkage int sys_utimes(char * filename, struct timeval * utimes) struct iattr newattrs; lock_kernel(); - error = namei(filename,&inode); + error = namei(NAM_FOLLOW_LINK, filename, &inode); if (error) goto out; error = -EROFS; @@ -252,6 +258,11 @@ asmlinkage int sys_utimes(char * filename, struct timeval * utimes) goto iput_and_out; } error = notify_change(inode, &newattrs); +#ifdef CONFIG_OMIRR + if(!error) + omirr_printall(inode, " U %ld %ld %ld ", CURRENT_TIME, + newattrs.ia_atime, newattrs.ia_mtime); +#endif iput_and_out: iput(inode); out: @@ -276,7 +287,7 @@ asmlinkage int sys_access(const char * filename, int mode) old_fsgid = current->fsgid; current->fsuid = current->uid; current->fsgid = current->gid; - res = namei(filename,&inode); + res = namei(NAM_FOLLOW_LINK, filename, &inode); if (!res) { res = permission(inode, mode); iput(inode); @@ -291,24 +302,23 @@ out: asmlinkage int sys_chdir(const char * filename) { struct inode * inode; + struct inode * tmpi; int error; lock_kernel(); - error = namei(filename,&inode); + error = namei(NAM_FOLLOW_LINK, filename, &inode); if (error) goto out; error = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) { - iput(inode); - goto out; - } - if ((error = permission(inode,MAY_EXEC)) != 0) { - iput(inode); - goto out; - } - iput(current->fs->pwd); - current->fs->pwd = inode; - error = 0; + if (!S_ISDIR(inode->i_mode)) + goto iput_and_out; + if ((error = permission(inode,MAY_EXEC)) != 0) + goto iput_and_out; + + /* exchange inodes */ + tmpi = current->fs->pwd; current->fs->pwd = inode; inode = tmpi; +iput_and_out: + iput(inode); out: unlock_kernel(); return error; @@ -333,8 +343,7 @@ asmlinkage int sys_fchdir(unsigned int fd) goto out; iput(current->fs->pwd); current->fs->pwd = inode; - inode->i_count++; - error = 0; + atomic_inc(&inode->i_count); out: unlock_kernel(); return error; @@ -343,25 +352,23 @@ out: asmlinkage int sys_chroot(const char * filename) { struct inode * inode; + struct inode * tmpi; int error; lock_kernel(); - error = namei(filename,&inode); + error = namei(NAM_FOLLOW_LINK, filename, &inode); if (error) goto out; error = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) { - iput(inode); - goto out; - } + if (!S_ISDIR(inode->i_mode)) + goto iput_and_out; error = -EPERM; - if (!fsuser()) { - iput(inode); - goto out; - } - iput(current->fs->root); - current->fs->root = inode; + if (!fsuser()) + goto iput_and_out; + tmpi = current->fs->root; current->fs->root = inode; inode = tmpi; error = 0; +iput_and_out: + iput(inode); out: unlock_kernel(); return error; @@ -392,6 +399,10 @@ asmlinkage int sys_fchmod(unsigned int fd, mode_t mode) newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; inode->i_dirt = 1; err = notify_change(inode, &newattrs); +#ifdef CONFIG_OMIRR + if(!err) + omirr_printall(inode, " M %ld %ld ", CURRENT_TIME, newattrs.ia_mode); +#endif out: unlock_kernel(); return err; @@ -404,7 +415,11 @@ asmlinkage int sys_chmod(const char * filename, mode_t mode) struct iattr newattrs; lock_kernel(); - error = namei(filename,&inode); + /* I'm not sure whether to use NAM_FOLLOW_TRAILSLASH instead, + * because permissions on symlinks now can never be changed, + * but on the other hand they are never needed. + */ + error = namei(NAM_FOLLOW_LINK, filename, &inode); if (error) goto out; error = -EROFS; @@ -419,6 +434,10 @@ asmlinkage int sys_chmod(const char * filename, mode_t mode) newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; inode->i_dirt = 1; error = notify_change(inode, &newattrs); +#ifdef CONFIG_OMIRR + if(!error) + omirr_printall(inode, " M %ld %ld ", CURRENT_TIME, newattrs.ia_mode); +#endif iput_and_out: iput(inode); out: @@ -481,6 +500,11 @@ asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group) inode->i_sb->dq_op->transfer(inode, &newattrs, 1); } else error = notify_change(inode, &newattrs); +#ifdef CONFIG_OMIRR + if(!error) + omirr_printall(inode, " O %d %d ", CURRENT_TIME, + newattrs.ia_uid, newattrs.ia_gid); +#endif out: unlock_kernel(); return error; @@ -493,7 +517,7 @@ asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group) struct iattr newattrs; lock_kernel(); - error = lnamei(filename,&inode); + error = namei(NAM_FOLLOW_TRAILSLASH, filename, &inode); if (error) goto out; error = -EROFS; @@ -532,12 +556,17 @@ asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group) inode->i_sb->dq_op->initialize(inode, -1); error = -EDQUOT; if (inode->i_sb->dq_op->transfer(inode, &newattrs, 0)) - goto out; + goto iput_and_out; error = notify_change(inode, &newattrs); if (error) inode->i_sb->dq_op->transfer(inode, &newattrs, 1); } else error = notify_change(inode, &newattrs); +#ifdef CONFIG_OMIRR + if(!error) + omirr_printall(inode, " O %d %d ", CURRENT_TIME, + newattrs.ia_uid, newattrs.ia_gid); +#endif iput_and_out: iput(inode); out: @@ -385,7 +385,6 @@ struct inode_operations pipe_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -442,7 +441,7 @@ int do_pipe(int *fd) close_f12_inode_i: put_unused_fd(i); close_f12_inode: - inode->i_count--; + atomic_dec(&inode->i_count); iput(inode); close_f12: put_filp(f2); diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 75ec3dd85..6f336245d 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,8 +8,11 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := proc.o -O_OBJS := inode.o root.o base.o generic.o mem.o link.o fd.o array.o \ +O_OBJS := inode.o root.o base.o generic.o mem.o link.o arbitrary.o fd.o array.o \ kmsg.o scsi.o proc_tty.o +ifdef CONFIG_OMIRR +O_OBJS := $(O_OBJS) omirr.o +endif OX_OBJS := procfs_syms.o M_OBJS := diff --git a/fs/proc/arbitrary.c b/fs/proc/arbitrary.c new file mode 100644 index 000000000..1e18e594e --- /dev/null +++ b/fs/proc/arbitrary.c @@ -0,0 +1,58 @@ +/* + * $Id: arbitrary.c,v 1.2 1997/06/05 01:27:47 davem Exp $ + * + * linux/fs/proc/arbitrary.c - lookup() for arbitrary inodes. + * Copyright (C) 1997, Thomas Schoebel-Theuer, + * <schoebel@informatik.uni-stuttgart.de>. + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/fs.h> + +/* Format of dev/inode pairs that can be used as file names: + * [<dev_number_in_hex]:<inode_number_in_decimal> + * (the same format that is already in use in /proc/<pid>/exe, + * /proc/<pid>/cwd and /proc/<pid>/root). + */ +/* Note that readdir does not supply such names, so they must be used + * either "blind" or must be queried another way, for example + * as result of a virtual symlink (see linux/proc/link.c). + */ +int proc_arbitrary_lookup(struct inode * dir, const char * name, + int len, struct inode ** result) +{ + int dev, ino; + char * ptr = (char*)name; + kdev_t kdev; + int i; + int error = -EINVAL; + + if(*ptr++ != '[') + goto done; + dev = simple_strtoul(ptr, &ptr, 16); + if(*ptr++ != ']') + goto done; + if(*ptr++ != ':') + goto done; + ino = simple_strtoul(ptr, &ptr, 0); + if((long)ptr - (long)name != len) + goto done; + + error = -ENOENT; + kdev = to_kdev_t(dev); + if(!kdev) + goto done; + for(i = 0; i < NR_SUPER; i++) + if(super_blocks[i].s_dev == kdev) + break; + if(i < NR_SUPER) { + *result = iget(&super_blocks[i], ino); + if(*result) + error = 0; + } +done: + iput(dir); + return error; +} diff --git a/fs/proc/array.c b/fs/proc/array.c index 516e87813..518ef1b4c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -154,8 +154,6 @@ static long read_profile(struct inode *inode, struct file *file, return read; } - - /* * Writing to /proc/profile resets the counters * @@ -1042,6 +1040,9 @@ extern int get_smp_prof_list(char *); #ifdef CONFIG_ZORRO extern int zorro_get_list(char *); #endif +#if defined (CONFIG_AMIGA) || defined (CONFIG_ATARI) +extern int get_hardware_list(char *); +#endif static long get_root_array(char * page, int type, char **start, off_t offset, unsigned long length) @@ -1126,6 +1127,10 @@ static long get_root_array(char * page, int type, char **start, case PROC_ZORRO: return zorro_get_list(page); #endif +#if defined (CONFIG_AMIGA) || defined (CONFIG_ATARI) + case PROC_HARDWARE: + return get_hardware_list(page); +#endif } return -EBADF; } @@ -1232,7 +1237,6 @@ struct inode_operations proc_array_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -1278,7 +1282,6 @@ struct inode_operations proc_arraylong_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 7e9a65e08..b983e73f6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -42,7 +42,6 @@ static struct inode_operations proc_base_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/proc/fd.c b/fs/proc/fd.c index fd262bc9d..884631db8 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -44,7 +44,6 @@ struct inode_operations proc_fd_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 6e80e8298..1424dd1ef 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -51,7 +51,6 @@ struct inode_operations proc_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -74,7 +73,6 @@ struct inode_operations proc_net_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index 1cc6a9c83..6ef386ffa 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -70,7 +70,6 @@ struct inode_operations proc_kmsg_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/proc/link.c b/fs/proc/link.c index d5c08eafd..695ed9bba 100644 --- a/fs/proc/link.c +++ b/fs/proc/link.c @@ -14,10 +14,9 @@ #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/dalloc.h> static int proc_readlink(struct inode *, char *, int); -static int proc_follow_link(struct inode *, struct inode *, int, int, - struct inode **); /* * PLAN9_SEMANTICS won't work any more: it used an ugly hack that broke @@ -53,7 +52,6 @@ struct inode_operations proc_link_inode_operations = { NULL, /* mknod */ NULL, /* rename */ proc_readlink, /* readlink */ - proc_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -61,7 +59,11 @@ struct inode_operations proc_link_inode_operations = { NULL /* permission */ }; - +/* [Feb-1997 T. Schoebel-Theuer] This is no longer called from the + * VFS, but only from proc_readlink(). All the functionality + * should the moved there (without using temporary inodes any more) + * and then it could be eliminated. + */ static int proc_follow_link(struct inode * dir, struct inode * inode, int flag, int mode, struct inode ** res_inode) { @@ -130,33 +132,35 @@ static int proc_follow_link(struct inode * dir, struct inode * inode, if (!new_inode) return -ENOENT; *res_inode = new_inode; - new_inode->i_count++; + atomic_inc(&new_inode->i_count); return 0; } static int proc_readlink(struct inode * inode, char * buffer, int buflen) { - int i; - unsigned int dev,ino; - char buf[64]; + int error = proc_follow_link(NULL, inode, 0, 0, &inode); - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } - i = proc_follow_link(NULL, inode, 0, 0, &inode); - if (i) - return i; + if (error) + return error; if (!inode) return -EIO; - dev = kdev_t_to_nr(inode->i_dev); - ino = inode->i_ino; + + /* This will return *one* of the alias names (which is not quite + * correct). I have to rethink the problem, so this is only a + * quick hack... + */ + if(inode->i_dentry) { + char * tmp = (char*)__get_free_page(GFP_KERNEL); + int len = d_path(inode->i_dentry, current->fs->root, tmp); + int min = buflen<PAGE_SIZE ? buflen : PAGE_SIZE; + if(len <= min) + min = len+1; + copy_to_user(buffer, tmp, min); + free_page((unsigned long)tmp); + error = len; + } else { + error= -ENOENT; + } iput(inode); - i = sprintf(buf,"[%04x]:%u", dev, ino); - if (buflen > i) - buflen = i; - i = 0; - while (i < buflen) - put_user(buf[i++],buffer++); - return i; + return error; } diff --git a/fs/proc/mem.c b/fs/proc/mem.c index 97acb5ee8..a64ead624 100644 --- a/fs/proc/mem.c +++ b/fs/proc/mem.c @@ -328,7 +328,6 @@ struct inode_operations proc_mem_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/proc/net.c b/fs/proc/net.c index 257487569..3bc5c339c 100644 --- a/fs/proc/net.c +++ b/fs/proc/net.c @@ -111,7 +111,6 @@ struct inode_operations proc_net_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/proc/omirr.c b/fs/proc/omirr.c new file mode 100644 index 000000000..0e6377fb2 --- /dev/null +++ b/fs/proc/omirr.c @@ -0,0 +1,297 @@ +/* + * fs/proc/omirr.c - online mirror support + * + * (C) 1997 Thomas Schoebel-Theuer + */ + +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/dalloc.h> +#include <linux/omirr.h> +#include <asm/uaccess.h> + +static int nr_omirr_open = 0; +static int cleared_flag = 0; + +static char * buffer = NULL; +static int read_pos, write_pos; +static int clip_pos, max_pos; +static struct wait_queue * read_wait = NULL; +static struct wait_queue * write_wait = NULL; + +static /*inline*/ int reserve_write_space(int len) +{ + int rest = max_pos - write_pos; + + if(rest < len) { + clip_pos = write_pos; + write_pos = 0; + rest = max_pos; + } + while(read_pos > write_pos && read_pos <= write_pos+len) { + if(!nr_omirr_open) + return 0; + interruptible_sleep_on(&write_wait); + } + return 1; +} + +static /*inline*/ void write_space(int len) +{ + write_pos += len; + wake_up_interruptible(&read_wait); +} + +static /*inline*/ int reserve_read_space(int len) +{ + int rest = clip_pos - read_pos; + + if(!rest) { + read_pos = 0; + rest = clip_pos; + clip_pos = max_pos; + } + if(len > rest) + len = rest; + while(read_pos == write_pos) { + interruptible_sleep_on(&read_wait); + } + rest = write_pos - read_pos; + if(rest > 0 && rest < len) + len = rest; + return len; +} + +static /*inline*/ void read_space(int len) +{ + read_pos += len; + if(read_pos >= clip_pos) { + read_pos = 0; + clip_pos = max_pos; + } + wake_up_interruptible(&write_wait); +} + +static /*inline*/ void init_buffer(char * initxt) +{ + int len = initxt ? strlen(initxt) : 0; + + if(!buffer) { + buffer = (char*)__get_free_page(GFP_USER); + max_pos = clip_pos = PAGE_SIZE; + } + read_pos = write_pos = 0; + memcpy(buffer, initxt, len); + write_space(len); +} + +static int omirr_open(struct inode * inode, struct file * file) +{ + if(nr_omirr_open) + return -EAGAIN; + nr_omirr_open++; + if(!buffer) + init_buffer(NULL); + return 0; +} + +static int omirr_release(struct inode * inode, struct file * file) +{ + nr_omirr_open--; + read_space(0); + return 0; +} + +static long omirr_read(struct inode * inode, struct file * file, + char * buf, unsigned long count) +{ + char * tmp; + int len; + int error = 0; + + if(!count) + goto done; + error = -EINVAL; + if(!buf || count < 0) + goto done; + + error = verify_area(VERIFY_WRITE, buf, count); + if(error) + goto done; + + error = -EAGAIN; + if((file->f_flags & O_NONBLOCK) && read_pos == write_pos) + goto done; + + error = len = reserve_read_space(count); + tmp = buffer + read_pos; + while(len) { + put_user(*tmp++, buf++); + len--; + } + read_space(error); +done: + return error; +} + +int compute_name(struct dentry * entry, char * buf) +{ + int len; + + if(IS_ROOT(entry)) { + *buf = '/'; + return 1; + } + len = compute_name(entry->d_parent, buf); + if(len > 1) { + buf[len++] = '/'; + } + memcpy(buf+len, entry->d_name, entry->d_len); + return len + entry->d_len; +} + +int _omirr_print(struct dentry * ent1, struct dentry * ent2, + struct qstr * suffix, const char * fmt, + va_list args1, va_list args2) +{ + int count = strlen(fmt) + 10; /* estimate */ + const char * tmp = fmt; + char lenbuf[8]; + int res; + + if(!buffer) + init_buffer(NULL); + while(*tmp) { + while(*tmp && *tmp++ != '%') ; + if(*tmp) { + if(*tmp == 's') { + char * str = va_arg(args1, char*); + count += strlen(str); + } else { + (void)va_arg(args1, int); + count += 8; /* estimate */ + } + } + } + if(ent1) { + struct dentry * dent = ent1; + while(dent && !IS_ROOT(dent)) { + count += dent->d_len + 1; + dent = dent->d_parent; + } + count++; + if(ent2) { + dent = ent2; + while(dent && !IS_ROOT(dent)) { + count += dent->d_len + 1; + dent = dent->d_parent; + } + count++; + } + if(suffix) + count += suffix->len + 1; + } + + if((nr_omirr_open | cleared_flag) && reserve_write_space(count)) { + cleared_flag = 0; + res = vsprintf(buffer+write_pos+4, fmt, args2) + 4; + if(res > count) + printk("omirr: format estimate was wrong\n"); + if(ent1) { + res += compute_name(ent1, buffer+write_pos+res); + if(ent2) { + buffer[write_pos+res++] = '\0'; + res += compute_name(ent2, buffer+write_pos+res); + } + if(suffix) { + buffer[write_pos+res++] = '/'; + memcpy(buffer+write_pos+res, + suffix->name, suffix->len); + res += suffix->len; + } + buffer[write_pos+res++] = '\0'; + buffer[write_pos+res++] = '\n'; + } + sprintf(lenbuf, "%04d", res); + memcpy(buffer+write_pos, lenbuf, 4); + } else { + if(!cleared_flag) { + cleared_flag = 1; + init_buffer("0007 Z\n"); + } + res = 0; + } + write_space(res); + return res; +} + +int omirr_print(struct dentry * ent1, struct dentry * ent2, + struct qstr * suffix, const char * fmt, ...) +{ + va_list args1, args2; + int res; + + /* I don't know whether I could make a simple copy of the va_list, + * so for the safe way... + */ + va_start(args1, fmt); + va_start(args2, fmt); + res = _omirr_print(ent1, ent2, suffix, fmt, args1, args2); + va_end(args2); + va_end(args1); + return res; +} + +int omirr_printall(struct inode * inode, const char * fmt, ...) +{ + int res = 0; + struct dentry * tmp = inode->i_dentry; + + if(tmp) do { + va_list args1, args2; + va_start(args1, fmt); + va_start(args2, fmt); + res += _omirr_print(tmp, NULL, NULL, fmt, args1, args2); + va_end(args2); + va_end(args1); + tmp = tmp->d_next; + } while(tmp != inode->i_dentry); + return res; +} + +static struct file_operations omirr_operations = { + NULL, /* omirr_lseek */ + omirr_read, + NULL, /* omirr_write */ + NULL, /* omirr_readdir */ + NULL, /* omirr_select */ + NULL, /* omirr_ioctl */ + NULL, /* mmap */ + omirr_open, + omirr_release, + NULL, /* fsync */ + NULL, /* fasync */ + NULL, /* check_media_change */ + NULL /* revalidate */ +}; + +struct inode_operations proc_omirr_inode_operations = { + &omirr_operations, + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL, /* permission */ + NULL /* smap */ +}; diff --git a/fs/proc/openpromfs.c b/fs/proc/openpromfs.c index a9f84b9eb..7d741cfaf 100644 --- a/fs/proc/openpromfs.c +++ b/fs/proc/openpromfs.c @@ -1,4 +1,4 @@ -/* $Id: openpromfs.c,v 1.13 1997/04/03 08:49:25 davem Exp $ +/* $Id: openpromfs.c,v 1.15 1997/06/05 01:28:11 davem Exp $ * openpromfs.c: /proc/openprom handling routines * * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -484,7 +484,6 @@ static struct inode_operations openpromfs_prop_inode_ops = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -517,7 +516,6 @@ static struct inode_operations openpromfs_nodenum_inode_ops = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -550,7 +548,6 @@ static struct inode_operations openprom_alias_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -1015,7 +1012,7 @@ void openpromfs_use (struct inode *inode, int inc) static int usec = 0; if (inc) { - if (inode->i_count == 1) + if (atomic_read(&inode->i_count) == 1) usec++; else if (root_fresh && inode->i_ino == PROC_OPENPROM_FIRST) { root_fresh = 0; @@ -1028,10 +1025,10 @@ void openpromfs_use (struct inode *inode, int inc) usec--; } printk ("openpromfs_use: %d %d %d %d\n", - inode->i_ino, inc, usec, inode->i_count); + inode->i_ino, inc, usec, atomic_read(&inode->i_count)); #else if (inc) { - if (inode->i_count == 1) + if (atomic_read(&inode->i_count) == 1) MOD_INC_USE_COUNT; else if (root_fresh && inode->i_ino == PROC_OPENPROM_FIRST) { root_fresh = 0; diff --git a/fs/proc/procfs_syms.c b/fs/proc/procfs_syms.c index 809a26084..71c29dd75 100644 --- a/fs/proc/procfs_syms.c +++ b/fs/proc/procfs_syms.c @@ -37,7 +37,10 @@ EXPORT_SYMBOL(proc_openprom_deregister); #endif static struct file_system_type proc_fs_type = { - proc_read_super, "proc", 0, NULL + "proc", + FS_NO_DCACHE, + proc_read_super, + NULL }; int init_proc_fs(void) diff --git a/fs/proc/root.c b/fs/proc/root.c index 11c27699a..f42557d2c 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -64,7 +64,6 @@ struct inode_operations proc_dir_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -105,7 +104,6 @@ static struct inode_operations proc_root_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -266,7 +264,6 @@ struct inode_operations proc_openprom_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -348,17 +345,6 @@ int proc_unregister(struct proc_dir_entry * dir, int ino) /* * /proc/self: */ -static int proc_self_followlink(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - iput(dir); - *res_inode = proc_get_inode(inode->i_sb, (current->pid << 16) + PROC_PID_INO, &proc_pid); - iput(inode); - if (!*res_inode) - return -ENOENT; - return 0; -} - static int proc_self_readlink(struct inode * inode, char * buffer, int buflen) { int len; @@ -384,7 +370,6 @@ static struct inode_operations proc_self_inode_operations = { NULL, /* mknod */ NULL, /* rename */ proc_self_readlink, /* readlink */ - proc_self_followlink, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -436,6 +421,13 @@ static struct proc_dir_entry proc_root_cpuinfo = { S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_array_inode_operations }; +#if defined (CONFIG_AMIGA) || defined (CONFIG_ATARI) +static struct proc_dir_entry proc_root_hardware = { + PROC_HARDWARE, 8, "hardware", + S_IFREG | S_IRUGO, 1, 0, 0, + 0, &proc_array_inode_operations +}; +#endif static struct proc_dir_entry proc_root_self = { PROC_SELF, 4, "self", S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO, 1, 0, 0, @@ -539,6 +531,13 @@ static struct proc_dir_entry proc_root_slab = { S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_array_inode_operations }; +#ifdef CONFIG_OMIRR +static struct proc_dir_entry proc_root_omirr = { + PROC_OMIRR, 5, "omirr", + S_IFREG | S_IRUSR, 1, 0, 0, + 0, &proc_omirr_inode_operations +}; +#endif void proc_root_init(void) { @@ -599,7 +598,9 @@ void proc_root_init(void) #endif proc_register(&proc_root, &proc_openprom); #endif - +#if defined (CONFIG_AMIGA) || defined (CONFIG_ATARI) + proc_register(&proc_root, &proc_root_hardware); +#endif proc_register(&proc_root, &proc_root_slab); if (prof_shift) { @@ -641,6 +642,16 @@ int proc_lookup(struct inode * dir,const char * name, int len, return -EINVAL; } + /* Either remove this as soon as possible due to security problems, + * or uncomment the root-only usage. + */ + + /* Allow generic inode lookups everywhere. + * No other name in /proc must begin with a '['. + */ + if(/*!current->uid &&*/ name[0] == '[') + return proc_arbitrary_lookup(dir,name,len,result); + /* Special case "." and "..": they aren't on the directory list */ *result = dir; if (!len) @@ -686,7 +697,7 @@ static int proc_root_lookup(struct inode * dir,const char * name, int len, int ino, retval; struct task_struct *p; - dir->i_count++; + atomic_inc(&dir->i_count); if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ dir->i_nlink = proc_root.nlink; diff --git a/fs/proc/scsi.c b/fs/proc/scsi.c index b1e77398c..fd629a75c 100644 --- a/fs/proc/scsi.c +++ b/fs/proc/scsi.c @@ -69,7 +69,6 @@ struct inode_operations proc_scsi_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/read_write.c b/fs/read_write.c index dd4092301..81b19ac30 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -168,6 +168,7 @@ asmlinkage long sys_write(unsigned int fd, const char * buf, unsigned long count goto out; down(&inode->i_sem); error = write(inode,file,buf,count); + inode->i_status |= ST_MODIFIED; up(&inode->i_sem); out: fput(file, inode); @@ -248,6 +249,10 @@ static long do_readv_writev(int type, struct inode * inode, struct file * file, len = vector->iov_len; vector++; count--; + + /* Any particular reason why we do not grab the inode semaphore + * when doing writes here? -DaveM + */ nr = fn(inode, file, base, len); if (nr < 0) { if (retval) @@ -259,6 +264,8 @@ static long do_readv_writev(int type, struct inode * inode, struct file * file, if (nr != len) break; } + if(fn == (IO_fn_t) file->f_op->write) + inode->i_status |= ST_MODIFIED; if (iov != iovstack) kfree(iov); return retval; diff --git a/fs/readdir.c b/fs/readdir.c index aaea5b45f..a86398ac3 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -1,20 +1,35 @@ /* - * linux/fs/readdir.c + * fs/readdir.c * * Copyright (C) 1995 Linus Torvalds */ +#include <linux/config.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/stat.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> +#ifdef CONFIG_TRANS_NAMES +#include <linux/nametrans.h> +#endif +#include <linux/dalloc.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> +/* [T.Schoebel-Theuer] I am assuming that directories never get too large. + * The problem is that getdents() delivers d_offset's that can be used + * for lseek() by the user, so I must encode the status information for + * name translation and dcache baskets in the offset. + * Note that the linux man page getdents(2) does not mention that + * the d_offset is fs-specific and can be used for lseek(). + */ +#define BASKET_BIT (1<<30) /* 31 is already used by affs */ +#define TRANS_BIT (1<<29) + /* * Traditional linux readdir() handling.. * @@ -35,6 +50,9 @@ struct old_linux_dirent { struct readdir_callback { struct old_linux_dirent * dirent; + struct file * file; + int translate; + off_t oldoffset; int count; }; @@ -47,11 +65,26 @@ static int fillonedir(void * __buf, const char * name, int namlen, off_t offset, return -EINVAL; buf->count++; dirent = buf->dirent; + copy_to_user(dirent->d_name, name, namlen); + put_user(0, dirent->d_name + namlen); +#ifdef CONFIG_TRANS_NAMES + if(!buf->translate) { + char * cut; +#ifdef CONFIG_TRANS_RESTRICT + struct inode * inode = buf->file->f_inode; + cut = testname(inode && inode->i_gid != CONFIG_TRANS_GID, dirent->d_name); +#else + cut = testname(1, dirent->d_name); +#endif + if(cut) { + put_user(0, cut); + buf->translate = 1; + } + } +#endif put_user(ino, &dirent->d_ino); put_user(offset, &dirent->d_offset); put_user(namlen, &dirent->d_namlen); - copy_to_user(dirent->d_name, name, namlen); - put_user(0, dirent->d_name + namlen); return 0; } @@ -60,6 +93,7 @@ asmlinkage int old_readdir(unsigned int fd, void * dirent, unsigned int count) int error = -EBADF; struct file * file; struct readdir_callback buf; + off_t oldpos; lock_kernel(); if (fd >= NR_OPEN || !(file = current->files->fd[fd])) @@ -70,11 +104,21 @@ asmlinkage int old_readdir(unsigned int fd, void * dirent, unsigned int count) error = verify_area(VERIFY_WRITE, dirent, sizeof(struct old_linux_dirent)); if (error) goto out; - buf.count = 0; + oldpos = file->f_pos; + buf.file = file; buf.dirent = dirent; + buf.count = 0; + buf.translate = 0; + if(file->f_pos & TRANS_BIT) { + file->f_pos &= ~TRANS_BIT; + buf.translate = 1; + } error = file->f_op->readdir(file->f_inode, file, &buf, fillonedir); if (error < 0) goto out; + if(buf.translate) { + file->f_pos = oldpos | TRANS_BIT; + } error = buf.count; out: unlock_kernel(); @@ -95,8 +139,11 @@ struct linux_dirent { struct getdents_callback { struct linux_dirent * current_dir; struct linux_dirent * previous; + struct file * file; int count; - int error; + int error; + int restricted; + int do_preload; }; static int filldir(void * __buf, const char * name, int namlen, off_t offset, ino_t ino) @@ -105,18 +152,51 @@ static int filldir(void * __buf, const char * name, int namlen, off_t offset, in struct getdents_callback * buf = (struct getdents_callback *) __buf; int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 1); - buf->error = -EINVAL; /* only used if we fail.. */ + /* Do not touch buf->error any more if everything is ok! */ if (reclen > buf->count) - return -EINVAL; - dirent = buf->previous; - if (dirent) - put_user(offset, &dirent->d_off); + return (buf->error = -EINVAL); +#ifdef CONFIG_DCACHE_PRELOAD + if(buf->do_preload && (name[0] != '.' || namlen > 2)) { + struct qstr qname = { name, namlen }; + struct inode * dir = buf->file->f_inode; + d_entry_preliminary(dir->i_dentry, &qname, ino); + } +#endif dirent = buf->current_dir; - buf->previous = dirent; - put_user(ino, &dirent->d_ino); - put_user(reclen, &dirent->d_reclen); copy_to_user(dirent->d_name, name, namlen); put_user(0, dirent->d_name + namlen); +#ifdef CONFIG_TRANS_NAMES + { + char * cut; +#ifdef CONFIG_TRANS_RESTRICT + cut = testname(buf->restricted, dirent->d_name); +#else + cut = testname(1, dirent->d_name); +#endif + if(cut) { + int newlen = (int)cut - (int)dirent->d_name; + int newreclen = ROUND_UP(NAME_OFFSET(dirent) + newlen + 1); + /* Either both must fit or none. This way we need + * no status information in f_pos */ + if (reclen+newlen > buf->count) + return -EINVAL; + put_user(0, cut); + put_user(ino, &dirent->d_ino); + put_user(newreclen, &dirent->d_reclen); + put_user(offset, &dirent->d_off); + ((char *) dirent) += newreclen; + buf->count -= newreclen; + put_user(offset, &dirent->d_off); + copy_to_user(dirent->d_name, name, namlen); + put_user(0, dirent->d_name + namlen); + } + } +#endif + put_user(ino, &dirent->d_ino); + put_user(reclen, &dirent->d_reclen); + if (buf->previous) + put_user(buf->file->f_pos, &buf->previous->d_off); + buf->previous = dirent; ((char *) dirent) += reclen; buf->current_dir = dirent; buf->count -= reclen; @@ -126,7 +206,6 @@ static int filldir(void * __buf, const char * name, int namlen, off_t offset, in asmlinkage int sys_getdents(unsigned int fd, void * dirent, unsigned int count) { struct file * file; - struct linux_dirent * lastdirent; struct getdents_callback buf; int error = -EBADF; @@ -139,18 +218,72 @@ asmlinkage int sys_getdents(unsigned int fd, void * dirent, unsigned int count) error = verify_area(VERIFY_WRITE, dirent, count); if (error) goto out; + buf.file = file; buf.current_dir = (struct linux_dirent *) dirent; buf.previous = NULL; buf.count = count; buf.error = 0; - error = file->f_op->readdir(file->f_inode, file, &buf, filldir); - if (error < 0) - goto out; - lastdirent = buf.previous; - if (!lastdirent) { + buf.restricted = 0; +#ifdef CONFIG_TRANS_RESTRICT + buf.restricted = file->f_inode && file->f_inode->i_gid != CONFIG_TRANS_GID; +#endif + buf.do_preload = 0; +#ifdef CONFIG_DCACHE_PRELOAD + if(file->f_inode && file->f_inode->i_dentry && + !(file->f_inode->i_sb->s_type->fs_flags & (FS_NO_DCACHE|FS_NO_PRELIM)) && + !(file->f_inode->i_dentry->d_flag & D_PRELOADED)) + buf.do_preload = 1; +#endif + + if(!(file->f_pos & BASKET_BIT)) { + int oldcount; + do { + oldcount = buf.count; + error = file->f_op->readdir(file->f_inode, file, &buf, filldir); + if (error < 0) + goto out; + } while(!buf.error && buf.count != oldcount); + } + if(!buf.error) { + int nr = 0; + struct dentry * list = file->f_inode ? + d_basket(file->f_inode->i_dentry) : NULL; + struct dentry * ptr = list; +#ifdef CONFIG_DCACHE_PRELOAD + if(buf.do_preload) { + buf.do_preload = 0; + file->f_inode->i_dentry->d_flag |= D_PRELOADED; + } +#endif + if(ptr) { + if(!(file->f_pos & BASKET_BIT)) + file->f_pos = BASKET_BIT; + do { + struct dentry * next = ptr->d_basket_next; + struct inode * inode; + /* vfs_locks() are missing here */ + inode = d_inode(&ptr); + if(inode) { + nr++; + if(nr > (file->f_pos & ~BASKET_BIT)) { + int err = filldir(&buf, ptr->d_name, + ptr->d_len, + file->f_pos, + inode->i_ino); + if(err) + break; + file->f_pos++; + } + iput(inode); + } + ptr = next; + } while(ptr != list); + } + } + if (!buf.previous) { error = buf.error; } else { - put_user(file->f_pos, &lastdirent->d_off); + put_user(file->f_pos, &buf.previous->d_off); error = count - buf.count; } out: diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index f3f73c66e..0ddda855d 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -417,59 +417,6 @@ out: return mylen; } -static int -romfs_follow_link(struct inode *dir, struct inode *inode, - int flag, int mode, struct inode **res_inode) -{ - int error, len; - char *buf; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - *res_inode = inode; - iput(dir); - return 0; - } - if (current->link_count > 5) { - iput(inode); - iput(dir); - return -ELOOP; - } - - /* Eek. Short enough. */ - len = inode->i_size; - if (!(buf = kmalloc(len+1, GFP_KERNEL))) { - iput(inode); - iput(dir); - /* correct? spin? */ - return -EAGAIN; - } - error = romfs_copyfrom(inode, buf, inode->u.romfs_i.i_dataoffset, len); - if (error != len) { - iput(inode); - iput(dir); - error = -EIO; - } else { - iput(inode); - buf[len] = 0; - current->link_count++; - error = open_namei(buf, flag, mode, res_inode, dir); - current->link_count--; - } - - kfree(buf); - return error; -} - /* Mapping from our types to the kernel */ static struct file_operations romfs_file_operations = { @@ -500,7 +447,6 @@ static struct inode_operations romfs_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ romfs_readpage, /* readpage */ NULL, /* writepage */ NULL, /* bmap -- not really */ @@ -525,7 +471,7 @@ static struct file_operations romfs_dir_operations = { NULL /* revalidate */ }; -/* Merged dir/symlink op table. readdir/lookup/readlink/follow_link +/* Merged dir/symlink op table. readdir/lookup/readlink * will protect from type mismatch. */ @@ -541,7 +487,6 @@ static struct inode_operations romfs_dirlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ romfs_readlink, /* readlink */ - romfs_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -638,7 +583,10 @@ static struct super_operations romfs_ops = { }; static struct file_system_type romfs_fs_type = { - romfs_read_super, "romfs", 1, NULL + "romfs", + (FS_REQUIRES_DEV | FS_NO_DCACHE), /* Can dcache be used? */ + romfs_read_super, + NULL }; __initfunc(int init_romfs_fs(void)) diff --git a/fs/select.c b/fs/select.c index 683865a30..c7ea0e015 100644 --- a/fs/select.c +++ b/fs/select.c @@ -208,17 +208,18 @@ out: * We do a VERIFY_WRITE here even though we are only reading this time: * we'll write to it eventually.. * - * Use "int" accesses to let user-mode fd_set's be int-aligned. + * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. */ -static int __get_fd_set(unsigned long nr, int * fs_pointer, int * fdset) +static int __get_fd_set(unsigned long nr, unsigned long * fs_pointer, unsigned long * fdset) { - /* round up nr to nearest "int" */ - nr = (nr + 8*sizeof(int)-1) / (8*sizeof(int)); + /* round up nr to nearest "unsigned long" */ + nr = (nr + 8*sizeof(unsigned long)-1) / (8*sizeof(unsigned long)); if (fs_pointer) { - int error = verify_area(VERIFY_WRITE,fs_pointer,nr*sizeof(int)); + int error = verify_area(VERIFY_WRITE,fs_pointer, + nr*sizeof(unsigned long)); if (!error) { while (nr) { - get_user(*fdset, fs_pointer); + __get_user(*fdset, fs_pointer); nr--; fs_pointer++; fdset++; @@ -234,13 +235,13 @@ static int __get_fd_set(unsigned long nr, int * fs_pointer, int * fdset) return 0; } -static void __set_fd_set(long nr, int * fs_pointer, int * fdset) +static void __set_fd_set(long nr, unsigned long * fs_pointer, unsigned long * fdset) { if (!fs_pointer) return; while (nr >= 0) { - put_user(*fdset, fs_pointer); - nr -= 8 * sizeof(int); + __put_user(*fdset, fs_pointer); + nr -= 8 * sizeof(unsigned long); fdset++; fs_pointer++; } @@ -261,13 +262,16 @@ static inline void __zero_fd_set(long nr, unsigned long * fdset) * subtract by 1 on the nr of file descriptors. The former is better for * machines with long > int, and the latter allows us to test the bit count * against "zero or positive", which can mostly be just a sign bit test.. + * + * Unfortunately this scheme falls apart on big endian machines where + * sizeof(long) > sizeof(int) (ie. V9 Sparc). -DaveM */ #define get_fd_set(nr,fsp,fdp) \ -__get_fd_set(nr, (int *) (fsp), (int *) (fdp)) +__get_fd_set(nr, (unsigned long *) (fsp), (unsigned long *) (fdp)) #define set_fd_set(nr,fsp,fdp) \ -__set_fd_set((nr)-1, (int *) (fsp), (int *) (fdp)) +__set_fd_set((nr)-1, (unsigned long *) (fsp), (unsigned long *) (fdp)) #define zero_fd_set(nr,fdp) \ __zero_fd_set((nr)-1, (unsigned long *) (fdp)) @@ -302,11 +306,11 @@ asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct error = verify_area(VERIFY_WRITE, tvp, sizeof(*tvp)); if (error) goto out; - get_user(timeout, &tvp->tv_usec); + __get_user(timeout, &tvp->tv_usec); timeout = ROUND_UP(timeout,(1000000/HZ)); { unsigned long tmp; - get_user(tmp, &tvp->tv_sec); + __get_user(tmp, &tvp->tv_sec); timeout += tmp * (unsigned long) HZ; } if (timeout) @@ -322,10 +326,10 @@ asmlinkage int sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct if ((long) timeout < 0) timeout = 0; if (tvp && !(current->personality & STICKY_TIMEOUTS)) { - put_user(timeout/HZ, &tvp->tv_sec); + __put_user(timeout/HZ, &tvp->tv_sec); timeout %= HZ; timeout *= (1000000/HZ); - put_user(timeout, &tvp->tv_usec); + __put_user(timeout, &tvp->tv_usec); } if (error < 0) goto out; diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index e23dbb979..bd723b142 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -47,8 +47,7 @@ static int static int smb_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir); + struct inode *new_dir, const char *new_name, int new_len); static struct file_operations smb_dir_operations = { @@ -77,7 +76,6 @@ struct inode_operations smb_dir_inode_operations = NULL, /* mknod */ smb_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -798,8 +796,7 @@ smb_unlink(struct inode *dir, const char *name, int len) static int smb_rename(struct inode *old_dir, const char *old_name, int old_len, - struct inode *new_dir, const char *new_name, int new_len, - int must_be_dir) + struct inode *new_dir, const char *new_name, int new_len) { int res; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 93c57e38f..0451ee427 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -236,7 +236,6 @@ struct inode_operations smb_file_inode_operations = NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1943045bb..20738b0d2 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -429,9 +429,11 @@ int smb_current_kmalloced; int smb_current_vmalloced; #endif -static struct file_system_type smb_fs_type = -{ - smb_read_super, "smbfs", 0, NULL +static struct file_system_type smb_fs_type = { + "smbfs", + FS_NO_DCACHE, + smb_read_super, + NULL }; __initfunc(int init_smb_fs(void)) diff --git a/fs/smbfs/mmap.c b/fs/smbfs/mmap.c index 9fd157b2a..472fad6de 100644 --- a/fs/smbfs/mmap.c +++ b/fs/smbfs/mmap.c @@ -120,7 +120,7 @@ smb_mmap(struct inode *inode, struct file *file, struct vm_area_struct *vma) inode->i_dirt = 1; } vma->vm_inode = inode; - inode->i_count++; + atomic_inc(&inode->i_count); vma->vm_ops = &smb_file_mmap; return 0; } @@ -127,7 +127,7 @@ asmlinkage int sys_stat(char * filename, struct __old_kernel_stat * statbuf) int error; lock_kernel(); - error = namei(filename,&inode); + error = namei(NAM_FOLLOW_LINK, filename, &inode); if (error) goto out; if ((error = do_revalidate(inode)) == 0) @@ -145,7 +145,7 @@ asmlinkage int sys_newstat(char * filename, struct stat * statbuf) int error; lock_kernel(); - error = namei(filename,&inode); + error = namei(NAM_FOLLOW_LINK, filename, &inode); if (error) goto out; if ((error = do_revalidate(inode)) == 0) @@ -168,7 +168,7 @@ asmlinkage int sys_lstat(char * filename, struct __old_kernel_stat * statbuf) int error; lock_kernel(); - error = lnamei(filename,&inode); + error = namei(NAM_FOLLOW_TRAILSLASH, filename, &inode); if (error) goto out; if ((error = do_revalidate(inode)) == 0) @@ -187,7 +187,7 @@ asmlinkage int sys_newlstat(char * filename, struct stat * statbuf) int error; lock_kernel(); - error = lnamei(filename,&inode); + error = namei(NAM_FOLLOW_TRAILSLASH, filename, &inode); if (error) goto out; if ((error = do_revalidate(inode)) == 0) @@ -249,15 +249,19 @@ asmlinkage int sys_readlink(const char * path, char * buf, int bufsiz) error = verify_area(VERIFY_WRITE,buf,bufsiz); if (error) goto out; - error = lnamei(path,&inode); + error = namei(NAM_FOLLOW_TRAILSLASH, path, &inode); if (error) goto out; error = -EINVAL; - if (!inode->i_op || !inode->i_op->readlink - || (error = do_revalidate(inode)) < 0) { + if (!inode->i_op || !inode->i_op->readlink || + !S_ISLNK(inode->i_mode) || (error = do_revalidate(inode)) < 0) { iput(inode); goto out; } + if (!IS_RDONLY(inode)) { + inode->i_atime = CURRENT_TIME; + inode->i_dirt = 1; + } error = inode->i_op->readlink(inode,buf,bufsiz); out: unlock_kernel(); diff --git a/fs/super.c b/fs/super.c index 6048b1ae7..ec47301aa 100644 --- a/fs/super.c +++ b/fs/super.c @@ -33,6 +33,7 @@ #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/fd.h> +#include <linux/dalloc.h> #include <linux/init.h> #include <asm/system.h> @@ -59,8 +60,8 @@ kdev_t ROOT_DEV; struct super_block super_blocks[NR_SUPER]; static struct file_system_type *file_systems = (struct file_system_type *) NULL; -static struct vfsmount *vfsmntlist = (struct vfsmount *) NULL, - *vfsmnttail = (struct vfsmount *) NULL, +struct vfsmount *vfsmntlist = (struct vfsmount *) NULL; +static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL, *mru_vfsmnt = (struct vfsmount *) NULL; /* @@ -376,7 +377,7 @@ int get_filesystem_list(char * buf) tmp = file_systems; while (tmp && len < PAGE_SIZE - 80) { len += sprintf(buf+len, "%s\t%s\n", - tmp->requires_dev ? "" : "nodev", + (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev", tmp->name); tmp = tmp->next; } @@ -435,7 +436,7 @@ void sync_supers(kdev_t dev) } } -static struct super_block * get_super(kdev_t dev) +struct super_block * get_super(kdev_t dev) { struct super_block * s; @@ -601,6 +602,10 @@ static int do_umount(kdev_t dev,int unmount_root) if (!sb->s_covered->i_mount) printk("VFS: umount(%s): mounted inode has i_mount=NULL\n", kdevname(dev)); + while(sb->s_ibasket) + free_ibasket(sb); + if(sb->s_mounted->i_dentry) + d_del(sb->s_mounted->i_dentry, D_NO_CLEAR_INODE); /* * Before checking if the filesystem is still busy make sure the kernel * doesn't hold any quotafiles open on that device. If the umount fails @@ -636,15 +641,15 @@ asmlinkage int sys_umount(char * name) { struct inode * inode; kdev_t dev; + struct inode * dummy_inode = NULL; int retval = -EPERM; - struct inode dummy_inode; lock_kernel(); if (!suser()) goto out; - retval = namei(name, &inode); + retval = namei(NAM_FOLLOW_LINK, name, &inode); if (retval) { - retval = lnamei(name, &inode); + retval = namei(NAM_FOLLOW_TRAILSLASH, name, &inode); if (retval) goto out; } @@ -663,9 +668,8 @@ asmlinkage int sys_umount(char * name) } dev = inode->i_sb->s_dev; iput(inode); - memset(&dummy_inode, 0, sizeof(dummy_inode)); - dummy_inode.i_rdev = dev; - inode = &dummy_inode; + inode = dummy_inode = get_empty_inode(); + inode->i_rdev = dev; } retval = -ENXIO; if (MAJOR(dev) >= MAX_BLKDEV) { @@ -680,8 +684,7 @@ asmlinkage int sys_umount(char * name) put_unnamed_dev(dev); } } - if (inode != &dummy_inode) - iput(inode); + iput(inode); if (!retval) fsync_dev(dev); out: @@ -697,22 +700,42 @@ out: * We cannot mount a filesystem if it has active, used, or dirty inodes. * We also have to flush all inode-data for this device, as the new mount * might need new info. + * + * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when + * supplying a leading "!" before the dir_name, allowing "stacks" of + * mounted filesystems. The stacking will only influence any pathname lookups + * _after_ the mount, but open filedescriptors or working directories that + * are now covered remain valid. For example, when you overmount /home, any + * process with old cwd /home/joe will continue to use the old versions, + * as long as relative paths are used, but absolute paths like /home/joe/xxx + * will go to the new "top of stack" version. In general, crossing a + * mountpoint will always go to the top of stack element. + * Anyone using this new feature must know what he/she is doing. */ int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const char * type, int flags, void * data) { - struct inode * dir_i; + struct inode * dir_i = NULL; struct super_block * sb; struct vfsmount *vfsmnt; int error; + int override = 0; + if(dir_name) { + char c; + + get_user(c, dir_name); + override = (c == '!'); + } if (!(flags & MS_RDONLY) && dev && is_read_only(dev)) return -EACCES; /*flags |= MS_RDONLY;*/ - error = namei(dir_name, &dir_i); + if(override) + dir_name++; + error = namei(NAM_FOLLOW_LINK, dir_name, &dir_i); if (error) return error; - if (dir_i->i_count != 1 || dir_i->i_mount) { + if (!override && (atomic_read(&dir_i->i_count) != 1 || dir_i->i_mount)) { iput(dir_i); return -EBUSY; } @@ -720,7 +743,7 @@ int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const cha iput(dir_i); return -ENOTDIR; } - if (!fs_may_mount(dev)) { + if (!fs_may_mount(dev) && !override) { iput(dir_i); return -EBUSY; } @@ -738,6 +761,22 @@ int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const cha vfsmnt->mnt_sb = sb; vfsmnt->mnt_flags = flags; } + { + struct dentry * old = dir_i->i_dentry; + struct dentry * new; + vfs_lock(); + new = d_alloc(old->d_parent, old->d_len, 1); + if(new) { + struct qstr copy = { old->d_name, old->d_len }; + d_add(new, sb->s_mounted, ©, D_DUPLICATE); + vfs_unlock(); + } else { + printk("VFS: cannot setup dentry for mount\n"); + iput(dir_i); + return -ENOMEM; + } + vfs_unlock(); + } sb->s_covered = dir_i; dir_i->i_mount = sb->s_mounted; return 0; /* we don't iput(dir_i) - see umount */ @@ -781,7 +820,7 @@ static int do_remount(const char *dir,int flags,char *data) struct inode *dir_i; int retval; - retval = namei(dir, &dir_i); + retval = namei(NAM_FOLLOW_LINK, dir, &dir_i); if (retval) return retval; if (dir_i != dir_i->i_sb->s_mounted) { @@ -872,8 +911,8 @@ asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, goto out; t = fstype->name; fops = NULL; - if (fstype->requires_dev) { - retval = namei(dev_name, &inode); + if ((fstype->fs_flags & FS_REQUIRES_DEV)) { + retval = namei(NAM_FOLLOW_LINK, dev_name, &inode); if (retval) goto out; retval = -ENOTBLK; @@ -943,7 +982,7 @@ __initfunc(static void do_mount_root(void)) struct file_system_type * fs_type; struct super_block * sb; struct vfsmount *vfsmnt; - struct inode * inode, d_inode; + struct inode * inode, * d_inode = NULL; struct file filp; int retval; @@ -963,13 +1002,14 @@ __initfunc(static void do_mount_root(void)) sb->s_flags = root_mountflags & ~MS_RDONLY; if (nfs_root_mount(sb) >= 0) { inode = sb->s_mounted; - inode->i_count += 3 ; + atomic_add(3, &inode->i_count); sb->s_covered = inode; sb->s_rd_only = 0; sb->s_dirt = 0; sb->s_type = fs_type; current->fs->pwd = inode; current->fs->root = inode; + (void)d_alloc_root(inode); ROOT_DEV = sb->s_dev; printk (KERN_NOTICE "VFS: Mounted root (nfs filesystem).\n"); vfsmnt = add_vfsmnt(ROOT_DEV, "/dev/root", "/"); @@ -1000,19 +1040,20 @@ __initfunc(static void do_mount_root(void)) #endif memset(&filp, 0, sizeof(filp)); - memset(&d_inode, 0, sizeof(d_inode)); - d_inode.i_rdev = ROOT_DEV; - filp.f_inode = &d_inode; + d_inode = get_empty_inode(); + d_inode->i_rdev = ROOT_DEV; + filp.f_inode = d_inode; if ( root_mountflags & MS_RDONLY) filp.f_mode = 1; /* read only */ else filp.f_mode = 3; /* read write */ - retval = blkdev_open(&d_inode, &filp); + retval = blkdev_open(d_inode, &filp); if (retval == -EROFS) { root_mountflags |= MS_RDONLY; filp.f_mode = 1; - retval = blkdev_open(&d_inode, &filp); + retval = blkdev_open(d_inode, &filp); } + iput(d_inode); if (retval) /* * Allow the user to distinguish between failed open @@ -1021,16 +1062,19 @@ __initfunc(static void do_mount_root(void)) printk("VFS: Cannot open root device %s\n", kdevname(ROOT_DEV)); else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) { - if (!fs_type->requires_dev) + if (!(fs_type->fs_flags & FS_REQUIRES_DEV)) continue; sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1); if (sb) { inode = sb->s_mounted; - inode->i_count += 3 ; /* NOTE! it is logically used 4 times, not 1 */ + + /* NOTE! it is logically used 4 times, not 1 */ + atomic_add(3, &inode->i_count); sb->s_covered = inode; sb->s_flags = root_mountflags; current->fs->pwd = inode; current->fs->root = inode; + (void)d_alloc_root(inode); printk ("VFS: Mounted root (%s filesystem)%s.\n", fs_type->name, (sb->s_flags & MS_RDONLY) ? " readonly" : ""); @@ -1077,11 +1121,13 @@ __initfunc(static int do_change_root(kdev_t new_root_dev,const char *put_old)) do_mount_root(); old_fs = get_fs(); set_fs(get_ds()); - error = namei(put_old,&inode); + error = namei(NAM_FOLLOW_LINK, put_old, &inode); if (error) inode = NULL; set_fs(old_fs); - if (!error && (inode->i_count != 1 || inode->i_mount)) error = -EBUSY; - if (!error && !S_ISDIR(inode->i_mode)) error = -ENOTDIR; + if (!error && (atomic_read(&inode->i_count) != 1 || inode->i_mount)) + error = -EBUSY; + if (!error && !S_ISDIR(inode->i_mode)) + error = -ENOTDIR; iput(old_root); /* current->fs->root */ iput(old_pwd); /* current->fs->pwd */ if (error) { diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 3dd0931cf..8b942a5b1 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -57,7 +57,6 @@ struct inode_operations sysv_dir_inode_operations = { sysv_mknod, /* mknod */ sysv_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/sysv/file.c b/fs/sysv/file.c index f3aadb509..da07ef7a8 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -64,7 +64,6 @@ struct inode_operations sysv_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ sysv_bmap, /* bmap */ diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index 85ba640d1..97bc7284f 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c @@ -62,8 +62,9 @@ void sysv_free_inode(struct inode * inode) printk("sysv_free_inode: inode has no device\n"); return; } - if (inode->i_count != 1) { - printk("sysv_free_inode: inode has count=%d\n", inode->i_count); + if (atomic_read(&inode->i_count) != 1) { + printk("sysv_free_inode: inode has count=%d\n", + atomic_read(&inode->i_count)); return; } if (inode->i_nlink) { @@ -149,7 +150,7 @@ struct inode * sysv_new_inode(const struct inode * dir) mark_buffer_dirty(sb->sv_bh1, 1); /* super-block has been modified */ if (sb->sv_bh1 != sb->sv_bh2) mark_buffer_dirty(sb->sv_bh2, 1); sb->s_dirt = 1; /* and needs time stamp */ - inode->i_count = 1; + atomic_set(&inode->i_count, 1); inode->i_nlink = 1; inode->i_dev = sb->s_dev; inode->i_uid = current->fsuid; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index ebbf0bb4f..f8c6a1b38 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -975,9 +975,9 @@ int sysv_sync_inode(struct inode * inode) /* Every kernel module contains stuff like this. */ static struct file_system_type sysv_fs_type[3] = { - {sysv_read_super, "xenix", 1, NULL}, - {sysv_read_super, "sysv", 1, NULL}, - {sysv_read_super, "coherent", 1, NULL} + {"xenix", FS_REQUIRES_DEV, sysv_read_super, NULL}, + {"sysv", FS_REQUIRES_DEV, sysv_read_super, NULL}, + {"coherent", FS_REQUIRES_DEV, sysv_read_super, NULL} }; __initfunc(int init_sysv_fs(void)) diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 735d158d4..d1b67ab5f 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -454,7 +454,7 @@ int sysv_rmdir(struct inode * dir, const char * name, int len) retval = -ENOENT; goto end_rmdir; } - if (inode->i_count > 1) { + if (atomic_read(&inode->i_count) > 1) { retval = -EBUSY; goto end_rmdir; } @@ -635,7 +635,7 @@ static int subdir(struct inode * new_inode, struct inode * old_inode) int ino; int result; - new_inode->i_count++; + atomic_inc(&new_inode->i_count); result = 0; for (;;) { if (new_inode == old_inode) { @@ -668,7 +668,7 @@ static int subdir(struct inode * new_inode, struct inode * old_inode) * higher-level routines. */ static int do_sysv_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len, int must_be_dir) + struct inode * new_dir, const char * new_name, int new_len) { struct inode * old_inode, * new_inode; struct buffer_head * old_bh, * new_bh, * dir_bh; @@ -694,8 +694,6 @@ start_up: old_inode = __iget(old_dir->i_sb, old_de->inode, 0); /* don't cross mnt-points */ if (!old_inode) goto end_rename; - if (must_be_dir && !S_ISDIR(old_inode->i_mode)) - goto end_rename; retval = -EPERM; if ((old_dir->i_mode & S_ISVTX) && current->fsuid != old_inode->i_uid && @@ -724,7 +722,7 @@ start_up: if (!empty_dir(new_inode)) goto end_rename; retval = -EBUSY; - if (new_inode->i_count > 1) + if (atomic_read(&new_inode->i_count) > 1) goto end_rename; } retval = -EPERM; @@ -810,8 +808,7 @@ end_rename: * as they are on different partitions. */ int sysv_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len, - int must_be_dir) + struct inode * new_dir, const char * new_name, int new_len) { static struct wait_queue * wait = NULL; static int lock = 0; @@ -821,7 +818,7 @@ int sysv_rename(struct inode * old_dir, const char * old_name, int old_len, sleep_on(&wait); lock = 1; result = do_sysv_rename(old_dir, old_name, old_len, - new_dir, new_name, new_len, must_be_dir); + new_dir, new_name, new_len); lock = 0; wake_up(&wait); return result; diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c index 9a33d9fab..4e8a5e349 100644 --- a/fs/sysv/symlink.c +++ b/fs/sysv/symlink.c @@ -21,7 +21,6 @@ #include <asm/uaccess.h> static int sysv_readlink(struct inode *, char *, int); -static int sysv_follow_link(struct inode *, struct inode *, int, int, struct inode **); /* * symlinks can't do much... @@ -38,7 +37,6 @@ struct inode_operations sysv_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ sysv_readlink, /* readlink */ - sysv_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -46,44 +44,6 @@ struct inode_operations sysv_symlink_inode_operations = { NULL /* permission */ }; -static int sysv_follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - int error; - struct buffer_head * bh; - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput(dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput(dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput(inode); - iput(dir); - return -ELOOP; - } - if (!(bh = sysv_file_bread(inode, 0, 0))) { /* is reading 1 block enough ?? */ - iput(inode); - iput(dir); - return -EIO; - } - iput(inode); - current->link_count++; - error = open_namei(bh->b_data,flag,mode,res_inode,dir); - current->link_count--; - brelse(bh); - return error; -} - static int sysv_readlink(struct inode * inode, char * buffer, int buflen) { struct buffer_head * bh; @@ -91,10 +51,6 @@ static int sysv_readlink(struct inode * inode, char * buffer, int buflen) int i; char c; - if (!S_ISLNK(inode->i_mode)) { - iput(inode); - return -EINVAL; - } if (buflen > inode->i_sb->sv_block_size_1) buflen = inode->i_sb->sv_block_size_1; bh = sysv_file_bread(inode, 0, 0); diff --git a/fs/ufs/ufs_dir.c b/fs/ufs/ufs_dir.c index 26ae02abe..15396589f 100644 --- a/fs/ufs/ufs_dir.c +++ b/fs/ufs/ufs_dir.c @@ -6,7 +6,7 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_dir.c,v 1.8 1997/01/26 07:14:28 davem Exp $ + * $Id: ufs_dir.c,v 1.10 1997/06/05 01:29:06 davem Exp $ * */ @@ -108,11 +108,8 @@ revalidate: * version stamp to detect whether or * not the directory has been modified * during the copy operation. */ - unsigned long version; - dcache_add(inode, de->d_name, - ufs_swab16(de->d_namlen), - ufs_swab32(de->d_ino)); - version = inode->i_version; + unsigned long version = inode->i_version; + if (inode->i_sb->u.ufs_sb.s_flags & UFS_DEBUG) { printk("ufs_readdir: filldir(%s,%u)\n", de->d_name, ufs_swab32(de->d_ino)); @@ -166,7 +163,6 @@ struct inode_operations ufs_dir_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ @@ -174,13 +170,3 @@ struct inode_operations ufs_dir_inode_operations = { NULL, /* permission */ NULL, /* smap */ }; - -/* - * Local Variables: *** - * c-indent-level: 8 *** - * c-continued-statement-offset: 8 *** - * c-brace-offset: -8 *** - * c-argdecl-indent: 0 *** - * c-label-offset: -8 *** - * End: *** - */ diff --git a/fs/ufs/ufs_file.c b/fs/ufs/ufs_file.c index 4b479a65e..74ae1a470 100644 --- a/fs/ufs/ufs_file.c +++ b/fs/ufs/ufs_file.c @@ -6,7 +6,7 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_file.c,v 1.7 1997/01/26 07:14:28 davem Exp $ + * $Id: ufs_file.c,v 1.8 1997/06/05 01:29:09 davem Exp $ * */ @@ -41,7 +41,6 @@ struct inode_operations ufs_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ ufs_bmap, /* bmap */ diff --git a/fs/ufs/ufs_inode.c b/fs/ufs/ufs_inode.c index 0d89fd6f0..f0fdd5d5f 100644 --- a/fs/ufs/ufs_inode.c +++ b/fs/ufs/ufs_inode.c @@ -6,7 +6,7 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_inode.c,v 1.7 1996/06/01 14:56:46 ecd Exp $ + * $Id: ufs_inode.c,v 1.8 1997/06/04 08:28:28 davem Exp $ * */ @@ -18,8 +18,9 @@ void ufs_print_inode(struct inode * inode) { printk("ino %lu mode 0%6.6o lk %d uid %d gid %d" " sz %lu blks %lu cnt %u\n", - inode->i_ino, inode->i_mode, inode->i_nlink, inode->i_uid, - inode->i_gid, inode->i_size, inode->i_blocks, inode->i_count); + inode->i_ino, inode->i_mode, inode->i_nlink, inode->i_uid, + inode->i_gid, inode->i_size, inode->i_blocks, + atomic_read(&inode->i_count)); printk(" db <0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x" " 0x%x 0x%x 0x%x 0x%x>\n", inode->u.ufs_i.i_data[0], inode->u.ufs_i.i_data[1], diff --git a/fs/ufs/ufs_super.c b/fs/ufs/ufs_super.c index 44d7241ef..342722237 100644 --- a/fs/ufs/ufs_super.c +++ b/fs/ufs/ufs_super.c @@ -8,7 +8,7 @@ * * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * - * $Id: ufs_super.c,v 1.23 1997/04/16 04:53:39 tdyas Exp $ + * $Id: ufs_super.c,v 1.24 1997/06/04 08:28:29 davem Exp $ * */ @@ -49,7 +49,10 @@ static struct super_operations ufs_super_ops = { }; static struct file_system_type ufs_fs_type = { - ufs_read_super, "ufs", 1, NULL + "ufs", + FS_REQUIRES_DEV, + ufs_read_super, + NULL }; __initfunc(int init_ufs_fs(void)) diff --git a/fs/ufs/ufs_symlink.c b/fs/ufs/ufs_symlink.c index 13d2285e6..d98f99ff7 100644 --- a/fs/ufs/ufs_symlink.c +++ b/fs/ufs/ufs_symlink.c @@ -6,7 +6,7 @@ * Laboratory for Computer Science Research Computing Facility * Rutgers, The State University of New Jersey * - * $Id: ufs_symlink.c,v 1.7 1997/01/26 07:14:29 davem Exp $ + * $Id: ufs_symlink.c,v 1.9 1997/06/05 01:29:11 davem Exp $ * */ @@ -30,10 +30,6 @@ ufs_readlink(struct inode * inode, char * buffer, int buflen) inode->i_ino, MAJOR(inode->i_dev), MINOR(inode->i_dev)); } - if (!S_ISLNK(inode->i_mode)) { - iput (inode); - return -EINVAL; - } if (buflen > inode->i_sb->s_blocksize - 1) buflen = inode->i_sb->s_blocksize - 1; if (inode->i_blocks) { @@ -67,73 +63,6 @@ ufs_readlink(struct inode * inode, char * buffer, int buflen) return i; } -/* - * XXX - blatantly stolen from ext2fs - */ -static int -ufs_follow_link(struct inode * dir, struct inode * inode, - int flag, int mode, struct inode ** res_inode) -{ - unsigned long int block; - int error; - struct buffer_head * bh; - char * link; - - bh = NULL; - - if (inode->i_sb->u.ufs_sb.s_flags & (UFS_DEBUG|UFS_DEBUG_LINKS)) { - printk("ufs_follow_link: called on ino %lu dev %u/%u\n", - dir->i_ino, MAJOR(dir->i_dev), MINOR(dir->i_dev)); - } - - *res_inode = NULL; - if (!dir) { - dir = current->fs->root; - dir->i_count++; - } - if (!inode) { - iput (dir); - return -ENOENT; - } - if (!S_ISLNK(inode->i_mode)) { - iput (dir); - *res_inode = inode; - return 0; - } - if (current->link_count > 5) { - iput (dir); - iput (inode); - return -ELOOP; - } - if (inode->i_blocks) { - /* read the link from disk */ - /* XXX - error checking */ - block = ufs_bmap(inode, 0); - bh = bread(inode->i_dev, block, BLOCK_SIZE); - if (bh == NULL) { - printk("ufs_follow_link: can't read block 0 for ino %lu on dev %u/%u\n", - inode->i_ino, MAJOR(inode->i_dev), - MINOR(inode->i_dev)); - iput(dir); - iput(inode); - return(-EIO); - } - link = bh->b_data; - } else { - /* fast symlink */ - link = (char *)&(inode->u.ufs_i.i_data[0]); - } - current->link_count++; - error = open_namei (link, flag, mode, res_inode, dir); - current->link_count--; - iput (inode); - if (bh) { - brelse (bh); - } - return(error); -} - - static struct file_operations ufs_symlink_operations = { NULL, /* lseek */ NULL, /* read */ @@ -161,8 +90,7 @@ struct inode_operations ufs_symlink_inode_operations = { NULL, /* rmdir */ NULL, /* mknod */ NULL, /* rename */ - &ufs_readlink, /* readlink */ - &ufs_follow_link, /* follow_link */ + ufs_readlink, /* readlink */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index 8f8a6bbb6..a3f23181c 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -565,7 +565,7 @@ int umsdos_locate_path ( } } }else{ - dir->i_count++; + atomic_inc(&dir->i_count); } if (ret == 0){ while (dir != dir->i_sb->s_mounted){ @@ -628,7 +628,7 @@ static int umsdos_lookup_x ( umsdos_startlookup(dir); if (len == 1 && name[0] == '.'){ *result = dir; - dir->i_count++; + atomic_inc(&dir->i_count); ret = 0; }else if (len == 2 && name[0] == '.' && name[1] == '.'){ if (pseudo_root != NULL && dir == pseudo_root->i_sb->s_mounted){ @@ -639,7 +639,7 @@ static int umsdos_lookup_x ( */ ret = 0; *result = pseudo_root; - pseudo_root->i_count++; + atomic_inc(&pseudo_root->i_count); }else{ /* #Specification: locating .. / strategy We use the msdos filesystem to locate the parent directory. @@ -668,7 +668,7 @@ static int umsdos_lookup_x ( and return the inode of the real root. */ *result = dir->i_sb->s_mounted; - (*result)->i_count++; + atomic_inc(&((*result)->i_count)); ret = 0; }else{ struct umsdos_info info; @@ -757,7 +757,7 @@ int umsdos_hlink2inode (struct inode *hlink, struct inode **result) dir = hlink->i_sb->s_mounted; path[hlink->i_size] = '\0'; iput (hlink); - dir->i_count++; + atomic_inc(&dir->i_count); while (1){ char *start = pt; int len; @@ -811,7 +811,6 @@ struct inode_operations umsdos_dir_inode_operations = { UMSDOS_mknod, /* mknod */ UMSDOS_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/umsdos/emd.c b/fs/umsdos/emd.c index 0d4a89298..ec064b2b1 100644 --- a/fs/umsdos/emd.c +++ b/fs/umsdos/emd.c @@ -137,7 +137,8 @@ struct inode *umsdos_emd_dir_lookup(struct inode *dir, int creat) if (dir->u.umsdos_i.i_emd_dir != 0){ ret = iget (dir->i_sb,dir->u.umsdos_i.i_emd_dir); PRINTK (("deja trouve %d %x [%d] " - ,dir->u.umsdos_i.i_emd_dir,ret,ret->i_count)); + ,dir->u.umsdos_i.i_emd_dir,ret, + atomic_read(&ret->i_count))); }else{ umsdos_real_lookup (dir,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN,&ret); PRINTK (("emd_dir_lookup ")); @@ -147,7 +148,7 @@ struct inode *umsdos_emd_dir_lookup(struct inode *dir, int creat) }else if (creat){ int code; PRINTK (("avant create ")); - dir->i_count++; + atomic_inc(&dir->i_count); code = msdos_create (dir,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN ,S_IFREG|0777,&ret); PRINTK (("Creat EMD code %d ret %x ",code,ret)); diff --git a/fs/umsdos/file.c b/fs/umsdos/file.c index cda0e4e8d..32d76ac06 100644 --- a/fs/umsdos/file.c +++ b/fs/umsdos/file.c @@ -86,7 +86,6 @@ struct inode_operations umsdos_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ fat_bmap, /* bmap */ @@ -120,7 +119,6 @@ struct inode_operations umsdos_file_inode_operations_no_bmap = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/umsdos/inode.c b/fs/umsdos/inode.c index 8a00fd833..137235731 100644 --- a/fs/umsdos/inode.c +++ b/fs/umsdos/inode.c @@ -73,7 +73,7 @@ int umsdos_real_lookup ( struct inode **result) /* Will hold inode of the file, if successful */ { int ret; - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_lookup (dir,name,len,result); return ret; } @@ -120,7 +120,7 @@ int umsdos_isinit (struct inode *inode) #elif 0 return inode->i_atime != 0; #else - return inode->i_count > 1; + return atomic_read(&inode->i_count) > 1; #endif } /* @@ -224,7 +224,7 @@ void UMSDOS_read_inode(struct inode *inode) { PRINTK (("read inode %x ino = %d ",inode,inode->i_ino)); msdos_read_inode(inode); - PRINTK (("ino = %d %d\n",inode->i_ino,inode->i_count)); + PRINTK (("ino = %d %d\n",inode->i_ino,atomic_read(&inode->i_count))); if (S_ISDIR(inode->i_mode) && (inode->u.umsdos_i.u.dir_info.creating != 0 || inode->u.umsdos_i.u.dir_info.looking != 0 @@ -480,7 +480,7 @@ struct super_block *UMSDOS_read_super( umsdos_setup_dir_inode (pseudo); Printk (("Activating pseudo root /%s\n",UMSDOS_PSDROOT_NAME)); pseudo_root = pseudo; - pseudo->i_count++; + atomic_inc(&pseudo->i_count); pseudo = NULL; } iput (sbin); @@ -497,7 +497,10 @@ struct super_block *UMSDOS_read_super( static struct file_system_type umsdos_fs_type = { - UMSDOS_read_super, "umsdos", 1, NULL + "umsdos", + FS_REQUIRES_DEV, + UMSDOS_read_super, + NULL }; __initfunc(int init_umsdos_fs(void)) diff --git a/fs/umsdos/ioctl.c b/fs/umsdos/ioctl.c index a4d4108b8..ba56963ca 100644 --- a/fs/umsdos/ioctl.c +++ b/fs/umsdos/ioctl.c @@ -215,11 +215,11 @@ int UMSDOS_ioctl_dir ( This ioctl allows umssync to rename a mangle file name before syncing it back in the EMD. */ - dir->i_count += 2; + atomic_add(2, &dir->i_count); ret = msdos_rename (dir ,data.dos_dirent.d_name,data.dos_dirent.d_reclen ,dir - ,data.umsdos_dirent.name,data.umsdos_dirent.name_len,0); + ,data.umsdos_dirent.name,data.umsdos_dirent.name_len); }else if (cmd == UMSDOS_UNLINK_EMD){ /* #Specification: ioctl / UMSDOS_UNLINK_EMD The umsdos_dirent field of the struct umsdos_ioctl is used @@ -246,7 +246,7 @@ int UMSDOS_ioctl_dir ( Return 0 if success. */ - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_unlink (dir,data.dos_dirent.d_name ,data.dos_dirent.d_reclen); }else if (cmd == UMSDOS_RMDIR_DOS){ @@ -257,7 +257,7 @@ int UMSDOS_ioctl_dir ( Return 0 if success. */ - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_rmdir (dir,data.dos_dirent.d_name ,data.dos_dirent.d_reclen); }else if (cmd == UMSDOS_STAT_DOS){ diff --git a/fs/umsdos/namei.c b/fs/umsdos/namei.c index c4c9e73ba..b2a1e2e56 100644 --- a/fs/umsdos/namei.c +++ b/fs/umsdos/namei.c @@ -225,13 +225,14 @@ static int umsdos_create_any ( umsdos_lockcreate(dir); ret = umsdos_newentry (dir,&info); if (ret == 0){ - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_create (dir,info.fake.fname,info.fake.len ,S_IFREG|0777,result); if (ret == 0){ struct inode *inode = *result; umsdos_lookup_patch (dir,inode,&info.entry,info.f_pos); - PRINTK (("inode %p[%d] ",inode,inode->i_count)); + PRINTK (("inode %p[%d] ",inode, + atomic_read(&inode->i_count))); PRINTK (("Creation OK: [%d] %s %d pos %d\n",dir->i_ino ,info.fake.fname,current->pid,info.f_pos)); }else{ @@ -351,13 +352,12 @@ chkstk(); PRINTK (("ret %d %d ",ret,new_info.fake.len)); if (ret == 0){ PRINTK (("msdos_rename ")); - old_dir->i_count++; - new_dir->i_count++; /* Both inode are needed later */ + atomic_inc(&old_dir->i_count); + atomic_inc(&new_dir->i_count); /* Both inode are needed later */ ret = msdos_rename (old_dir ,old_info.fake.fname,old_info.fake.len ,new_dir - ,new_info.fake.fname,new_info.fake.len - ,0); + ,new_info.fake.fname,new_info.fake.len); chkstk(); PRINTK (("after m_rename ret %d ",ret)); if (ret != 0){ @@ -378,7 +378,7 @@ chkstk(); Not very efficient ... */ struct inode *inode; - new_dir->i_count++; + atomic_inc(&new_dir->i_count); PRINTK (("rename lookup len %d %d -- ",new_len,new_info.entry.flags)); ret = UMSDOS_lookup (new_dir,new_name,new_len ,&inode); @@ -441,7 +441,7 @@ static int umsdos_symlink_x( */ struct inode *inode; int ret; - dir->i_count++; /* We keep the inode in case we need it */ + atomic_inc(&dir->i_count);/* We keep the inode in case we need it */ /* later */ ret = umsdos_create_any (dir,name,len,mode,0,flags,&inode); PRINTK (("umsdos_symlink ret %d ",ret)); @@ -572,7 +572,8 @@ int UMSDOS_link ( struct inode *olddir; ret = umsdos_get_dirowner(oldinode,&olddir); PRINTK (("umsdos_link dir_owner = %d -> %p [%d] " - ,oldinode->u.umsdos_i.i_dir_owner,olddir,olddir->i_count)); + ,oldinode->u.umsdos_i.i_dir_owner,olddir, + atomic_read(&olddir->i_count))); if (ret == 0){ struct umsdos_dirent entry; umsdos_lockcreate2(dir,olddir); @@ -596,8 +597,9 @@ int UMSDOS_link ( struct umsdos_info info; ret = umsdos_newhidden (olddir,&info); if (ret == 0){ - olddir->i_count+=2; - PRINTK (("olddir[%d] ",olddir->i_count)); + atomic_add(2, &olddir->i_count); + PRINTK (("olddir[%d] ", + atomic_read(&olddir->i_count))); ret = umsdos_rename_f (olddir,entry.name ,entry.name_len ,olddir,info.entry.name,info.entry.name_len @@ -607,17 +609,19 @@ int UMSDOS_link ( if (path == NULL){ ret = -ENOMEM; }else{ - PRINTK (("olddir[%d] ",olddir->i_count)); + PRINTK (("olddir[%d] ", + atomic_read(&olddir->i_count))); ret = umsdos_locate_path (oldinode,path); - PRINTK (("olddir[%d] ",olddir->i_count)); + PRINTK (("olddir[%d] ", + atomic_read(&olddir->i_count))); if (ret == 0){ - olddir->i_count++; + atomic_inc(&olddir->i_count); ret = umsdos_symlink_x (olddir ,entry.name ,entry.name_len,path ,S_IFREG|0777,UMSDOS_HLINK); if (ret == 0){ - dir->i_count++; + atomic_inc(&dir->i_count); ret = umsdos_symlink_x (dir,name,len ,path ,S_IFREG|0777,UMSDOS_HLINK); @@ -634,7 +638,7 @@ int UMSDOS_link ( }else{ ret = umsdos_locate_path (oldinode,path); if (ret == 0){ - dir->i_count++; + atomic_inc(&dir->i_count); ret = umsdos_symlink_x (dir,name,len,path ,S_IFREG|0777,UMSDOS_HLINK); } @@ -703,7 +707,7 @@ int UMSDOS_mkdir( ret = umsdos_newentry (dir,&info); PRINTK (("newentry %d ",ret)); if (ret == 0){ - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_mkdir (dir,info.fake.fname,info.fake.len,mode); if (ret != 0){ umsdos_delentry (dir,&info,1); @@ -869,16 +873,17 @@ int UMSDOS_rmdir( int ret = umsdos_nevercreat(dir,name,len,-EPERM); if (ret == 0){ struct inode *sdir; - dir->i_count++; + atomic_inc(&dir->i_count); ret = UMSDOS_lookup (dir,name,len,&sdir); PRINTK (("rmdir lookup %d ",ret)); if (ret == 0){ int empty; umsdos_lockcreate(dir); - if (sdir->i_count > 1){ + if (atomic_read(&sdir->i_count) > 1){ ret = -EBUSY; }else if ((empty = umsdos_isempty (sdir)) != 0){ - PRINTK (("isempty %d i_count %d ",empty,sdir->i_count)); + PRINTK (("isempty %d i_count %d ",empty, + atomic_read(&sdir->i_count))); /* check sticky bit */ if ( !(dir->i_mode & S_ISVTX) || fsuser() || current->fsuid == sdir->i_uid || @@ -895,7 +900,7 @@ int UMSDOS_rmdir( PRINTK (("isempty ret %d nlink %d ",ret,dir->i_nlink)); if (ret == 0){ struct umsdos_info info; - dir->i_count++; + atomic_inc(&dir->i_count); umsdos_parse (name,len,&info); /* The findentry is there only to complete */ /* the mangling */ @@ -960,7 +965,7 @@ int UMSDOS_unlink ( using the standard lookup function. */ struct inode *inode; - dir->i_count++; + atomic_inc(&dir->i_count); ret = UMSDOS_lookup (dir,name,len,&inode); if (ret == 0){ PRINTK (("unlink nlink = %d ",inode->i_nlink)); @@ -988,7 +993,7 @@ int UMSDOS_unlink ( ret = umsdos_delentry (dir,&info,0); if (ret == 0){ PRINTK (("Avant msdos_unlink %s ",info.fake.fname)); - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_unlink_umsdos (dir,info.fake.fname ,info.fake.len); PRINTK (("msdos_unlink %s %o ret %d ",info.fake.fname @@ -1018,8 +1023,7 @@ int UMSDOS_rename( int old_len, struct inode * new_dir, const char * new_name, - int new_len, - int must_be_dir) + int new_len) { /* #Specification: weakness / rename There is a case where UMSDOS rename has a different behavior @@ -1036,8 +1040,8 @@ int UMSDOS_rename( int ret = umsdos_nevercreat(new_dir,new_name,new_len,-EEXIST); if (ret == 0){ /* umsdos_rename_f eat the inode and we may need those later */ - old_dir->i_count++; - new_dir->i_count++; + atomic_inc(&old_dir->i_count); + atomic_inc(&new_dir->i_count); ret = umsdos_rename_f (old_dir,old_name,old_len,new_dir,new_name ,new_len,0); if (ret == -EEXIST){ @@ -1075,12 +1079,12 @@ int UMSDOS_rename( is a problem at all. */ /* This is not super efficient but should work */ - new_dir->i_count++; + atomic_inc(&new_dir->i_count); ret = UMSDOS_unlink (new_dir,new_name,new_len); chkstk(); PRINTK (("rename unlink ret %d %d -- ",ret,new_len)); if (ret == -EISDIR){ - new_dir->i_count++; + atomic_inc(&new_dir->i_count); ret = UMSDOS_rmdir (new_dir,new_name,new_len); chkstk(); PRINTK (("rename rmdir ret %d -- ",ret)); diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c index a2a5364f6..e3b7678e4 100644 --- a/fs/umsdos/rdir.c +++ b/fs/umsdos/rdir.c @@ -96,7 +96,7 @@ int umsdos_rlookup_x( && dir == dir->i_sb->s_mounted && dir == pseudo_root->i_sb->s_mounted){ *result = pseudo_root; - pseudo_root->i_count++; + atomic_inc(&pseudo_root->i_count); ret = 0; /* #Specification: pseudo root / DOS/.. In the real root directory (c:\), the directory .. @@ -165,17 +165,18 @@ static int UMSDOS_rrmdir ( ret = -EPERM; }else{ umsdos_lockcreate (dir); - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_rmdir (dir,name,len); if (ret == -ENOTEMPTY){ struct inode *sdir; - dir->i_count++; + atomic_inc(&dir->i_count); ret = UMSDOS_rlookup (dir,name,len,&sdir); PRINTK (("rrmdir lookup %d ",ret)); if (ret == 0){ int empty; if ((empty = umsdos_isempty (sdir)) != 0){ - PRINTK (("isempty %d i_count %d ",empty,sdir->i_count)); + PRINTK (("isempty %d i_count %d ",empty, + atomic_read(&sdir->i_count))); if (empty == 2){ /* Not a Umsdos directory, so the previous msdos_rmdir @@ -188,7 +189,7 @@ static int UMSDOS_rrmdir ( ,UMSDOS_EMD_NAMELEN); sdir = NULL; if (ret == 0){ - dir->i_count++; + atomic_inc(&dir->i_count); ret = msdos_rmdir (dir,name,len); } } @@ -260,7 +261,6 @@ struct inode_operations umsdos_rdir_inode_operations = { NULL, /* mknod */ msdos_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/umsdos/symlink.c b/fs/umsdos/symlink.c index 8b6678ae9..d714c5ab9 100644 --- a/fs/umsdos/symlink.c +++ b/fs/umsdos/symlink.c @@ -42,69 +42,12 @@ static int umsdos_readlink_x ( } return ret; } -/* - Follow a symbolic link chain by calling open_namei recursively - until an inode is found. - - Return 0 if ok, or a negative error code if not. -*/ -static int UMSDOS_follow_link( - struct inode * dir, - struct inode * inode, - int flag, - int mode, - struct inode ** res_inode) -{ - int ret = -ELOOP; - *res_inode = NULL; - if (current->link_count < 5) { - char *path = (char*)kmalloc(PATH_MAX,GFP_KERNEL); - if (path == NULL){ - ret = -ENOMEM; - }else{ - if (!dir) { - dir = current->fs[1].root; - dir->i_count++; - } - if (!inode){ - PRINTK (("symlink: inode = NULL\n")); - ret = -ENOENT; - }else if (!S_ISLNK(inode->i_mode)){ - PRINTK (("symlink: Not ISLNK\n")); - *res_inode = inode; - inode = NULL; - ret = 0; - }else{ - ret = umsdos_readlink_x (inode,path - ,umsdos_file_read_kmem,PATH_MAX-1); - if (ret > 0){ - path[ret] = '\0'; - PRINTK (("follow :%s: %d ",path,ret)); - iput(inode); - inode = NULL; - current->link_count++; - ret = open_namei(path,flag,mode,res_inode,dir); - current->link_count--; - dir = NULL; - }else{ - ret = -EIO; - } - } - kfree (path); - } - } - iput(inode); - iput(dir); - PRINTK (("follow_link ret %d\n",ret)); - return ret; -} static int UMSDOS_readlink(struct inode * inode, char * buffer, int buflen) { - int ret = -EINVAL; - if (S_ISLNK(inode->i_mode)) { - ret = umsdos_readlink_x (inode,buffer,fat_file_read,buflen); - } + int ret; + + ret = umsdos_readlink_x (inode,buffer,fat_file_read,buflen); PRINTK (("readlink %d %x bufsiz %d\n",ret,inode->i_mode,buflen)); iput(inode); return ret; @@ -136,7 +79,6 @@ struct inode_operations umsdos_symlink_inode_operations = { NULL, /* mknod */ NULL, /* rename */ UMSDOS_readlink, /* readlink */ - UMSDOS_follow_link, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index e3db004cc..161bc791e 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -937,12 +937,6 @@ int vfat_lookup(struct inode *dir,const char *name,int len, if (!(*result = iget(dir->i_sb,ino))) return -EACCES; return 0; } - if (dcache_lookup(dir, name, len, (unsigned long *) &ino) && ino) { - iput(dir); - if (!(*result = iget(dir->i_sb, ino))) - return -EACCES; - return 0; - } PRINTK (("vfat_lookup 3\n")); if ((res = vfat_find(dir,name,len,1,0,0,&sinfo)) < 0) { iput(dir); @@ -1019,7 +1013,6 @@ static int vfat_create_entry(struct inode *dir,const char *name,int len, (*result)->i_dirt = 1; (*result)->i_version = ++event; dir->i_version = event; - dcache_add(dir, name, len, ino); return 0; } @@ -1132,7 +1125,7 @@ static int vfat_empty(struct inode *dir) struct buffer_head *bh; struct msdos_dir_entry *de; - if (dir->i_count > 1) + if (atomic_read(&dir->i_count) > 1) return -EBUSY; if (MSDOS_I(dir)->i_start) { /* may be zero in mkdir */ pos = 0; @@ -1356,7 +1349,7 @@ int vfat_unlink(struct inode *dir,const char *name,int len) int vfat_rename(struct inode *old_dir,const char *old_name,int old_len, - struct inode *new_dir,const char *new_name,int new_len,int must_be_dir) + struct inode *new_dir,const char *new_name,int new_len) { struct super_block *sb = old_dir->i_sb; struct buffer_head *old_bh,*new_bh,*dotdot_bh; @@ -1391,8 +1384,6 @@ int vfat_rename(struct inode *old_dir,const char *old_name,int old_len, if (!(old_inode = iget(old_dir->i_sb,old_ino))) goto rename_done; is_dir = S_ISDIR(old_inode->i_mode); - if (must_be_dir && !is_dir) - goto rename_done; if (is_dir) { if ((old_dir->i_dev != new_dir->i_dev) || (old_ino == new_dir->i_ino)) { @@ -1504,7 +1495,6 @@ int vfat_rename(struct inode *old_dir,const char *old_name,int old_len, PRINTK(("vfat_rename 15b\n")); fat_mark_buffer_dirty(sb, new_bh, 1); - dcache_add(new_dir, new_name, new_len, new_ino); /* XXX: There is some code in the original MSDOS rename that * is not duplicated here and it might cause a problem in @@ -1562,7 +1552,6 @@ struct inode_operations vfat_dir_inode_operations = { NULL, /* mknod */ vfat_rename, /* rename */ NULL, /* readlink */ - NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ fat_bmap, /* bmap */ @@ -1577,7 +1566,10 @@ void vfat_read_inode(struct inode *inode) } static struct file_system_type vfat_fs_type = { - vfat_read_super, "vfat", 1, NULL + "vfat", + FS_REQUIRES_DEV, + vfat_read_super, + NULL }; EXPORT_SYMBOL(vfat_create); |