diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-03-27 23:54:12 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-03-27 23:54:12 +0000 |
commit | d3e71cb08747743fce908122bab08b479eb403a5 (patch) | |
tree | cbec6948fdbdee9af81cf3ecfb504070d2745d7b /fs | |
parent | fe7ff1706e323d0e5ed83972960a1ecc1ee538b3 (diff) |
Merge with Linux 2.3.99-pre3.
Diffstat (limited to 'fs')
65 files changed, 2310 insertions, 1133 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 6eb08c857..c9aecc730 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -59,7 +59,7 @@ static int adfs_readpage(struct dentry *dentry, struct page *page) return block_read_full_page(page, adfs_get_block); } -static int adfs_prepare_write(struct page *page, unsigned int from, unsigned int to) +static int adfs_prepare_write(struct file *file, struct page *page, unsigned int from, unsigned int to) { return cont_prepare_write(page, from, to, adfs_get_block, &((struct inode *)page->mapping->host)->u.adfs_i.mmu_private); diff --git a/fs/affs/file.c b/fs/affs/file.c index 069964acb..bc0db190f 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -299,7 +299,7 @@ static int affs_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,affs_get_block); } -static int affs_prepare_write(struct page *page, unsigned from, unsigned to) +static int affs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return cont_prepare_write(page,from,to,affs_get_block, &((struct inode*)page->mapping->host)->u.affs_i.mmu_private); diff --git a/fs/bfs/file.c b/fs/bfs/file.c index f5ef5e652..c5ca51cda 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -137,7 +137,7 @@ static int bfs_readpage(struct dentry *dentry, struct page *page) return block_read_full_page(page, bfs_get_block); } -static int bfs_prepare_write(struct page *page, unsigned from, unsigned to) +static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return block_prepare_write(page, from, to, bfs_get_block); } diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 9339775ce..f48a2492d 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -252,7 +252,6 @@ static unsigned long * create_aout_tables(char * p, struct linux_binprm * bprm) static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) { struct exec ex; - struct file * file; int fd; unsigned long error; unsigned long fd_offset; @@ -263,7 +262,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || - bprm->dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + bprm->file->f_dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } @@ -304,26 +303,32 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->flags &= ~PF_FORKNOEXEC; #ifdef __sparc__ if (N_MAGIC(ex) == NMAGIC) { + loff_t pos = fd_offset; /* Fuck me plenty... */ + /* <AOL></AOL> */ error = do_brk(N_TXTADDR(ex), ex.a_text); - read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex), - ex.a_text, 0); + bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex), + ex.a_text, &pos); error = do_brk(N_DATADDR(ex), ex.a_data); - read_exec(bprm->dentry, fd_offset + ex.a_text, (char *) N_DATADDR(ex), - ex.a_data, 0); + bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex), + ex.a_data, &pos); goto beyond_if; } #endif if (N_MAGIC(ex) == OMAGIC) { + loff_t pos; #if defined(__alpha__) || defined(__sparc__) + pos = fd_offset; do_brk(N_TXTADDR(ex) & PAGE_MASK, ex.a_text+ex.a_data + PAGE_SIZE - 1); - read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex), - ex.a_text+ex.a_data, 0); + bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex), + ex.a_text+ex.a_data, &pos); #else + pos = 32; do_brk(0, ex.a_text+ex.a_data); - read_exec(bprm->dentry, 32, (char *) 0, ex.a_text+ex.a_data, 0); + bprm->file->f_op->read(bprm->file, (char *) 0, + ex.a_text+ex.a_data, &pos); #endif flush_icache_range((unsigned long) 0, (unsigned long) ex.a_text+ex.a_data); @@ -336,49 +341,48 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) error_time2 = jiffies; } - fd = open_dentry(bprm->dentry, O_RDONLY); + fd = get_unused_fd(); if (fd < 0) return fd; - file = fget(fd); + get_file(bprm->file); + fd_install(fd, bprm->file); if ((fd_offset & ~PAGE_MASK) != 0 && (jiffies-error_time) > 5*HZ) { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", - file->f_dentry->d_name.name); + bprm->file->f_dentry->d_name.name); error_time = jiffies; } - if (!file->f_op || !file->f_op->mmap || ((fd_offset & ~PAGE_MASK) != 0)) { - fput(file); + if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { + loff_t pos = fd_offset; sys_close(fd); do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - read_exec(bprm->dentry, fd_offset, - (char *) N_TXTADDR(ex), ex.a_text+ex.a_data, 0); + bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex), + ex.a_text+ex.a_data, &pos); flush_icache_range((unsigned long) N_TXTADDR(ex), (unsigned long) N_TXTADDR(ex) + ex.a_text+ex.a_data); goto beyond_if; } - error = do_mmap(file, N_TXTADDR(ex), ex.a_text, + error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); if (error != N_TXTADDR(ex)) { - fput(file); sys_close(fd); send_sig(SIGKILL, current, 0); return error; } - error = do_mmap(file, N_DATADDR(ex), ex.a_data, + error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); - fput(file); sys_close(fd); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); @@ -420,16 +424,12 @@ static int load_aout_library(struct file *file) unsigned long bss, start_addr, len; unsigned long error; int retval; - loff_t offset = 0; struct exec ex; inode = file->f_dentry->d_inode; retval = -ENOEXEC; - /* N.B. Save current fs? */ - set_fs(KERNEL_DS); - error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset); - set_fs(USER_DS); + error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); if (error != sizeof(ex)) goto out; @@ -450,6 +450,7 @@ static int load_aout_library(struct file *file) if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { static unsigned long error_time; + loff_t pos = N_TXTOFF(ex); if ((jiffies-error_time) > 5*HZ) { @@ -461,8 +462,8 @@ static int load_aout_library(struct file *file) do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - read_exec(file->f_dentry, N_TXTOFF(ex), - (char *)start_addr, ex.a_text + ex.a_data, 0); + file->f_op->read(file, (char *)start_addr, + ex.a_text + ex.a_data, &pos); flush_icache_range((unsigned long) start_addr, (unsigned long) start_addr + ex.a_text + ex.a_data); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 28f82594f..a12183834 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -205,17 +205,15 @@ create_elf_tables(char *p, int argc, int envc, an ELF header */ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, - struct dentry * interpreter_dentry, + struct file * interpreter, unsigned long *interp_load_addr) { - struct file * file; struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; unsigned long load_addr = 0; int load_addr_set = 0; unsigned long last_bss = 0, elf_bss = 0; unsigned long error = ~0UL; - int elf_exec_fileno; int retval, i, size; /* First of all, some simple consistency checks */ @@ -224,8 +222,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, goto out; if (!elf_check_arch(interp_elf_ex->e_machine)) goto out; - if (!interpreter_dentry->d_inode->i_fop || - !interpreter_dentry->d_inode->i_fop->mmap) + if (!interpreter->f_op->mmap) goto out; /* @@ -244,17 +241,10 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, if (!elf_phdata) goto out; - retval = read_exec(interpreter_dentry, interp_elf_ex->e_phoff, - (char *) elf_phdata, size, 1); + retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size); error = retval; if (retval < 0) - goto out_free; - - error = ~0UL; - elf_exec_fileno = open_dentry(interpreter_dentry, O_RDONLY); - if (elf_exec_fileno < 0) - goto out_free; - file = fget(elf_exec_fileno); + goto out_close; eppnt = elf_phdata; for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) { @@ -271,7 +261,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) elf_type |= MAP_FIXED; - map_addr = do_mmap(file, + map_addr = do_mmap(interpreter, load_addr + ELF_PAGESTART(vaddr), eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), elf_prot, @@ -322,19 +312,17 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex, error = ((unsigned long) interp_elf_ex->e_entry) + load_addr; out_close: - fput(file); - sys_close(elf_exec_fileno); -out_free: kfree(elf_phdata); out: return error; } static unsigned long load_aout_interp(struct exec * interp_ex, - struct dentry * interpreter_dentry) + struct file * interpreter) { - unsigned long text_data, offset, elf_entry = ~0UL; + unsigned long text_data, elf_entry = ~0UL; char * addr; + loff_t offset; int retval; current->mm->end_code = interp_ex->a_text; @@ -357,7 +345,10 @@ static unsigned long load_aout_interp(struct exec * interp_ex, } do_brk(0, text_data); - retval = read_exec(interpreter_dentry, offset, addr, text_data, 0); + retval = -ENOEXEC; + if (!interpreter->f_op->read) + goto out; + retval = interpreter->f_op->read(interpreter, addr, text_data, &offset); if (retval < 0) goto out; flush_icache_range((unsigned long)addr, @@ -383,8 +374,7 @@ out: static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) { - struct file * file; - struct dentry *interpreter_dentry = NULL; /* to shut gcc up */ + struct file *interpreter = NULL; /* to shut gcc up */ unsigned long load_addr = 0, load_bias; int load_addr_set = 0; char * elf_interpreter = NULL; @@ -430,7 +420,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto out; } #endif - if (!bprm->dentry->d_inode->i_fop||!bprm->dentry->d_inode->i_fop->mmap) + if (!bprm->file->f_op||!bprm->file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -443,16 +433,15 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (!elf_phdata) goto out; - retval = read_exec(bprm->dentry, elf_ex.e_phoff, - (char *) elf_phdata, size, 1); + retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size); if (retval < 0) goto out_free_ph; - retval = open_dentry(bprm->dentry, O_RDONLY); + retval = get_unused_fd(); if (retval < 0) goto out_free_ph; - elf_exec_fileno = retval; - file = fget(elf_exec_fileno); + get_file(bprm->file); + fd_install(elf_exec_fileno = retval, bprm->file); elf_ppnt = elf_phdata; elf_bss = 0; @@ -480,9 +469,9 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (!elf_interpreter) goto out_free_file; - retval = read_exec(bprm->dentry, elf_ppnt->p_offset, + retval = kernel_read(bprm->file, elf_ppnt->p_offset, elf_interpreter, - elf_ppnt->p_filesz, 1); + elf_ppnt->p_filesz); if (retval < 0) goto out_free_interp; /* If the program interpreter is one of these two, @@ -495,32 +484,22 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) #if 0 printk("Using ELF interpreter %s\n", elf_interpreter); #endif - old_fs = get_fs(); /* This could probably be optimized */ - set_fs(get_ds()); #ifdef __sparc__ if (ibcs2_interpreter) { unsigned long old_pers = current->personality; current->personality = PER_SVR4; - lock_kernel(); - interpreter_dentry = open_namei(elf_interpreter); - unlock_kernel(); + interpreter = open_exec(elf_interpreter); current->personality = old_pers; } else #endif { - lock_kernel(); - interpreter_dentry = open_namei(elf_interpreter); - unlock_kernel(); + interpreter = open_exec(elf_interpreter); } - set_fs(old_fs); - retval = PTR_ERR(interpreter_dentry); - if (IS_ERR(interpreter_dentry)) + retval = PTR_ERR(interpreter); + if (IS_ERR(interpreter)) goto out_free_interp; - retval = permission(interpreter_dentry->d_inode, MAY_EXEC); - if (retval < 0) - goto out_free_dentry; - retval = read_exec(interpreter_dentry, 0, bprm->buf, 128, 1); + retval = kernel_read(interpreter, 0, bprm->buf, 128); if (retval < 0) goto out_free_dentry; @@ -629,7 +608,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) elf_flags |= MAP_FIXED; } - error = do_mmap(file, ELF_PAGESTART(load_bias + vaddr), + error = do_mmap(bprm->file, ELF_PAGESTART(load_bias + vaddr), (elf_ppnt->p_filesz + ELF_PAGEOFFSET(elf_ppnt->p_vaddr)), elf_prot, elf_flags, (elf_ppnt->p_offset - @@ -661,7 +640,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) elf_brk = k; } set_fs(old_fs); - fput(file); /* all done with the file */ elf_entry += load_bias; elf_bss += load_bias; @@ -674,14 +652,14 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (elf_interpreter) { if (interpreter_type == INTERPRETER_AOUT) elf_entry = load_aout_interp(&interp_ex, - interpreter_dentry); + interpreter); else elf_entry = load_elf_interp(&interp_elf_ex, - interpreter_dentry, + interpreter, &interp_load_addr); lock_kernel(); - dput(interpreter_dentry); + fput(interpreter); unlock_kernel(); kfree(elf_interpreter); @@ -708,7 +686,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) #ifndef VM_STACK_FLAGS lock_kernel(); - current->executable = dget(bprm->dentry); + current->executable = dget(bprm->file->f_dentry); unlock_kernel(); #endif compute_creds(bprm); @@ -779,13 +757,12 @@ out: /* error cleanup */ out_free_dentry: lock_kernel(); - dput(interpreter_dentry); + fput(interpreter); unlock_kernel(); out_free_interp: if (elf_interpreter) kfree(elf_interpreter); out_free_file: - fput(file); sys_close(elf_exec_fileno); out_free_ph: kfree(elf_phdata); @@ -797,25 +774,13 @@ out_free_ph: static int load_elf_library(struct file *file) { - struct dentry * dentry; - struct inode * inode; struct elf_phdr *elf_phdata; unsigned long elf_bss = 0, bss, len, k; int retval, error, i, j; struct elfhdr elf_ex; - loff_t offset = 0; - - error = -EACCES; - dentry = file->f_dentry; - inode = dentry->d_inode; - /* seek to the beginning of the file */ error = -ENOEXEC; - - /* N.B. save current DS?? */ - set_fs(KERNEL_DS); - retval = file->f_op->read(file, (char *) &elf_ex, sizeof(elf_ex), &offset); - set_fs(USER_DS); + retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex)); if (retval != sizeof(elf_ex)) goto out; @@ -824,8 +789,7 @@ static int load_elf_library(struct file *file) /* First of all, some simple consistency checks */ if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || - !elf_check_arch(elf_ex.e_machine) || - (!inode->i_fop || !inode->i_fop->mmap)) + !elf_check_arch(elf_ex.e_machine) || !file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -840,8 +804,8 @@ static int load_elf_library(struct file *file) goto out; /* N.B. check for error return?? */ - retval = read_exec(dentry, elf_ex.e_phoff, (char *) elf_phdata, - sizeof(struct elf_phdr) * elf_ex.e_phnum, 1); + retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, + sizeof(struct elf_phdr) * elf_ex.e_phnum); error = -ENOEXEC; for (j = 0, i = 0; i<elf_ex.e_phnum; i++) diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 1a1533a10..1b18094eb 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -25,7 +25,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) { char *interp, *i_name, *i_arg; - struct dentry * dentry; + struct file * file; int retval; struct elfhdr elf_ex; @@ -38,16 +38,13 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) /* First of all, some simple consistency checks */ if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || - (!bprm->dentry->d_inode->i_fop || - !bprm->dentry->d_inode->i_fop->mmap)) { + (!bprm->file->f_op || !bprm->file->f_op->mmap)) { return -ENOEXEC; } bprm->sh_bang++; /* Well, the bang-shell is implicit... */ - lock_kernel(); - dput(bprm->dentry); - unlock_kernel(); - bprm->dentry = NULL; + fput(bprm->file); + bprm->file = NULL; /* Unlike in the script case, we don't have to do any hairy * parsing to find our interpreter... it's hardcoded! @@ -79,16 +76,14 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) /* * OK, now restart the process with the interpreter's inode. - * Note that we use open_namei() as the name is now in kernel + * Note that we use open_exec() as the name is now in kernel * space, and we don't need to copy it. */ - lock_kernel(); - dentry = open_namei(interp); - unlock_kernel(); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + file = open_exec(interp); + if (IS_ERR(file)) + return PTR_ERR(file); - bprm->dentry = dentry; + bprm->file = file; retval = prepare_binprm(bprm); if (retval < 0) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9d98d7d70..a03c4723f 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -27,6 +27,7 @@ #include <linux/proc_fs.h> #include <linux/string.h> #include <linux/ctype.h> +#include <linux/file.h> #include <linux/spinlock.h> #include <asm/uaccess.h> @@ -180,7 +181,7 @@ static struct binfmt_entry *check_file(struct linux_binprm *bprm) static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) { struct binfmt_entry *fmt; - struct dentry * dentry; + struct file * file; char iname[128]; char *iname_addr = iname; int retval; @@ -200,8 +201,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!fmt) goto _ret; - dput(bprm->dentry); - bprm->dentry = NULL; + fput(bprm->file); + bprm->file = NULL; /* Build args for interpreter */ remove_arg_zero(bprm); @@ -213,11 +214,11 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) bprm->argc++; bprm->filename = iname; /* for binfmt_script */ - dentry = open_namei(iname); - retval = PTR_ERR(dentry); - if (IS_ERR(dentry)) + file = open_exec(iname); + retval = PTR_ERR(file); + if (IS_ERR(file)) goto _ret; - bprm->dentry = dentry; + bprm->file = file; retval = prepare_binprm(bprm); if (retval >= 0) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 450f918a4..dc78f8389 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -11,12 +11,13 @@ #include <linux/malloc.h> #include <linux/binfmts.h> #include <linux/init.h> +#include <linux/file.h> #include <linux/smp_lock.h> static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) { char *cp, *i_name, *i_arg; - struct dentry * dentry; + struct file *file; char interp[128]; int retval; @@ -28,10 +29,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) */ bprm->sh_bang++; - lock_kernel(); - dput(bprm->dentry); - unlock_kernel(); - bprm->dentry = NULL; + fput(bprm->file); + bprm->file = NULL; bprm->buf[127] = '\0'; if ((cp = strchr(bprm->buf, '\n')) == NULL) @@ -81,13 +80,11 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) /* * OK, now restart the process with the interpreter's dentry. */ - lock_kernel(); - dentry = open_namei(interp); - unlock_kernel(); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + file = open_exec(interp); + if (IS_ERR(file)) + return PTR_ERR(file); - bprm->dentry = dentry; + bprm->file = file; retval = prepare_binprm(bprm); if (retval < 0) return retval; diff --git a/fs/buffer.c b/fs/buffer.c index 617188db0..26580ee0d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2029,7 +2029,7 @@ int block_symlink(struct inode *inode, const char *symname, int len) if (!page) goto fail; - err = mapping->a_ops->prepare_write(page, 0, len-1); + err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); if (err) goto fail_map; kaddr = (char*)page_address(page); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 1e39811e6..bb51b0c05 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -685,7 +685,7 @@ static int coda_venus_readdir(struct file *filp, void *getdent, } /* we use this routine to read the file into our buffer */ - bufsize = read_exec(filp->f_dentry, filp->f_pos, buff, DIR_BUFSIZE, 1); + bufsize = kernel_read(filp, filp->f_pos, buff, DIR_BUFSIZE); if ( bufsize < 0) { printk("coda_venus_readdir: cannot read directory %d.\n", bufsize); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 237c7d9aa..c5ca590d2 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -200,12 +200,6 @@ out: return retval; } -/* Nothing to do.. */ -static void cramfs_put_super(struct super_block *sb) -{ - return; -} - static int cramfs_statfs(struct super_block *sb, struct statfs *buf) { buf->f_type = CRAMFS_MAGIC; @@ -361,7 +355,6 @@ static struct inode_operations cramfs_dir_inode_operations = { }; static struct super_operations cramfs_ops = { - put_super: cramfs_put_super, statfs: cramfs_statfs, }; diff --git a/fs/dcache.c b/fs/dcache.c index dc424305f..d4aef49e7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -27,6 +27,9 @@ #define DCACHE_PARANOIA 1 /* #define DCACHE_DEBUG 1 */ +/* Right now the dcache depends on the kernel lock */ +#define check_lock() if (!kernel_locked()) BUG() + /* For managing the dcache */ extern unsigned long num_physpages, page_cache_size; extern int inodes_stat[]; @@ -104,6 +107,8 @@ void dput(struct dentry *dentry) { int count; + check_lock(); + if (!dentry) return; @@ -158,7 +163,7 @@ out: count, dentry->d_parent->d_name.name, dentry->d_name.name); - *(int *)0 = 0; + BUG(); } /* @@ -168,6 +173,8 @@ out: */ int d_invalidate(struct dentry * dentry) { + check_lock(); + /* * If it's already been dropped, return OK. */ @@ -226,6 +233,7 @@ static inline void prune_one_dentry(struct dentry * dentry) */ void prune_dcache(int count) { + check_lock(); for (;;) { struct dentry *dentry; struct list_head *tmp = dentry_unused.prev; @@ -261,6 +269,8 @@ void shrink_dcache_sb(struct super_block * sb) struct list_head *tmp, *next; struct dentry *dentry; + check_lock(); + /* * Pass one ... move the dentries for the specified * superblock to the most recent end of the unused list. @@ -308,6 +318,8 @@ int is_root_busy(struct dentry *root) struct list_head *next; int count = root->d_count; + check_lock(); + repeat: next = this_parent->d_subdirs.next; resume: @@ -337,6 +349,44 @@ resume: } /* + * Search for at least 1 mount point in the dentry's subdirs. + * We descend to the next level whenever the d_subdirs + * list is non-empty and continue searching. + */ +int have_submounts(struct dentry *parent) +{ + struct dentry *this_parent = parent; + struct list_head *next; + + if (parent->d_mounts != parent) + return 1; +repeat: + next = this_parent->d_subdirs.next; +resume: + while (next != &this_parent->d_subdirs) { + struct list_head *tmp = next; + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + next = tmp->next; + /* Have we found a mount point ? */ + if (dentry->d_mounts != dentry) + return 1; + if (!list_empty(&dentry->d_subdirs)) { + this_parent = dentry; + goto repeat; + } + } + /* + * All done at this level ... ascend and resume the search. + */ + if (this_parent != parent) { + next = this_parent->d_child.next; + this_parent = this_parent->d_parent; + goto resume; + } + return 0; /* No mount points found in tree */ +} + +/* * Search the dentry child list for the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level @@ -349,6 +399,8 @@ static int select_parent(struct dentry * parent) struct list_head *next; int found = 0; + check_lock(); + repeat: next = this_parent->d_subdirs.next; resume: @@ -525,6 +577,8 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) struct list_head *head = d_hash(parent,hash); struct list_head *tmp = head->next; + check_lock(); + for (;;) { struct dentry * dentry = list_entry(tmp, struct dentry, d_hash); if (tmp == head) @@ -564,6 +618,8 @@ int d_validate(struct dentry *dentry, struct dentry *dparent, struct list_head *base, *lhp; int valid = 1; + check_lock(); + if (dentry != dparent) { base = d_hash(dparent, hash); lhp = base; @@ -605,6 +661,10 @@ out: */ void d_delete(struct dentry * dentry) { + check_lock(); + + check_lock(); + /* * Are we the only user? */ @@ -646,6 +706,7 @@ static inline void switch_names(struct dentry * dentry, struct dentry * target) { const unsigned char *old_name, *new_name; + check_lock(); memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); old_name = target->d_name.name; new_name = dentry->d_name.name; @@ -674,6 +735,8 @@ static inline void switch_names(struct dentry * dentry, struct dentry * target) */ void d_move(struct dentry * dentry, struct dentry * target) { + check_lock(); + if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -773,7 +836,11 @@ asmlinkage long sys_getcwd(char *buf, unsigned long size) error = -ENOMEM; if (page) { unsigned long len; - char * cwd = d_path(pwd, page, PAGE_SIZE); + char * cwd; + + lock_kernel(); + cwd = d_path(pwd, page, PAGE_SIZE); + unlock_kernel(); error = -ERANGE; len = PAGE_SIZE + page - cwd; diff --git a/fs/devfs/Makefile b/fs/devfs/Makefile index 2b301b37a..23f190410 100644 --- a/fs/devfs/Makefile +++ b/fs/devfs/Makefile @@ -36,4 +36,4 @@ doc: base.c util.c test: gcc -o /tmp/base.o -D__KERNEL__ -I../../include -Wall \ -Wstrict-prototypes -O2 -fomit-frame-pointer -pipe \ - -fno-strength-reduce -DCPU=686 -DEXPORT_SYMTAB -c base.c + -fno-strength-reduce -DEXPORT_SYMTAB -c base.c @@ -93,56 +93,6 @@ static inline void put_binfmt(struct linux_binfmt * fmt) __MOD_DEC_USE_COUNT(fmt->module); } -/* N.B. Error returns must be < 0 */ -int open_dentry(struct dentry * dentry, int mode) -{ - struct inode * inode = dentry->d_inode; - struct file * f; - struct list_head * l = NULL; - int fd, error; - - lock_kernel(); - if (inode->i_sb) - l = &inode->i_sb->s_files; - - error = -EINVAL; - if (!inode->i_fop) - goto out; - fd = get_unused_fd(); - if (fd >= 0) { - error = -ENFILE; - f = get_empty_filp(); - if (!f) - goto out_fd; - f->f_flags = mode; - f->f_mode = (mode+1) & O_ACCMODE; - f->f_dentry = dentry; - f->f_pos = 0; - f->f_reada = 0; - f->f_op = inode->i_fop; - if (f->f_op->open) { - error = f->f_op->open(inode,f); - if (error) - goto out_filp; - } - file_move(f, l); - fd_install(fd, f); - dget(dentry); - } - unlock_kernel(); - return fd; - -out_filp: - if (error > 0) - error = -EIO; - put_filp(f); -out_fd: - put_unused_fd(fd); -out: - unlock_kernel(); - return error; -} - /* * Note that a shared library must be both readable and executable due to * security reasons. @@ -365,44 +315,45 @@ int setup_arg_pages(struct linux_binprm *bprm) return 0; } -/* - * Read in the complete executable. This is used for "-N" files - * that aren't on a block boundary, and for files on filesystems - * without get_block support. - */ -int read_exec(struct dentry *dentry, unsigned long offset, - char * addr, unsigned long count, int to_kmem) +struct file *open_exec(const char *name) { - struct file file; - struct inode * inode = dentry->d_inode; - int result = -ENOEXEC; + struct dentry *dentry; + struct file *file; - if (!inode->i_fop) - goto end_readexec; - if (init_private_file(&file, dentry, 1)) - goto end_readexec; - if (!file.f_op->read) - goto close_readexec; - if (file.f_op->llseek) { - if (file.f_op->llseek(&file,offset,0) != offset) - goto close_readexec; - } else - file.f_pos = offset; - if (to_kmem) { - mm_segment_t old_fs = get_fs(); - set_fs(get_ds()); - result = file.f_op->read(&file, addr, count, &file.f_pos); - set_fs(old_fs); - } else { - result = verify_area(VERIFY_WRITE, addr, count); - if (result) - goto close_readexec; - result = file.f_op->read(&file, addr, count, &file.f_pos); + lock_kernel(); + dentry = lookup_dentry(name, NULL, LOOKUP_FOLLOW); + file = (struct file*) dentry; + if (!IS_ERR(dentry)) { + file = ERR_PTR(-EACCES); + if (dentry->d_inode && S_ISREG(dentry->d_inode->i_mode)) { + int err = permission(dentry->d_inode, MAY_EXEC); + file = ERR_PTR(err); + if (!err) { + file = dentry_open(dentry, O_RDONLY); +out: + unlock_kernel(); + return file; + } + } + dput(dentry); } -close_readexec: - if (file.f_op->release) - file.f_op->release(inode,&file); -end_readexec: + goto out; +} + +int kernel_read(struct file *file, unsigned long offset, + char * addr, unsigned long count) +{ + mm_segment_t old_fs; + loff_t pos = offset; + int result = -ENOSYS; + + if (!file->f_op->read) + goto fail; + old_fs = get_fs(); + set_fs(get_ds()); + result = file->f_op->read(file, addr, count, &pos); + set_fs(old_fs); +fail: return result; } @@ -540,7 +491,7 @@ int flush_old_exec(struct linux_binprm * bprm) flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || - permission(bprm->dentry->d_inode,MAY_READ)) + permission(bprm->file->f_dentry->d_inode,MAY_READ)) current->dumpable = 0; /* An exec changes our domain. We are no longer part of the thread @@ -580,7 +531,7 @@ int prepare_binprm(struct linux_binprm *bprm) { int mode; int retval,id_change,cap_raised; - struct inode * inode = bprm->dentry->d_inode; + struct inode * inode = bprm->file->f_dentry->d_inode; mode = inode->i_mode; if (!S_ISREG(mode)) /* must be regular file */ @@ -677,7 +628,7 @@ int prepare_binprm(struct linux_binprm *bprm) } memset(bprm->buf,0,sizeof(bprm->buf)); - return read_exec(bprm->dentry,0,bprm->buf,128,1); + return kernel_read(bprm->file,0,bprm->buf,128); } /* @@ -763,24 +714,20 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { int i; char * dynloader[] = { "/sbin/loader" }; - struct dentry * dentry; + struct file * file; - lock_kernel(); - dput(bprm->dentry); - unlock_kernel(); - bprm->dentry = NULL; + fput(bprm->file); + bprm->file = NULL; bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); for (i = 0 ; i < MAX_ARG_PAGES ; i++) /* clear page-table */ bprm_loader.page[i] = NULL; - lock_kernel(); - dentry = open_namei(dynloader[0]); - unlock_kernel(); - retval = PTR_ERR(dentry); - if (IS_ERR(dentry)) + file = open_exec(dynloader[0]); + retval = PTR_ERR(file); + if (IS_ERR(file)) return retval; - bprm->dentry = dentry; + bprm->file = file; bprm->loader = bprm_loader.p; retval = prepare_binprm(bprm); if (retval<0) @@ -802,12 +749,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) retval = fn(bprm, regs); if (retval >= 0) { put_binfmt(fmt); - if (bprm->dentry) { - lock_kernel(); - dput(bprm->dentry); - unlock_kernel(); - } - bprm->dentry = NULL; + if (bprm->file) + fput(bprm->file); + bprm->file = NULL; current->did_exec = 1; return retval; } @@ -815,7 +759,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) put_binfmt(fmt); if (retval != -ENOEXEC) break; - if (!bprm->dentry) { + if (!bprm->file) { spin_unlock(&binfmt_lock); return retval; } @@ -847,37 +791,31 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs) { struct linux_binprm bprm; - struct dentry * dentry; + struct file *file; int retval; int i; bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); - lock_kernel(); - dentry = open_namei(filename); - unlock_kernel(); + file = open_exec(filename); - retval = PTR_ERR(dentry); - if (IS_ERR(dentry)) + retval = PTR_ERR(file); + if (IS_ERR(file)) return retval; - bprm.dentry = dentry; + bprm.file = file; bprm.filename = filename; bprm.sh_bang = 0; bprm.loader = 0; bprm.exec = 0; if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) { - lock_kernel(); - dput(dentry); - unlock_kernel(); + fput(file); return bprm.argc; } if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) { - lock_kernel(); - dput(dentry); - unlock_kernel(); + fput(file); return bprm.envc; } @@ -905,11 +843,8 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs out: /* Something went wrong, return the inode and free the argument pages*/ - if (bprm.dentry) { - lock_kernel(); - dput(bprm.dentry); - unlock_kernel(); - } + if (bprm.file) + fput(bprm.file); /* Assumes that free_page() can take a NULL argument. */ /* I hope this is ok for all architectures */ diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 904f5cb8f..90ce121ce 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -11,6 +11,7 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ +#include <linux/config.h> #include <linux/fs.h> #include <linux/locks.h> #include <linux/quotaops.h> @@ -300,21 +301,20 @@ do_more: if (!gdp) goto error_return; - if (test_opt (sb, CHECK_STRICT) && - (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || - in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || - in_range (block, le32_to_cpu(gdp->bg_inode_table), - sb->u.ext2_sb.s_itb_per_group) || - in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), - sb->u.ext2_sb.s_itb_per_group))) - ext2_panic (sb, "ext2_free_blocks", + if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || + in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || + in_range (block, le32_to_cpu(gdp->bg_inode_table), + sb->u.ext2_sb.s_itb_per_group) || + in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), + sb->u.ext2_sb.s_itb_per_group)) + ext2_error (sb, "ext2_free_blocks", "Freeing blocks in system zones - " "Block = %lu, count = %lu", block, count); for (i = 0; i < count; i++) { if (!ext2_clear_bit (bit + i, bh->b_data)) - ext2_warning (sb, "ext2_free_blocks", + ext2_error (sb, "ext2_free_blocks", "bit already cleared for block %lu", block); else { @@ -527,11 +527,11 @@ got_block: tmp = j + i * EXT2_BLOCKS_PER_GROUP(sb) + le32_to_cpu(es->s_first_data_block); - if (test_opt (sb, CHECK_STRICT) && - (tmp == le32_to_cpu(gdp->bg_block_bitmap) || - tmp == le32_to_cpu(gdp->bg_inode_bitmap) || - in_range (tmp, le32_to_cpu(gdp->bg_inode_table), sb->u.ext2_sb.s_itb_per_group))) - ext2_panic (sb, "ext2_new_block", + if (tmp == le32_to_cpu(gdp->bg_block_bitmap) || + tmp == le32_to_cpu(gdp->bg_inode_bitmap) || + in_range (tmp, le32_to_cpu(gdp->bg_inode_table), + sb->u.ext2_sb.s_itb_per_group)) + ext2_error (sb, "ext2_new_block", "Allocating block in system zone - " "block = %u", tmp); @@ -679,6 +679,7 @@ int ext2_group_sparse(int group) test_root(group, 7)); } +#ifdef CONFIG_EXT2_CHECK /* Called at mount-time, super-block is locked */ void ext2_check_blocks_bitmap (struct super_block * sb) { @@ -753,3 +754,4 @@ void ext2_check_blocks_bitmap (struct super_block * sb) "stored = %lu, counted = %lu", (unsigned long) le32_to_cpu(es->s_free_blocks_count), bitmap_count); } +#endif diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 3a3e4a69c..277562ec7 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -12,6 +12,7 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ +#include <linux/config.h> #include <linux/fs.h> #include <linux/locks.h> #include <linux/quotaops.h> @@ -236,7 +237,7 @@ void ext2_free_inode (struct inode * inode) /* Ok, now we can actually update the inode bitmaps.. */ if (!ext2_clear_bit (bit, bh->b_data)) - ext2_warning (sb, "ext2_free_inode", + ext2_error (sb, "ext2_free_inode", "bit already cleared for inode %lu", ino); else { gdp = ext2_get_group_desc (sb, block_group, &bh2); @@ -401,7 +402,7 @@ repeat: EXT2_INODES_PER_GROUP(sb))) < EXT2_INODES_PER_GROUP(sb)) { if (ext2_set_bit (j, bh->b_data)) { - ext2_warning (sb, "ext2_new_inode", + ext2_error (sb, "ext2_new_inode", "bit already set for inode %d", j); goto repeat; } @@ -527,6 +528,7 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) #endif } +#ifdef CONFIG_EXT2_CHECK /* Called at mount-time, super-block is locked */ void ext2_check_inodes_bitmap (struct super_block * sb) { @@ -565,3 +567,4 @@ void ext2_check_inodes_bitmap (struct super_block * sb) (unsigned long) le32_to_cpu(es->s_free_inodes_count), bitmap_count); } +#endif diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index cfaf5d4d3..dd09b95aa 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -628,7 +628,7 @@ static int ext2_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,ext2_get_block); } -static int ext2_prepare_write(struct page *page, unsigned from, unsigned to) +static int ext2_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return block_prepare_write(page,from,to,ext2_get_block); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 73be71e61..a68289d71 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -16,6 +16,7 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 */ +#include <linux/config.h> #include <linux/module.h> #include <linux/string.h> #include <linux/fs.h> @@ -153,23 +154,14 @@ static int parse_options (char * options, unsigned long * sb_block, set_opt (*mount_options, NO_UID32); } else if (!strcmp (this_char, "check")) { - if (!value || !*value) - set_opt (*mount_options, CHECK_NORMAL); - else if (!strcmp (value, "none")) { - clear_opt (*mount_options, CHECK_NORMAL); - clear_opt (*mount_options, CHECK_STRICT); - } - else if (!strcmp (value, "normal")) - set_opt (*mount_options, CHECK_NORMAL); - else if (!strcmp (value, "strict")) { - set_opt (*mount_options, CHECK_NORMAL); - set_opt (*mount_options, CHECK_STRICT); - } - else { - printk ("EXT2-fs: Invalid check option: %s\n", - value); - return 0; - } + if (!value || !*value || !strcmp (value, "none")) + clear_opt (*mount_options, CHECK); + else +#ifdef CONFIG_EXT2_CHECK + set_opt (*mount_options, CHECK); +#else + printk("EXT2 Check option not supported\n"); +#endif } else if (!strcmp (this_char, "debug")) set_opt (*mount_options, DEBUG); @@ -205,10 +197,6 @@ static int parse_options (char * options, unsigned long * sb_block, set_opt (*mount_options, GRPID); else if (!strcmp (this_char, "minixdf")) set_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nocheck")) { - clear_opt (*mount_options, CHECK_NORMAL); - clear_opt (*mount_options, CHECK_STRICT); - } else if (!strcmp (this_char, "nogrpid") || !strcmp (this_char, "sysvgroups")) clear_opt (*mount_options, GRPID); @@ -305,10 +293,12 @@ static void ext2_setup_super (struct super_block * sb, EXT2_BLOCKS_PER_GROUP(sb), EXT2_INODES_PER_GROUP(sb), sb->u.ext2_sb.s_mount_opt); +#ifdef CONFIG_EXT2_CHECK if (test_opt (sb, CHECK)) { ext2_check_blocks_bitmap (sb); ext2_check_inodes_bitmap (sb); } +#endif } #if 0 /* ibasket's still have unresolved bugs... -DaveM */ @@ -398,7 +388,6 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data, } sb->u.ext2_sb.s_mount_opt = 0; - set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL); if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, &sb->u.ext2_sb.s_mount_opt)) { return NULL; @@ -674,7 +663,6 @@ int ext2_remount (struct super_block * sb, int * flags, char * data) /* * Allow the "check" option to be passed as a remount option. */ - new_mount_opt = EXT2_MOUNT_CHECK_NORMAL; if (!parse_options (data, &tmp, &resuid, &resgid, &new_mount_opt)) return -EINVAL; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index cedd3ba2b..a0202c66f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -737,7 +737,7 @@ static int fat_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,fat_get_block); } -static int fat_prepare_write(struct page *page, unsigned from, unsigned to) +static int fat_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return cont_prepare_write(page,from,to,fat_get_block, &MSDOS_I((struct inode*)page->mapping->host)->mmu_private); @@ -12,6 +12,21 @@ #include <linux/mm.h> #include <linux/malloc.h> +static void wait_for_partner(struct inode* inode, unsigned int* cnt) +{ + int cur = *cnt; + while(cur == *cnt) { + pipe_wait(inode); + if(signal_pending(current)) + break; + } +} + +static void wake_up_partner(struct inode* inode) +{ + wake_up_interruptible(PIPE_WAIT(*inode)); +} + static int fifo_open(struct inode *inode, struct file *filp) { int ret; @@ -20,29 +35,12 @@ static int fifo_open(struct inode *inode, struct file *filp) if (down_interruptible(PIPE_SEM(*inode))) goto err_nolock_nocleanup; - if (! inode->i_pipe) { - unsigned long page; - struct pipe_inode_info *info; - - info = kmalloc(sizeof(struct pipe_inode_info),GFP_KERNEL); - + if (!inode->i_pipe) { ret = -ENOMEM; - if (!info) - goto err_nocleanup; - page = __get_free_page(GFP_KERNEL); - if (!page) { - kfree(info); + if(!pipe_new(inode)) goto err_nocleanup; - } - - inode->i_pipe = info; - - init_waitqueue_head(PIPE_WAIT(*inode)); - PIPE_BASE(*inode) = (char *) page; - PIPE_START(*inode) = PIPE_LEN(*inode) = 0; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0; - PIPE_WAITING_WRITERS(*inode) = PIPE_WAITING_READERS(*inode) = 0; } + filp->f_version = 0; switch (filp->f_mode) { case 1: @@ -51,27 +49,23 @@ static int fifo_open(struct inode *inode, struct file *filp) * POSIX.1 says that O_NONBLOCK means return with the FIFO * opened, even when there is no process writing the FIFO. */ - filp->f_op = &connecting_fifo_fops; + filp->f_op = &read_fifo_fops; + PIPE_RCOUNTER(*inode)++; if (PIPE_READERS(*inode)++ == 0) - wake_up_interruptible(PIPE_WAIT(*inode)); - - if (!(filp->f_flags & O_NONBLOCK)) { - while (!PIPE_WRITERS(*inode)) { - if (signal_pending(current)) + wake_up_partner(inode); + + if (!PIPE_WRITERS(*inode)) { + if ((filp->f_flags & O_NONBLOCK)) { + /* suppress POLLHUP until we have + * seen a writer */ + filp->f_version = PIPE_WCOUNTER(*inode); + } else + { + wait_for_partner(inode, &PIPE_WCOUNTER(*inode)); + if(signal_pending(current)) goto err_rd; - up(PIPE_SEM(*inode)); - interruptible_sleep_on(PIPE_WAIT(*inode)); - - /* Note that using down_interruptible here - and similar places below is pointless, - since we have to acquire the lock to clean - up properly. */ - down(PIPE_SEM(*inode)); } } - - if (PIPE_WRITERS(*inode)) - filp->f_op = &read_fifo_fops; break; case 2: @@ -85,15 +79,14 @@ static int fifo_open(struct inode *inode, struct file *filp) goto err; filp->f_op = &write_fifo_fops; + PIPE_WCOUNTER(*inode)++; if (!PIPE_WRITERS(*inode)++) - wake_up_interruptible(PIPE_WAIT(*inode)); + wake_up_partner(inode); - while (!PIPE_READERS(*inode)) { + if (!PIPE_READERS(*inode)) { + wait_for_partner(inode, &PIPE_RCOUNTER(*inode)); if (signal_pending(current)) goto err_wr; - up(PIPE_SEM(*inode)); - interruptible_sleep_on(PIPE_WAIT(*inode)); - down(PIPE_SEM(*inode)); } break; @@ -108,8 +101,10 @@ static int fifo_open(struct inode *inode, struct file *filp) PIPE_READERS(*inode)++; PIPE_WRITERS(*inode)++; + PIPE_RCOUNTER(*inode)++; + PIPE_WCOUNTER(*inode)++; if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1) - wake_up_interruptible(PIPE_WAIT(*inode)); + wake_up_partner(inode); break; default: diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 34e365663..8c0afe0c8 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -225,7 +225,7 @@ static int hfs_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,hfs_get_block); } -static int hfs_prepare_write(struct page *page, unsigned from, unsigned to) +static int hfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return cont_prepare_write(page,from,to,hfs_get_block, &((struct inode*)page->mapping->host)->u.hfs_i.mmu_private); diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 710b9120b..d8063e296 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -94,7 +94,7 @@ static int hpfs_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,hpfs_get_block); } -static int hpfs_prepare_write(struct page *page, unsigned from, unsigned to) +static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return cont_prepare_write(page,from,to,hpfs_get_block, &((struct inode*)page->mapping->host)->u.hpfs_i.mmu_private); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 517456326..20b9bb490 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -296,6 +296,7 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) struct rpc_clnt *clnt; struct nlm_args *argp = &req->a_args; struct nlm_res *resp = &req->a_res; + struct rpc_message msg; int status; dprintk("lockd: call procedure %s on %s (async)\n", @@ -306,8 +307,11 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) return -ENOLCK; /* bootstrap and kick off the async RPC call */ - status = rpc_do_call(clnt, proc, argp, resp, RPC_TASK_ASYNC, - callback, req); + msg.rpc_proc = proc; + msg.rpc_argp = argp; + msg.rpc_resp =resp; + msg.rpc_cred = NULL; + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req); /* If the async call is proceeding, increment host refcount */ if (status >= 0 && (req->a_flags & RPC_TASK_ASYNC)) diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index a2f280bdc..55dee3886 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -163,7 +163,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp) *p++ = htonl(argp->proc); /* This is the private part. Needed only for SM_MON call */ - if (rqstp->rq_task->tk_proc == SM_MON) { + if (rqstp->rq_task->tk_msg.rpc_proc == SM_MON) { *p++ = argp->addr; *p++ = 0; *p++ = 0; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 4c9fa16a3..a581e328a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -1014,7 +1014,7 @@ static int minix_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,minix_get_block); } -static int minix_prepare_write(struct page *page, unsigned from, unsigned to) +static int minix_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return block_prepare_write(page,from,to,minix_get_block); } diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 3171e8adc..3c8aac510 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,7 +9,7 @@ O_TARGET := nfs.o O_OBJS := inode.o file.o read.o write.o dir.o symlink.o proc.o \ - nfs2xdr.o + nfs2xdr.o flushd.o ifdef CONFIG_ROOT_NFS O_OBJS += nfsroot.o mount_clnt.o diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 37b2b682b..3ca240129 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -596,9 +596,12 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) out_valid: return 1; out_bad: - d_drop(dentry); if (!list_empty(&dentry->d_subdirs)) shrink_dcache_parent(dentry); + /* If we have submounts, don't unhash ! */ + if (have_submounts(dentry)) + goto out_valid; + d_drop(dentry); /* Purge readdir caches. */ if (dentry->d_parent->d_inode) { nfs_zap_caches(dentry->d_parent->d_inode); @@ -862,61 +865,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) return error; } - -/* Note: we copy the code from lookup_dentry() here, only: we have to - * omit the directory lock. We are already the owner of the lock when - * we reach here. And "down(&dir->i_sem)" would make us sleep forever - * ('cause WE have the lock) - * - * VERY IMPORTANT: calculate the hash for this dentry!!!!!!!! - * Otherwise the cached lookup DEFINITELY WILL fail. And a new dentry - * is created. Without the DCACHE_NFSFS_RENAMED flag. And with d_count - * == 1. And trouble. - * - * Concerning my choice of the temp name: it is just nice to have - * i_ino part of the temp name, as this offers another check whether - * somebody attempts to remove the "silly renamed" dentry itself. - * Which is something that I consider evil. Your opinion may vary. - * BUT: - * Now that I compute the hash value right, it should be possible to simply - * check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of - * doing the string compare. - * WHICH MEANS: - * This offers the opportunity to shorten the temp name. Currently, I use - * the hex representation of i_ino + an event counter. This sums up to - * as much as 36 characters for a 64 bit machine, and needs 20 chars on - * a 32 bit machine. - * QUINTESSENCE - * The use of i_ino is simply cosmetic. All we need is a unique temp - * file name for the .nfs files. The event counter seemed to be adequate. - * And as we retry in case such a file already exists, we are guaranteed - * to succeed. - */ - -static -struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen) -{ - struct qstr sqstr; - struct dentry *sdentry; - struct dentry *res; - - sqstr.name = silly; - sqstr.len = slen; - sqstr.hash = full_name_hash(silly, slen); - sdentry = d_lookup(parent, &sqstr); - if (!sdentry) { - sdentry = d_alloc(parent, &sqstr); - if (sdentry == NULL) - return ERR_PTR(-ENOMEM); - res = nfs_lookup(parent->d_inode, sdentry); - if (res) { - dput(sdentry); - return res; - } - } - return sdentry; -} - static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) { static unsigned int sillycounter = 0; @@ -966,7 +914,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name); dfprintk(VFS, "trying to rename %s to %s\n", dentry->d_name.name, silly); - sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen); + sdentry = lookup_one(silly, dget(dentry->d_parent)); /* * N.B. Better to return EBUSY here ... it could be * dangerous to delete the file while it's in use. diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9a91bb1ab..32d290c73 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -144,10 +144,10 @@ nfs_fsync(struct file *file, struct dentry *dentry) * If the writer ends up delaying the write, the writer needs to * increment the page use counts until he is done with the page. */ -static int nfs_prepare_write(struct page *page, unsigned offset, unsigned to) +static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) { kmap(page); - return 0; + return nfs_flush_incompatible(file, page); } static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) { diff --git a/fs/nfs/flushd.c b/fs/nfs/flushd.c new file mode 100644 index 000000000..d36c3a9ae --- /dev/null +++ b/fs/nfs/flushd.c @@ -0,0 +1,304 @@ +/* + * linux/fs/nfs/flushd.c + * + * For each NFS mount, there is a separate cache object that contains + * a hash table of all clusters. With this cache, an async RPC task + * (`flushd') is associated, which wakes up occasionally to inspect + * its list of dirty buffers. + * (Note that RPC tasks aren't kernel threads. Take a look at the + * rpciod code to understand what they are). + * + * Inside the cache object, we also maintain a count of the current number + * of dirty pages, which may not exceed a certain threshold. + * (FIXME: This threshold should be configurable). + * + * The code is streamlined for what I think is the prevalent case for + * NFS traffic, which is sequential write access without concurrent + * access by different processes. + * + * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> + * + * Rewritten 6/3/2000 by Trond Myklebust + * Copyright (C) 1999, 2000, Trond Myklebust <trond.myklebust@fys.uio.no> + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/malloc.h> +#include <linux/pagemap.h> +#include <linux/file.h> + +#include <linux/sched.h> + +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/sched.h> + +#include <linux/spinlock.h> + +#include <linux/nfs.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_fs_sb.h> +#include <linux/nfs_flushd.h> +#include <linux/nfs_mount.h> + +/* + * Various constants + */ +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + +/* + * This is the wait queue all cluster daemons sleep on + */ +static struct rpc_wait_queue flushd_queue = RPC_INIT_WAITQ("nfs_flushd"); + +/* + * Spinlock + */ +spinlock_t nfs_flushd_lock = SPIN_LOCK_UNLOCKED; + +/* + * Local function declarations. + */ +static void nfs_flushd(struct rpc_task *); +static void nfs_flushd_exit(struct rpc_task *); + + +int nfs_reqlist_init(struct nfs_server *server) +{ + struct nfs_reqlist *cache; + struct rpc_task *task; + int status = 0; + + dprintk("NFS: writecache_init\n"); + spin_lock(&nfs_flushd_lock); + cache = server->rw_requests; + + if (cache->task) + goto out_unlock; + + /* Create the RPC task */ + status = -ENOMEM; + task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC); + if (!task) + goto out_unlock; + + task->tk_calldata = server; + + cache->task = task; + + /* Run the task */ + cache->runat = jiffies; + + cache->auth = server->client->cl_auth; + task->tk_action = nfs_flushd; + task->tk_exit = nfs_flushd_exit; + + spin_unlock(&nfs_flushd_lock); + rpc_execute(task); + return 0; + out_unlock: + spin_unlock(&nfs_flushd_lock); + return status; +} + +void nfs_reqlist_exit(struct nfs_server *server) +{ + struct nfs_reqlist *cache; + + cache = server->rw_requests; + if (!cache) + return; + + dprintk("NFS: reqlist_exit (ptr %p rpc %p)\n", cache, cache->task); + while (cache->task || cache->inodes) { + spin_lock(&nfs_flushd_lock); + if (!cache->task) { + spin_unlock(&nfs_flushd_lock); + nfs_reqlist_init(server); + } else { + cache->task->tk_status = -ENOMEM; + rpc_wake_up_task(cache->task); + spin_unlock(&nfs_flushd_lock); + } + interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ); + } +} + +int nfs_reqlist_alloc(struct nfs_server *server) +{ + struct nfs_reqlist *cache; + if (server->rw_requests) + return 0; + + cache = (struct nfs_reqlist *)kmalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return -ENOMEM; + + memset(cache, 0, sizeof(*cache)); + init_waitqueue_head(&cache->request_wait); + server->rw_requests = cache; + + return 0; +} + +void nfs_reqlist_free(struct nfs_server *server) +{ + if (server->rw_requests) { + kfree(server->rw_requests); + server->rw_requests = NULL; + } +} + +void nfs_wake_flushd() +{ + rpc_wake_up_status(&flushd_queue, -ENOMEM); +} + +static void inode_append_flushd(struct inode *inode) +{ + struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); + struct inode **q; + + spin_lock(&nfs_flushd_lock); + if (NFS_FLAGS(inode) & NFS_INO_FLUSH) + goto out; + inode->u.nfs_i.hash_next = NULL; + + q = &cache->inodes; + while (*q) + q = &(*q)->u.nfs_i.hash_next; + *q = inode; + + /* Note: we increase the inode i_count in order to prevent + * it from disappearing when on the flush list + */ + NFS_FLAGS(inode) |= NFS_INO_FLUSH; + inode->i_count++; + out: + spin_unlock(&nfs_flushd_lock); +} + +void inode_remove_flushd(struct inode *inode) +{ + struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); + struct inode **q; + + spin_lock(&nfs_flushd_lock); + if (!(NFS_FLAGS(inode) & NFS_INO_FLUSH)) + goto out; + + q = &cache->inodes; + while (*q && *q != inode) + q = &(*q)->u.nfs_i.hash_next; + if (*q) { + *q = inode->u.nfs_i.hash_next; + NFS_FLAGS(inode) &= ~NFS_INO_FLUSH; + iput(inode); + } + out: + spin_unlock(&nfs_flushd_lock); +} + +void inode_schedule_scan(struct inode *inode, unsigned long time) +{ + struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); + struct rpc_task *task; + unsigned long mintimeout; + + if (time_after(NFS_NEXTSCAN(inode), time)) + NFS_NEXTSCAN(inode) = time; + mintimeout = jiffies + 1 * HZ; + if (time_before(mintimeout, NFS_NEXTSCAN(inode))) + mintimeout = NFS_NEXTSCAN(inode); + inode_append_flushd(inode); + + spin_lock(&nfs_flushd_lock); + task = cache->task; + if (!task) { + spin_unlock(&nfs_flushd_lock); + nfs_reqlist_init(NFS_SERVER(inode)); + } else { + if (time_after(cache->runat, mintimeout)) + rpc_wake_up_task(task); + spin_unlock(&nfs_flushd_lock); + } +} + + +static void +nfs_flushd(struct rpc_task *task) +{ + struct nfs_server *server; + struct nfs_reqlist *cache; + struct inode *inode, *next; + unsigned long delay = jiffies + NFS_WRITEBACK_LOCKDELAY; + int flush = (task->tk_status == -ENOMEM); + + dprintk("NFS: %4d flushd starting\n", task->tk_pid); + server = (struct nfs_server *) task->tk_calldata; + cache = server->rw_requests; + + spin_lock(&nfs_flushd_lock); + next = cache->inodes; + cache->inodes = NULL; + spin_unlock(&nfs_flushd_lock); + + while ((inode = next) != NULL) { + next = next->u.nfs_i.hash_next; + inode->u.nfs_i.hash_next = NULL; + NFS_FLAGS(inode) &= ~NFS_INO_FLUSH; + + if (flush) { + nfs_sync_file(inode, NULL, 0, 0, FLUSH_AGING); + } else if (time_after(jiffies, NFS_NEXTSCAN(inode))) { + NFS_NEXTSCAN(inode) = jiffies + NFS_WRITEBACK_LOCKDELAY; + nfs_flush_timeout(inode, FLUSH_AGING); +#ifdef CONFIG_NFS_V3 + nfs_commit_timeout(inode, FLUSH_AGING); +#endif + } + + if (nfs_have_writebacks(inode)) { + inode_append_flushd(inode); + if (time_after(delay, NFS_NEXTSCAN(inode))) + delay = NFS_NEXTSCAN(inode); + } + iput(inode); + } + + dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid); + if (time_after(jiffies + 1 * HZ, delay)) + delay = 1 * HZ; + else + delay = delay - jiffies; + task->tk_status = 0; + task->tk_action = nfs_flushd; + task->tk_timeout = delay; + cache->runat = jiffies + task->tk_timeout; + + spin_lock(&nfs_flushd_lock); + if (!cache->nr_requests && !cache->inodes) { + cache->task = NULL; + task->tk_action = NULL; + } else + rpc_sleep_on(&flushd_queue, task, NULL, NULL); + spin_unlock(&nfs_flushd_lock); +} + +static void +nfs_flushd_exit(struct rpc_task *task) +{ + struct nfs_server *server; + struct nfs_reqlist *cache; + server = (struct nfs_server *) task->tk_calldata; + cache = server->rw_requests; + + spin_lock(&nfs_flushd_lock); + if (cache->task == task) + cache->task = NULL; + spin_unlock(&nfs_flushd_lock); + wake_up(&cache->request_wait); + rpc_release_task(task); +} + diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 441d62edc..ca7e1b944 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -27,6 +27,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/stats.h> #include <linux/nfs_fs.h> +#include <linux/nfs_flushd.h> #include <linux/lockd/bind.h> #include <linux/smp_lock.h> @@ -74,6 +75,12 @@ nfs_read_inode(struct inode * inode) inode->i_rdev = 0; NFS_FILEID(inode) = 0; NFS_FSID(inode) = 0; + INIT_LIST_HEAD(&inode->u.nfs_i.dirty); + INIT_LIST_HEAD(&inode->u.nfs_i.commit); + INIT_LIST_HEAD(&inode->u.nfs_i.writeback); + inode->u.nfs_i.ndirty = 0; + inode->u.nfs_i.ncommit = 0; + inode->u.nfs_i.npages = 0; NFS_CACHEINV(inode); NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); } @@ -92,8 +99,6 @@ nfs_put_inode(struct inode * inode) static void nfs_delete_inode(struct inode * inode) { - int failed; - dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino); lock_kernel(); @@ -101,29 +106,12 @@ nfs_delete_inode(struct inode * inode) nfs_free_dircache(inode); } else { /* - * Flush out any pending write requests ... + * The following can never actually happen... */ - if (NFS_WRITEBACK(inode) != NULL) { - unsigned long timeout = jiffies + 5*HZ; -#ifdef NFS_DEBUG_VERBOSE -printk("nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); -#endif - nfs_inval(inode); - while (NFS_WRITEBACK(inode) != NULL && - time_before(jiffies, timeout)) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/10); - } - current->state = TASK_RUNNING; - if (NFS_WRITEBACK(inode) != NULL) - printk("NFS: Arghhh, stuck RPC requests!\n"); + if (nfs_have_writebacks(inode)) { + printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); } } - - failed = nfs_check_failed_request(inode); - if (failed) - printk("NFS: inode %ld had %d failed requests\n", - inode->i_ino, failed); unlock_kernel(); clear_inode(inode); @@ -135,9 +123,18 @@ nfs_put_super(struct super_block *sb) struct nfs_server *server = &sb->u.nfs_sb.s_server; struct rpc_clnt *rpc; + /* + * First get rid of the request flushing daemon. + * Relies on rpc_shutdown_client() waiting on all + * client tasks to finish. + */ + nfs_reqlist_exit(server); + if ((rpc = server->client) != NULL) rpc_shutdown_client(rpc); + nfs_reqlist_free(server); + if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ rpciod_down(); /* release rpciod */ @@ -306,6 +303,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) sb->s_root->d_op = &nfs_dentry_operations; sb->s_root->d_fsdata = root_fh; + /* Fire up the writeback cache */ + if (nfs_reqlist_alloc(server) < 0) { + printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n"); + goto failure_kill_reqlist; + } + /* We're airborne */ /* Check whether to start the lockd process */ @@ -314,6 +317,8 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent) return sb; /* Yargs. It didn't work out. */ + failure_kill_reqlist: + nfs_reqlist_exit(server); out_no_root: printk("nfs_read_super: get root inode failed\n"); iput(root_inode); @@ -342,6 +347,7 @@ out_no_xprt: printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); out_free_host: + nfs_reqlist_free(server); kfree(server->hostname); out_unlock: goto out_fail; @@ -440,7 +446,6 @@ nfs_invalidate_inode(struct inode *inode) make_bad_inode(inode); inode->i_mode = save_mode; - nfs_inval(inode); nfs_zap_caches(inode); } @@ -864,7 +869,7 @@ nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) * to look at the size or the mtime the server sends us * too closely, as we're in the middle of modifying them. */ - if (NFS_WRITEBACK(inode)) + if (nfs_have_writebacks(inode)) goto out; if (inode->i_size != fattr->size) { @@ -925,7 +930,7 @@ printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages); static DECLARE_FSTYPE(nfs_fs_type, "nfs", nfs_read_super, 0); extern int nfs_init_fhcache(void); -extern int nfs_init_wreqcache(void); +extern int nfs_init_nfspagecache(void); /* * Initialize NFS @@ -939,7 +944,7 @@ init_nfs_fs(void) if (err) return err; - err = nfs_init_wreqcache(); + err = nfs_init_nfspagecache(); if (err) return err; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index a7e53e6db..5ad2aaa67 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -63,6 +63,7 @@ static int nfs_stat_to_errno(int stat); #define NFS_diropres_sz 1+NFS_fhandle_sz+NFS_fattr_sz #define NFS_readlinkres_sz 1 #define NFS_readres_sz 1+NFS_fattr_sz+1 +#define NFS_writeres_sz NFS_attrstat_sz #define NFS_stat_sz 1 #define NFS_readdirres_sz 1 #define NFS_statfsres_sz 1+NFS_info_sz @@ -273,6 +274,7 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) static int nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) { + unsigned int nr; u32 count = args->count; p = xdr_encode_fhandle(p, args->fh); @@ -282,28 +284,35 @@ nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) *p++ = htonl(count); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - req->rq_svec[1].iov_base = (void *) args->buffer; - req->rq_svec[1].iov_len = count; - req->rq_slen += count; - req->rq_snr = 2; + /* Get the number of buffers in the send iovec */ + nr = args->nriov; + + if (nr+2 > MAX_IOVEC) { + printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs " + "(nr %d max %d)\n", nr, MAX_IOVEC); + return -EINVAL; + } + + /* Copy the iovec */ + memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec)); #ifdef NFS_PAD_WRITES /* * Some old servers require that the message length * be a multiple of 4, so we pad it here if needed. */ - count = ((count + 3) & ~3) - count; - if (count) { -#if 0 -printk("nfs_writeargs: padding write, len=%d, slen=%d, pad=%d\n", -req->rq_svec[1].iov_len, req->rq_slen, count); -#endif - req->rq_svec[2].iov_base = (void *) "\0\0\0"; - req->rq_svec[2].iov_len = count; - req->rq_slen += count; - req->rq_snr = 3; + if (count & 3) { + struct iovec *iov = req->rq_svec + nr + 1; + int pad = 4 - (count & 3); + + iov->iov_base = (void *) "\0\0\0"; + iov->iov_len = pad; + count += pad; + nr++; } #endif + req->rq_slen += count; + req->rq_snr += nr; return 0; } @@ -593,6 +602,16 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy) } /* + * Decode WRITE reply + */ +static int +nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) +{ + res->verf->committed = NFS_FILE_SYNC; + return nfs_xdr_attrstat(req, p, res->fattr); +} + +/* * Decode STATFS reply */ static int @@ -678,7 +697,7 @@ static struct rpc_procinfo nfs_procedures[18] = { PROC(readlink, readlinkargs, readlinkres), PROC(read, readargs, readres), PROC(writecache, enc_void, dec_void), - PROC(write, writeargs, attrstat), + PROC(write, writeargs, writeres), PROC(create, createargs, diropres), PROC(remove, diropargs, stat), PROC(rename, renameargs, stat), diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index d9a423f16..a592608be 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -145,6 +145,8 @@ static struct nfs_bool_opts { { "nocto", ~NFS_MOUNT_NOCTO, NFS_MOUNT_NOCTO }, { "ac", ~NFS_MOUNT_NOAC, 0 }, { "noac", ~NFS_MOUNT_NOAC, NFS_MOUNT_NOAC }, + { "lock", ~NFS_MOUNT_NONLM, 0 }, + { "nolock", ~NFS_MOUNT_NONLM, NFS_MOUNT_NONLM }, { NULL, 0, 0 } }; @@ -320,7 +322,7 @@ int __init root_nfs_init(void) * Parse NFS server and directory information passed on the kernel * command line. */ -void __init nfs_root_setup(char *line) +int __init nfs_root_setup(char *line) { ROOT_DEV = MKDEV(UNNAMED_MAJOR, 255); if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { @@ -333,6 +335,7 @@ void __init nfs_root_setup(char *line) sprintf(nfs_root_name, NFS_ROOT, line); } root_nfs_parse_addr(nfs_root_name); + return 1; } __setup("nfsroot=", nfs_root_setup); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index bb55ce6d6..3823c3118 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -111,11 +111,15 @@ nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap, unsigned long offset, unsigned int count, const void *buffer, struct nfs_fattr *fattr) { - struct nfs_writeargs arg = { fhandle, offset, count, buffer }; + struct nfs_writeargs arg = { fhandle, offset, count, 1, 1, + {{(void *) buffer, count}, {0,0}, {0,0}, {0,0}, + {0,0}, {0,0}, {0,0}, {0,0}}}; + struct nfs_writeverf verf; + struct nfs_writeres res = {fattr, &verf, count}; int status; dprintk("NFS call write %d @ %ld\n", count, offset); - status = rpc_call(server->client, NFSPROC_WRITE, &arg, fattr, + status = rpc_call(server->client, NFSPROC_WRITE, &arg, &res, swap? (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) : 0); dprintk("NFS reply read: %d\n", status); return status < 0? status : count; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 717d12bbb..aa17780e5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -171,6 +171,7 @@ static inline int nfs_readpage_async(struct dentry *dentry, struct inode *inode, struct page *page) { + struct rpc_message msg; unsigned long address; struct nfs_rreq *req; int result = -1, flags; @@ -195,8 +196,13 @@ nfs_readpage_async(struct dentry *dentry, struct inode *inode, /* Start the async call */ dprintk("NFS: executing async READ request.\n"); - result = rpc_do_call(NFS_CLIENT(inode), NFSPROC_READ, - &req->ra_args, &req->ra_res, flags, + + msg.rpc_proc = NFSPROC_READ; + msg.rpc_argp = &req->ra_args; + msg.rpc_resp = &req->ra_res; + msg.rpc_cred = NULL; + + result = rpc_call_async(NFS_CLIENT(inode), &msg, flags, nfs_readpage_result, req); if (result < 0) goto out_free; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5f847bec8..af023a121 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> */ +#include <linux/config.h> #include <linux/types.h> #include <linux/malloc.h> #include <linux/swap.h> @@ -54,33 +55,126 @@ #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> +#include <linux/nfs_flushd.h> #include <asm/uaccess.h> #include <linux/smp_lock.h> #define NFS_PARANOIA 1 #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static void nfs_wback_begin(struct rpc_task *task); -static void nfs_wback_result(struct rpc_task *task); -static void nfs_cancel_request(struct nfs_wreq *req); +/* + * Spinlock + */ +spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED; +static unsigned int nfs_nr_requests = 0; /* - * Cache parameters + * Local structures + * + * Valid flags for a dirty buffer */ -#define NFS_WRITEBACK_DELAY (10 * HZ) -#define NFS_WRITEBACK_MAX 64 +#define PG_BUSY 0x0001 /* - * Limit number of delayed writes + * This is the struct where the WRITE/COMMIT arguments go. */ -static int nr_write_requests = 0; -static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain"); +struct nfs_write_data { + struct rpc_task task; + struct file *file; + struct rpc_cred *cred; + struct nfs_writeargs args; /* argument struct */ + struct nfs_writeres res; /* result struct */ + struct nfs_fattr fattr; + struct nfs_writeverf verf; + struct list_head pages; /* Coalesced requests we wish to flush */ +}; + +struct nfs_page { + struct list_head wb_hash, /* Inode */ + wb_list, + *wb_list_head; + struct file *wb_file; + struct rpc_cred *wb_cred; + struct page *wb_page; /* page to write out */ + wait_queue_head_t wb_wait; /* wait queue */ + unsigned long wb_timeout; /* when to write/commit */ + unsigned int wb_offset, /* Offset of write */ + wb_bytes, /* Length of request */ + wb_count, /* reference count */ + wb_flags; + struct nfs_writeverf wb_verf; /* Commit cookie */ +}; + +#define NFS_WBACK_BUSY(req) ((req)->wb_flags & PG_BUSY) + +/* + * Local function declarations + */ +static void nfs_writeback_done(struct rpc_task *); +#ifdef CONFIG_NFS_V3 +static void nfs_commit_done(struct rpc_task *); +#endif /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) #endif +static kmem_cache_t *nfs_page_cachep = NULL; +static kmem_cache_t *nfs_wdata_cachep = NULL; + +static __inline__ struct nfs_page *nfs_page_alloc(void) +{ + struct nfs_page *p; + p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->wb_hash); + INIT_LIST_HEAD(&p->wb_list); + init_waitqueue_head(&p->wb_wait); + } + return p; +} + +static __inline__ void nfs_page_free(struct nfs_page *p) +{ + kmem_cache_free(nfs_page_cachep, p); +} + +static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) +{ + struct nfs_write_data *p; + p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + } + return p; +} + +static __inline__ void nfs_writedata_free(struct nfs_write_data *p) +{ + kmem_cache_free(nfs_wdata_cachep, p); +} + +static void nfs_writedata_release(struct rpc_task *task) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; + rpc_release_task(task); + nfs_writedata_free(wdata); +} + +/* + * This function will be used to simulate weak cache consistency + * under NFSv2 when the NFSv3 attribute patch is included. + * For the moment, we just call nfs_refresh_inode(). + */ +static __inline__ int +nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr) +{ + return nfs_refresh_inode(inode, fattr); +} + /* * Write a page synchronously. * Offset is the data offset within the page. @@ -161,278 +255,770 @@ io_error: } /* - * Append a writeback request to a list + * Write a page to the server. This was supposed to be used for + * NFS swapping only. + * FIXME: Using this for mmap is pointless, breaks asynchronous + * writebacks, and is extremely slow. */ -static inline void -append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq) +int +nfs_writepage(struct dentry * dentry, struct page *page) { - dprintk("NFS: append_write_request(%p, %p)\n", q, wreq); - rpc_append_list(q, wreq); + struct inode *inode = dentry->d_inode; + unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; + unsigned offset = PAGE_CACHE_SIZE; + int err; + + /* easy case */ + if (page->index < end_index) + goto do_it; + /* things got complicated... */ + offset = inode->i_size & (PAGE_CACHE_SIZE-1); + /* OK, are we completely out? */ + if (page->index >= end_index+1 || !offset) + return -EIO; +do_it: + err = nfs_writepage_sync(dentry, inode, page, 0, offset); + if ( err == offset) return 0; + return err; +} + +/* + * Check whether the file range we want to write to is locked by + * us. + */ +static int +region_locked(struct inode *inode, struct nfs_page *req) +{ + struct file_lock *fl; + unsigned long rqstart, rqend; + + /* Don't optimize writes if we don't use NLM */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + return 0; + + rqstart = page_offset(req->wb_page) + req->wb_offset; + rqend = rqstart + req->wb_bytes; + for (fl = inode->i_flock; fl; fl = fl->fl_next) { + if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX) + && fl->fl_type == F_WRLCK + && fl->fl_start <= rqstart && rqend <= fl->fl_end) { + return 1; + } + } + + return 0; +} + +static inline struct nfs_page * +nfs_inode_wb_entry(struct list_head *head) +{ + return list_entry(head, struct nfs_page, wb_hash); } /* - * Remove a writeback request from a list + * Insert a write request into an inode */ static inline void -remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq) +nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { - dprintk("NFS: remove_write_request(%p, %p)\n", q, wreq); - rpc_remove_list(q, wreq); + if (!list_empty(&req->wb_hash)) + return; + if (!NFS_WBACK_BUSY(req)) + printk(KERN_ERR "NFS: unlocked request attempted hashed!\n"); + inode->u.nfs_i.npages++; + list_add(&req->wb_hash, &inode->u.nfs_i.writeback); + req->wb_count++; } /* - * Find a non-busy write request for a given page to - * try to combine with. + * Insert a write request into an inode */ -static inline struct nfs_wreq * -find_write_request(struct inode *inode, struct page *page) +static inline void +nfs_inode_remove_request(struct nfs_page *req) { - pid_t pid = current->pid; - struct nfs_wreq *head, *req; + struct inode *inode; + spin_lock(&nfs_wreq_lock); + if (list_empty(&req->wb_hash)) { + spin_unlock(&nfs_wreq_lock); + return; + } + if (!NFS_WBACK_BUSY(req)) + printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n"); + inode = req->wb_file->f_dentry->d_inode; + list_del(&req->wb_hash); + INIT_LIST_HEAD(&req->wb_hash); + inode->u.nfs_i.npages--; + if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n"); + if (!nfs_have_writebacks(inode)) + inode_remove_flushd(inode); + spin_unlock(&nfs_wreq_lock); + nfs_release_request(req); +} - dprintk("NFS: find_write_request(%x/%ld, %p)\n", - inode->i_dev, inode->i_ino, page); - if (!(req = head = NFS_WRITEBACK(inode))) - return NULL; - do { - /* - * We can't combine with canceled requests or - * requests that have already been started.. - */ - if (req->wb_flags & (NFS_WRITE_CANCELLED | NFS_WRITE_INPROGRESS)) +/* + * Find a request + */ +static inline struct nfs_page * +_nfs_find_request(struct inode *inode, struct page *page) +{ + struct list_head *head, *next; + + head = &inode->u.nfs_i.writeback; + next = head->next; + while (next != head) { + struct nfs_page *req = nfs_inode_wb_entry(next); + next = next->next; + if (page_index(req->wb_page) != page_index(page)) continue; + req->wb_count++; + return req; + } + return NULL; +} - if (req->wb_page == page && req->wb_pid == pid) - return req; +struct nfs_page * +nfs_find_request(struct inode *inode, struct page *page) +{ + struct nfs_page *req; - /* - * Ehh, don't keep too many tasks queued.. - */ - rpc_wake_up_task(&req->wb_task); + spin_lock(&nfs_wreq_lock); + req = _nfs_find_request(inode, page); + spin_unlock(&nfs_wreq_lock); + return req; +} - } while ((req = WB_NEXT(req)) != head); - return NULL; +static inline struct nfs_page * +nfs_list_entry(struct list_head *head) +{ + return list_entry(head, struct nfs_page, wb_list); } /* - * Find and release all failed requests for this inode. + * Insert a write request into a sorted list */ -int -nfs_check_failed_request(struct inode * inode) +static inline void +nfs_list_add_request(struct nfs_page *req, struct list_head *head) { - /* FIXME! */ - return 0; + struct list_head *prev; + + if (!list_empty(&req->wb_list)) { + printk(KERN_ERR "NFS: Add to list failed!\n"); + return; + } + if (list_empty(&req->wb_hash)) { + printk(KERN_ERR "NFS: Unhashed request attempted added to a list!\n"); + return; + } + if (!NFS_WBACK_BUSY(req)) + printk(KERN_ERR "NFS: unlocked request attempted added to list!\n"); + prev = head->prev; + while (prev != head) { + struct nfs_page *p = nfs_list_entry(prev); + if (page_index(p->wb_page) < page_index(req->wb_page)) + break; + prev = prev->prev; + } + list_add(&req->wb_list, prev); + req->wb_list_head = head; } /* - * Try to merge adjacent write requests. This works only for requests - * issued by the same user. + * Insert a write request into an inode */ -static inline int -update_write_request(struct nfs_wreq *req, unsigned int first, - unsigned int bytes) +static inline void +nfs_list_remove_request(struct nfs_page *req) { - unsigned int rqfirst = req->wb_offset, - rqlast = rqfirst + req->wb_bytes, - last = first + bytes; + if (list_empty(&req->wb_list)) + return; + if (!NFS_WBACK_BUSY(req)) + printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n"); + list_del(&req->wb_list); + INIT_LIST_HEAD(&req->wb_list); + req->wb_list_head = NULL; +} - dprintk("nfs: trying to update write request %p\n", req); +/* + * Add a request to the inode's dirty list. + */ +static inline void +nfs_mark_request_dirty(struct nfs_page *req) +{ + struct inode *inode = req->wb_file->f_dentry->d_inode; - /* not contiguous? */ - if (rqlast < first || last < rqfirst) - return 0; + spin_lock(&nfs_wreq_lock); + if (list_empty(&req->wb_list)) { + nfs_list_add_request(req, &inode->u.nfs_i.dirty); + inode->u.nfs_i.ndirty++; + } + spin_unlock(&nfs_wreq_lock); + /* + * NB: the call to inode_schedule_scan() must lie outside the + * spinlock since it can run flushd(). + */ + inode_schedule_scan(inode, req->wb_timeout); +} - if (first < rqfirst) - rqfirst = first; - if (rqlast < last) - rqlast = last; +/* + * Check if a request is dirty + */ +static inline int +nfs_dirty_request(struct nfs_page *req) +{ + struct inode *inode = req->wb_file->f_dentry->d_inode; + return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty; +} - req->wb_offset = rqfirst; - req->wb_bytes = rqlast - rqfirst; - req->wb_count++; +#ifdef CONFIG_NFS_V3 +/* + * Add a request to the inode's commit list. + */ +static inline void +nfs_mark_request_commit(struct nfs_page *req) +{ + struct inode *inode = req->wb_file->f_dentry->d_inode; - return 1; + spin_lock(&nfs_wreq_lock); + if (list_empty(&req->wb_list)) { + nfs_list_add_request(req, &inode->u.nfs_i.commit); + inode->u.nfs_i.ncommit++; + } + spin_unlock(&nfs_wreq_lock); + /* + * NB: the call to inode_schedule_scan() must lie outside the + * spinlock since it can run flushd(). + */ + inode_schedule_scan(inode, req->wb_timeout); } +#endif -static kmem_cache_t *nfs_wreq_cachep; - -int nfs_init_wreqcache(void) +/* + * Lock the page of an asynchronous request + */ +static inline int +nfs_lock_request(struct nfs_page *req) { - nfs_wreq_cachep = kmem_cache_create("nfs_wreq", - sizeof(struct nfs_wreq), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (nfs_wreq_cachep == NULL) - return -ENOMEM; - return 0; + if (NFS_WBACK_BUSY(req)) + return 0; + req->wb_count++; + req->wb_flags |= PG_BUSY; + return 1; } static inline void -free_write_request(struct nfs_wreq * req) +nfs_unlock_request(struct nfs_page *req) { - if (!--req->wb_count) - kmem_cache_free(nfs_wreq_cachep, req); + if (!NFS_WBACK_BUSY(req)) { + printk(KERN_ERR "NFS: Invalid unlock attempted\n"); + return; + } + req->wb_flags &= ~PG_BUSY; + wake_up(&req->wb_wait); + nfs_release_request(req); } /* - * Create and initialize a writeback request + * Create a write request. + * Page must be locked by the caller. This makes sure we never create + * two different requests for the same page, and avoids possible deadlock + * when we reach the hard limit on the number of dirty pages. */ -static inline struct nfs_wreq * -create_write_request(struct file * file, struct page *page, unsigned int offset, unsigned int bytes) +static struct nfs_page * +nfs_create_request(struct inode *inode, struct file *file, struct page *page, + unsigned int offset, unsigned int count) { - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - struct nfs_wreq *wreq; - struct rpc_task *task; + struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); + struct nfs_page *req = NULL; + long timeout; - dprintk("NFS: create_write_request(%s/%s, %ld+%d)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (page->index << PAGE_CACHE_SHIFT) + offset, bytes); - - /* FIXME: Enforce hard limit on number of concurrent writes? */ - wreq = kmem_cache_alloc(nfs_wreq_cachep, SLAB_KERNEL); - if (!wreq) - goto out_fail; - memset(wreq, 0, sizeof(*wreq)); + /* Deal with hard/soft limits. + */ + do { + /* If we're over the soft limit, flush out old requests */ + if (nfs_nr_requests >= MAX_REQUEST_SOFT) + nfs_wb_file(inode, file); + + /* If we're still over the soft limit, wake up some requests */ + if (nfs_nr_requests >= MAX_REQUEST_SOFT) { + dprintk("NFS: hit soft limit (%d requests)\n", + nfs_nr_requests); + if (!cache->task) + nfs_reqlist_init(NFS_SERVER(inode)); + nfs_wake_flushd(); + } - task = &wreq->wb_task; - rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE); - task->tk_calldata = wreq; - task->tk_action = nfs_wback_begin; + /* If we haven't reached the hard limit yet, + * try to allocate the request struct */ + if (nfs_nr_requests < MAX_REQUEST_HARD) { + req = nfs_page_alloc(); + if (req != NULL) + break; + } - rpcauth_lookupcred(task); /* Obtain user creds */ - if (task->tk_status < 0) - goto out_req; + /* We're over the hard limit. Wait for better times */ + dprintk("NFS: create_request sleeping (total %d pid %d)\n", + nfs_nr_requests, current->pid); + + timeout = 1 * HZ; + if (NFS_SERVER(inode)->flags & NFS_MOUNT_INTR) { + interruptible_sleep_on_timeout(&cache->request_wait, + timeout); + if (signalled()) + break; + } else + sleep_on_timeout(&cache->request_wait, timeout); + + dprintk("NFS: create_request waking up (tot %d pid %d)\n", + nfs_nr_requests, current->pid); + } while (!req); + if (!req) + return NULL; - /* Put the task on inode's writeback request list. */ + /* Initialize the request struct. Initially, we assume a + * long write-back delay. This will be adjusted in + * update_nfs_request below if the region is not locked. */ + req->wb_page = page; + atomic_inc(&page->count); + req->wb_offset = offset; + req->wb_bytes = count; + /* If the region is locked, adjust the timeout */ + if (region_locked(inode, req)) + req->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY; + else + req->wb_timeout = jiffies + NFS_WRITEBACK_DELAY; + req->wb_file = file; + req->wb_cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); get_file(file); - wreq->wb_file = file; - wreq->wb_pid = current->pid; - wreq->wb_page = page; - init_waitqueue_head(&wreq->wb_wait); - wreq->wb_offset = offset; - wreq->wb_bytes = bytes; - wreq->wb_count = 2; /* One for the IO, one for us */ + req->wb_count = 1; - kmap(page); - append_write_request(&NFS_WRITEBACK(inode), wreq); + /* register request's existence */ + cache->nr_requests++; + nfs_nr_requests++; + return req; +} - if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4) - rpc_wake_up_next(&write_queue); - return wreq; +/* + * Release all resources associated with a write request after it + * has been committed to stable storage + * + * Note: Should always be called with the spinlock held! + */ +void +nfs_release_request(struct nfs_page *req) +{ + struct inode *inode = req->wb_file->f_dentry->d_inode; + struct nfs_reqlist *cache = NFS_REQUESTLIST(inode); + struct page *page = req->wb_page; + + spin_lock(&nfs_wreq_lock); + if (--req->wb_count) { + spin_unlock(&nfs_wreq_lock); + return; + } + spin_unlock(&nfs_wreq_lock); -out_req: - rpc_release_task(task); - kmem_cache_free(nfs_wreq_cachep, wreq); -out_fail: - return NULL; + if (!list_empty(&req->wb_list)) { + printk(KERN_ERR "NFS: Request released while still on a list!\n"); + nfs_list_remove_request(req); + } + if (!list_empty(&req->wb_hash)) { + printk(KERN_ERR "NFS: Request released while still hashed!\n"); + nfs_inode_remove_request(req); + } + if (NFS_WBACK_BUSY(req)) + printk(KERN_ERR "NFS: Request released while still locked!\n"); + + rpcauth_releasecred(NFS_CLIENT(inode)->cl_auth, req->wb_cred); + fput(req->wb_file); + page_cache_release(page); + nfs_page_free(req); + /* wake up anyone waiting to allocate a request */ + cache->nr_requests--; + nfs_nr_requests--; + wake_up(&cache->request_wait); } /* - * Schedule a writeback RPC call. - * If the server is congested, don't add to our backlog of queued - * requests but call it synchronously. - * The function returns whether we should wait for the thing or not. + * Wait for a request to complete. * - * FIXME: Here we could walk the inode's lock list to see whether the - * page we're currently writing to has been write-locked by the caller. - * If it is, we could schedule an async write request with a long - * delay in order to avoid writing back the page until the lock is - * released. + * Interruptible by signals only if mounted with intr flag. */ -static inline int -schedule_write_request(struct nfs_wreq *req, int sync) +static int +nfs_wait_on_request(struct nfs_page *req) { - struct rpc_task *task = &req->wb_task; - struct file *file = req->wb_file; - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = req->wb_file->f_dentry->d_inode; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + int retval; - if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX) - sync = 1; - - if (sync) { - sigset_t oldmask; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - dprintk("NFS: %4d schedule_write_request (sync)\n", - task->tk_pid); - /* Page is already locked */ - rpc_clnt_sigmask(clnt, &oldmask); - rpc_execute(task); - rpc_clnt_sigunmask(clnt, &oldmask); - } else { - dprintk("NFS: %4d schedule_write_request (async)\n", - task->tk_pid); - task->tk_flags |= RPC_TASK_ASYNC; - task->tk_timeout = NFS_WRITEBACK_DELAY; - rpc_sleep_on(&write_queue, task, NULL, NULL); + if (!NFS_WBACK_BUSY(req)) + return 0; + req->wb_count++; + retval = nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req)); + nfs_release_request(req); + return retval; +} + +/* + * Wait for a request to complete. + * + * Interruptible by signals only if mounted with intr flag. + */ +static int +nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long start, unsigned int count) +{ + struct list_head *p, *head; + unsigned long idx_start, idx_end; + unsigned int pages = 0; + int error; + + idx_start = start >> PAGE_CACHE_SHIFT; + if (count == 0) + idx_end = ~0; + else { + unsigned long idx_count = (count-1) >> PAGE_CACHE_SHIFT; + idx_end = idx_start + idx_count; } + spin_lock(&nfs_wreq_lock); + head = &inode->u.nfs_i.writeback; + p = head->next; + while (p != head) { + unsigned long pg_idx; + struct nfs_page *req = nfs_inode_wb_entry(p); + + p = p->next; + + if (file && req->wb_file != file) + continue; + + pg_idx = page_index(req->wb_page); + if (pg_idx < idx_start || pg_idx > idx_end) + continue; - return sync; + if (!NFS_WBACK_BUSY(req)) + continue; + req->wb_count++; + spin_unlock(&nfs_wreq_lock); + error = nfs_wait_on_request(req); + nfs_release_request(req); + if (error < 0) + return error; + spin_lock(&nfs_wreq_lock); + p = head->next; + pages++; + } + spin_unlock(&nfs_wreq_lock); + return pages; } /* - * Wait for request to complete. + * Scan cluster for dirty pages and send as many of them to the + * server as possible. */ static int -wait_on_write_request(struct nfs_wreq *req) +nfs_scan_list_timeout(struct list_head *head, struct list_head *dst, struct inode *inode) { - struct file *file = req->wb_file; - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - DECLARE_WAITQUEUE(wait, current); - sigset_t oldmask; - int retval; + struct list_head *p; + struct nfs_page *req; + int pages = 0; + + p = head->next; + while (p != head) { + req = nfs_list_entry(p); + p = p->next; + if (time_after(req->wb_timeout, jiffies)) { + if (time_after(NFS_NEXTSCAN(inode), req->wb_timeout)) + NFS_NEXTSCAN(inode) = req->wb_timeout; + continue; + } + if (!nfs_lock_request(req)) + continue; + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + pages++; + } + return pages; +} + +static int +nfs_scan_dirty_timeout(struct inode *inode, struct list_head *dst) +{ + int pages; + spin_lock(&nfs_wreq_lock); + pages = nfs_scan_list_timeout(&inode->u.nfs_i.dirty, dst, inode); + inode->u.nfs_i.ndirty -= pages; + if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + spin_unlock(&nfs_wreq_lock); + return pages; +} - /* Make sure it's started.. */ - if (!WB_INPROGRESS(req)) - rpc_wake_up_task(&req->wb_task); +#ifdef CONFIG_NFS_V3 +static int +nfs_scan_commit_timeout(struct inode *inode, struct list_head *dst) +{ + int pages; + spin_lock(&nfs_wreq_lock); + pages = nfs_scan_list_timeout(&inode->u.nfs_i.commit, dst, inode); + inode->u.nfs_i.ncommit -= pages; + if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + spin_unlock(&nfs_wreq_lock); + return pages; +} +#endif + +static int +nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, unsigned long start, unsigned int count) +{ + struct list_head *p; + struct nfs_page *req; + unsigned long idx_start, idx_end; + int pages; + + pages = 0; + idx_start = start >> PAGE_CACHE_SHIFT; + if (count == 0) + idx_end = ~0; + else + idx_end = idx_start + ((count-1) >> PAGE_CACHE_SHIFT); + p = src->next; + while (p != src) { + unsigned long pg_idx; + + req = nfs_list_entry(p); + p = p->next; + + if (file && req->wb_file != file) + continue; + + pg_idx = page_index(req->wb_page); + if (pg_idx < idx_start || pg_idx > idx_end) + continue; + + if (!nfs_lock_request(req)) + continue; + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + pages++; + } + return pages; +} + +static int +nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count) +{ + int pages; + spin_lock(&nfs_wreq_lock); + pages = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, start, count); + inode->u.nfs_i.ndirty -= pages; + if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + spin_unlock(&nfs_wreq_lock); + return pages; +} + +#ifdef CONFIG_NFS_V3 +static int +nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count) +{ + int pages; + spin_lock(&nfs_wreq_lock); + pages = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, start, count); + inode->u.nfs_i.ncommit -= pages; + if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + spin_unlock(&nfs_wreq_lock); + return pages; +} +#endif + + +static int +coalesce_requests(struct list_head *src, struct list_head *dst, unsigned int maxpages) +{ + struct nfs_page *req = NULL; + unsigned int pages = 0; + + while (!list_empty(src)) { + struct nfs_page *prev = req; + + req = nfs_list_entry(src->next); + if (prev) { + if (req->wb_file != prev->wb_file) + break; + + if (page_index(req->wb_page) != page_index(prev->wb_page)+1) + break; + + if (req->wb_offset != 0) + break; + } + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + pages++; + if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE) + break; + if (pages >= maxpages) + break; + } + return pages; +} + +/* + * Try to update any existing write request, or create one if there is none. + * In order to match, the request's credentials must match those of + * the calling process. + * + * Note: Should always be called with the Page Lock held! + */ +static struct nfs_page * +nfs_update_request(struct file* file, struct page *page, + unsigned long offset, unsigned int bytes) +{ + struct inode *inode = file->f_dentry->d_inode; + struct nfs_page *req, *new = NULL; + unsigned long rqend, end; + + end = offset + bytes; - rpc_clnt_sigmask(clnt, &oldmask); - add_wait_queue(&req->wb_wait, &wait); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - retval = 0; - if (req->wb_flags & NFS_WRITE_COMPLETE) + /* Loop over all inode entries and see if we find + * A request for the page we wish to update + */ + spin_lock(&nfs_wreq_lock); + req = _nfs_find_request(inode, page); + if (req) { + if (!nfs_lock_request(req)) { + spin_unlock(&nfs_wreq_lock); + nfs_wait_on_request(req); + nfs_release_request(req); + continue; + } + spin_unlock(&nfs_wreq_lock); + if (new) + nfs_release_request(new); break; - retval = -ERESTARTSYS; - if (signalled()) + } + + req = new; + if (req) { + nfs_lock_request(req); + nfs_inode_add_request(inode, req); + spin_unlock(&nfs_wreq_lock); + nfs_mark_request_dirty(req); break; - schedule(); + } + spin_unlock(&nfs_wreq_lock); + + /* Create the request. It's safe to sleep in this call because + * we only get here if the page is locked. + */ + new = nfs_create_request(inode, file, page, offset, bytes); + if (!new) + return ERR_PTR(-ENOMEM); + } + + /* We have a request for our page. + * If the creds don't match, or the + * page addresses don't match, + * tell the caller to wait on the conflicting + * request. + */ + rqend = req->wb_offset + req->wb_bytes; + if (req->wb_file != file + || req->wb_page != page + || !nfs_dirty_request(req) + || offset > rqend || end < req->wb_offset) { + nfs_unlock_request(req); + nfs_release_request(req); + return ERR_PTR(-EBUSY); + } + + /* Okay, the request matches. Update the region */ + if (offset < req->wb_offset) { + req->wb_offset = offset; + req->wb_bytes = rqend - req->wb_offset; } - remove_wait_queue(&req->wb_wait, &wait); - current->state = TASK_RUNNING; - rpc_clnt_sigunmask(clnt, &oldmask); - return retval; + + if (end > rqend) + req->wb_bytes = end - req->wb_offset; + + nfs_unlock_request(req); + + return req; } /* - * Write a page to the server. This will be used for NFS swapping only - * (for now), and we currently do this synchronously only. + * This is the strategy routine for NFS. + * It is called by nfs_updatepage whenever the user wrote up to the end + * of a page. + * + * We always try to submit a set of requests in parallel so that the + * server's write code can gather writes. This is mainly for the benefit + * of NFSv2. + * + * We never submit more requests than we think the remote can handle. + * For UDP sockets, we make sure we don't exceed the congestion window; + * for TCP, we limit the number of requests to 8. + * + * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that + * should be sent out in one go. This is for the benefit of NFSv2 servers + * that perform write gathering. + * + * FIXME: Different servers may have different sweet spots. + * Record the average congestion window in server struct? */ -int -nfs_writepage(struct dentry * dentry, struct page *page) +#define NFS_STRATEGY_PAGES 8 +static void +nfs_strategy(struct file *file) { - struct inode *inode = dentry->d_inode; - unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; - unsigned offset = PAGE_CACHE_SIZE; - int err; + struct inode *inode = file->f_dentry->d_inode; + unsigned int dirty, wpages; + + dirty = inode->u.nfs_i.ndirty; + wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT; +#ifdef CONFIG_NFS_V3 + if (NFS_PROTO(inode)->version == 2) { + if (dirty >= NFS_STRATEGY_PAGES * wpages) + nfs_flush_file(inode, file, 0, 0, 0); + } else { + if (dirty >= wpages) + nfs_flush_file(inode, file, 0, 0, 0); + } +#else + if (dirty >= NFS_STRATEGY_PAGES * wpages) + nfs_flush_file(inode, file, 0, 0, 0); +#endif + /* + * If we're running out of requests, flush out everything + * in order to reduce memory useage... + */ + if (nfs_nr_requests > MAX_REQUEST_SOFT) + nfs_wb_file(inode, file); +} - /* easy case */ - if (page->index < end_index) - goto do_it; - /* things got complicated... */ - offset = inode->i_size & (PAGE_CACHE_SIZE-1); - /* OK, are we completely out? */ - if (page->index >= end_index+1 || !offset) - return -EIO; -do_it: - err = nfs_writepage_sync(dentry, inode, page, 0, offset); - if ( err == offset) return 0; - return err; +int +nfs_flush_incompatible(struct file *file, struct page *page) +{ + struct inode *inode = file->f_dentry->d_inode; + struct nfs_page *req; + int status = 0; + /* + * Look for a request corresponding to this page. If there + * is one, and it belongs to another file, we flush it out + * before we try to copy anything into the page. Do this + * due to the lack of an ACCESS-type call in NFSv2. + * Also do the same if we find a request from an existing + * dropped page. + */ + req = nfs_find_request(inode,page); + if (req) { + if (req->wb_file != file || req->wb_page != page) + status = nfs_wb_page(inode, page); + nfs_release_request(req); + } + return (status < 0) ? status : 0; } /* @@ -446,27 +1032,13 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig { struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - struct nfs_wreq *req; + struct nfs_page *req; int synchronous = file->f_flags & O_SYNC; - int retval; + int status = 0; - dprintk("NFS: nfs_updatepage(%s/%s %d@%ld)\n", + dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - count, (page->index << PAGE_CACHE_SHIFT) +offset); - - /* - * Try to find a corresponding request on the writeback queue. - * If there is one, we can be sure that this request is not - * yet being processed, because we hold a lock on the page. - * - * If the request was created by us, update it. Otherwise, - * transfer the page lock and flush out the dirty page now. - * After returning, generic_file_write will wait on the - * page and retry the update. - */ - req = find_write_request(inode, page); - if (req && req->wb_file == file && update_write_request(req, offset, count)) - goto updated; + count, page_offset(page) +offset); /* * If wsize is smaller than page size, update and write @@ -475,241 +1047,542 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig if (NFS_SERVER(inode)->wsize < PAGE_SIZE) return nfs_writepage_sync(dentry, inode, page, offset, count); - /* Create the write request. */ - req = create_write_request(file, page, offset, count); - if (!req) - return -ENOBUFS; - /* - * Ok, there's another user of this page with the new request.. - * The IO completion will then free the page and the dentry. + * Try to find an NFS request corresponding to this page + * and update it. + * If the existing request cannot be updated, we must flush + * it out now. */ - get_page(page); - - /* Schedule request */ - synchronous = schedule_write_request(req, synchronous); + do { + req = nfs_update_request(file, page, offset, count); + status = (IS_ERR(req)) ? PTR_ERR(req) : 0; + if (status != -EBUSY) + break; + /* Request could not be updated. Flush it out and try again */ + status = nfs_wb_page(inode, page); + } while (status >= 0); + if (status < 0) + goto done; -updated: - if (req->wb_bytes == PAGE_SIZE) + if (req->wb_bytes == PAGE_CACHE_SIZE) SetPageUptodate(page); - retval = 0; + status = 0; if (synchronous) { - int status = wait_on_write_request(req); - if (status) { - nfs_cancel_request(req); - retval = status; - } else { - status = req->wb_status; - if (status < 0) - retval = status; - } + int error; - if (retval < 0) - ClearPageUptodate(page); + error = nfs_sync_file(inode, file, page_offset(page) + offset, count, FLUSH_SYNC|FLUSH_STABLE); + if (error < 0 || (error = file->f_error) < 0) + status = error; + file->f_error = 0; + } else { + /* If we wrote past the end of the page. + * Call the strategy routine so it can send out a bunch + * of requests. + */ + if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE) + nfs_strategy(file); } - - free_write_request(req); - return retval; + nfs_release_request(req); +done: + dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", + status, inode->i_size); + if (status < 0) + clear_bit(PG_uptodate, &page->flags); + return status; } /* - * Cancel a write request. We always mark it cancelled, - * but if it's already in progress there's no point in - * calling rpc_exit, and we don't want to overwrite the - * tk_status field. - */ + * Set up the argument/result storage required for the RPC call. + */ static void -nfs_cancel_request(struct nfs_wreq *req) +nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data) { - req->wb_flags |= NFS_WRITE_CANCELLED; - if (!WB_INPROGRESS(req)) { - rpc_exit(&req->wb_task, 0); - rpc_wake_up_task(&req->wb_task); + struct nfs_page *req; + struct iovec *iov; + unsigned int count; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + iov = data->args.iov; + count = 0; + while (!list_empty(head)) { + struct nfs_page *req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + iov->iov_base = (void *)(kmap(req->wb_page) + req->wb_offset); + iov->iov_len = req->wb_bytes; + count += req->wb_bytes; + iov++; + data->args.nriov++; } + req = nfs_list_entry(data->pages.next); + data->file = req->wb_file; + data->cred = req->wb_cred; + data->args.fh = NFS_FH(req->wb_file->f_dentry); + data->args.offset = page_offset(req->wb_page) + req->wb_offset; + data->args.count = count; + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.verf = &data->verf; } + /* - * Cancel all writeback requests, both pending and in progress. + * Create an RPC task for the given write request and kick it. + * The page must have been locked by the caller. + * + * It may happen that the page we're passed is not marked dirty. + * This is the case if nfs_updatepage detects a conflicting request + * that has been written but not committed. */ -static void -nfs_cancel_dirty(struct inode *inode, pid_t pid) +static int +nfs_flush_one(struct list_head *head, struct file *file, int how) { - struct nfs_wreq *head, *req; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_write_data *data; + struct rpc_task *task; + struct rpc_message msg; + int flags, + async = !(how & FLUSH_SYNC), + stable = (how & FLUSH_STABLE); + sigset_t oldset; + + + data = nfs_writedata_alloc(); + if (!data) + goto out_bad; + task = &data->task; + + /* Set the initial flags for the task. */ + flags = (async) ? RPC_TASK_ASYNC : 0; + + /* Set up the argument struct */ + nfs_write_rpcsetup(head, data); + if (stable) { + if (!inode->u.nfs_i.ncommit) + data->args.stable = NFS_FILE_SYNC; + else + data->args.stable = NFS_DATA_SYNC; + } else + data->args.stable = NFS_UNSTABLE; + + /* Finalize the task. */ + rpc_init_task(task, clnt, nfs_writeback_done, flags); + task->tk_calldata = data; + +#ifdef CONFIG_NFS_V3 + msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE; +#else + msg.rpc_proc = NFSPROC_WRITE; +#endif + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + msg.rpc_cred = data->cred; + + dprintk("NFS: %4d initiated write call (req %s/%s count %d nriov %d)\n", + task->tk_pid, + dentry->d_parent->d_name.name, + dentry->d_name.name, + data->args.count, data->args.nriov); + + rpc_clnt_sigmask(clnt, &oldset); + rpc_call_setup(task, &msg, 0); + rpc_execute(task); + rpc_clnt_sigunmask(clnt, &oldset); + return 0; + out_bad: + while (!list_empty(head)) { + struct nfs_page *req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_mark_request_dirty(req); + nfs_unlock_request(req); + } + return -ENOMEM; +} - req = head = NFS_WRITEBACK(inode); - while (req != NULL) { - if (pid == 0 || req->wb_pid == pid) - nfs_cancel_request(req); - if ((req = WB_NEXT(req)) == head) +static int +nfs_flush_list(struct inode *inode, struct list_head *head, int how) +{ + LIST_HEAD(one_request); + struct nfs_page *req; + int error = 0; + unsigned int pages = 0, + wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT; + + while (!list_empty(head)) { + pages += coalesce_requests(head, &one_request, wpages); + req = nfs_list_entry(one_request.next); + error = nfs_flush_one(&one_request, req->wb_file, how); + if (error < 0) break; } + if (error >= 0) + return pages; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_mark_request_dirty(req); + nfs_unlock_request(req); + } + return error; } + /* - * If we're waiting on somebody else's request - * we need to increment the counter during the - * wait so that the request doesn't disappear - * from under us during the wait.. + * This function is called when the WRITE call is complete. */ -static int FASTCALL(wait_on_other_req(struct nfs_wreq *)); -static int wait_on_other_req(struct nfs_wreq *req) +static void +nfs_writeback_done(struct rpc_task *task) { - int retval; - req->wb_count++; - retval = wait_on_write_request(req); - free_write_request(req); - return retval; -} + struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_writeargs *argp = &data->args; + struct nfs_writeres *resp = &data->res; + struct dentry *dentry = data->file->f_dentry; + struct inode *inode = dentry->d_inode; + struct nfs_page *req; + + dprintk("NFS: %4d nfs_writeback_done (status %d)\n", + task->tk_pid, task->tk_status); + + /* We can't handle that yet but we check for it nevertheless */ + if (resp->count < argp->count && task->tk_status >= 0) { + static unsigned long complain = 0; + if (time_before(complain, jiffies)) { + printk(KERN_WARNING + "NFS: Server wrote less than requested.\n"); + complain = jiffies + 300 * HZ; + } + /* Can't do anything about it right now except throw + * an error. */ + task->tk_status = -EIO; + } +#ifdef CONFIG_NFS_V3 + if (resp->verf->committed < argp->stable && task->tk_status >= 0) { + /* We tried a write call, but the server did not + * commit data to stable storage even though we + * requested it. + */ + static unsigned long complain = 0; + + if (time_before(complain, jiffies)) { + printk(KERN_NOTICE "NFS: faulty NFSv3 server %s:" + " (committed = %d) != (stable = %d)\n", + NFS_SERVER(inode)->hostname, + resp->verf->committed, argp->stable); + complain = jiffies + 300 * HZ; + } + } +#endif -/* - * This writes back a set of requests according to the condition. - * - * If this ever gets much more convoluted, use a fn pointer for - * the condition.. - */ -#define NFS_WB(inode, cond) { int retval = 0 ; \ - do { \ - struct nfs_wreq *req = NFS_WRITEBACK(inode); \ - struct nfs_wreq *head = req; \ - if (!req) break; \ - for (;;) { \ - if (!(req->wb_flags & NFS_WRITE_COMPLETE)) \ - if (cond) break; \ - req = WB_NEXT(req); \ - if (req == head) goto out; \ - } \ - retval = wait_on_other_req(req); \ - } while (!retval); \ -out: return retval; \ -} + /* Update attributes as result of writeback. */ + if (task->tk_status >= 0) + nfs_write_attributes(inode, resp->fattr); -int -nfs_wb_all(struct inode *inode) -{ - NFS_WB(inode, 1); + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + + kunmap(req->wb_page); + + dprintk("NFS: write (%s/%s %d@%Ld)", + req->wb_file->f_dentry->d_parent->d_name.name, + req->wb_file->f_dentry->d_name.name, + req->wb_bytes, + page_offset(req->wb_page) + req->wb_offset); + + if (task->tk_status < 0) { + req->wb_file->f_error = task->tk_status; + nfs_inode_remove_request(req); + dprintk(", error = %d\n", task->tk_status); + goto next; + } + +#ifdef CONFIG_NFS_V3 + if (resp->verf->committed != NFS_UNSTABLE) { + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; + } + memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf)); + req->wb_timeout = jiffies + NFS_COMMIT_DELAY; + nfs_mark_request_commit(req); + dprintk(" marked for commit\n"); +#else + nfs_inode_remove_request(req); +#endif + next: + nfs_unlock_request(req); + } + nfs_writedata_release(task); } + +#ifdef CONFIG_NFS_V3 /* - * Write back all requests on one page - we do this before reading it. + * Set up the argument/result storage required for the RPC call. */ -int -nfs_wb_page(struct inode *inode, struct page *page) +static void +nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data) { - NFS_WB(inode, req->wb_page == page); + struct nfs_page *req; + struct dentry *dentry; + struct inode *inode; + unsigned long start, end, len; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + end = 0; + start = ~0; + req = nfs_list_entry(head->next); + data->file = req->wb_file; + data->cred = req->wb_cred; + dentry = data->file->f_dentry; + inode = dentry->d_inode; + while (!list_empty(head)) { + struct nfs_page *req; + unsigned long rqstart, rqend; + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + rqstart = page_offset(req->wb_page) + req->wb_offset; + rqend = rqstart + req->wb_bytes; + if (rqstart < start) + start = rqstart; + if (rqend > end) + end = rqend; + } + data->args.fh = NFS_FH(dentry); + data->args.offset = start; + len = end - start; + if (end >= inode->i_size || len > (~((u32)0) >> 1)) + len = 0; + data->res.count = data->args.count = (u32)len; + data->res.fattr = &data->fattr; + data->res.verf = &data->verf; } /* - * Write back all pending writes from one file descriptor.. + * Commit dirty pages */ -int -nfs_wb_file(struct inode *inode, struct file *file) -{ - NFS_WB(inode, req->wb_file == file); -} - -void -nfs_inval(struct inode *inode) +static int +nfs_commit_list(struct list_head *head, int how) { - nfs_cancel_dirty(inode,0); + struct rpc_message msg; + struct file *file; + struct rpc_clnt *clnt; + struct nfs_write_data *data; + struct rpc_task *task; + struct nfs_page *req; + int flags, + async = !(how & FLUSH_SYNC); + sigset_t oldset; + + data = nfs_writedata_alloc(); + + if (!data) + goto out_bad; + task = &data->task; + + flags = (async) ? RPC_TASK_ASYNC : 0; + + /* Set up the argument struct */ + nfs_commit_rpcsetup(head, data); + req = nfs_list_entry(data->pages.next); + file = req->wb_file; + clnt = NFS_CLIENT(file->f_dentry->d_inode); + + rpc_init_task(task, clnt, nfs_commit_done, flags); + task->tk_calldata = data; + + msg.rpc_proc = NFS3PROC_COMMIT; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + msg.rpc_cred = data->cred; + + dprintk("NFS: %4d initiated commit call\n", task->tk_pid); + rpc_clnt_sigmask(clnt, &oldset); + rpc_call_setup(task, &msg, 0); + rpc_execute(task); + rpc_clnt_sigunmask(clnt, &oldset); + return 0; + out_bad: + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_mark_request_commit(req); + nfs_unlock_request(req); + } + return -ENOMEM; } /* - * The following procedures make up the writeback finite state machinery: - * - * 1. Try to lock the page if not yet locked by us, - * set up the RPC call info, and pass to the call FSM. + * COMMIT call returned */ static void -nfs_wback_begin(struct rpc_task *task) +nfs_commit_done(struct rpc_task *task) { - struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata; - struct page *page = req->wb_page; - struct file *file = req->wb_file; - struct dentry *dentry = file->f_dentry; - - dprintk("NFS: %4d nfs_wback_begin (%s/%s, status=%d flags=%x)\n", - task->tk_pid, dentry->d_parent->d_name.name, - dentry->d_name.name, task->tk_status, req->wb_flags); + struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_writeres *resp = &data->res; + struct nfs_page *req; + struct dentry *dentry = data->file->f_dentry; + struct inode *inode = dentry->d_inode; - task->tk_status = 0; + dprintk("NFS: %4d nfs_commit_done (status %d)\n", + task->tk_pid, task->tk_status); + + nfs_refresh_inode(inode, resp->fattr); + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + + dprintk("NFS: commit (%s/%s %d@%ld)", + req->wb_file->f_dentry->d_parent->d_name.name, + req->wb_file->f_dentry->d_name.name, + req->wb_bytes, + page_offset(req->wb_page) + req->wb_offset); + if (task->tk_status < 0) { + req->wb_file->f_error = task->tk_status; + nfs_inode_remove_request(req); + dprintk(", error = %d\n", task->tk_status); + goto next; + } - /* Setup the task struct for a writeback call */ - req->wb_flags |= NFS_WRITE_INPROGRESS; - req->wb_args.fh = NFS_FH(dentry); - req->wb_args.offset = (page->index << PAGE_CACHE_SHIFT) + req->wb_offset; - req->wb_args.count = req->wb_bytes; - req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset); + /* Okay, COMMIT succeeded, apparently. Check the verifier + * returned by the server against all stored verfs. */ + if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { + /* We have a match */ + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; + } + /* We have a mismatch. Write the page again */ + dprintk(" mismatch\n"); + nfs_mark_request_dirty(req); + next: + nfs_unlock_request(req); + } + nfs_writedata_release(task); +} +#endif - rpc_call_setup(task, NFSPROC_WRITE, &req->wb_args, &req->wb_fattr, 0); +int nfs_flush_file(struct inode *inode, struct file *file, unsigned long start, + unsigned int count, int how) +{ + LIST_HEAD(head); + int pages, + error = 0; + + pages = nfs_scan_dirty(inode, &head, file, start, count); + if (pages) + error = nfs_flush_list(inode, &head, how); + if (error < 0) + return error; + return pages; +} - return; +int nfs_flush_timeout(struct inode *inode, int how) +{ + LIST_HEAD(head); + int pages, + error = 0; + + pages = nfs_scan_dirty_timeout(inode, &head); + if (pages) + error = nfs_flush_list(inode, &head, how); + if (error < 0) + return error; + return pages; } -/* - * 2. Collect the result - */ -static void -nfs_wback_result(struct rpc_task *task) +#ifdef CONFIG_NFS_V3 +int nfs_commit_file(struct inode *inode, struct file *file, unsigned long start, + unsigned int count, int how) { - struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata; - struct file *file = req->wb_file; - struct page *page = req->wb_page; - int status = task->tk_status; - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; + LIST_HEAD(head); + int pages, + error = 0; + + pages = nfs_scan_commit(inode, &head, file, start, count); + if (pages) + error = nfs_commit_list(&head, how); + if (error < 0) + return error; + return pages; +} - dprintk("NFS: %4d nfs_wback_result (%s/%s, status=%d, flags=%x)\n", - task->tk_pid, dentry->d_parent->d_name.name, - dentry->d_name.name, status, req->wb_flags); - - /* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */ - req->wb_flags |= NFS_WRITE_COMPLETE; - req->wb_status = status; - - if (status < 0) { - req->wb_flags |= NFS_WRITE_INVALIDATE; - file->f_error = status; - } else if (!WB_CANCELLED(req)) { - struct nfs_fattr *fattr = &req->wb_fattr; - /* Update attributes as result of writeback. - * Beware: when UDP replies arrive out of order, we - * may end up overwriting a previous, bigger file size. - * - * When the file size shrinks we cancel all pending - * writebacks. - */ - if (fattr->mtime.seconds >= inode->i_mtime) { - if (fattr->size < inode->i_size) - fattr->size = inode->i_size; - - /* possible Solaris 2.5 server bug workaround */ - if (inode->i_ino == fattr->fileid) { - /* - * We expect these values to change, and - * don't want to invalidate the caches. - */ - inode->i_size = fattr->size; - inode->i_mtime = fattr->mtime.seconds; - nfs_refresh_inode(inode, fattr); - } - else - printk("nfs_wback_result: inode %ld, got %u?\n", - inode->i_ino, fattr->fileid); - } +int nfs_commit_timeout(struct inode *inode, int how) +{ + LIST_HEAD(head); + int pages, + error = 0; + + pages = nfs_scan_commit_timeout(inode, &head); + if (pages) { + pages += nfs_scan_commit(inode, &head, NULL, 0, 0); + error = nfs_commit_list(&head, how); } + if (error < 0) + return error; + return pages; +} +#endif - rpc_release_task(task); +int nfs_sync_file(struct inode *inode, struct file *file, unsigned long start, + unsigned int count, int how) +{ + int error, + wait; - if (WB_INVALIDATE(req)) - ClearPageUptodate(page); + wait = how & FLUSH_WAIT; + how &= ~FLUSH_WAIT; - kunmap(page); - __free_page(page); - remove_write_request(&NFS_WRITEBACK(inode), req); - nr_write_requests--; - fput(req->wb_file); + if (!inode && file) + inode = file->f_dentry->d_inode; - wake_up(&req->wb_wait); - free_write_request(req); + do { + error = 0; + if (wait) + error = nfs_wait_on_requests(inode, file, start, count); + if (error == 0) + error = nfs_flush_file(inode, file, start, count, how); +#ifdef CONFIG_NFS_V3 + if (error == 0) + error = nfs_commit_file(inode, file, start, count, how); +#endif + } while (error > 0); + return error; +} + +int nfs_init_nfspagecache(void) +{ + nfs_page_cachep = kmem_cache_create("nfs_page", + sizeof(struct nfs_page), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (nfs_page_cachep == NULL) + return -ENOMEM; + + nfs_wdata_cachep = kmem_cache_create("nfs_write_data", + sizeof(struct nfs_write_data), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (nfs_wdata_cachep == NULL) + return -ENOMEM; + + return 0; +} + +void nfs_destroy_nfspagecache(void) +{ + if (kmem_cache_destroy(nfs_page_cachep)) + printk(KERN_INFO "nfs_page: not all structures were freed\n"); + if (kmem_cache_destroy(nfs_wdata_cachep)) + printk(KERN_INFO "nfs_write_data: not all structures were freed\n"); } + diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8d2b610a8..c6ea9074c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -483,7 +483,10 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino, * fh must be initialized before calling fh_compose */ fh_init(&fh, maxsize); - err = fh_compose(&fh, exp, dentry); + if (fh_compose(&fh, exp, dentry)) + err = -EINVAL; + else + err = 0; memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh)); fh_put(&fh); return err; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 6e102db9c..969ff54a9 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -748,6 +748,7 @@ encode_entry(struct readdir_cd *cd, const char *name, if (fh_compose(&fh, exp, dchild) != 0 || !dchild->d_inode) goto noexec; p = encode_post_op_attr(cd->rqstp, p, fh.fh_dentry); + *p++ = xdr_one; /* yes, a file handle follows */ p = encode_fh(p, &fh); fh_put(&fh); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f755adc8c..97b46f0c7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -42,8 +42,8 @@ static int nfsctl_export(struct nfsctl_export *data); static int nfsctl_unexport(struct nfsctl_export *data); static int nfsctl_getfh(struct nfsctl_fhparm *, __u8 *); static int nfsctl_getfd(struct nfsctl_fdparm *, __u8 *); -#ifdef notyet static int nfsctl_getfs(struct nfsctl_fsparm *, struct knfsd_fh *); +#ifdef notyet static int nfsctl_ugidupdate(struct nfsctl_ugidmap *data); #endif @@ -112,7 +112,6 @@ nfsctl_ugidupdate(nfs_ugidmap *data) } #endif -#ifdef notyet static inline int nfsctl_getfs(struct nfsctl_fsparm *data, struct knfsd_fh *res) { @@ -131,10 +130,9 @@ nfsctl_getfs(struct nfsctl_fsparm *data, struct knfsd_fh *res) else err = exp_rootfh(clp, 0, 0, data->gd_path, res, data->gd_maxlen); exp_unlock(); - + /*HACK*/ res->fh_size = NFS_FHSIZE; /* HACK until lockd handles var-length handles */ return err; } -#endif static inline int nfsctl_getfd(struct nfsctl_fdparm *data, __u8 *res) @@ -206,6 +204,21 @@ nfsctl_getfh(struct nfsctl_fhparm *data, __u8 *res) #define handle_sys_nfsservctl sys_nfsservctl #endif +static struct { + int argsize, respsize; +} sizes[] = { + /* NFSCTL_SVC */ { sizeof(struct nfsctl_svc), 0 }, + /* NFSCTL_ADDCLIENT */ { sizeof(struct nfsctl_client), 0}, + /* NFSCTL_DELCLIENT */ { sizeof(struct nfsctl_client), 0}, + /* NFSCTL_EXPORT */ { sizeof(struct nfsctl_export), 0}, + /* NFSCTL_UNEXPORT */ { sizeof(struct nfsctl_export), 0}, + /* NFSCTL_UGIDUPDATE */ { sizeof(struct nfsctl_uidmap), 0}, + /* NFSCTL_GETFH */ { sizeof(struct nfsctl_fhparm), NFS_FHSIZE}, + /* NFSCTL_GETFD */ { sizeof(struct nfsctl_fdparm), NFS_FHSIZE}, + /* NFSCTL_GETFS */ { sizeof(struct nfsctl_fsparm), sizeof(struct knfsd_fh)}, +}; +#define CMD_MAX (sizeof(sizes)/sizeof(sizes[0])-1) + int asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) { @@ -214,6 +227,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) struct nfsctl_arg * arg = NULL; union nfsctl_res * res = NULL; int err; + int argsize, respsize; MOD_INC_USE_COUNT; lock_kernel (); @@ -223,12 +237,16 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) if (!capable(CAP_SYS_ADMIN)) { goto done; } + err = -EINVAL; + if (cmd<0 || cmd > CMD_MAX) + goto done; err = -EFAULT; - if (!access_ok(VERIFY_READ, argp, sizeof(*argp)) - || (resp && !access_ok(VERIFY_WRITE, resp, sizeof(*resp)))) { + argsize = sizes[cmd].argsize + sizeof(int); /* int for ca_version */ + respsize = sizes[cmd].respsize; /* maximum */ + if (!access_ok(VERIFY_READ, argp, argsize) + || (resp && !access_ok(VERIFY_WRITE, resp, respsize))) { goto done; } - err = -ENOMEM; /* ??? */ if (!(arg = kmalloc(sizeof(*arg), GFP_USER)) || (resp && !(res = kmalloc(sizeof(*res), GFP_USER)))) { @@ -236,7 +254,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) } err = -EINVAL; - copy_from_user(arg, argp, sizeof(*argp)); + copy_from_user(arg, argp, argsize); if (arg->ca_version != NFSCTL_VERSION) { printk(KERN_WARNING "nfsd: incompatible version in syscall.\n"); goto done; @@ -269,16 +287,16 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp) case NFSCTL_GETFD: err = nfsctl_getfd(&arg->ca_getfd, res->cr_getfh); break; -#ifdef notyet case NFSCTL_GETFS: err = nfsctl_getfs(&arg->ca_getfs, &res->cr_getfs); -#endif + respsize = res->cr_getfs.fh_size+sizeof(int); + break; default: err = -EINVAL; } - if (!err && resp) - copy_to_user(resp, res, sizeof(*resp)); + if (!err && resp && respsize) + copy_to_user(resp, res, respsize); done: if (arg) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6f69225cc..5c312b906 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -223,9 +223,10 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, goto done; fh_lock(dirfhp); dchild = lookup_one(argp->name, dget(dirfhp->fh_dentry)); - nfserr = nfserrno(PTR_ERR(dchild)); - if (IS_ERR(dchild)) + if (IS_ERR(dchild)) { + nfserr = nfserrno(PTR_ERR(dchild)); goto out_unlock; + } fh_init(newfhp, NFS_FHSIZE); nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild); if (!nfserr && !dchild->d_inode) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d69bba8d0..fb3b32f8d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -43,6 +43,8 @@ extern struct svc_program nfsd_program; static void nfsd(struct svc_rqst *rqstp); struct timeval nfssvc_boot = { 0, 0 }; static struct svc_serv *nfsd_serv = NULL; +static int nfsd_busy = 0; +static unsigned long nfsd_last_call; struct nfsd_list { struct list_head list; @@ -115,6 +117,24 @@ nfsd_svc(unsigned short port, int nrservs) return error; } +static void inline +update_thread_usage(int busy_threads) +{ + unsigned long prev_call; + unsigned long diff; + int decile; + + prev_call = nfsd_last_call; + nfsd_last_call = jiffies; + decile = busy_threads*10/nfsdstats.th_cnt; + if (decile>0 && decile <= 10) { + diff = nfsd_last_call - prev_call; + nfsdstats.th_usage[decile-1] += diff; + if (decile == 10) + nfsdstats.th_fullcnt++; + } +} + /* * This is the NFS server kernel thread */ @@ -134,6 +154,7 @@ nfsd(struct svc_rqst *rqstp) sprintf(current->comm, "nfsd"); current->fs->umask = 0; + nfsdstats.th_cnt++; /* Let svc_process check client's authentication. */ rqstp->rq_auth = 1; @@ -161,6 +182,8 @@ nfsd(struct svc_rqst *rqstp) ; if (err < 0) break; + update_thread_usage(nfsd_busy); + nfsd_busy++; /* Lock the export hash tables for reading. */ exp_readlock(); @@ -179,6 +202,8 @@ nfsd(struct svc_rqst *rqstp) /* Unlock export hash tables */ exp_unlock(); + update_thread_usage(nfsd_busy); + nfsd_busy--; } if (err != -EINTR) { @@ -202,6 +227,7 @@ nfsd(struct svc_rqst *rqstp) nfsd_racache_shutdown(); /* release read-ahead cache */ } list_del(&me.list); + nfsdstats.th_cnt --; /* Release the thread */ svc_exit_thread(rqstp); diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 69defe790..254242fb6 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -8,6 +8,16 @@ * Format: * rc <hits> <misses> <nocache> * Statistsics for the reply cache + * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> + * statistics for filehandle lookup + * io <bytes-read> <bytes-writtten> + * statistics for IO throughput + * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> + * time (milliseconds) when nfsd thread usage above thresholds + * and number of times that all threads were in use + * ra cache-size <10% <20% <30% ... <100% not-found + * number of times that read-ahead entry was found that deep in + * the cache. * plus generic RPC stats (see net/sunrpc/stats.c) * * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> @@ -33,17 +43,30 @@ nfsd_proc_read(char *buffer, char **start, off_t offset, int count, int *eof, void *data) { int len; + int i; - len = sprintf(buffer, "rc %d %d %d %d %d %d %d %d\n", - nfsdstats.rchits, - nfsdstats.rcmisses, - nfsdstats.rcnocache, - nfsdstats.fh_stale, - nfsdstats.fh_lookup, - nfsdstats.fh_anon, - nfsdstats.fh_nocache_dir, - nfsdstats.fh_nocache_nondir); + len = sprintf(buffer, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", + nfsdstats.rchits, + nfsdstats.rcmisses, + nfsdstats.rcnocache, + nfsdstats.fh_stale, + nfsdstats.fh_lookup, + nfsdstats.fh_anon, + nfsdstats.fh_nocache_dir, + nfsdstats.fh_nocache_nondir, + nfsdstats.io_read, + nfsdstats.io_write); + /* thread usage: */ + len += sprintf(buffer+len, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); + for (i=0; i<10; i++) + len += sprintf(buffer+len, " %u", nfsdstats.th_usage[i]); + /* newline and ra-cache */ + len += sprintf(buffer+len, "\nra %u", nfsdstats.ra_size); + for (i=0; i<11; i++) + len += sprintf(buffer+len, " %u", nfsdstats.ra_depth[i]); + len += sprintf(buffer+len, "\n"); + /* Assume we haven't hit EOF yet. Will be set by svc_proc_read. */ *eof = 0; @@ -53,13 +76,13 @@ nfsd_proc_read(char *buffer, char **start, off_t offset, int count, */ if (len <= offset) { len = svc_proc_read(buffer, start, offset - len, count, - eof, data); + eof, data); return len; } if (len < count) { len += svc_proc_read(buffer + len, start, 0, count - len, - eof, data); + eof, data); } if (offset >= len) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e3be271a2..5cd55fda8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -539,13 +539,16 @@ static inline struct raparms * nfsd_get_raparms(dev_t dev, ino_t ino) { struct raparms *ra, **rap, **frap = NULL; - + int depth = 0; + for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { if (ra->p_ino == ino && ra->p_dev == dev) goto found; + depth++; if (ra->p_count == 0) frap = rap; } + depth = nfsdstats.ra_size*11/10; if (!frap) return NULL; rap = frap; @@ -560,6 +563,7 @@ found: raparm_cache = ra; } ra->p_count++; + nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; return ra; } @@ -598,6 +602,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, oldfs = get_fs(); set_fs(KERNEL_DS); err = file.f_op->read(&file, buf, *count, &file.f_pos); set_fs(oldfs); + nfsdstats.io_read += *count; /* Write back readahead params */ if (ra != NULL) { @@ -691,6 +696,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, #else err = file.f_op->write(&file, buf, cnt, &file.f_pos); #endif + nfsdstats.io_write += cnt; set_fs(oldfs); /* clear setuid/setgid flag after write */ @@ -1559,5 +1565,6 @@ nfsd_racache_init(int cache_size) "nfsd: Could not allocate memory read-ahead cache.\n"); return -ENOMEM; } + nfsdstats.ra_size = cache_size; return 0; } diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index e95a36179..e6abd178e 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -595,7 +595,7 @@ static int ntfs_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,ntfs_get_block); } -static int ntfs_prepare_write(struct page *page, unsigned from, unsigned to) +static int ntfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return cont_prepare_write(page,from,to,ntfs_get_block, &((struct inode*)page->mapping->host)->u.ntfs_i.mmu_private); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 8a5e404b1..bd8aa6b98 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -258,7 +258,7 @@ static void ntfs_load_attributes(ntfs_inode* ino) if( !buf ) return; delta=0; - for(offset=0;datasize;datasize-=len) + for(offset=0;datasize;datasize-=len,offset+=len) { ntfs_io io; io.fn_put=ntfs_put; @@ -268,7 +268,7 @@ static void ntfs_load_attributes(ntfs_inode* ino) if(ntfs_read_attr(ino,vol->at_attribute_list,0,offset,&io)){ ntfs_error("error in load_attributes\n"); } - delta=len; + delta+=len; parse_attributes(ino,buf,&delta); if(delta) /* move remaining bytes to buffer start */ @@ -298,7 +298,6 @@ asmlinkage long sys_access(const char * filename, int mode) if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - lock_kernel(); old_fsuid = current->fsuid; old_fsgid = current->fsgid; old_cap = current->cap_effective; @@ -311,7 +310,8 @@ asmlinkage long sys_access(const char * filename, int mode) cap_clear(current->cap_effective); else current->cap_effective = current->cap_permitted; - + + lock_kernel(); dentry = namei(filename); res = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -321,12 +321,12 @@ asmlinkage long sys_access(const char * filename, int mode) res = -EROFS; dput(dentry); } + unlock_kernel(); current->fsuid = old_fsuid; current->fsgid = old_fsgid; current->cap_effective = old_cap; - unlock_kernel(); return res; } @@ -646,25 +646,35 @@ out: */ struct file *filp_open(const char * filename, int flags, int mode, struct dentry * base) { - struct inode * inode; struct dentry * dentry; - struct file * f; int flag,error; - error = -ENFILE; - f = get_empty_filp(); - if (!f) - goto out; - f->f_flags = flag = flags; - f->f_mode = (flag+1) & O_ACCMODE; - if (f->f_mode) + flag = flags; + if ((flag+1) & O_ACCMODE) flag++; if (flag & O_TRUNC) flag |= 2; + dentry = __open_namei(filename, flag, mode, base); error = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto cleanup_file; + if (!IS_ERR(dentry)) + return dentry_open(dentry, flags); + + return ERR_PTR(error); +} + +struct file *dentry_open(struct dentry *dentry, int flags) +{ + struct file * f; + struct inode *inode; + int error; + + error = -ENFILE; + f = get_empty_filp(); + if (!f) + goto cleanup_dentry; + f->f_flags = flags; + f->f_mode = (flags+1) & O_ACCMODE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); @@ -692,12 +702,10 @@ struct file *filp_open(const char * filename, int flags, int mode, struct dentry cleanup_all: if (f->f_mode & FMODE_WRITE) put_write_access(inode); -cleanup_dentry: f->f_dentry = NULL; +cleanup_dentry: dput(dentry); -cleanup_file: put_filp(f); -out: return ERR_PTR(error); } diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 27b4be8cb..92eed7559 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -1,4 +1,4 @@ -/* $Id: inode.c,v 1.9 2000/03/13 21:59:43 davem Exp $ +/* $Id: inode.c,v 1.10 2000/03/24 01:32:51 davem Exp $ * openpromfs.c: /proc/openprom handling routines * * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com) @@ -980,10 +980,6 @@ static void openprom_read_inode(struct inode * inode) } } -static void openprom_put_super(struct super_block *sb) -{ -} - static int openprom_statfs(struct super_block *sb, struct statfs *buf) { buf->f_type = OPENPROM_SUPER_MAGIC; @@ -997,7 +993,6 @@ static int openprom_statfs(struct super_block *sb, struct statfs *buf) static struct super_operations openprom_sops = { read_inode: openprom_read_inode, - put_super: openprom_put_super, statfs: openprom_statfs, }; diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index fc9555b77..21330f499 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -26,7 +26,10 @@ #include <linux/major.h> #include <linux/string.h> #include <linux/blk.h> + +#ifdef CONFIG_IDE #include <linux/ide.h> /* IDE xlate */ +#endif /* CONFIG_IDE */ #include <asm/system.h> @@ -347,19 +350,19 @@ int msdos_partition(struct gendisk *hd, kdev_t dev, unsigned char *data; int mask = (1 << hd->minor_shift) - 1; int sector_size = get_hardsect_size(dev) / 512; -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +#ifdef CONFIG_IDE int tested_for_xlate = 0; read_mbr: -#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */ +#endif /* CONFIG_IDE */ if (!(bh = bread(dev,0,get_ptable_blocksize(dev)))) { if (warn_no_part) printk(" unable to read partition table\n"); return -1; } data = bh->b_data; -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +#ifdef CONFIG_IDE check_table: -#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */ +#endif /* CONFIG_IDE */ /* Use bforget(), because we may have changed the disk geometry */ if (*(unsigned short *) (0x1fe + data) != cpu_to_le16(MSDOS_LABEL_MAGIC)) { bforget(bh); @@ -367,7 +370,7 @@ check_table: } p = (struct partition *) (0x1be + data); -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +#ifdef CONFIG_IDE if (!tested_for_xlate++) { /* Do this only once per disk */ /* * Look for various forms of IDE disk geometry translation @@ -423,7 +426,7 @@ check_table: (void) ide_xlate_1024(dev, 2, heads, " [PTBL]"); } } -#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */ +#endif /* CONFIG_IDE */ /* Look for partitions in two passes: First find the primary partitions, and the DOS-type extended partitions. @@ -13,16 +13,6 @@ #include <asm/uaccess.h> /* - * Define this if you want SunOS compatibility wrt braindead - * select behaviour on FIFO's. - */ -#ifdef __sparc__ -#define FIFO_SUNOS_BRAINDAMAGE -#else -#undef FIFO_SUNOS_BRAINDAMAGE -#endif - -/* * We use a start+len construction, which provides full use of the * allocated memory. * -- Florian Coosmann (FGC) @@ -32,7 +22,7 @@ */ /* Drop the inode semaphore and wait for a pipe event, atomically */ -static void pipe_wait(struct inode * inode) +void pipe_wait(struct inode * inode) { DECLARE_WAITQUEUE(wait, current); current->state = TASK_INTERRUPTIBLE; @@ -296,7 +286,7 @@ pipe_poll(struct file *filp, poll_table *wait) mask = POLLIN | POLLRDNORM; if (PIPE_EMPTY(*inode)) mask = POLLOUT | POLLWRNORM; - if (!PIPE_WRITERS(*inode)) + if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode)) mask |= POLLHUP; if (!PIPE_READERS(*inode)) mask |= POLLERR; @@ -304,72 +294,9 @@ pipe_poll(struct file *filp, poll_table *wait) return mask; } -#ifdef FIFO_SUNOS_BRAINDAMAGE -/* - * Argh! Why does SunOS have to have different select() behaviour - * for pipes and FIFOs? Hate, hate, hate! SunOS lacks POLLHUP. - */ -static unsigned int -fifo_poll(struct file *filp, poll_table *wait) -{ - unsigned int mask; - struct inode *inode = filp->f_dentry->d_inode; - - poll_wait(filp, PIPE_WAIT(*inode), wait); - - /* Reading only -- no need for aquiring the semaphore. */ - mask = POLLIN | POLLRDNORM; - if (PIPE_EMPTY(*inode)) - mask = POLLOUT | POLLWRNORM; - if (!PIPE_READERS(*inode)) - mask |= POLLERR; - - return mask; -} -#else - +/* FIXME: most Unices do not set POLLERR for fifos */ #define fifo_poll pipe_poll -#endif /* FIFO_SUNOS_BRAINDAMAGE */ - -/* - * The 'connect_xxx()' functions are needed for named pipes when - * the open() code hasn't guaranteed a connection (O_NONBLOCK), - * and we need to act differently until we do get a writer.. - */ -static ssize_t -connect_read(struct file *filp, char *buf, size_t count, loff_t *ppos) -{ - struct inode *inode = filp->f_dentry->d_inode; - - /* Reading only -- no need for aquiring the semaphore. */ - if (PIPE_EMPTY(*inode) && !PIPE_WRITERS(*inode)) - return 0; - - filp->f_op = &read_fifo_fops; - return pipe_read(filp, buf, count, ppos); -} - -static unsigned int -connect_poll(struct file *filp, poll_table *wait) -{ - struct inode *inode = filp->f_dentry->d_inode; - unsigned int mask = 0; - - poll_wait(filp, PIPE_WAIT(*inode), wait); - - /* Reading only -- no need for aquiring the semaphore. */ - if (!PIPE_EMPTY(*inode)) { - filp->f_op = &read_fifo_fops; - mask = POLLIN | POLLRDNORM; - } else if (PIPE_WRITERS(*inode)) { - filp->f_op = &read_fifo_fops; - mask = POLLOUT | POLLWRNORM; - } - - return mask; -} - static int pipe_release(struct inode *inode, int decr, int decw) { @@ -450,16 +377,6 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) * The file_operations structs are not static because they * are also used in linux/fs/fifo.c to do operations on FIFOs. */ -struct file_operations connecting_fifo_fops = { - llseek: pipe_lseek, - read: connect_read, - write: bad_pipe_w, - poll: connect_poll, - ioctl: pipe_ioctl, - open: pipe_read_open, - release: pipe_read_release, -}; - struct file_operations read_fifo_fops = { llseek: pipe_lseek, read: pipe_read, @@ -520,29 +437,42 @@ struct file_operations rdwr_pipe_fops = { release: pipe_rdwr_release, }; -static struct inode * get_pipe_inode(void) +struct inode* pipe_new(struct inode* inode) { - struct inode *inode = get_empty_inode(); unsigned long page; - if (!inode) - goto fail_inode; - page = __get_free_page(GFP_USER); if (!page) - goto fail_iput; + return NULL; inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); if (!inode->i_pipe) goto fail_page; - inode->i_fop = &rdwr_pipe_fops; - init_waitqueue_head(PIPE_WAIT(*inode)); - PIPE_BASE(*inode) = (char *) page; + PIPE_BASE(*inode) = (char*) page; PIPE_START(*inode) = PIPE_LEN(*inode) = 0; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0; PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0; + PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1; + + return inode; +fail_page: + free_page(page); + return NULL; +} + +static struct inode * get_pipe_inode(void) +{ + struct inode *inode = get_empty_inode(); + + if (!inode) + goto fail_inode; + + if(!pipe_new(inode)) + goto fail_iput; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + inode->i_fop = &rdwr_pipe_fops; /* * Mark the inode dirty from the very beginning, @@ -558,8 +488,6 @@ static struct inode * get_pipe_inode(void) inode->i_blksize = PAGE_SIZE; return inode; -fail_page: - free_page(page); fail_iput: iput(inode); fail_inode: @@ -606,11 +534,13 @@ int do_pipe(int *fd) f1->f_flags = O_RDONLY; f1->f_op = &read_pipe_fops; f1->f_mode = 1; + f1->f_version = 0; /* write file */ f2->f_flags = O_WRONLY; f2->f_op = &write_pipe_fops; f2->f_mode = 2; + f2->f_version = 0; fd_install(i, f1); fd_install(j, f2); diff --git a/fs/proc/array.c b/fs/proc/array.c index 3e1c58ad7..54e594634 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -402,7 +402,8 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned ++*pages; if (pte_dirty(page)) ++*dirty; - if (pte_pagenr(page) >= max_mapnr) + if ((pte_pagenr(page) >= max_mapnr) || + PageReserved(pte_pagenr(page) + mem_map)) continue; if (page_count(pte_page(page)) > 1) ++*shared; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 9e78119c9..c6511354b 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -418,7 +418,7 @@ static int qnx4_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,qnx4_get_block); } -static int qnx4_prepare_write(struct page *page, unsigned from, unsigned to) +static int qnx4_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return cont_prepare_write(page,from,to,qnx4_get_block, &((struct inode*)page->mapping->host)->u.qnx4_i.mmu_private); diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 7f5f2dee6..63d5a58ab 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -161,14 +161,6 @@ outnobh: return s; } -/* Nothing to do.. */ - -static void -romfs_put_super(struct super_block *sb) -{ - return; -} - /* That's simple too. */ static int @@ -526,7 +518,6 @@ romfs_read_inode(struct inode *i) static struct super_operations romfs_ops = { read_inode: romfs_read_inode, - put_super: romfs_put_super, statfs: romfs_statfs, }; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 1360ca994..12e2bf295 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -268,7 +268,7 @@ out: * If the writer ends up delaying the write, the writer needs to * increment the page use counts until he is done with the page. */ -static int smb_prepare_write(struct page *page, unsigned offset, unsigned to) +static int smb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) { kmap(page); return 0; diff --git a/fs/super.c b/fs/super.c index dd34ddc70..302487807 100644 --- a/fs/super.c +++ b/fs/super.c @@ -147,7 +147,6 @@ static int fs_index(const char * __name) err = index; break; } - index++; } spin_unlock(&file_systems_lock); putname(name); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3367c02ef..b6396ff04 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -949,7 +949,7 @@ static int sysv_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,sysv_get_block); } -static int sysv_prepare_write(struct page *page, unsigned from, unsigned to) +static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return block_prepare_write(page,from,to,sysv_get_block); } diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 1aa3aa1c4..2be4e8562 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -85,14 +85,14 @@ found_middle: #define find_first_one_bit(addr, size)\ find_next_one_bit((addr), (size), 0) -static int read_block_bitmap(struct super_block * sb, unsigned int block, - unsigned long bitmap_nr) +static int read_block_bitmap(struct super_block * sb, Uint32 bitmap, + unsigned int block, unsigned long bitmap_nr) { struct buffer_head *bh = NULL; int retval = 0; lb_addr loc; - loc.logicalBlockNum = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap; + loc.logicalBlockNum = bitmap; loc.partitionReferenceNum = UDF_SB_PARTITION(sb); bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block), sb->s_blocksize); @@ -105,7 +105,8 @@ static int read_block_bitmap(struct super_block * sb, unsigned int block, return retval; } -static int __load_block_bitmap(struct super_block * sb, unsigned int block_group) +static int __load_block_bitmap(struct super_block * sb, Uint32 bitmap, + unsigned int block_group) { int i, j, retval = 0; unsigned long block_bitmap_number; @@ -125,7 +126,7 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group if (UDF_SB_BLOCK_BITMAP_NUMBER(sb, block_group) == block_group) return block_group; } - retval = read_block_bitmap(sb, block_group, block_group); + retval = read_block_bitmap(sb, bitmap, block_group, block_group); if (retval < 0) return retval; return block_group; @@ -150,7 +151,7 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group UDF_SB_BLOCK_BITMAP(sb, 0) = block_bitmap; if (!block_bitmap) - retval = read_block_bitmap(sb, block_group, 0); + retval = read_block_bitmap(sb, bitmap, block_group, 0); } else { @@ -163,12 +164,12 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group UDF_SB_BLOCK_BITMAP_NUMBER(sb, j) = UDF_SB_BLOCK_BITMAP_NUMBER(sb, j-1); UDF_SB_BLOCK_BITMAP(sb, j) = UDF_SB_BLOCK_BITMAP(sb, j-1); } - retval = read_block_bitmap(sb, block_group, 0); + retval = read_block_bitmap(sb, bitmap, block_group, 0); } return retval; } -static inline int load_block_bitmap(struct super_block *sb, +static inline int load_block_bitmap(struct super_block *sb, Uint32 bitmap, unsigned int block_group) { int slot; @@ -189,7 +190,7 @@ static inline int load_block_bitmap(struct super_block *sb, } else { - slot = __load_block_bitmap(sb, block_group); + slot = __load_block_bitmap(sb, bitmap, block_group); } if (slot < 0) @@ -201,8 +202,8 @@ static inline int load_block_bitmap(struct super_block *sb, return slot; } -void udf_free_blocks(const struct inode * inode, lb_addr bloc, Uint32 offset, - Uint32 count) +static void udf_bitmap_free_blocks(const struct inode * inode, Uint32 bitmap, + lb_addr bloc, Uint32 offset, Uint32 count) { struct buffer_head * bh = NULL; unsigned long block; @@ -220,9 +221,6 @@ void udf_free_blocks(const struct inode * inode, lb_addr bloc, Uint32 offset, return; } - if (UDF_SB_PARTMAPS(sb)[bloc.partitionReferenceNum].s_uspace_bitmap == 0xFFFFFFFF) - return; - lock_super(sb); if (bloc.logicalBlockNum < 0 || (bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum)) @@ -248,7 +246,7 @@ do_more: overflow = bit + count - (sb->s_blocksize << 3); count -= overflow; } - bitmap_nr = load_block_bitmap(sb, block_group); + bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto error_return; @@ -285,8 +283,8 @@ error_return: return; } -int udf_prealloc_blocks(const struct inode * inode, Uint16 partition, - Uint32 first_block, Uint32 block_count) +static int udf_bitmap_prealloc_blocks(const struct inode * inode, Uint32 bitmap, + Uint16 partition, Uint32 first_block, Uint32 block_count) { int alloc_count = 0; int bit, block, block_group, group_start; @@ -312,7 +310,7 @@ repeat: block_group = block >> (sb->s_blocksize_bits + 3); group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc); - bitmap_nr = load_block_bitmap(sb, block_group); + bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto out; bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr); @@ -351,7 +349,8 @@ out: return alloc_count; } -int udf_new_block(const struct inode * inode, Uint16 partition, Uint32 goal, int *err) +static int udf_bitmap_new_block(const struct inode * inode, Uint32 bitmap, + Uint16 partition, Uint32 goal, int *err) { int tmp, newbit, bit=0, block, block_group, group_start; int end_goal, nr_groups, bitmap_nr, i; @@ -379,7 +378,7 @@ repeat: block_group = block >> (sb->s_blocksize_bits + 3); group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc); - bitmap_nr = load_block_bitmap(sb, block_group); + bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto error_return; bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr); @@ -419,7 +418,7 @@ repeat: block_group = 0; group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc); - bitmap_nr = load_block_bitmap(sb, block_group); + bitmap_nr = load_block_bitmap(sb, bitmap, block_group); if (bitmap_nr < 0) goto error_return; bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr); @@ -497,3 +496,64 @@ error_return: unlock_super(sb); return 0; } + +inline void udf_free_blocks(const struct inode * inode, lb_addr bloc, + Uint32 offset, Uint32 count) +{ + if (UDF_SB_PARTFLAGS(inode->i_sb, bloc.partitionReferenceNum) & UDF_PART_FLAG_UNALLOC_BITMAP) + { + return udf_bitmap_free_blocks(inode, + UDF_SB_PARTMAPS(inode->i_sb)[bloc.partitionReferenceNum].s_uspace.bitmap, + bloc, offset, count); + } + else if (UDF_SB_PARTFLAGS(inode->i_sb, bloc.partitionReferenceNum) & UDF_PART_FLAG_FREED_BITMAP) + { + return udf_bitmap_free_blocks(inode, + UDF_SB_PARTMAPS(inode->i_sb)[bloc.partitionReferenceNum].s_fspace.bitmap, + bloc, offset, count); + } + else + return; +} + +inline int udf_prealloc_blocks(const struct inode * inode, Uint16 partition, + Uint32 first_block, Uint32 block_count) +{ + if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) + { + return udf_bitmap_prealloc_blocks(inode, + UDF_SB_PARTMAPS(inode->i_sb)[partition].s_uspace.bitmap, + partition, first_block, block_count); + } + else if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_FREED_BITMAP) + { + return udf_bitmap_prealloc_blocks(inode, + UDF_SB_PARTMAPS(inode->i_sb)[partition].s_fspace.bitmap, + partition, first_block, block_count); + } + else + return 0; +} + +inline int udf_new_block(const struct inode * inode, Uint16 partition, + Uint32 goal, int *err) +{ + if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP) + { + return udf_bitmap_new_block(inode, + UDF_SB_PARTMAPS(inode->i_sb)[partition].s_uspace.bitmap, + partition, goal, err); + } + else if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_FREED_BITMAP) + { + return udf_bitmap_new_block(inode, + UDF_SB_PARTMAPS(inode->i_sb)[partition].s_fspace.bitmap, + partition, goal, err); + } + else + { + *err = -EIO; + return 0; + } +} + diff --git a/fs/udf/file.c b/fs/udf/file.c index 73d47ac10..96297521b 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -87,7 +87,7 @@ static int udf_adinicb_writepage(struct dentry *dentry, struct page *page) return 0; } -static int udf_adinicb_prepare_write(struct page *page, unsigned offset, unsigned to) +static int udf_adinicb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) { kmap(page); return 0; @@ -246,7 +246,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, struct FileEntry *fe; fe = (struct FileEntry *)bh->b_data; - eaicb = fe->extendedAttrICB; + eaicb = lela_to_cpu(fe->extendedAttrICB); if (UDF_I_LENEATTR(inode)) ea = fe->extendedAttr; } @@ -255,7 +255,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, struct ExtendedFileEntry *efe; efe = (struct ExtendedFileEntry *)bh->b_data; - eaicb = efe->extendedAttrICB; + eaicb = lela_to_cpu(efe->extendedAttrICB); if (UDF_I_LENEATTR(inode)) ea = efe->extendedAttr; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 264086135..ed1507fa7 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -74,10 +74,13 @@ static int udf_get_block(struct inode *, long, struct buffer_head *, int); */ void udf_put_inode(struct inode * inode) { - lock_kernel(); - udf_discard_prealloc(inode); - write_inode_now(inode); - unlock_kernel(); + if (!(inode->i_sb->s_flags & MS_RDONLY)) + { + lock_kernel(); + udf_discard_prealloc(inode); + write_inode_now(inode); + unlock_kernel(); + } } /* @@ -130,7 +133,7 @@ static int udf_readpage(struct dentry *dentry, struct page *page) return block_read_full_page(page, udf_get_block); } -static int udf_prepare_write(struct page *page, unsigned from, unsigned to) +static int udf_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return block_prepare_write(page, from, to, udf_get_block); } @@ -1554,16 +1557,18 @@ int udf_add_aext(struct inode *inode, lb_addr *bloc, int *extoffset, case ICB_FLAG_AD_SHORT: { sad = (short_ad *)sptr; - sad->extLength = EXTENT_NEXT_EXTENT_ALLOCDECS << 30 | - inode->i_sb->s_blocksize; + sad->extLength = cpu_to_le32( + EXTENT_NEXT_EXTENT_ALLOCDECS << 30 | + inode->i_sb->s_blocksize); sad->extPosition = cpu_to_le32(bloc->logicalBlockNum); break; } case ICB_FLAG_AD_LONG: { lad = (long_ad *)sptr; - lad->extLength = EXTENT_NEXT_EXTENT_ALLOCDECS << 30 | - inode->i_sb->s_blocksize; + lad->extLength = cpu_to_le32( + EXTENT_NEXT_EXTENT_ALLOCDECS << 30 | + inode->i_sb->s_blocksize); lad->extLocation = cpu_to_lelb(*bloc); break; } diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 653997033..1403cec52 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -70,8 +70,6 @@ udf_get_last_session(struct super_block *sb) unsigned int udf_get_last_block(struct super_block *sb) { - extern int *blksize_size[]; - kdev_t dev = sb->s_dev; struct block_device *bdev = sb->s_bdev; int ret; unsigned long lblock = 0; @@ -80,28 +78,10 @@ udf_get_last_block(struct super_block *sb) if (ret) /* Hard Disk */ { - unsigned int hbsize = get_hardblocksize(dev); - unsigned int blocksize = sb->s_blocksize; - unsigned int mult = 0; - unsigned int div = 0; - - if (!hbsize) - hbsize = blksize_size[MAJOR(dev)][MINOR(dev)]; - - if (hbsize > blocksize) - mult = hbsize / blocksize; - else if (blocksize > hbsize) - div = blocksize / hbsize; - ret = ioctl_by_bdev(bdev, BLKGETSIZE, (unsigned long) &lblock); if (!ret && lblock != 0x7FFFFFFF) - { - if (mult) - lblock *= mult; - else if (div) - lblock /= div; - } + lblock = ((512 * lblock) / sb->s_blocksize); } if (!ret && lblock) diff --git a/fs/udf/misc.c b/fs/udf/misc.c index bed1e3984..ae998258e 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -90,7 +90,7 @@ udf_add_extendedattr(struct inode * inode, Uint32 size, Uint32 type, struct FileEntry *fe; fe = (struct FileEntry *)(*bh)->b_data; - eaicb = fe->extendedAttrICB; + eaicb = lela_to_cpu(fe->extendedAttrICB); offset = sizeof(struct FileEntry); } else @@ -98,7 +98,7 @@ udf_add_extendedattr(struct inode * inode, Uint32 size, Uint32 type, struct ExtendedFileEntry *efe; efe = (struct ExtendedFileEntry *)(*bh)->b_data; - eaicb = efe->extendedAttrICB; + eaicb = lela_to_cpu(efe->extendedAttrICB); offset = sizeof(struct ExtendedFileEntry); } @@ -206,7 +206,7 @@ udf_get_extendedattr(struct inode * inode, Uint32 type, Uint8 subtype, struct FileEntry *fe; fe = (struct FileEntry *)(*bh)->b_data; - eaicb = fe->extendedAttrICB; + eaicb = lela_to_cpu(fe->extendedAttrICB); if (UDF_I_LENEATTR(inode)) ea = fe->extendedAttr; } @@ -215,7 +215,7 @@ udf_get_extendedattr(struct inode * inode, Uint32 type, Uint8 subtype, struct ExtendedFileEntry *efe; efe = (struct ExtendedFileEntry *)(*bh)->b_data; - eaicb = efe->extendedAttrICB; + eaicb = lela_to_cpu(efe->extendedAttrICB); if (UDF_I_LENEATTR(inode)) ea = efe->extendedAttr; } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index c371b5d52..a44e19043 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -935,7 +935,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * inode->i_data.a_ops = &udf_symlink_aops; inode->i_op = &page_symlink_inode_operations; - if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_IN_ICB) + if (UDF_I_ALLOCTYPE(inode) != ICB_FLAG_AD_IN_ICB) { struct buffer_head *bh = NULL; lb_addr bloc, eloc; @@ -964,7 +964,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char * bh = udf_tread(inode->i_sb, block, inode->i_sb->s_blocksize); ea = bh->b_data + udf_ext0_offset(inode); - eoffset = inode->i_sb->s_blocksize - (ea - bh->b_data); + eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode); pc = (struct PathComponent *)ea; if (*symname == '/') diff --git a/fs/udf/super.c b/fs/udf/super.c index 81f59e9a3..5f76abbb0 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -100,14 +100,14 @@ static DECLARE_FSTYPE_DEV(udf_fstype, "udf", udf_read_super); /* Superblock operations */ static struct super_operations udf_sb_ops = { - read_inode: udf_read_inode, + read_inode: udf_read_inode, write_inode: udf_write_inode, - put_inode: udf_put_inode, + put_inode: udf_put_inode, delete_inode: udf_delete_inode, - put_super: udf_put_super, + put_super: udf_put_super, write_super: udf_write_super, - statfs: udf_statfs, - remount_fs: udf_remount_fs, + statfs: udf_statfs, + remount_fs: udf_remount_fs, }; struct udf_options @@ -127,7 +127,6 @@ struct udf_options uid_t uid; }; - static int __init init_udf_fs(void) { printk(KERN_NOTICE "udf: registering filesystem\n"); @@ -745,8 +744,9 @@ udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh) { if (udf_CS0toUTF8(&outstr, &instr)) { - udf_debug("volIdent[] = '%s'\n", outstr.u_name); - strncpy( UDF_SB_VOLIDENT(sb), outstr.u_name, outstr.u_len); + strncpy( UDF_SB_VOLIDENT(sb), outstr.u_name, + outstr.u_len > 31 ? 31 : outstr.u_len); + udf_debug("volIdent[] = '%s'\n", UDF_SB_VOLIDENT(sb)); } } @@ -788,7 +788,6 @@ udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) { UDF_SB_PARTLEN(sb,i) = le32_to_cpu(p->partitionLength); /* blocks */ UDF_SB_PARTROOT(sb,i) = le32_to_cpu(p->partitionStartingLocation) + UDF_SB_SESSION(sb); - UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap = 0xFFFFFFFF; if (UDF_SB_PARTTYPE(sb,i) == UDF_SPARABLE_MAP15) udf_fill_spartable(sb, &UDF_SB_TYPESPAR(sb,i), UDF_SB_PARTLEN(sb,i)); @@ -803,17 +802,24 @@ udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) udf_debug("unallocatedSpaceTable (part %d)\n", i); if (phd->unallocatedSpaceBitmap.extLength) { - UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap = + UDF_SB_PARTMAPS(sb)[i].s_uspace.bitmap = le32_to_cpu(phd->unallocatedSpaceBitmap.extPosition); + UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_BITMAP; udf_debug("unallocatedSpaceBitmap (part %d) @ %d\n", - i, UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap); + i, UDF_SB_PARTMAPS(sb)[i].s_uspace.bitmap); } if (phd->partitionIntegrityTable.extLength) udf_debug("partitionIntegrityTable (part %d)\n", i); if (phd->freedSpaceTable.extLength) udf_debug("freedSpaceTable (part %d)\n", i); if (phd->freedSpaceBitmap.extLength) - udf_debug("freedSpaceBitmap (part %d\n", i); + { + UDF_SB_PARTMAPS(sb)[i].s_fspace.bitmap = + le32_to_cpu(phd->freedSpaceBitmap.extPosition); + UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_BITMAP; + udf_debug("freedSpaceBitmap (part %d) @ %d\n", + i, UDF_SB_PARTMAPS(sb)[i].s_fspace.bitmap); + } } break; } @@ -1184,7 +1190,6 @@ udf_load_partition(struct super_block *sb, lb_addr *fileset) } UDF_SB_PARTROOT(sb,i) = udf_get_pblock(sb, 0, i, 0); UDF_SB_PARTLEN(sb,i) = UDF_SB_PARTLEN(sb,ino.partitionReferenceNum); - UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap = 0xFFFFFFFF; } } } @@ -1520,34 +1525,27 @@ static unsigned int udf_count_free(struct super_block *sb) { struct buffer_head *bh = NULL; - unsigned int accum=0; - int index; - int block=0, newblock; + unsigned int accum = 0; lb_addr loc; - Uint32 bytes; - Uint8 value; - Uint8 * ptr; - Uint16 ident; - - if (UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap == 0xFFFFFFFF) - { - if (UDF_SB_LVIDBH(sb)) - { - if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb)) - accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]); - - if (accum == 0xFFFFFFFF) - accum = 0; + Uint32 bitmap; - return accum; - } - else - return 0; - } + if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP) + bitmap = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.bitmap; + else if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP) + bitmap = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.bitmap; else + bitmap = 0xFFFFFFFF; + + if (bitmap != 0xFFFFFFFF) { struct SpaceBitmapDesc *bm; - loc.logicalBlockNum = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap; + int block = 0, newblock, index; + Uint16 ident; + Uint32 bytes; + Uint8 value; + Uint8 * ptr; + + loc.logicalBlockNum = bitmap; loc.partitionReferenceNum = UDF_SB_PARTITION(sb); bh = udf_read_ptagged(sb, loc, 0, &ident); @@ -1593,6 +1591,18 @@ udf_count_free(struct super_block *sb) } } udf_release_data(bh); - return accum; } + else + { + if (UDF_SB_LVIDBH(sb)) + { + if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb)) + accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]); + + if (accum == 0xFFFFFFFF) + accum = 0; + } + } + + return accum; } diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 6084c5613..6988a7238 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -18,6 +18,11 @@ #define UDF_FLAG_UNDELETE 6 #define UDF_FLAG_UNHIDE 7 #define UDF_FLAG_VARCONV 8 + +#define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001 +#define UDF_PART_FLAG_UNALLOC_TABLE 0x0002 +#define UDF_PART_FLAG_FREED_BITMAP 0x0004 +#define UDF_PART_FLAG_FREED_TABLE 0x0008 #define UDF_SB_FREE(X)\ {\ @@ -52,6 +57,7 @@ #define UDF_SB_TYPESPAR(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_sparing ) #define UDF_SB_TYPEVIRT(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_virtual ) #define UDF_SB_PARTFUNC(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_func ) +#define UDF_SB_PARTFLAGS(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_flags ) #define UDF_SB_VOLIDENT(X) ( UDF_SB(X)->s_volident ) #define UDF_SB_NUMPARTS(X) ( UDF_SB(X)->s_partitions ) diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index b92eed7db..7dd00bc19 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -106,7 +106,7 @@ struct ktm struct ustr { Uint8 u_cmpID; - Uint8 u_name[UDF_NAME_LEN-1]; + Uint8 u_name[UDF_NAME_LEN]; Uint8 u_len; Uint8 padding; unsigned long u_hash; @@ -182,6 +182,8 @@ extern void udf_truncate(struct inode *); extern void udf_free_blocks(const struct inode *, lb_addr, Uint32, Uint32); extern int udf_prealloc_blocks(const struct inode *, Uint16, Uint32, Uint32); extern int udf_new_block(const struct inode *, Uint16, Uint32, int *); + +/* fsync.c */ extern int udf_sync_file(struct file *, struct dentry *); /* directory.c */ diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 134b3c755..7cb2d3c1f 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -177,7 +177,8 @@ int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i) return 0; } - for (i = 0; (i < ocu_len) && (utf_o->u_len < UDF_NAME_LEN) ;) { + for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;) + { /* Expand OSTA compressed Unicode to Unicode */ c = ocu[i++]; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index af07961e1..8c5c15d55 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -548,7 +548,7 @@ static int ufs_readpage(struct dentry *dentry, struct page *page) { return block_read_full_page(page,ufs_getfrag_block); } -static int ufs_prepare_write(struct page *page, unsigned from, unsigned to) +static int ufs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { return block_prepare_write(page,from,to,ufs_getfrag_block); } |