summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-03-27 23:54:12 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-03-27 23:54:12 +0000
commitd3e71cb08747743fce908122bab08b479eb403a5 (patch)
treecbec6948fdbdee9af81cf3ecfb504070d2745d7b /fs
parentfe7ff1706e323d0e5ed83972960a1ecc1ee538b3 (diff)
Merge with Linux 2.3.99-pre3.
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/binfmt_aout.c55
-rw-r--r--fs/binfmt_elf.c106
-rw-r--r--fs/binfmt_em86.c23
-rw-r--r--fs/binfmt_misc.c15
-rw-r--r--fs/binfmt_script.c19
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/cramfs/inode.c7
-rw-r--r--fs/dcache.c71
-rw-r--r--fs/devfs/Makefile2
-rw-r--r--fs/exec.c183
-rw-r--r--fs/ext2/balloc.c30
-rw-r--r--fs/ext2/ialloc.c7
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c34
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/fifo.c81
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/lockd/clntproc.c8
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/minix/inode.c2
-rw-r--r--fs/nfs/Makefile2
-rw-r--r--fs/nfs/dir.c62
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/flushd.c304
-rw-r--r--fs/nfs/inode.c57
-rw-r--r--fs/nfs/nfs2xdr.c49
-rw-r--r--fs/nfs/nfsroot.c5
-rw-r--r--fs/nfs/proc.c8
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/write.c1667
-rw-r--r--fs/nfsd/export.c5
-rw-r--r--fs/nfsd/nfs3xdr.c1
-rw-r--r--fs/nfsd/nfsctl.c42
-rw-r--r--fs/nfsd/nfsproc.c5
-rw-r--r--fs/nfsd/nfssvc.c26
-rw-r--r--fs/nfsd/stats.c45
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/ntfs/fs.c2
-rw-r--r--fs/ntfs/inode.c4
-rw-r--r--fs/open.c42
-rw-r--r--fs/openpromfs/inode.c7
-rw-r--r--fs/partitions/msdos.c15
-rw-r--r--fs/pipe.c126
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/romfs/inode.c9
-rw-r--r--fs/smbfs/file.c2
-rw-r--r--fs/super.c1
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/udf/balloc.c102
-rw-r--r--fs/udf/file.c6
-rw-r--r--fs/udf/inode.c23
-rw-r--r--fs/udf/lowlevel.c22
-rw-r--r--fs/udf/misc.c8
-rw-r--r--fs/udf/namei.c4
-rw-r--r--fs/udf/super.c84
-rw-r--r--fs/udf/udf_sb.h6
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/udf/unicode.c3
-rw-r--r--fs/ufs/inode.c2
65 files changed, 2310 insertions, 1133 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 6eb08c857..c9aecc730 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -59,7 +59,7 @@ static int adfs_readpage(struct dentry *dentry, struct page *page)
return block_read_full_page(page, adfs_get_block);
}
-static int adfs_prepare_write(struct page *page, unsigned int from, unsigned int to)
+static int adfs_prepare_write(struct file *file, struct page *page, unsigned int from, unsigned int to)
{
return cont_prepare_write(page, from, to, adfs_get_block,
&((struct inode *)page->mapping->host)->u.adfs_i.mmu_private);
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 069964acb..bc0db190f 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -299,7 +299,7 @@ static int affs_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,affs_get_block);
}
-static int affs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int affs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return cont_prepare_write(page,from,to,affs_get_block,
&((struct inode*)page->mapping->host)->u.affs_i.mmu_private);
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index f5ef5e652..c5ca51cda 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -137,7 +137,7 @@ static int bfs_readpage(struct dentry *dentry, struct page *page)
return block_read_full_page(page, bfs_get_block);
}
-static int bfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return block_prepare_write(page, from, to, bfs_get_block);
}
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 9339775ce..f48a2492d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -252,7 +252,6 @@ static unsigned long * create_aout_tables(char * p, struct linux_binprm * bprm)
static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
struct exec ex;
- struct file * file;
int fd;
unsigned long error;
unsigned long fd_offset;
@@ -263,7 +262,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
N_TRSIZE(ex) || N_DRSIZE(ex) ||
- bprm->dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+ bprm->file->f_dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
return -ENOEXEC;
}
@@ -304,26 +303,32 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
current->flags &= ~PF_FORKNOEXEC;
#ifdef __sparc__
if (N_MAGIC(ex) == NMAGIC) {
+ loff_t pos = fd_offset;
/* Fuck me plenty... */
+ /* <AOL></AOL> */
error = do_brk(N_TXTADDR(ex), ex.a_text);
- read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex),
- ex.a_text, 0);
+ bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
+ ex.a_text, &pos);
error = do_brk(N_DATADDR(ex), ex.a_data);
- read_exec(bprm->dentry, fd_offset + ex.a_text, (char *) N_DATADDR(ex),
- ex.a_data, 0);
+ bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
+ ex.a_data, &pos);
goto beyond_if;
}
#endif
if (N_MAGIC(ex) == OMAGIC) {
+ loff_t pos;
#if defined(__alpha__) || defined(__sparc__)
+ pos = fd_offset;
do_brk(N_TXTADDR(ex) & PAGE_MASK,
ex.a_text+ex.a_data + PAGE_SIZE - 1);
- read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex),
- ex.a_text+ex.a_data, 0);
+ bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
#else
+ pos = 32;
do_brk(0, ex.a_text+ex.a_data);
- read_exec(bprm->dentry, 32, (char *) 0, ex.a_text+ex.a_data, 0);
+ bprm->file->f_op->read(bprm->file, (char *) 0,
+ ex.a_text+ex.a_data, &pos);
#endif
flush_icache_range((unsigned long) 0,
(unsigned long) ex.a_text+ex.a_data);
@@ -336,49 +341,48 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
error_time2 = jiffies;
}
- fd = open_dentry(bprm->dentry, O_RDONLY);
+ fd = get_unused_fd();
if (fd < 0)
return fd;
- file = fget(fd);
+ get_file(bprm->file);
+ fd_install(fd, bprm->file);
if ((fd_offset & ~PAGE_MASK) != 0 &&
(jiffies-error_time) > 5*HZ)
{
printk(KERN_WARNING
"fd_offset is not page aligned. Please convert program: %s\n",
- file->f_dentry->d_name.name);
+ bprm->file->f_dentry->d_name.name);
error_time = jiffies;
}
- if (!file->f_op || !file->f_op->mmap || ((fd_offset & ~PAGE_MASK) != 0)) {
- fput(file);
+ if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+ loff_t pos = fd_offset;
sys_close(fd);
do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- read_exec(bprm->dentry, fd_offset,
- (char *) N_TXTADDR(ex), ex.a_text+ex.a_data, 0);
+ bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+ ex.a_text+ex.a_data, &pos);
flush_icache_range((unsigned long) N_TXTADDR(ex),
(unsigned long) N_TXTADDR(ex) +
ex.a_text+ex.a_data);
goto beyond_if;
}
- error = do_mmap(file, N_TXTADDR(ex), ex.a_text,
+ error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset);
if (error != N_TXTADDR(ex)) {
- fput(file);
sys_close(fd);
send_sig(SIGKILL, current, 0);
return error;
}
- error = do_mmap(file, N_DATADDR(ex), ex.a_data,
+ error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset + ex.a_text);
- fput(file);
sys_close(fd);
if (error != N_DATADDR(ex)) {
send_sig(SIGKILL, current, 0);
@@ -420,16 +424,12 @@ static int load_aout_library(struct file *file)
unsigned long bss, start_addr, len;
unsigned long error;
int retval;
- loff_t offset = 0;
struct exec ex;
inode = file->f_dentry->d_inode;
retval = -ENOEXEC;
- /* N.B. Save current fs? */
- set_fs(KERNEL_DS);
- error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset);
- set_fs(USER_DS);
+ error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
if (error != sizeof(ex))
goto out;
@@ -450,6 +450,7 @@ static int load_aout_library(struct file *file)
if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
static unsigned long error_time;
+ loff_t pos = N_TXTOFF(ex);
if ((jiffies-error_time) > 5*HZ)
{
@@ -461,8 +462,8 @@ static int load_aout_library(struct file *file)
do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- read_exec(file->f_dentry, N_TXTOFF(ex),
- (char *)start_addr, ex.a_text + ex.a_data, 0);
+ file->f_op->read(file, (char *)start_addr,
+ ex.a_text + ex.a_data, &pos);
flush_icache_range((unsigned long) start_addr,
(unsigned long) start_addr + ex.a_text + ex.a_data);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 28f82594f..a12183834 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -205,17 +205,15 @@ create_elf_tables(char *p, int argc, int envc,
an ELF header */
static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
- struct dentry * interpreter_dentry,
+ struct file * interpreter,
unsigned long *interp_load_addr)
{
- struct file * file;
struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
unsigned long load_addr = 0;
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
unsigned long error = ~0UL;
- int elf_exec_fileno;
int retval, i, size;
/* First of all, some simple consistency checks */
@@ -224,8 +222,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
goto out;
if (!elf_check_arch(interp_elf_ex->e_machine))
goto out;
- if (!interpreter_dentry->d_inode->i_fop ||
- !interpreter_dentry->d_inode->i_fop->mmap)
+ if (!interpreter->f_op->mmap)
goto out;
/*
@@ -244,17 +241,10 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
if (!elf_phdata)
goto out;
- retval = read_exec(interpreter_dentry, interp_elf_ex->e_phoff,
- (char *) elf_phdata, size, 1);
+ retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
error = retval;
if (retval < 0)
- goto out_free;
-
- error = ~0UL;
- elf_exec_fileno = open_dentry(interpreter_dentry, O_RDONLY);
- if (elf_exec_fileno < 0)
- goto out_free;
- file = fget(elf_exec_fileno);
+ goto out_close;
eppnt = elf_phdata;
for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
@@ -271,7 +261,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED;
- map_addr = do_mmap(file,
+ map_addr = do_mmap(interpreter,
load_addr + ELF_PAGESTART(vaddr),
eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr),
elf_prot,
@@ -322,19 +312,17 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
out_close:
- fput(file);
- sys_close(elf_exec_fileno);
-out_free:
kfree(elf_phdata);
out:
return error;
}
static unsigned long load_aout_interp(struct exec * interp_ex,
- struct dentry * interpreter_dentry)
+ struct file * interpreter)
{
- unsigned long text_data, offset, elf_entry = ~0UL;
+ unsigned long text_data, elf_entry = ~0UL;
char * addr;
+ loff_t offset;
int retval;
current->mm->end_code = interp_ex->a_text;
@@ -357,7 +345,10 @@ static unsigned long load_aout_interp(struct exec * interp_ex,
}
do_brk(0, text_data);
- retval = read_exec(interpreter_dentry, offset, addr, text_data, 0);
+ retval = -ENOEXEC;
+ if (!interpreter->f_op->read)
+ goto out;
+ retval = interpreter->f_op->read(interpreter, addr, text_data, &offset);
if (retval < 0)
goto out;
flush_icache_range((unsigned long)addr,
@@ -383,8 +374,7 @@ out:
static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
- struct file * file;
- struct dentry *interpreter_dentry = NULL; /* to shut gcc up */
+ struct file *interpreter = NULL; /* to shut gcc up */
unsigned long load_addr = 0, load_bias;
int load_addr_set = 0;
char * elf_interpreter = NULL;
@@ -430,7 +420,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
goto out;
}
#endif
- if (!bprm->dentry->d_inode->i_fop||!bprm->dentry->d_inode->i_fop->mmap)
+ if (!bprm->file->f_op||!bprm->file->f_op->mmap)
goto out;
/* Now read in all of the header information */
@@ -443,16 +433,15 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (!elf_phdata)
goto out;
- retval = read_exec(bprm->dentry, elf_ex.e_phoff,
- (char *) elf_phdata, size, 1);
+ retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
if (retval < 0)
goto out_free_ph;
- retval = open_dentry(bprm->dentry, O_RDONLY);
+ retval = get_unused_fd();
if (retval < 0)
goto out_free_ph;
- elf_exec_fileno = retval;
- file = fget(elf_exec_fileno);
+ get_file(bprm->file);
+ fd_install(elf_exec_fileno = retval, bprm->file);
elf_ppnt = elf_phdata;
elf_bss = 0;
@@ -480,9 +469,9 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (!elf_interpreter)
goto out_free_file;
- retval = read_exec(bprm->dentry, elf_ppnt->p_offset,
+ retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
- elf_ppnt->p_filesz, 1);
+ elf_ppnt->p_filesz);
if (retval < 0)
goto out_free_interp;
/* If the program interpreter is one of these two,
@@ -495,32 +484,22 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
#if 0
printk("Using ELF interpreter %s\n", elf_interpreter);
#endif
- old_fs = get_fs(); /* This could probably be optimized */
- set_fs(get_ds());
#ifdef __sparc__
if (ibcs2_interpreter) {
unsigned long old_pers = current->personality;
current->personality = PER_SVR4;
- lock_kernel();
- interpreter_dentry = open_namei(elf_interpreter);
- unlock_kernel();
+ interpreter = open_exec(elf_interpreter);
current->personality = old_pers;
} else
#endif
{
- lock_kernel();
- interpreter_dentry = open_namei(elf_interpreter);
- unlock_kernel();
+ interpreter = open_exec(elf_interpreter);
}
- set_fs(old_fs);
- retval = PTR_ERR(interpreter_dentry);
- if (IS_ERR(interpreter_dentry))
+ retval = PTR_ERR(interpreter);
+ if (IS_ERR(interpreter))
goto out_free_interp;
- retval = permission(interpreter_dentry->d_inode, MAY_EXEC);
- if (retval < 0)
- goto out_free_dentry;
- retval = read_exec(interpreter_dentry, 0, bprm->buf, 128, 1);
+ retval = kernel_read(interpreter, 0, bprm->buf, 128);
if (retval < 0)
goto out_free_dentry;
@@ -629,7 +608,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
elf_flags |= MAP_FIXED;
}
- error = do_mmap(file, ELF_PAGESTART(load_bias + vaddr),
+ error = do_mmap(bprm->file, ELF_PAGESTART(load_bias + vaddr),
(elf_ppnt->p_filesz +
ELF_PAGEOFFSET(elf_ppnt->p_vaddr)),
elf_prot, elf_flags, (elf_ppnt->p_offset -
@@ -661,7 +640,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
elf_brk = k;
}
set_fs(old_fs);
- fput(file); /* all done with the file */
elf_entry += load_bias;
elf_bss += load_bias;
@@ -674,14 +652,14 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (elf_interpreter) {
if (interpreter_type == INTERPRETER_AOUT)
elf_entry = load_aout_interp(&interp_ex,
- interpreter_dentry);
+ interpreter);
else
elf_entry = load_elf_interp(&interp_elf_ex,
- interpreter_dentry,
+ interpreter,
&interp_load_addr);
lock_kernel();
- dput(interpreter_dentry);
+ fput(interpreter);
unlock_kernel();
kfree(elf_interpreter);
@@ -708,7 +686,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
#ifndef VM_STACK_FLAGS
lock_kernel();
- current->executable = dget(bprm->dentry);
+ current->executable = dget(bprm->file->f_dentry);
unlock_kernel();
#endif
compute_creds(bprm);
@@ -779,13 +757,12 @@ out:
/* error cleanup */
out_free_dentry:
lock_kernel();
- dput(interpreter_dentry);
+ fput(interpreter);
unlock_kernel();
out_free_interp:
if (elf_interpreter)
kfree(elf_interpreter);
out_free_file:
- fput(file);
sys_close(elf_exec_fileno);
out_free_ph:
kfree(elf_phdata);
@@ -797,25 +774,13 @@ out_free_ph:
static int load_elf_library(struct file *file)
{
- struct dentry * dentry;
- struct inode * inode;
struct elf_phdr *elf_phdata;
unsigned long elf_bss = 0, bss, len, k;
int retval, error, i, j;
struct elfhdr elf_ex;
- loff_t offset = 0;
-
- error = -EACCES;
- dentry = file->f_dentry;
- inode = dentry->d_inode;
- /* seek to the beginning of the file */
error = -ENOEXEC;
-
- /* N.B. save current DS?? */
- set_fs(KERNEL_DS);
- retval = file->f_op->read(file, (char *) &elf_ex, sizeof(elf_ex), &offset);
- set_fs(USER_DS);
+ retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
if (retval != sizeof(elf_ex))
goto out;
@@ -824,8 +789,7 @@ static int load_elf_library(struct file *file)
/* First of all, some simple consistency checks */
if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
- !elf_check_arch(elf_ex.e_machine) ||
- (!inode->i_fop || !inode->i_fop->mmap))
+ !elf_check_arch(elf_ex.e_machine) || !file->f_op->mmap)
goto out;
/* Now read in all of the header information */
@@ -840,8 +804,8 @@ static int load_elf_library(struct file *file)
goto out;
/* N.B. check for error return?? */
- retval = read_exec(dentry, elf_ex.e_phoff, (char *) elf_phdata,
- sizeof(struct elf_phdr) * elf_ex.e_phnum, 1);
+ retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata,
+ sizeof(struct elf_phdr) * elf_ex.e_phnum);
error = -ENOEXEC;
for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 1a1533a10..1b18094eb 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -25,7 +25,7 @@
static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
{
char *interp, *i_name, *i_arg;
- struct dentry * dentry;
+ struct file * file;
int retval;
struct elfhdr elf_ex;
@@ -38,16 +38,13 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
/* First of all, some simple consistency checks */
if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
(!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
- (!bprm->dentry->d_inode->i_fop ||
- !bprm->dentry->d_inode->i_fop->mmap)) {
+ (!bprm->file->f_op || !bprm->file->f_op->mmap)) {
return -ENOEXEC;
}
bprm->sh_bang++; /* Well, the bang-shell is implicit... */
- lock_kernel();
- dput(bprm->dentry);
- unlock_kernel();
- bprm->dentry = NULL;
+ fput(bprm->file);
+ bprm->file = NULL;
/* Unlike in the script case, we don't have to do any hairy
* parsing to find our interpreter... it's hardcoded!
@@ -79,16 +76,14 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
/*
* OK, now restart the process with the interpreter's inode.
- * Note that we use open_namei() as the name is now in kernel
+ * Note that we use open_exec() as the name is now in kernel
* space, and we don't need to copy it.
*/
- lock_kernel();
- dentry = open_namei(interp);
- unlock_kernel();
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ file = open_exec(interp);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
- bprm->dentry = dentry;
+ bprm->file = file;
retval = prepare_binprm(bprm);
if (retval < 0)
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9d98d7d70..a03c4723f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
#include <linux/proc_fs.h>
#include <linux/string.h>
#include <linux/ctype.h>
+#include <linux/file.h>
#include <linux/spinlock.h>
#include <asm/uaccess.h>
@@ -180,7 +181,7 @@ static struct binfmt_entry *check_file(struct linux_binprm *bprm)
static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
struct binfmt_entry *fmt;
- struct dentry * dentry;
+ struct file * file;
char iname[128];
char *iname_addr = iname;
int retval;
@@ -200,8 +201,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (!fmt)
goto _ret;
- dput(bprm->dentry);
- bprm->dentry = NULL;
+ fput(bprm->file);
+ bprm->file = NULL;
/* Build args for interpreter */
remove_arg_zero(bprm);
@@ -213,11 +214,11 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
bprm->argc++;
bprm->filename = iname; /* for binfmt_script */
- dentry = open_namei(iname);
- retval = PTR_ERR(dentry);
- if (IS_ERR(dentry))
+ file = open_exec(iname);
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
goto _ret;
- bprm->dentry = dentry;
+ bprm->file = file;
retval = prepare_binprm(bprm);
if (retval >= 0)
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 450f918a4..dc78f8389 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -11,12 +11,13 @@
#include <linux/malloc.h>
#include <linux/binfmts.h>
#include <linux/init.h>
+#include <linux/file.h>
#include <linux/smp_lock.h>
static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
{
char *cp, *i_name, *i_arg;
- struct dentry * dentry;
+ struct file *file;
char interp[128];
int retval;
@@ -28,10 +29,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
*/
bprm->sh_bang++;
- lock_kernel();
- dput(bprm->dentry);
- unlock_kernel();
- bprm->dentry = NULL;
+ fput(bprm->file);
+ bprm->file = NULL;
bprm->buf[127] = '\0';
if ((cp = strchr(bprm->buf, '\n')) == NULL)
@@ -81,13 +80,11 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
/*
* OK, now restart the process with the interpreter's dentry.
*/
- lock_kernel();
- dentry = open_namei(interp);
- unlock_kernel();
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ file = open_exec(interp);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
- bprm->dentry = dentry;
+ bprm->file = file;
retval = prepare_binprm(bprm);
if (retval < 0)
return retval;
diff --git a/fs/buffer.c b/fs/buffer.c
index 617188db0..26580ee0d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2029,7 +2029,7 @@ int block_symlink(struct inode *inode, const char *symname, int len)
if (!page)
goto fail;
- err = mapping->a_ops->prepare_write(page, 0, len-1);
+ err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
if (err)
goto fail_map;
kaddr = (char*)page_address(page);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 1e39811e6..bb51b0c05 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -685,7 +685,7 @@ static int coda_venus_readdir(struct file *filp, void *getdent,
}
/* we use this routine to read the file into our buffer */
- bufsize = read_exec(filp->f_dentry, filp->f_pos, buff, DIR_BUFSIZE, 1);
+ bufsize = kernel_read(filp, filp->f_pos, buff, DIR_BUFSIZE);
if ( bufsize < 0) {
printk("coda_venus_readdir: cannot read directory %d.\n",
bufsize);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 237c7d9aa..c5ca590d2 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -200,12 +200,6 @@ out:
return retval;
}
-/* Nothing to do.. */
-static void cramfs_put_super(struct super_block *sb)
-{
- return;
-}
-
static int cramfs_statfs(struct super_block *sb, struct statfs *buf)
{
buf->f_type = CRAMFS_MAGIC;
@@ -361,7 +355,6 @@ static struct inode_operations cramfs_dir_inode_operations = {
};
static struct super_operations cramfs_ops = {
- put_super: cramfs_put_super,
statfs: cramfs_statfs,
};
diff --git a/fs/dcache.c b/fs/dcache.c
index dc424305f..d4aef49e7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -27,6 +27,9 @@
#define DCACHE_PARANOIA 1
/* #define DCACHE_DEBUG 1 */
+/* Right now the dcache depends on the kernel lock */
+#define check_lock() if (!kernel_locked()) BUG()
+
/* For managing the dcache */
extern unsigned long num_physpages, page_cache_size;
extern int inodes_stat[];
@@ -104,6 +107,8 @@ void dput(struct dentry *dentry)
{
int count;
+ check_lock();
+
if (!dentry)
return;
@@ -158,7 +163,7 @@ out:
count,
dentry->d_parent->d_name.name,
dentry->d_name.name);
- *(int *)0 = 0;
+ BUG();
}
/*
@@ -168,6 +173,8 @@ out:
*/
int d_invalidate(struct dentry * dentry)
{
+ check_lock();
+
/*
* If it's already been dropped, return OK.
*/
@@ -226,6 +233,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
*/
void prune_dcache(int count)
{
+ check_lock();
for (;;) {
struct dentry *dentry;
struct list_head *tmp = dentry_unused.prev;
@@ -261,6 +269,8 @@ void shrink_dcache_sb(struct super_block * sb)
struct list_head *tmp, *next;
struct dentry *dentry;
+ check_lock();
+
/*
* Pass one ... move the dentries for the specified
* superblock to the most recent end of the unused list.
@@ -308,6 +318,8 @@ int is_root_busy(struct dentry *root)
struct list_head *next;
int count = root->d_count;
+ check_lock();
+
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -337,6 +349,44 @@ resume:
}
/*
+ * Search for at least 1 mount point in the dentry's subdirs.
+ * We descend to the next level whenever the d_subdirs
+ * list is non-empty and continue searching.
+ */
+int have_submounts(struct dentry *parent)
+{
+ struct dentry *this_parent = parent;
+ struct list_head *next;
+
+ if (parent->d_mounts != parent)
+ return 1;
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ next = tmp->next;
+ /* Have we found a mount point ? */
+ if (dentry->d_mounts != dentry)
+ return 1;
+ if (!list_empty(&dentry->d_subdirs)) {
+ this_parent = dentry;
+ goto repeat;
+ }
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ if (this_parent != parent) {
+ next = this_parent->d_child.next;
+ this_parent = this_parent->d_parent;
+ goto resume;
+ }
+ return 0; /* No mount points found in tree */
+}
+
+/*
* Search the dentry child list for the specified parent,
* and move any unused dentries to the end of the unused
* list for prune_dcache(). We descend to the next level
@@ -349,6 +399,8 @@ static int select_parent(struct dentry * parent)
struct list_head *next;
int found = 0;
+ check_lock();
+
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -525,6 +577,8 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp = head->next;
+ check_lock();
+
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
if (tmp == head)
@@ -564,6 +618,8 @@ int d_validate(struct dentry *dentry, struct dentry *dparent,
struct list_head *base, *lhp;
int valid = 1;
+ check_lock();
+
if (dentry != dparent) {
base = d_hash(dparent, hash);
lhp = base;
@@ -605,6 +661,10 @@ out:
*/
void d_delete(struct dentry * dentry)
{
+ check_lock();
+
+ check_lock();
+
/*
* Are we the only user?
*/
@@ -646,6 +706,7 @@ static inline void switch_names(struct dentry * dentry, struct dentry * target)
{
const unsigned char *old_name, *new_name;
+ check_lock();
memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
old_name = target->d_name.name;
new_name = dentry->d_name.name;
@@ -674,6 +735,8 @@ static inline void switch_names(struct dentry * dentry, struct dentry * target)
*/
void d_move(struct dentry * dentry, struct dentry * target)
{
+ check_lock();
+
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
@@ -773,7 +836,11 @@ asmlinkage long sys_getcwd(char *buf, unsigned long size)
error = -ENOMEM;
if (page) {
unsigned long len;
- char * cwd = d_path(pwd, page, PAGE_SIZE);
+ char * cwd;
+
+ lock_kernel();
+ cwd = d_path(pwd, page, PAGE_SIZE);
+ unlock_kernel();
error = -ERANGE;
len = PAGE_SIZE + page - cwd;
diff --git a/fs/devfs/Makefile b/fs/devfs/Makefile
index 2b301b37a..23f190410 100644
--- a/fs/devfs/Makefile
+++ b/fs/devfs/Makefile
@@ -36,4 +36,4 @@ doc: base.c util.c
test:
gcc -o /tmp/base.o -D__KERNEL__ -I../../include -Wall \
-Wstrict-prototypes -O2 -fomit-frame-pointer -pipe \
- -fno-strength-reduce -DCPU=686 -DEXPORT_SYMTAB -c base.c
+ -fno-strength-reduce -DEXPORT_SYMTAB -c base.c
diff --git a/fs/exec.c b/fs/exec.c
index d7d5240be..8a8a10631 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -93,56 +93,6 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
__MOD_DEC_USE_COUNT(fmt->module);
}
-/* N.B. Error returns must be < 0 */
-int open_dentry(struct dentry * dentry, int mode)
-{
- struct inode * inode = dentry->d_inode;
- struct file * f;
- struct list_head * l = NULL;
- int fd, error;
-
- lock_kernel();
- if (inode->i_sb)
- l = &inode->i_sb->s_files;
-
- error = -EINVAL;
- if (!inode->i_fop)
- goto out;
- fd = get_unused_fd();
- if (fd >= 0) {
- error = -ENFILE;
- f = get_empty_filp();
- if (!f)
- goto out_fd;
- f->f_flags = mode;
- f->f_mode = (mode+1) & O_ACCMODE;
- f->f_dentry = dentry;
- f->f_pos = 0;
- f->f_reada = 0;
- f->f_op = inode->i_fop;
- if (f->f_op->open) {
- error = f->f_op->open(inode,f);
- if (error)
- goto out_filp;
- }
- file_move(f, l);
- fd_install(fd, f);
- dget(dentry);
- }
- unlock_kernel();
- return fd;
-
-out_filp:
- if (error > 0)
- error = -EIO;
- put_filp(f);
-out_fd:
- put_unused_fd(fd);
-out:
- unlock_kernel();
- return error;
-}
-
/*
* Note that a shared library must be both readable and executable due to
* security reasons.
@@ -365,44 +315,45 @@ int setup_arg_pages(struct linux_binprm *bprm)
return 0;
}
-/*
- * Read in the complete executable. This is used for "-N" files
- * that aren't on a block boundary, and for files on filesystems
- * without get_block support.
- */
-int read_exec(struct dentry *dentry, unsigned long offset,
- char * addr, unsigned long count, int to_kmem)
+struct file *open_exec(const char *name)
{
- struct file file;
- struct inode * inode = dentry->d_inode;
- int result = -ENOEXEC;
+ struct dentry *dentry;
+ struct file *file;
- if (!inode->i_fop)
- goto end_readexec;
- if (init_private_file(&file, dentry, 1))
- goto end_readexec;
- if (!file.f_op->read)
- goto close_readexec;
- if (file.f_op->llseek) {
- if (file.f_op->llseek(&file,offset,0) != offset)
- goto close_readexec;
- } else
- file.f_pos = offset;
- if (to_kmem) {
- mm_segment_t old_fs = get_fs();
- set_fs(get_ds());
- result = file.f_op->read(&file, addr, count, &file.f_pos);
- set_fs(old_fs);
- } else {
- result = verify_area(VERIFY_WRITE, addr, count);
- if (result)
- goto close_readexec;
- result = file.f_op->read(&file, addr, count, &file.f_pos);
+ lock_kernel();
+ dentry = lookup_dentry(name, NULL, LOOKUP_FOLLOW);
+ file = (struct file*) dentry;
+ if (!IS_ERR(dentry)) {
+ file = ERR_PTR(-EACCES);
+ if (dentry->d_inode && S_ISREG(dentry->d_inode->i_mode)) {
+ int err = permission(dentry->d_inode, MAY_EXEC);
+ file = ERR_PTR(err);
+ if (!err) {
+ file = dentry_open(dentry, O_RDONLY);
+out:
+ unlock_kernel();
+ return file;
+ }
+ }
+ dput(dentry);
}
-close_readexec:
- if (file.f_op->release)
- file.f_op->release(inode,&file);
-end_readexec:
+ goto out;
+}
+
+int kernel_read(struct file *file, unsigned long offset,
+ char * addr, unsigned long count)
+{
+ mm_segment_t old_fs;
+ loff_t pos = offset;
+ int result = -ENOSYS;
+
+ if (!file->f_op->read)
+ goto fail;
+ old_fs = get_fs();
+ set_fs(get_ds());
+ result = file->f_op->read(file, addr, count, &pos);
+ set_fs(old_fs);
+fail:
return result;
}
@@ -540,7 +491,7 @@ int flush_old_exec(struct linux_binprm * bprm)
flush_thread();
if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
- permission(bprm->dentry->d_inode,MAY_READ))
+ permission(bprm->file->f_dentry->d_inode,MAY_READ))
current->dumpable = 0;
/* An exec changes our domain. We are no longer part of the thread
@@ -580,7 +531,7 @@ int prepare_binprm(struct linux_binprm *bprm)
{
int mode;
int retval,id_change,cap_raised;
- struct inode * inode = bprm->dentry->d_inode;
+ struct inode * inode = bprm->file->f_dentry->d_inode;
mode = inode->i_mode;
if (!S_ISREG(mode)) /* must be regular file */
@@ -677,7 +628,7 @@ int prepare_binprm(struct linux_binprm *bprm)
}
memset(bprm->buf,0,sizeof(bprm->buf));
- return read_exec(bprm->dentry,0,bprm->buf,128,1);
+ return kernel_read(bprm->file,0,bprm->buf,128);
}
/*
@@ -763,24 +714,20 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
int i;
char * dynloader[] = { "/sbin/loader" };
- struct dentry * dentry;
+ struct file * file;
- lock_kernel();
- dput(bprm->dentry);
- unlock_kernel();
- bprm->dentry = NULL;
+ fput(bprm->file);
+ bprm->file = NULL;
bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
for (i = 0 ; i < MAX_ARG_PAGES ; i++) /* clear page-table */
bprm_loader.page[i] = NULL;
- lock_kernel();
- dentry = open_namei(dynloader[0]);
- unlock_kernel();
- retval = PTR_ERR(dentry);
- if (IS_ERR(dentry))
+ file = open_exec(dynloader[0]);
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
return retval;
- bprm->dentry = dentry;
+ bprm->file = file;
bprm->loader = bprm_loader.p;
retval = prepare_binprm(bprm);
if (retval<0)
@@ -802,12 +749,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
retval = fn(bprm, regs);
if (retval >= 0) {
put_binfmt(fmt);
- if (bprm->dentry) {
- lock_kernel();
- dput(bprm->dentry);
- unlock_kernel();
- }
- bprm->dentry = NULL;
+ if (bprm->file)
+ fput(bprm->file);
+ bprm->file = NULL;
current->did_exec = 1;
return retval;
}
@@ -815,7 +759,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
put_binfmt(fmt);
if (retval != -ENOEXEC)
break;
- if (!bprm->dentry) {
+ if (!bprm->file) {
spin_unlock(&binfmt_lock);
return retval;
}
@@ -847,37 +791,31 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
{
struct linux_binprm bprm;
- struct dentry * dentry;
+ struct file *file;
int retval;
int i;
bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));
- lock_kernel();
- dentry = open_namei(filename);
- unlock_kernel();
+ file = open_exec(filename);
- retval = PTR_ERR(dentry);
- if (IS_ERR(dentry))
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
return retval;
- bprm.dentry = dentry;
+ bprm.file = file;
bprm.filename = filename;
bprm.sh_bang = 0;
bprm.loader = 0;
bprm.exec = 0;
if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
- lock_kernel();
- dput(dentry);
- unlock_kernel();
+ fput(file);
return bprm.argc;
}
if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
- lock_kernel();
- dput(dentry);
- unlock_kernel();
+ fput(file);
return bprm.envc;
}
@@ -905,11 +843,8 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs
out:
/* Something went wrong, return the inode and free the argument pages*/
- if (bprm.dentry) {
- lock_kernel();
- dput(bprm.dentry);
- unlock_kernel();
- }
+ if (bprm.file)
+ fput(bprm.file);
/* Assumes that free_page() can take a NULL argument. */
/* I hope this is ok for all architectures */
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 904f5cb8f..90ce121ce 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -11,6 +11,7 @@
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
+#include <linux/config.h>
#include <linux/fs.h>
#include <linux/locks.h>
#include <linux/quotaops.h>
@@ -300,21 +301,20 @@ do_more:
if (!gdp)
goto error_return;
- if (test_opt (sb, CHECK_STRICT) &&
- (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
- in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
- in_range (block, le32_to_cpu(gdp->bg_inode_table),
- sb->u.ext2_sb.s_itb_per_group) ||
- in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
- sb->u.ext2_sb.s_itb_per_group)))
- ext2_panic (sb, "ext2_free_blocks",
+ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
+ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
+ in_range (block, le32_to_cpu(gdp->bg_inode_table),
+ sb->u.ext2_sb.s_itb_per_group) ||
+ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
+ sb->u.ext2_sb.s_itb_per_group))
+ ext2_error (sb, "ext2_free_blocks",
"Freeing blocks in system zones - "
"Block = %lu, count = %lu",
block, count);
for (i = 0; i < count; i++) {
if (!ext2_clear_bit (bit + i, bh->b_data))
- ext2_warning (sb, "ext2_free_blocks",
+ ext2_error (sb, "ext2_free_blocks",
"bit already cleared for block %lu",
block);
else {
@@ -527,11 +527,11 @@ got_block:
tmp = j + i * EXT2_BLOCKS_PER_GROUP(sb) + le32_to_cpu(es->s_first_data_block);
- if (test_opt (sb, CHECK_STRICT) &&
- (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
- tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
- in_range (tmp, le32_to_cpu(gdp->bg_inode_table), sb->u.ext2_sb.s_itb_per_group)))
- ext2_panic (sb, "ext2_new_block",
+ if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
+ tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
+ in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
+ sb->u.ext2_sb.s_itb_per_group))
+ ext2_error (sb, "ext2_new_block",
"Allocating block in system zone - "
"block = %u", tmp);
@@ -679,6 +679,7 @@ int ext2_group_sparse(int group)
test_root(group, 7));
}
+#ifdef CONFIG_EXT2_CHECK
/* Called at mount-time, super-block is locked */
void ext2_check_blocks_bitmap (struct super_block * sb)
{
@@ -753,3 +754,4 @@ void ext2_check_blocks_bitmap (struct super_block * sb)
"stored = %lu, counted = %lu",
(unsigned long) le32_to_cpu(es->s_free_blocks_count), bitmap_count);
}
+#endif
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 3a3e4a69c..277562ec7 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -12,6 +12,7 @@
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
+#include <linux/config.h>
#include <linux/fs.h>
#include <linux/locks.h>
#include <linux/quotaops.h>
@@ -236,7 +237,7 @@ void ext2_free_inode (struct inode * inode)
/* Ok, now we can actually update the inode bitmaps.. */
if (!ext2_clear_bit (bit, bh->b_data))
- ext2_warning (sb, "ext2_free_inode",
+ ext2_error (sb, "ext2_free_inode",
"bit already cleared for inode %lu", ino);
else {
gdp = ext2_get_group_desc (sb, block_group, &bh2);
@@ -401,7 +402,7 @@ repeat:
EXT2_INODES_PER_GROUP(sb))) <
EXT2_INODES_PER_GROUP(sb)) {
if (ext2_set_bit (j, bh->b_data)) {
- ext2_warning (sb, "ext2_new_inode",
+ ext2_error (sb, "ext2_new_inode",
"bit already set for inode %d", j);
goto repeat;
}
@@ -527,6 +528,7 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
#endif
}
+#ifdef CONFIG_EXT2_CHECK
/* Called at mount-time, super-block is locked */
void ext2_check_inodes_bitmap (struct super_block * sb)
{
@@ -565,3 +567,4 @@ void ext2_check_inodes_bitmap (struct super_block * sb)
(unsigned long) le32_to_cpu(es->s_free_inodes_count),
bitmap_count);
}
+#endif
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index cfaf5d4d3..dd09b95aa 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -628,7 +628,7 @@ static int ext2_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,ext2_get_block);
}
-static int ext2_prepare_write(struct page *page, unsigned from, unsigned to)
+static int ext2_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return block_prepare_write(page,from,to,ext2_get_block);
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 73be71e61..a68289d71 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -16,6 +16,7 @@
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
+#include <linux/config.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
@@ -153,23 +154,14 @@ static int parse_options (char * options, unsigned long * sb_block,
set_opt (*mount_options, NO_UID32);
}
else if (!strcmp (this_char, "check")) {
- if (!value || !*value)
- set_opt (*mount_options, CHECK_NORMAL);
- else if (!strcmp (value, "none")) {
- clear_opt (*mount_options, CHECK_NORMAL);
- clear_opt (*mount_options, CHECK_STRICT);
- }
- else if (!strcmp (value, "normal"))
- set_opt (*mount_options, CHECK_NORMAL);
- else if (!strcmp (value, "strict")) {
- set_opt (*mount_options, CHECK_NORMAL);
- set_opt (*mount_options, CHECK_STRICT);
- }
- else {
- printk ("EXT2-fs: Invalid check option: %s\n",
- value);
- return 0;
- }
+ if (!value || !*value || !strcmp (value, "none"))
+ clear_opt (*mount_options, CHECK);
+ else
+#ifdef CONFIG_EXT2_CHECK
+ set_opt (*mount_options, CHECK);
+#else
+ printk("EXT2 Check option not supported\n");
+#endif
}
else if (!strcmp (this_char, "debug"))
set_opt (*mount_options, DEBUG);
@@ -205,10 +197,6 @@ static int parse_options (char * options, unsigned long * sb_block,
set_opt (*mount_options, GRPID);
else if (!strcmp (this_char, "minixdf"))
set_opt (*mount_options, MINIX_DF);
- else if (!strcmp (this_char, "nocheck")) {
- clear_opt (*mount_options, CHECK_NORMAL);
- clear_opt (*mount_options, CHECK_STRICT);
- }
else if (!strcmp (this_char, "nogrpid") ||
!strcmp (this_char, "sysvgroups"))
clear_opt (*mount_options, GRPID);
@@ -305,10 +293,12 @@ static void ext2_setup_super (struct super_block * sb,
EXT2_BLOCKS_PER_GROUP(sb),
EXT2_INODES_PER_GROUP(sb),
sb->u.ext2_sb.s_mount_opt);
+#ifdef CONFIG_EXT2_CHECK
if (test_opt (sb, CHECK)) {
ext2_check_blocks_bitmap (sb);
ext2_check_inodes_bitmap (sb);
}
+#endif
}
#if 0 /* ibasket's still have unresolved bugs... -DaveM */
@@ -398,7 +388,6 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data,
}
sb->u.ext2_sb.s_mount_opt = 0;
- set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL);
if (!parse_options ((char *) data, &sb_block, &resuid, &resgid,
&sb->u.ext2_sb.s_mount_opt)) {
return NULL;
@@ -674,7 +663,6 @@ int ext2_remount (struct super_block * sb, int * flags, char * data)
/*
* Allow the "check" option to be passed as a remount option.
*/
- new_mount_opt = EXT2_MOUNT_CHECK_NORMAL;
if (!parse_options (data, &tmp, &resuid, &resgid,
&new_mount_opt))
return -EINVAL;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index cedd3ba2b..a0202c66f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -737,7 +737,7 @@ static int fat_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,fat_get_block);
}
-static int fat_prepare_write(struct page *page, unsigned from, unsigned to)
+static int fat_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return cont_prepare_write(page,from,to,fat_get_block,
&MSDOS_I((struct inode*)page->mapping->host)->mmu_private);
diff --git a/fs/fifo.c b/fs/fifo.c
index fcaf45f9f..25a08e757 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -12,6 +12,21 @@
#include <linux/mm.h>
#include <linux/malloc.h>
+static void wait_for_partner(struct inode* inode, unsigned int* cnt)
+{
+ int cur = *cnt;
+ while(cur == *cnt) {
+ pipe_wait(inode);
+ if(signal_pending(current))
+ break;
+ }
+}
+
+static void wake_up_partner(struct inode* inode)
+{
+ wake_up_interruptible(PIPE_WAIT(*inode));
+}
+
static int fifo_open(struct inode *inode, struct file *filp)
{
int ret;
@@ -20,29 +35,12 @@ static int fifo_open(struct inode *inode, struct file *filp)
if (down_interruptible(PIPE_SEM(*inode)))
goto err_nolock_nocleanup;
- if (! inode->i_pipe) {
- unsigned long page;
- struct pipe_inode_info *info;
-
- info = kmalloc(sizeof(struct pipe_inode_info),GFP_KERNEL);
-
+ if (!inode->i_pipe) {
ret = -ENOMEM;
- if (!info)
- goto err_nocleanup;
- page = __get_free_page(GFP_KERNEL);
- if (!page) {
- kfree(info);
+ if(!pipe_new(inode))
goto err_nocleanup;
- }
-
- inode->i_pipe = info;
-
- init_waitqueue_head(PIPE_WAIT(*inode));
- PIPE_BASE(*inode) = (char *) page;
- PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
- PIPE_WAITING_WRITERS(*inode) = PIPE_WAITING_READERS(*inode) = 0;
}
+ filp->f_version = 0;
switch (filp->f_mode) {
case 1:
@@ -51,27 +49,23 @@ static int fifo_open(struct inode *inode, struct file *filp)
* POSIX.1 says that O_NONBLOCK means return with the FIFO
* opened, even when there is no process writing the FIFO.
*/
- filp->f_op = &connecting_fifo_fops;
+ filp->f_op = &read_fifo_fops;
+ PIPE_RCOUNTER(*inode)++;
if (PIPE_READERS(*inode)++ == 0)
- wake_up_interruptible(PIPE_WAIT(*inode));
-
- if (!(filp->f_flags & O_NONBLOCK)) {
- while (!PIPE_WRITERS(*inode)) {
- if (signal_pending(current))
+ wake_up_partner(inode);
+
+ if (!PIPE_WRITERS(*inode)) {
+ if ((filp->f_flags & O_NONBLOCK)) {
+ /* suppress POLLHUP until we have
+ * seen a writer */
+ filp->f_version = PIPE_WCOUNTER(*inode);
+ } else
+ {
+ wait_for_partner(inode, &PIPE_WCOUNTER(*inode));
+ if(signal_pending(current))
goto err_rd;
- up(PIPE_SEM(*inode));
- interruptible_sleep_on(PIPE_WAIT(*inode));
-
- /* Note that using down_interruptible here
- and similar places below is pointless,
- since we have to acquire the lock to clean
- up properly. */
- down(PIPE_SEM(*inode));
}
}
-
- if (PIPE_WRITERS(*inode))
- filp->f_op = &read_fifo_fops;
break;
case 2:
@@ -85,15 +79,14 @@ static int fifo_open(struct inode *inode, struct file *filp)
goto err;
filp->f_op = &write_fifo_fops;
+ PIPE_WCOUNTER(*inode)++;
if (!PIPE_WRITERS(*inode)++)
- wake_up_interruptible(PIPE_WAIT(*inode));
+ wake_up_partner(inode);
- while (!PIPE_READERS(*inode)) {
+ if (!PIPE_READERS(*inode)) {
+ wait_for_partner(inode, &PIPE_RCOUNTER(*inode));
if (signal_pending(current))
goto err_wr;
- up(PIPE_SEM(*inode));
- interruptible_sleep_on(PIPE_WAIT(*inode));
- down(PIPE_SEM(*inode));
}
break;
@@ -108,8 +101,10 @@ static int fifo_open(struct inode *inode, struct file *filp)
PIPE_READERS(*inode)++;
PIPE_WRITERS(*inode)++;
+ PIPE_RCOUNTER(*inode)++;
+ PIPE_WCOUNTER(*inode)++;
if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1)
- wake_up_interruptible(PIPE_WAIT(*inode));
+ wake_up_partner(inode);
break;
default:
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 34e365663..8c0afe0c8 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -225,7 +225,7 @@ static int hfs_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,hfs_get_block);
}
-static int hfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int hfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return cont_prepare_write(page,from,to,hfs_get_block,
&((struct inode*)page->mapping->host)->u.hfs_i.mmu_private);
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 710b9120b..d8063e296 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -94,7 +94,7 @@ static int hpfs_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,hpfs_get_block);
}
-static int hpfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return cont_prepare_write(page,from,to,hpfs_get_block,
&((struct inode*)page->mapping->host)->u.hpfs_i.mmu_private);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 517456326..20b9bb490 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -296,6 +296,7 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
struct rpc_clnt *clnt;
struct nlm_args *argp = &req->a_args;
struct nlm_res *resp = &req->a_res;
+ struct rpc_message msg;
int status;
dprintk("lockd: call procedure %s on %s (async)\n",
@@ -306,8 +307,11 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
return -ENOLCK;
/* bootstrap and kick off the async RPC call */
- status = rpc_do_call(clnt, proc, argp, resp, RPC_TASK_ASYNC,
- callback, req);
+ msg.rpc_proc = proc;
+ msg.rpc_argp = argp;
+ msg.rpc_resp =resp;
+ msg.rpc_cred = NULL;
+ status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req);
/* If the async call is proceeding, increment host refcount */
if (status >= 0 && (req->a_flags & RPC_TASK_ASYNC))
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a2f280bdc..55dee3886 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -163,7 +163,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
*p++ = htonl(argp->proc);
/* This is the private part. Needed only for SM_MON call */
- if (rqstp->rq_task->tk_proc == SM_MON) {
+ if (rqstp->rq_task->tk_msg.rpc_proc == SM_MON) {
*p++ = argp->addr;
*p++ = 0;
*p++ = 0;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 4c9fa16a3..a581e328a 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -1014,7 +1014,7 @@ static int minix_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,minix_get_block);
}
-static int minix_prepare_write(struct page *page, unsigned from, unsigned to)
+static int minix_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return block_prepare_write(page,from,to,minix_get_block);
}
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 3171e8adc..3c8aac510 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -9,7 +9,7 @@
O_TARGET := nfs.o
O_OBJS := inode.o file.o read.o write.o dir.o symlink.o proc.o \
- nfs2xdr.o
+ nfs2xdr.o flushd.o
ifdef CONFIG_ROOT_NFS
O_OBJS += nfsroot.o mount_clnt.o
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 37b2b682b..3ca240129 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -596,9 +596,12 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
out_valid:
return 1;
out_bad:
- d_drop(dentry);
if (!list_empty(&dentry->d_subdirs))
shrink_dcache_parent(dentry);
+ /* If we have submounts, don't unhash ! */
+ if (have_submounts(dentry))
+ goto out_valid;
+ d_drop(dentry);
/* Purge readdir caches. */
if (dentry->d_parent->d_inode) {
nfs_zap_caches(dentry->d_parent->d_inode);
@@ -862,61 +865,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
return error;
}
-
-/* Note: we copy the code from lookup_dentry() here, only: we have to
- * omit the directory lock. We are already the owner of the lock when
- * we reach here. And "down(&dir->i_sem)" would make us sleep forever
- * ('cause WE have the lock)
- *
- * VERY IMPORTANT: calculate the hash for this dentry!!!!!!!!
- * Otherwise the cached lookup DEFINITELY WILL fail. And a new dentry
- * is created. Without the DCACHE_NFSFS_RENAMED flag. And with d_count
- * == 1. And trouble.
- *
- * Concerning my choice of the temp name: it is just nice to have
- * i_ino part of the temp name, as this offers another check whether
- * somebody attempts to remove the "silly renamed" dentry itself.
- * Which is something that I consider evil. Your opinion may vary.
- * BUT:
- * Now that I compute the hash value right, it should be possible to simply
- * check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of
- * doing the string compare.
- * WHICH MEANS:
- * This offers the opportunity to shorten the temp name. Currently, I use
- * the hex representation of i_ino + an event counter. This sums up to
- * as much as 36 characters for a 64 bit machine, and needs 20 chars on
- * a 32 bit machine.
- * QUINTESSENCE
- * The use of i_ino is simply cosmetic. All we need is a unique temp
- * file name for the .nfs files. The event counter seemed to be adequate.
- * And as we retry in case such a file already exists, we are guaranteed
- * to succeed.
- */
-
-static
-struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen)
-{
- struct qstr sqstr;
- struct dentry *sdentry;
- struct dentry *res;
-
- sqstr.name = silly;
- sqstr.len = slen;
- sqstr.hash = full_name_hash(silly, slen);
- sdentry = d_lookup(parent, &sqstr);
- if (!sdentry) {
- sdentry = d_alloc(parent, &sqstr);
- if (sdentry == NULL)
- return ERR_PTR(-ENOMEM);
- res = nfs_lookup(parent->d_inode, sdentry);
- if (res) {
- dput(sdentry);
- return res;
- }
- }
- return sdentry;
-}
-
static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
{
static unsigned int sillycounter = 0;
@@ -966,7 +914,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
dfprintk(VFS, "trying to rename %s to %s\n",
dentry->d_name.name, silly);
- sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen);
+ sdentry = lookup_one(silly, dget(dentry->d_parent));
/*
* N.B. Better to return EBUSY here ... it could be
* dangerous to delete the file while it's in use.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9a91bb1ab..32d290c73 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -144,10 +144,10 @@ nfs_fsync(struct file *file, struct dentry *dentry)
* If the writer ends up delaying the write, the writer needs to
* increment the page use counts until he is done with the page.
*/
-static int nfs_prepare_write(struct page *page, unsigned offset, unsigned to)
+static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
kmap(page);
- return 0;
+ return nfs_flush_incompatible(file, page);
}
static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
diff --git a/fs/nfs/flushd.c b/fs/nfs/flushd.c
new file mode 100644
index 000000000..d36c3a9ae
--- /dev/null
+++ b/fs/nfs/flushd.c
@@ -0,0 +1,304 @@
+/*
+ * linux/fs/nfs/flushd.c
+ *
+ * For each NFS mount, there is a separate cache object that contains
+ * a hash table of all clusters. With this cache, an async RPC task
+ * (`flushd') is associated, which wakes up occasionally to inspect
+ * its list of dirty buffers.
+ * (Note that RPC tasks aren't kernel threads. Take a look at the
+ * rpciod code to understand what they are).
+ *
+ * Inside the cache object, we also maintain a count of the current number
+ * of dirty pages, which may not exceed a certain threshold.
+ * (FIXME: This threshold should be configurable).
+ *
+ * The code is streamlined for what I think is the prevalent case for
+ * NFS traffic, which is sequential write access without concurrent
+ * access by different processes.
+ *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ *
+ * Rewritten 6/3/2000 by Trond Myklebust
+ * Copyright (C) 1999, 2000, Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/malloc.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+
+#include <linux/sched.h>
+
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+
+#include <linux/spinlock.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_flushd.h>
+#include <linux/nfs_mount.h>
+
+/*
+ * Various constants
+ */
+#define NFSDBG_FACILITY NFSDBG_PAGECACHE
+
+/*
+ * This is the wait queue all cluster daemons sleep on
+ */
+static struct rpc_wait_queue flushd_queue = RPC_INIT_WAITQ("nfs_flushd");
+
+/*
+ * Spinlock
+ */
+spinlock_t nfs_flushd_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Local function declarations.
+ */
+static void nfs_flushd(struct rpc_task *);
+static void nfs_flushd_exit(struct rpc_task *);
+
+
+int nfs_reqlist_init(struct nfs_server *server)
+{
+ struct nfs_reqlist *cache;
+ struct rpc_task *task;
+ int status = 0;
+
+ dprintk("NFS: writecache_init\n");
+ spin_lock(&nfs_flushd_lock);
+ cache = server->rw_requests;
+
+ if (cache->task)
+ goto out_unlock;
+
+ /* Create the RPC task */
+ status = -ENOMEM;
+ task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
+ if (!task)
+ goto out_unlock;
+
+ task->tk_calldata = server;
+
+ cache->task = task;
+
+ /* Run the task */
+ cache->runat = jiffies;
+
+ cache->auth = server->client->cl_auth;
+ task->tk_action = nfs_flushd;
+ task->tk_exit = nfs_flushd_exit;
+
+ spin_unlock(&nfs_flushd_lock);
+ rpc_execute(task);
+ return 0;
+ out_unlock:
+ spin_unlock(&nfs_flushd_lock);
+ return status;
+}
+
+void nfs_reqlist_exit(struct nfs_server *server)
+{
+ struct nfs_reqlist *cache;
+
+ cache = server->rw_requests;
+ if (!cache)
+ return;
+
+ dprintk("NFS: reqlist_exit (ptr %p rpc %p)\n", cache, cache->task);
+ while (cache->task || cache->inodes) {
+ spin_lock(&nfs_flushd_lock);
+ if (!cache->task) {
+ spin_unlock(&nfs_flushd_lock);
+ nfs_reqlist_init(server);
+ } else {
+ cache->task->tk_status = -ENOMEM;
+ rpc_wake_up_task(cache->task);
+ spin_unlock(&nfs_flushd_lock);
+ }
+ interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ);
+ }
+}
+
+int nfs_reqlist_alloc(struct nfs_server *server)
+{
+ struct nfs_reqlist *cache;
+ if (server->rw_requests)
+ return 0;
+
+ cache = (struct nfs_reqlist *)kmalloc(sizeof(*cache), GFP_KERNEL);
+ if (!cache)
+ return -ENOMEM;
+
+ memset(cache, 0, sizeof(*cache));
+ init_waitqueue_head(&cache->request_wait);
+ server->rw_requests = cache;
+
+ return 0;
+}
+
+void nfs_reqlist_free(struct nfs_server *server)
+{
+ if (server->rw_requests) {
+ kfree(server->rw_requests);
+ server->rw_requests = NULL;
+ }
+}
+
+void nfs_wake_flushd()
+{
+ rpc_wake_up_status(&flushd_queue, -ENOMEM);
+}
+
+static void inode_append_flushd(struct inode *inode)
+{
+ struct nfs_reqlist *cache = NFS_REQUESTLIST(inode);
+ struct inode **q;
+
+ spin_lock(&nfs_flushd_lock);
+ if (NFS_FLAGS(inode) & NFS_INO_FLUSH)
+ goto out;
+ inode->u.nfs_i.hash_next = NULL;
+
+ q = &cache->inodes;
+ while (*q)
+ q = &(*q)->u.nfs_i.hash_next;
+ *q = inode;
+
+ /* Note: we increase the inode i_count in order to prevent
+ * it from disappearing when on the flush list
+ */
+ NFS_FLAGS(inode) |= NFS_INO_FLUSH;
+ inode->i_count++;
+ out:
+ spin_unlock(&nfs_flushd_lock);
+}
+
+void inode_remove_flushd(struct inode *inode)
+{
+ struct nfs_reqlist *cache = NFS_REQUESTLIST(inode);
+ struct inode **q;
+
+ spin_lock(&nfs_flushd_lock);
+ if (!(NFS_FLAGS(inode) & NFS_INO_FLUSH))
+ goto out;
+
+ q = &cache->inodes;
+ while (*q && *q != inode)
+ q = &(*q)->u.nfs_i.hash_next;
+ if (*q) {
+ *q = inode->u.nfs_i.hash_next;
+ NFS_FLAGS(inode) &= ~NFS_INO_FLUSH;
+ iput(inode);
+ }
+ out:
+ spin_unlock(&nfs_flushd_lock);
+}
+
+void inode_schedule_scan(struct inode *inode, unsigned long time)
+{
+ struct nfs_reqlist *cache = NFS_REQUESTLIST(inode);
+ struct rpc_task *task;
+ unsigned long mintimeout;
+
+ if (time_after(NFS_NEXTSCAN(inode), time))
+ NFS_NEXTSCAN(inode) = time;
+ mintimeout = jiffies + 1 * HZ;
+ if (time_before(mintimeout, NFS_NEXTSCAN(inode)))
+ mintimeout = NFS_NEXTSCAN(inode);
+ inode_append_flushd(inode);
+
+ spin_lock(&nfs_flushd_lock);
+ task = cache->task;
+ if (!task) {
+ spin_unlock(&nfs_flushd_lock);
+ nfs_reqlist_init(NFS_SERVER(inode));
+ } else {
+ if (time_after(cache->runat, mintimeout))
+ rpc_wake_up_task(task);
+ spin_unlock(&nfs_flushd_lock);
+ }
+}
+
+
+static void
+nfs_flushd(struct rpc_task *task)
+{
+ struct nfs_server *server;
+ struct nfs_reqlist *cache;
+ struct inode *inode, *next;
+ unsigned long delay = jiffies + NFS_WRITEBACK_LOCKDELAY;
+ int flush = (task->tk_status == -ENOMEM);
+
+ dprintk("NFS: %4d flushd starting\n", task->tk_pid);
+ server = (struct nfs_server *) task->tk_calldata;
+ cache = server->rw_requests;
+
+ spin_lock(&nfs_flushd_lock);
+ next = cache->inodes;
+ cache->inodes = NULL;
+ spin_unlock(&nfs_flushd_lock);
+
+ while ((inode = next) != NULL) {
+ next = next->u.nfs_i.hash_next;
+ inode->u.nfs_i.hash_next = NULL;
+ NFS_FLAGS(inode) &= ~NFS_INO_FLUSH;
+
+ if (flush) {
+ nfs_sync_file(inode, NULL, 0, 0, FLUSH_AGING);
+ } else if (time_after(jiffies, NFS_NEXTSCAN(inode))) {
+ NFS_NEXTSCAN(inode) = jiffies + NFS_WRITEBACK_LOCKDELAY;
+ nfs_flush_timeout(inode, FLUSH_AGING);
+#ifdef CONFIG_NFS_V3
+ nfs_commit_timeout(inode, FLUSH_AGING);
+#endif
+ }
+
+ if (nfs_have_writebacks(inode)) {
+ inode_append_flushd(inode);
+ if (time_after(delay, NFS_NEXTSCAN(inode)))
+ delay = NFS_NEXTSCAN(inode);
+ }
+ iput(inode);
+ }
+
+ dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid);
+ if (time_after(jiffies + 1 * HZ, delay))
+ delay = 1 * HZ;
+ else
+ delay = delay - jiffies;
+ task->tk_status = 0;
+ task->tk_action = nfs_flushd;
+ task->tk_timeout = delay;
+ cache->runat = jiffies + task->tk_timeout;
+
+ spin_lock(&nfs_flushd_lock);
+ if (!cache->nr_requests && !cache->inodes) {
+ cache->task = NULL;
+ task->tk_action = NULL;
+ } else
+ rpc_sleep_on(&flushd_queue, task, NULL, NULL);
+ spin_unlock(&nfs_flushd_lock);
+}
+
+static void
+nfs_flushd_exit(struct rpc_task *task)
+{
+ struct nfs_server *server;
+ struct nfs_reqlist *cache;
+ server = (struct nfs_server *) task->tk_calldata;
+ cache = server->rw_requests;
+
+ spin_lock(&nfs_flushd_lock);
+ if (cache->task == task)
+ cache->task = NULL;
+ spin_unlock(&nfs_flushd_lock);
+ wake_up(&cache->request_wait);
+ rpc_release_task(task);
+}
+
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 441d62edc..ca7e1b944 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -27,6 +27,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_flushd.h>
#include <linux/lockd/bind.h>
#include <linux/smp_lock.h>
@@ -74,6 +75,12 @@ nfs_read_inode(struct inode * inode)
inode->i_rdev = 0;
NFS_FILEID(inode) = 0;
NFS_FSID(inode) = 0;
+ INIT_LIST_HEAD(&inode->u.nfs_i.dirty);
+ INIT_LIST_HEAD(&inode->u.nfs_i.commit);
+ INIT_LIST_HEAD(&inode->u.nfs_i.writeback);
+ inode->u.nfs_i.ndirty = 0;
+ inode->u.nfs_i.ncommit = 0;
+ inode->u.nfs_i.npages = 0;
NFS_CACHEINV(inode);
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
}
@@ -92,8 +99,6 @@ nfs_put_inode(struct inode * inode)
static void
nfs_delete_inode(struct inode * inode)
{
- int failed;
-
dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
lock_kernel();
@@ -101,29 +106,12 @@ nfs_delete_inode(struct inode * inode)
nfs_free_dircache(inode);
} else {
/*
- * Flush out any pending write requests ...
+ * The following can never actually happen...
*/
- if (NFS_WRITEBACK(inode) != NULL) {
- unsigned long timeout = jiffies + 5*HZ;
-#ifdef NFS_DEBUG_VERBOSE
-printk("nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
-#endif
- nfs_inval(inode);
- while (NFS_WRITEBACK(inode) != NULL &&
- time_before(jiffies, timeout)) {
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(HZ/10);
- }
- current->state = TASK_RUNNING;
- if (NFS_WRITEBACK(inode) != NULL)
- printk("NFS: Arghhh, stuck RPC requests!\n");
+ if (nfs_have_writebacks(inode)) {
+ printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
}
}
-
- failed = nfs_check_failed_request(inode);
- if (failed)
- printk("NFS: inode %ld had %d failed requests\n",
- inode->i_ino, failed);
unlock_kernel();
clear_inode(inode);
@@ -135,9 +123,18 @@ nfs_put_super(struct super_block *sb)
struct nfs_server *server = &sb->u.nfs_sb.s_server;
struct rpc_clnt *rpc;
+ /*
+ * First get rid of the request flushing daemon.
+ * Relies on rpc_shutdown_client() waiting on all
+ * client tasks to finish.
+ */
+ nfs_reqlist_exit(server);
+
if ((rpc = server->client) != NULL)
rpc_shutdown_client(rpc);
+ nfs_reqlist_free(server);
+
if (!(server->flags & NFS_MOUNT_NONLM))
lockd_down(); /* release rpc.lockd */
rpciod_down(); /* release rpciod */
@@ -306,6 +303,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
sb->s_root->d_op = &nfs_dentry_operations;
sb->s_root->d_fsdata = root_fh;
+ /* Fire up the writeback cache */
+ if (nfs_reqlist_alloc(server) < 0) {
+ printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n");
+ goto failure_kill_reqlist;
+ }
+
/* We're airborne */
/* Check whether to start the lockd process */
@@ -314,6 +317,8 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
return sb;
/* Yargs. It didn't work out. */
+ failure_kill_reqlist:
+ nfs_reqlist_exit(server);
out_no_root:
printk("nfs_read_super: get root inode failed\n");
iput(root_inode);
@@ -342,6 +347,7 @@ out_no_xprt:
printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
out_free_host:
+ nfs_reqlist_free(server);
kfree(server->hostname);
out_unlock:
goto out_fail;
@@ -440,7 +446,6 @@ nfs_invalidate_inode(struct inode *inode)
make_bad_inode(inode);
inode->i_mode = save_mode;
- nfs_inval(inode);
nfs_zap_caches(inode);
}
@@ -864,7 +869,7 @@ nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
* to look at the size or the mtime the server sends us
* too closely, as we're in the middle of modifying them.
*/
- if (NFS_WRITEBACK(inode))
+ if (nfs_have_writebacks(inode))
goto out;
if (inode->i_size != fattr->size) {
@@ -925,7 +930,7 @@ printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages);
static DECLARE_FSTYPE(nfs_fs_type, "nfs", nfs_read_super, 0);
extern int nfs_init_fhcache(void);
-extern int nfs_init_wreqcache(void);
+extern int nfs_init_nfspagecache(void);
/*
* Initialize NFS
@@ -939,7 +944,7 @@ init_nfs_fs(void)
if (err)
return err;
- err = nfs_init_wreqcache();
+ err = nfs_init_nfspagecache();
if (err)
return err;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a7e53e6db..5ad2aaa67 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -63,6 +63,7 @@ static int nfs_stat_to_errno(int stat);
#define NFS_diropres_sz 1+NFS_fhandle_sz+NFS_fattr_sz
#define NFS_readlinkres_sz 1
#define NFS_readres_sz 1+NFS_fattr_sz+1
+#define NFS_writeres_sz NFS_attrstat_sz
#define NFS_stat_sz 1
#define NFS_readdirres_sz 1
#define NFS_statfsres_sz 1+NFS_info_sz
@@ -273,6 +274,7 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
static int
nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
{
+ unsigned int nr;
u32 count = args->count;
p = xdr_encode_fhandle(p, args->fh);
@@ -282,28 +284,35 @@ nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
*p++ = htonl(count);
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
- req->rq_svec[1].iov_base = (void *) args->buffer;
- req->rq_svec[1].iov_len = count;
- req->rq_slen += count;
- req->rq_snr = 2;
+ /* Get the number of buffers in the send iovec */
+ nr = args->nriov;
+
+ if (nr+2 > MAX_IOVEC) {
+ printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs "
+ "(nr %d max %d)\n", nr, MAX_IOVEC);
+ return -EINVAL;
+ }
+
+ /* Copy the iovec */
+ memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec));
#ifdef NFS_PAD_WRITES
/*
* Some old servers require that the message length
* be a multiple of 4, so we pad it here if needed.
*/
- count = ((count + 3) & ~3) - count;
- if (count) {
-#if 0
-printk("nfs_writeargs: padding write, len=%d, slen=%d, pad=%d\n",
-req->rq_svec[1].iov_len, req->rq_slen, count);
-#endif
- req->rq_svec[2].iov_base = (void *) "\0\0\0";
- req->rq_svec[2].iov_len = count;
- req->rq_slen += count;
- req->rq_snr = 3;
+ if (count & 3) {
+ struct iovec *iov = req->rq_svec + nr + 1;
+ int pad = 4 - (count & 3);
+
+ iov->iov_base = (void *) "\0\0\0";
+ iov->iov_len = pad;
+ count += pad;
+ nr++;
}
#endif
+ req->rq_slen += count;
+ req->rq_snr += nr;
return 0;
}
@@ -593,6 +602,16 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
}
/*
+ * Decode WRITE reply
+ */
+static int
+nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+ res->verf->committed = NFS_FILE_SYNC;
+ return nfs_xdr_attrstat(req, p, res->fattr);
+}
+
+/*
* Decode STATFS reply
*/
static int
@@ -678,7 +697,7 @@ static struct rpc_procinfo nfs_procedures[18] = {
PROC(readlink, readlinkargs, readlinkres),
PROC(read, readargs, readres),
PROC(writecache, enc_void, dec_void),
- PROC(write, writeargs, attrstat),
+ PROC(write, writeargs, writeres),
PROC(create, createargs, diropres),
PROC(remove, diropargs, stat),
PROC(rename, renameargs, stat),
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index d9a423f16..a592608be 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -145,6 +145,8 @@ static struct nfs_bool_opts {
{ "nocto", ~NFS_MOUNT_NOCTO, NFS_MOUNT_NOCTO },
{ "ac", ~NFS_MOUNT_NOAC, 0 },
{ "noac", ~NFS_MOUNT_NOAC, NFS_MOUNT_NOAC },
+ { "lock", ~NFS_MOUNT_NONLM, 0 },
+ { "nolock", ~NFS_MOUNT_NONLM, NFS_MOUNT_NONLM },
{ NULL, 0, 0 }
};
@@ -320,7 +322,7 @@ int __init root_nfs_init(void)
* Parse NFS server and directory information passed on the kernel
* command line.
*/
-void __init nfs_root_setup(char *line)
+int __init nfs_root_setup(char *line)
{
ROOT_DEV = MKDEV(UNNAMED_MAJOR, 255);
if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
@@ -333,6 +335,7 @@ void __init nfs_root_setup(char *line)
sprintf(nfs_root_name, NFS_ROOT, line);
}
root_nfs_parse_addr(nfs_root_name);
+ return 1;
}
__setup("nfsroot=", nfs_root_setup);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index bb55ce6d6..3823c3118 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -111,11 +111,15 @@ nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap,
unsigned long offset, unsigned int count,
const void *buffer, struct nfs_fattr *fattr)
{
- struct nfs_writeargs arg = { fhandle, offset, count, buffer };
+ struct nfs_writeargs arg = { fhandle, offset, count, 1, 1,
+ {{(void *) buffer, count}, {0,0}, {0,0}, {0,0},
+ {0,0}, {0,0}, {0,0}, {0,0}}};
+ struct nfs_writeverf verf;
+ struct nfs_writeres res = {fattr, &verf, count};
int status;
dprintk("NFS call write %d @ %ld\n", count, offset);
- status = rpc_call(server->client, NFSPROC_WRITE, &arg, fattr,
+ status = rpc_call(server->client, NFSPROC_WRITE, &arg, &res,
swap? (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) : 0);
dprintk("NFS reply read: %d\n", status);
return status < 0? status : count;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 717d12bbb..aa17780e5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,6 +171,7 @@ static inline int
nfs_readpage_async(struct dentry *dentry, struct inode *inode,
struct page *page)
{
+ struct rpc_message msg;
unsigned long address;
struct nfs_rreq *req;
int result = -1, flags;
@@ -195,8 +196,13 @@ nfs_readpage_async(struct dentry *dentry, struct inode *inode,
/* Start the async call */
dprintk("NFS: executing async READ request.\n");
- result = rpc_do_call(NFS_CLIENT(inode), NFSPROC_READ,
- &req->ra_args, &req->ra_res, flags,
+
+ msg.rpc_proc = NFSPROC_READ;
+ msg.rpc_argp = &req->ra_args;
+ msg.rpc_resp = &req->ra_res;
+ msg.rpc_cred = NULL;
+
+ result = rpc_call_async(NFS_CLIENT(inode), &msg, flags,
nfs_readpage_result, req);
if (result < 0)
goto out_free;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5f847bec8..af023a121 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@
* Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/malloc.h>
#include <linux/swap.h>
@@ -54,33 +55,126 @@
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
+#include <linux/nfs_flushd.h>
#include <asm/uaccess.h>
#include <linux/smp_lock.h>
#define NFS_PARANOIA 1
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static void nfs_wback_begin(struct rpc_task *task);
-static void nfs_wback_result(struct rpc_task *task);
-static void nfs_cancel_request(struct nfs_wreq *req);
+/*
+ * Spinlock
+ */
+spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int nfs_nr_requests = 0;
/*
- * Cache parameters
+ * Local structures
+ *
+ * Valid flags for a dirty buffer
*/
-#define NFS_WRITEBACK_DELAY (10 * HZ)
-#define NFS_WRITEBACK_MAX 64
+#define PG_BUSY 0x0001
/*
- * Limit number of delayed writes
+ * This is the struct where the WRITE/COMMIT arguments go.
*/
-static int nr_write_requests = 0;
-static struct rpc_wait_queue write_queue = RPC_INIT_WAITQ("write_chain");
+struct nfs_write_data {
+ struct rpc_task task;
+ struct file *file;
+ struct rpc_cred *cred;
+ struct nfs_writeargs args; /* argument struct */
+ struct nfs_writeres res; /* result struct */
+ struct nfs_fattr fattr;
+ struct nfs_writeverf verf;
+ struct list_head pages; /* Coalesced requests we wish to flush */
+};
+
+struct nfs_page {
+ struct list_head wb_hash, /* Inode */
+ wb_list,
+ *wb_list_head;
+ struct file *wb_file;
+ struct rpc_cred *wb_cred;
+ struct page *wb_page; /* page to write out */
+ wait_queue_head_t wb_wait; /* wait queue */
+ unsigned long wb_timeout; /* when to write/commit */
+ unsigned int wb_offset, /* Offset of write */
+ wb_bytes, /* Length of request */
+ wb_count, /* reference count */
+ wb_flags;
+ struct nfs_writeverf wb_verf; /* Commit cookie */
+};
+
+#define NFS_WBACK_BUSY(req) ((req)->wb_flags & PG_BUSY)
+
+/*
+ * Local function declarations
+ */
+static void nfs_writeback_done(struct rpc_task *);
+#ifdef CONFIG_NFS_V3
+static void nfs_commit_done(struct rpc_task *);
+#endif
/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0)
#endif
+static kmem_cache_t *nfs_page_cachep = NULL;
+static kmem_cache_t *nfs_wdata_cachep = NULL;
+
+static __inline__ struct nfs_page *nfs_page_alloc(void)
+{
+ struct nfs_page *p;
+ p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->wb_hash);
+ INIT_LIST_HEAD(&p->wb_list);
+ init_waitqueue_head(&p->wb_wait);
+ }
+ return p;
+}
+
+static __inline__ void nfs_page_free(struct nfs_page *p)
+{
+ kmem_cache_free(nfs_page_cachep, p);
+}
+
+static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
+{
+ struct nfs_write_data *p;
+ p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NFS);
+ if (p) {
+ memset(p, 0, sizeof(*p));
+ INIT_LIST_HEAD(&p->pages);
+ }
+ return p;
+}
+
+static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
+{
+ kmem_cache_free(nfs_wdata_cachep, p);
+}
+
+static void nfs_writedata_release(struct rpc_task *task)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
+ rpc_release_task(task);
+ nfs_writedata_free(wdata);
+}
+
+/*
+ * This function will be used to simulate weak cache consistency
+ * under NFSv2 when the NFSv3 attribute patch is included.
+ * For the moment, we just call nfs_refresh_inode().
+ */
+static __inline__ int
+nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
+{
+ return nfs_refresh_inode(inode, fattr);
+}
+
/*
* Write a page synchronously.
* Offset is the data offset within the page.
@@ -161,278 +255,770 @@ io_error:
}
/*
- * Append a writeback request to a list
+ * Write a page to the server. This was supposed to be used for
+ * NFS swapping only.
+ * FIXME: Using this for mmap is pointless, breaks asynchronous
+ * writebacks, and is extremely slow.
*/
-static inline void
-append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+int
+nfs_writepage(struct dentry * dentry, struct page *page)
{
- dprintk("NFS: append_write_request(%p, %p)\n", q, wreq);
- rpc_append_list(q, wreq);
+ struct inode *inode = dentry->d_inode;
+ unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ unsigned offset = PAGE_CACHE_SIZE;
+ int err;
+
+ /* easy case */
+ if (page->index < end_index)
+ goto do_it;
+ /* things got complicated... */
+ offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+ /* OK, are we completely out? */
+ if (page->index >= end_index+1 || !offset)
+ return -EIO;
+do_it:
+ err = nfs_writepage_sync(dentry, inode, page, 0, offset);
+ if ( err == offset) return 0;
+ return err;
+}
+
+/*
+ * Check whether the file range we want to write to is locked by
+ * us.
+ */
+static int
+region_locked(struct inode *inode, struct nfs_page *req)
+{
+ struct file_lock *fl;
+ unsigned long rqstart, rqend;
+
+ /* Don't optimize writes if we don't use NLM */
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+ return 0;
+
+ rqstart = page_offset(req->wb_page) + req->wb_offset;
+ rqend = rqstart + req->wb_bytes;
+ for (fl = inode->i_flock; fl; fl = fl->fl_next) {
+ if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
+ && fl->fl_type == F_WRLCK
+ && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline struct nfs_page *
+nfs_inode_wb_entry(struct list_head *head)
+{
+ return list_entry(head, struct nfs_page, wb_hash);
}
/*
- * Remove a writeback request from a list
+ * Insert a write request into an inode
*/
static inline void
-remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
- dprintk("NFS: remove_write_request(%p, %p)\n", q, wreq);
- rpc_remove_list(q, wreq);
+ if (!list_empty(&req->wb_hash))
+ return;
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
+ inode->u.nfs_i.npages++;
+ list_add(&req->wb_hash, &inode->u.nfs_i.writeback);
+ req->wb_count++;
}
/*
- * Find a non-busy write request for a given page to
- * try to combine with.
+ * Insert a write request into an inode
*/
-static inline struct nfs_wreq *
-find_write_request(struct inode *inode, struct page *page)
+static inline void
+nfs_inode_remove_request(struct nfs_page *req)
{
- pid_t pid = current->pid;
- struct nfs_wreq *head, *req;
+ struct inode *inode;
+ spin_lock(&nfs_wreq_lock);
+ if (list_empty(&req->wb_hash)) {
+ spin_unlock(&nfs_wreq_lock);
+ return;
+ }
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
+ inode = req->wb_file->f_dentry->d_inode;
+ list_del(&req->wb_hash);
+ INIT_LIST_HEAD(&req->wb_hash);
+ inode->u.nfs_i.npages--;
+ if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
+ if (!nfs_have_writebacks(inode))
+ inode_remove_flushd(inode);
+ spin_unlock(&nfs_wreq_lock);
+ nfs_release_request(req);
+}
- dprintk("NFS: find_write_request(%x/%ld, %p)\n",
- inode->i_dev, inode->i_ino, page);
- if (!(req = head = NFS_WRITEBACK(inode)))
- return NULL;
- do {
- /*
- * We can't combine with canceled requests or
- * requests that have already been started..
- */
- if (req->wb_flags & (NFS_WRITE_CANCELLED | NFS_WRITE_INPROGRESS))
+/*
+ * Find a request
+ */
+static inline struct nfs_page *
+_nfs_find_request(struct inode *inode, struct page *page)
+{
+ struct list_head *head, *next;
+
+ head = &inode->u.nfs_i.writeback;
+ next = head->next;
+ while (next != head) {
+ struct nfs_page *req = nfs_inode_wb_entry(next);
+ next = next->next;
+ if (page_index(req->wb_page) != page_index(page))
continue;
+ req->wb_count++;
+ return req;
+ }
+ return NULL;
+}
- if (req->wb_page == page && req->wb_pid == pid)
- return req;
+struct nfs_page *
+nfs_find_request(struct inode *inode, struct page *page)
+{
+ struct nfs_page *req;
- /*
- * Ehh, don't keep too many tasks queued..
- */
- rpc_wake_up_task(&req->wb_task);
+ spin_lock(&nfs_wreq_lock);
+ req = _nfs_find_request(inode, page);
+ spin_unlock(&nfs_wreq_lock);
+ return req;
+}
- } while ((req = WB_NEXT(req)) != head);
- return NULL;
+static inline struct nfs_page *
+nfs_list_entry(struct list_head *head)
+{
+ return list_entry(head, struct nfs_page, wb_list);
}
/*
- * Find and release all failed requests for this inode.
+ * Insert a write request into a sorted list
*/
-int
-nfs_check_failed_request(struct inode * inode)
+static inline void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
{
- /* FIXME! */
- return 0;
+ struct list_head *prev;
+
+ if (!list_empty(&req->wb_list)) {
+ printk(KERN_ERR "NFS: Add to list failed!\n");
+ return;
+ }
+ if (list_empty(&req->wb_hash)) {
+ printk(KERN_ERR "NFS: Unhashed request attempted added to a list!\n");
+ return;
+ }
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted added to list!\n");
+ prev = head->prev;
+ while (prev != head) {
+ struct nfs_page *p = nfs_list_entry(prev);
+ if (page_index(p->wb_page) < page_index(req->wb_page))
+ break;
+ prev = prev->prev;
+ }
+ list_add(&req->wb_list, prev);
+ req->wb_list_head = head;
}
/*
- * Try to merge adjacent write requests. This works only for requests
- * issued by the same user.
+ * Insert a write request into an inode
*/
-static inline int
-update_write_request(struct nfs_wreq *req, unsigned int first,
- unsigned int bytes)
+static inline void
+nfs_list_remove_request(struct nfs_page *req)
{
- unsigned int rqfirst = req->wb_offset,
- rqlast = rqfirst + req->wb_bytes,
- last = first + bytes;
+ if (list_empty(&req->wb_list))
+ return;
+ if (!NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n");
+ list_del(&req->wb_list);
+ INIT_LIST_HEAD(&req->wb_list);
+ req->wb_list_head = NULL;
+}
- dprintk("nfs: trying to update write request %p\n", req);
+/*
+ * Add a request to the inode's dirty list.
+ */
+static inline void
+nfs_mark_request_dirty(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
- /* not contiguous? */
- if (rqlast < first || last < rqfirst)
- return 0;
+ spin_lock(&nfs_wreq_lock);
+ if (list_empty(&req->wb_list)) {
+ nfs_list_add_request(req, &inode->u.nfs_i.dirty);
+ inode->u.nfs_i.ndirty++;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ /*
+ * NB: the call to inode_schedule_scan() must lie outside the
+ * spinlock since it can run flushd().
+ */
+ inode_schedule_scan(inode, req->wb_timeout);
+}
- if (first < rqfirst)
- rqfirst = first;
- if (rqlast < last)
- rqlast = last;
+/*
+ * Check if a request is dirty
+ */
+static inline int
+nfs_dirty_request(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
+ return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
+}
- req->wb_offset = rqfirst;
- req->wb_bytes = rqlast - rqfirst;
- req->wb_count++;
+#ifdef CONFIG_NFS_V3
+/*
+ * Add a request to the inode's commit list.
+ */
+static inline void
+nfs_mark_request_commit(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
- return 1;
+ spin_lock(&nfs_wreq_lock);
+ if (list_empty(&req->wb_list)) {
+ nfs_list_add_request(req, &inode->u.nfs_i.commit);
+ inode->u.nfs_i.ncommit++;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ /*
+ * NB: the call to inode_schedule_scan() must lie outside the
+ * spinlock since it can run flushd().
+ */
+ inode_schedule_scan(inode, req->wb_timeout);
}
+#endif
-static kmem_cache_t *nfs_wreq_cachep;
-
-int nfs_init_wreqcache(void)
+/*
+ * Lock the page of an asynchronous request
+ */
+static inline int
+nfs_lock_request(struct nfs_page *req)
{
- nfs_wreq_cachep = kmem_cache_create("nfs_wreq",
- sizeof(struct nfs_wreq),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (nfs_wreq_cachep == NULL)
- return -ENOMEM;
- return 0;
+ if (NFS_WBACK_BUSY(req))
+ return 0;
+ req->wb_count++;
+ req->wb_flags |= PG_BUSY;
+ return 1;
}
static inline void
-free_write_request(struct nfs_wreq * req)
+nfs_unlock_request(struct nfs_page *req)
{
- if (!--req->wb_count)
- kmem_cache_free(nfs_wreq_cachep, req);
+ if (!NFS_WBACK_BUSY(req)) {
+ printk(KERN_ERR "NFS: Invalid unlock attempted\n");
+ return;
+ }
+ req->wb_flags &= ~PG_BUSY;
+ wake_up(&req->wb_wait);
+ nfs_release_request(req);
}
/*
- * Create and initialize a writeback request
+ * Create a write request.
+ * Page must be locked by the caller. This makes sure we never create
+ * two different requests for the same page, and avoids possible deadlock
+ * when we reach the hard limit on the number of dirty pages.
*/
-static inline struct nfs_wreq *
-create_write_request(struct file * file, struct page *page, unsigned int offset, unsigned int bytes)
+static struct nfs_page *
+nfs_create_request(struct inode *inode, struct file *file, struct page *page,
+ unsigned int offset, unsigned int count)
{
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
- struct nfs_wreq *wreq;
- struct rpc_task *task;
+ struct nfs_reqlist *cache = NFS_REQUESTLIST(inode);
+ struct nfs_page *req = NULL;
+ long timeout;
- dprintk("NFS: create_write_request(%s/%s, %ld+%d)\n",
- dentry->d_parent->d_name.name, dentry->d_name.name,
- (page->index << PAGE_CACHE_SHIFT) + offset, bytes);
-
- /* FIXME: Enforce hard limit on number of concurrent writes? */
- wreq = kmem_cache_alloc(nfs_wreq_cachep, SLAB_KERNEL);
- if (!wreq)
- goto out_fail;
- memset(wreq, 0, sizeof(*wreq));
+ /* Deal with hard/soft limits.
+ */
+ do {
+ /* If we're over the soft limit, flush out old requests */
+ if (nfs_nr_requests >= MAX_REQUEST_SOFT)
+ nfs_wb_file(inode, file);
+
+ /* If we're still over the soft limit, wake up some requests */
+ if (nfs_nr_requests >= MAX_REQUEST_SOFT) {
+ dprintk("NFS: hit soft limit (%d requests)\n",
+ nfs_nr_requests);
+ if (!cache->task)
+ nfs_reqlist_init(NFS_SERVER(inode));
+ nfs_wake_flushd();
+ }
- task = &wreq->wb_task;
- rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE);
- task->tk_calldata = wreq;
- task->tk_action = nfs_wback_begin;
+ /* If we haven't reached the hard limit yet,
+ * try to allocate the request struct */
+ if (nfs_nr_requests < MAX_REQUEST_HARD) {
+ req = nfs_page_alloc();
+ if (req != NULL)
+ break;
+ }
- rpcauth_lookupcred(task); /* Obtain user creds */
- if (task->tk_status < 0)
- goto out_req;
+ /* We're over the hard limit. Wait for better times */
+ dprintk("NFS: create_request sleeping (total %d pid %d)\n",
+ nfs_nr_requests, current->pid);
+
+ timeout = 1 * HZ;
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_INTR) {
+ interruptible_sleep_on_timeout(&cache->request_wait,
+ timeout);
+ if (signalled())
+ break;
+ } else
+ sleep_on_timeout(&cache->request_wait, timeout);
+
+ dprintk("NFS: create_request waking up (tot %d pid %d)\n",
+ nfs_nr_requests, current->pid);
+ } while (!req);
+ if (!req)
+ return NULL;
- /* Put the task on inode's writeback request list. */
+ /* Initialize the request struct. Initially, we assume a
+ * long write-back delay. This will be adjusted in
+ * update_nfs_request below if the region is not locked. */
+ req->wb_page = page;
+ atomic_inc(&page->count);
+ req->wb_offset = offset;
+ req->wb_bytes = count;
+ /* If the region is locked, adjust the timeout */
+ if (region_locked(inode, req))
+ req->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
+ else
+ req->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
+ req->wb_file = file;
+ req->wb_cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
get_file(file);
- wreq->wb_file = file;
- wreq->wb_pid = current->pid;
- wreq->wb_page = page;
- init_waitqueue_head(&wreq->wb_wait);
- wreq->wb_offset = offset;
- wreq->wb_bytes = bytes;
- wreq->wb_count = 2; /* One for the IO, one for us */
+ req->wb_count = 1;
- kmap(page);
- append_write_request(&NFS_WRITEBACK(inode), wreq);
+ /* register request's existence */
+ cache->nr_requests++;
+ nfs_nr_requests++;
+ return req;
+}
- if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4)
- rpc_wake_up_next(&write_queue);
- return wreq;
+/*
+ * Release all resources associated with a write request after it
+ * has been committed to stable storage
+ *
+ * Note: Should always be called with the spinlock held!
+ */
+void
+nfs_release_request(struct nfs_page *req)
+{
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
+ struct nfs_reqlist *cache = NFS_REQUESTLIST(inode);
+ struct page *page = req->wb_page;
+
+ spin_lock(&nfs_wreq_lock);
+ if (--req->wb_count) {
+ spin_unlock(&nfs_wreq_lock);
+ return;
+ }
+ spin_unlock(&nfs_wreq_lock);
-out_req:
- rpc_release_task(task);
- kmem_cache_free(nfs_wreq_cachep, wreq);
-out_fail:
- return NULL;
+ if (!list_empty(&req->wb_list)) {
+ printk(KERN_ERR "NFS: Request released while still on a list!\n");
+ nfs_list_remove_request(req);
+ }
+ if (!list_empty(&req->wb_hash)) {
+ printk(KERN_ERR "NFS: Request released while still hashed!\n");
+ nfs_inode_remove_request(req);
+ }
+ if (NFS_WBACK_BUSY(req))
+ printk(KERN_ERR "NFS: Request released while still locked!\n");
+
+ rpcauth_releasecred(NFS_CLIENT(inode)->cl_auth, req->wb_cred);
+ fput(req->wb_file);
+ page_cache_release(page);
+ nfs_page_free(req);
+ /* wake up anyone waiting to allocate a request */
+ cache->nr_requests--;
+ nfs_nr_requests--;
+ wake_up(&cache->request_wait);
}
/*
- * Schedule a writeback RPC call.
- * If the server is congested, don't add to our backlog of queued
- * requests but call it synchronously.
- * The function returns whether we should wait for the thing or not.
+ * Wait for a request to complete.
*
- * FIXME: Here we could walk the inode's lock list to see whether the
- * page we're currently writing to has been write-locked by the caller.
- * If it is, we could schedule an async write request with a long
- * delay in order to avoid writing back the page until the lock is
- * released.
+ * Interruptible by signals only if mounted with intr flag.
*/
-static inline int
-schedule_write_request(struct nfs_wreq *req, int sync)
+static int
+nfs_wait_on_request(struct nfs_page *req)
{
- struct rpc_task *task = &req->wb_task;
- struct file *file = req->wb_file;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = req->wb_file->f_dentry->d_inode;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ int retval;
- if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX)
- sync = 1;
-
- if (sync) {
- sigset_t oldmask;
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
- dprintk("NFS: %4d schedule_write_request (sync)\n",
- task->tk_pid);
- /* Page is already locked */
- rpc_clnt_sigmask(clnt, &oldmask);
- rpc_execute(task);
- rpc_clnt_sigunmask(clnt, &oldmask);
- } else {
- dprintk("NFS: %4d schedule_write_request (async)\n",
- task->tk_pid);
- task->tk_flags |= RPC_TASK_ASYNC;
- task->tk_timeout = NFS_WRITEBACK_DELAY;
- rpc_sleep_on(&write_queue, task, NULL, NULL);
+ if (!NFS_WBACK_BUSY(req))
+ return 0;
+ req->wb_count++;
+ retval = nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req));
+ nfs_release_request(req);
+ return retval;
+}
+
+/*
+ * Wait for a request to complete.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ */
+static int
+nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long start, unsigned int count)
+{
+ struct list_head *p, *head;
+ unsigned long idx_start, idx_end;
+ unsigned int pages = 0;
+ int error;
+
+ idx_start = start >> PAGE_CACHE_SHIFT;
+ if (count == 0)
+ idx_end = ~0;
+ else {
+ unsigned long idx_count = (count-1) >> PAGE_CACHE_SHIFT;
+ idx_end = idx_start + idx_count;
}
+ spin_lock(&nfs_wreq_lock);
+ head = &inode->u.nfs_i.writeback;
+ p = head->next;
+ while (p != head) {
+ unsigned long pg_idx;
+ struct nfs_page *req = nfs_inode_wb_entry(p);
+
+ p = p->next;
+
+ if (file && req->wb_file != file)
+ continue;
+
+ pg_idx = page_index(req->wb_page);
+ if (pg_idx < idx_start || pg_idx > idx_end)
+ continue;
- return sync;
+ if (!NFS_WBACK_BUSY(req))
+ continue;
+ req->wb_count++;
+ spin_unlock(&nfs_wreq_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ if (error < 0)
+ return error;
+ spin_lock(&nfs_wreq_lock);
+ p = head->next;
+ pages++;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
}
/*
- * Wait for request to complete.
+ * Scan cluster for dirty pages and send as many of them to the
+ * server as possible.
*/
static int
-wait_on_write_request(struct nfs_wreq *req)
+nfs_scan_list_timeout(struct list_head *head, struct list_head *dst, struct inode *inode)
{
- struct file *file = req->wb_file;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
- DECLARE_WAITQUEUE(wait, current);
- sigset_t oldmask;
- int retval;
+ struct list_head *p;
+ struct nfs_page *req;
+ int pages = 0;
+
+ p = head->next;
+ while (p != head) {
+ req = nfs_list_entry(p);
+ p = p->next;
+ if (time_after(req->wb_timeout, jiffies)) {
+ if (time_after(NFS_NEXTSCAN(inode), req->wb_timeout))
+ NFS_NEXTSCAN(inode) = req->wb_timeout;
+ continue;
+ }
+ if (!nfs_lock_request(req))
+ continue;
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ pages++;
+ }
+ return pages;
+}
+
+static int
+nfs_scan_dirty_timeout(struct inode *inode, struct list_head *dst)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list_timeout(&inode->u.nfs_i.dirty, dst, inode);
+ inode->u.nfs_i.ndirty -= pages;
+ if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
- /* Make sure it's started.. */
- if (!WB_INPROGRESS(req))
- rpc_wake_up_task(&req->wb_task);
+#ifdef CONFIG_NFS_V3
+static int
+nfs_scan_commit_timeout(struct inode *inode, struct list_head *dst)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list_timeout(&inode->u.nfs_i.commit, dst, inode);
+ inode->u.nfs_i.ncommit -= pages;
+ if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
+#endif
+
+static int
+nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+ struct list_head *p;
+ struct nfs_page *req;
+ unsigned long idx_start, idx_end;
+ int pages;
+
+ pages = 0;
+ idx_start = start >> PAGE_CACHE_SHIFT;
+ if (count == 0)
+ idx_end = ~0;
+ else
+ idx_end = idx_start + ((count-1) >> PAGE_CACHE_SHIFT);
+ p = src->next;
+ while (p != src) {
+ unsigned long pg_idx;
+
+ req = nfs_list_entry(p);
+ p = p->next;
+
+ if (file && req->wb_file != file)
+ continue;
+
+ pg_idx = page_index(req->wb_page);
+ if (pg_idx < idx_start || pg_idx > idx_end)
+ continue;
+
+ if (!nfs_lock_request(req))
+ continue;
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ pages++;
+ }
+ return pages;
+}
+
+static int
+nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, start, count);
+ inode->u.nfs_i.ndirty -= pages;
+ if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
+
+#ifdef CONFIG_NFS_V3
+static int
+nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+ int pages;
+ spin_lock(&nfs_wreq_lock);
+ pages = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, start, count);
+ inode->u.nfs_i.ncommit -= pages;
+ if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ spin_unlock(&nfs_wreq_lock);
+ return pages;
+}
+#endif
+
+
+static int
+coalesce_requests(struct list_head *src, struct list_head *dst, unsigned int maxpages)
+{
+ struct nfs_page *req = NULL;
+ unsigned int pages = 0;
+
+ while (!list_empty(src)) {
+ struct nfs_page *prev = req;
+
+ req = nfs_list_entry(src->next);
+ if (prev) {
+ if (req->wb_file != prev->wb_file)
+ break;
+
+ if (page_index(req->wb_page) != page_index(prev->wb_page)+1)
+ break;
+
+ if (req->wb_offset != 0)
+ break;
+ }
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, dst);
+ pages++;
+ if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
+ break;
+ if (pages >= maxpages)
+ break;
+ }
+ return pages;
+}
+
+/*
+ * Try to update any existing write request, or create one if there is none.
+ * In order to match, the request's credentials must match those of
+ * the calling process.
+ *
+ * Note: Should always be called with the Page Lock held!
+ */
+static struct nfs_page *
+nfs_update_request(struct file* file, struct page *page,
+ unsigned long offset, unsigned int bytes)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct nfs_page *req, *new = NULL;
+ unsigned long rqend, end;
+
+ end = offset + bytes;
- rpc_clnt_sigmask(clnt, &oldmask);
- add_wait_queue(&req->wb_wait, &wait);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- retval = 0;
- if (req->wb_flags & NFS_WRITE_COMPLETE)
+ /* Loop over all inode entries and see if we find
+ * A request for the page we wish to update
+ */
+ spin_lock(&nfs_wreq_lock);
+ req = _nfs_find_request(inode, page);
+ if (req) {
+ if (!nfs_lock_request(req)) {
+ spin_unlock(&nfs_wreq_lock);
+ nfs_wait_on_request(req);
+ nfs_release_request(req);
+ continue;
+ }
+ spin_unlock(&nfs_wreq_lock);
+ if (new)
+ nfs_release_request(new);
break;
- retval = -ERESTARTSYS;
- if (signalled())
+ }
+
+ req = new;
+ if (req) {
+ nfs_lock_request(req);
+ nfs_inode_add_request(inode, req);
+ spin_unlock(&nfs_wreq_lock);
+ nfs_mark_request_dirty(req);
break;
- schedule();
+ }
+ spin_unlock(&nfs_wreq_lock);
+
+ /* Create the request. It's safe to sleep in this call because
+ * we only get here if the page is locked.
+ */
+ new = nfs_create_request(inode, file, page, offset, bytes);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* We have a request for our page.
+ * If the creds don't match, or the
+ * page addresses don't match,
+ * tell the caller to wait on the conflicting
+ * request.
+ */
+ rqend = req->wb_offset + req->wb_bytes;
+ if (req->wb_file != file
+ || req->wb_page != page
+ || !nfs_dirty_request(req)
+ || offset > rqend || end < req->wb_offset) {
+ nfs_unlock_request(req);
+ nfs_release_request(req);
+ return ERR_PTR(-EBUSY);
+ }
+
+ /* Okay, the request matches. Update the region */
+ if (offset < req->wb_offset) {
+ req->wb_offset = offset;
+ req->wb_bytes = rqend - req->wb_offset;
}
- remove_wait_queue(&req->wb_wait, &wait);
- current->state = TASK_RUNNING;
- rpc_clnt_sigunmask(clnt, &oldmask);
- return retval;
+
+ if (end > rqend)
+ req->wb_bytes = end - req->wb_offset;
+
+ nfs_unlock_request(req);
+
+ return req;
}
/*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
+ * This is the strategy routine for NFS.
+ * It is called by nfs_updatepage whenever the user wrote up to the end
+ * of a page.
+ *
+ * We always try to submit a set of requests in parallel so that the
+ * server's write code can gather writes. This is mainly for the benefit
+ * of NFSv2.
+ *
+ * We never submit more requests than we think the remote can handle.
+ * For UDP sockets, we make sure we don't exceed the congestion window;
+ * for TCP, we limit the number of requests to 8.
+ *
+ * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
+ * should be sent out in one go. This is for the benefit of NFSv2 servers
+ * that perform write gathering.
+ *
+ * FIXME: Different servers may have different sweet spots.
+ * Record the average congestion window in server struct?
*/
-int
-nfs_writepage(struct dentry * dentry, struct page *page)
+#define NFS_STRATEGY_PAGES 8
+static void
+nfs_strategy(struct file *file)
{
- struct inode *inode = dentry->d_inode;
- unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
- unsigned offset = PAGE_CACHE_SIZE;
- int err;
+ struct inode *inode = file->f_dentry->d_inode;
+ unsigned int dirty, wpages;
+
+ dirty = inode->u.nfs_i.ndirty;
+ wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT;
+#ifdef CONFIG_NFS_V3
+ if (NFS_PROTO(inode)->version == 2) {
+ if (dirty >= NFS_STRATEGY_PAGES * wpages)
+ nfs_flush_file(inode, file, 0, 0, 0);
+ } else {
+ if (dirty >= wpages)
+ nfs_flush_file(inode, file, 0, 0, 0);
+ }
+#else
+ if (dirty >= NFS_STRATEGY_PAGES * wpages)
+ nfs_flush_file(inode, file, 0, 0, 0);
+#endif
+ /*
+ * If we're running out of requests, flush out everything
+ * in order to reduce memory useage...
+ */
+ if (nfs_nr_requests > MAX_REQUEST_SOFT)
+ nfs_wb_file(inode, file);
+}
- /* easy case */
- if (page->index < end_index)
- goto do_it;
- /* things got complicated... */
- offset = inode->i_size & (PAGE_CACHE_SIZE-1);
- /* OK, are we completely out? */
- if (page->index >= end_index+1 || !offset)
- return -EIO;
-do_it:
- err = nfs_writepage_sync(dentry, inode, page, 0, offset);
- if ( err == offset) return 0;
- return err;
+int
+nfs_flush_incompatible(struct file *file, struct page *page)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct nfs_page *req;
+ int status = 0;
+ /*
+ * Look for a request corresponding to this page. If there
+ * is one, and it belongs to another file, we flush it out
+ * before we try to copy anything into the page. Do this
+ * due to the lack of an ACCESS-type call in NFSv2.
+ * Also do the same if we find a request from an existing
+ * dropped page.
+ */
+ req = nfs_find_request(inode,page);
+ if (req) {
+ if (req->wb_file != file || req->wb_page != page)
+ status = nfs_wb_page(inode, page);
+ nfs_release_request(req);
+ }
+ return (status < 0) ? status : 0;
}
/*
@@ -446,27 +1032,13 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
{
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
- struct nfs_wreq *req;
+ struct nfs_page *req;
int synchronous = file->f_flags & O_SYNC;
- int retval;
+ int status = 0;
- dprintk("NFS: nfs_updatepage(%s/%s %d@%ld)\n",
+ dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- count, (page->index << PAGE_CACHE_SHIFT) +offset);
-
- /*
- * Try to find a corresponding request on the writeback queue.
- * If there is one, we can be sure that this request is not
- * yet being processed, because we hold a lock on the page.
- *
- * If the request was created by us, update it. Otherwise,
- * transfer the page lock and flush out the dirty page now.
- * After returning, generic_file_write will wait on the
- * page and retry the update.
- */
- req = find_write_request(inode, page);
- if (req && req->wb_file == file && update_write_request(req, offset, count))
- goto updated;
+ count, page_offset(page) +offset);
/*
* If wsize is smaller than page size, update and write
@@ -475,241 +1047,542 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
if (NFS_SERVER(inode)->wsize < PAGE_SIZE)
return nfs_writepage_sync(dentry, inode, page, offset, count);
- /* Create the write request. */
- req = create_write_request(file, page, offset, count);
- if (!req)
- return -ENOBUFS;
-
/*
- * Ok, there's another user of this page with the new request..
- * The IO completion will then free the page and the dentry.
+ * Try to find an NFS request corresponding to this page
+ * and update it.
+ * If the existing request cannot be updated, we must flush
+ * it out now.
*/
- get_page(page);
-
- /* Schedule request */
- synchronous = schedule_write_request(req, synchronous);
+ do {
+ req = nfs_update_request(file, page, offset, count);
+ status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+ if (status != -EBUSY)
+ break;
+ /* Request could not be updated. Flush it out and try again */
+ status = nfs_wb_page(inode, page);
+ } while (status >= 0);
+ if (status < 0)
+ goto done;
-updated:
- if (req->wb_bytes == PAGE_SIZE)
+ if (req->wb_bytes == PAGE_CACHE_SIZE)
SetPageUptodate(page);
- retval = 0;
+ status = 0;
if (synchronous) {
- int status = wait_on_write_request(req);
- if (status) {
- nfs_cancel_request(req);
- retval = status;
- } else {
- status = req->wb_status;
- if (status < 0)
- retval = status;
- }
+ int error;
- if (retval < 0)
- ClearPageUptodate(page);
+ error = nfs_sync_file(inode, file, page_offset(page) + offset, count, FLUSH_SYNC|FLUSH_STABLE);
+ if (error < 0 || (error = file->f_error) < 0)
+ status = error;
+ file->f_error = 0;
+ } else {
+ /* If we wrote past the end of the page.
+ * Call the strategy routine so it can send out a bunch
+ * of requests.
+ */
+ if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE)
+ nfs_strategy(file);
}
-
- free_write_request(req);
- return retval;
+ nfs_release_request(req);
+done:
+ dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
+ status, inode->i_size);
+ if (status < 0)
+ clear_bit(PG_uptodate, &page->flags);
+ return status;
}
/*
- * Cancel a write request. We always mark it cancelled,
- * but if it's already in progress there's no point in
- * calling rpc_exit, and we don't want to overwrite the
- * tk_status field.
- */
+ * Set up the argument/result storage required for the RPC call.
+ */
static void
-nfs_cancel_request(struct nfs_wreq *req)
+nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
{
- req->wb_flags |= NFS_WRITE_CANCELLED;
- if (!WB_INPROGRESS(req)) {
- rpc_exit(&req->wb_task, 0);
- rpc_wake_up_task(&req->wb_task);
+ struct nfs_page *req;
+ struct iovec *iov;
+ unsigned int count;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ iov = data->args.iov;
+ count = 0;
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &data->pages);
+ iov->iov_base = (void *)(kmap(req->wb_page) + req->wb_offset);
+ iov->iov_len = req->wb_bytes;
+ count += req->wb_bytes;
+ iov++;
+ data->args.nriov++;
}
+ req = nfs_list_entry(data->pages.next);
+ data->file = req->wb_file;
+ data->cred = req->wb_cred;
+ data->args.fh = NFS_FH(req->wb_file->f_dentry);
+ data->args.offset = page_offset(req->wb_page) + req->wb_offset;
+ data->args.count = count;
+ data->res.fattr = &data->fattr;
+ data->res.count = count;
+ data->res.verf = &data->verf;
}
+
/*
- * Cancel all writeback requests, both pending and in progress.
+ * Create an RPC task for the given write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
*/
-static void
-nfs_cancel_dirty(struct inode *inode, pid_t pid)
+static int
+nfs_flush_one(struct list_head *head, struct file *file, int how)
{
- struct nfs_wreq *head, *req;
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_write_data *data;
+ struct rpc_task *task;
+ struct rpc_message msg;
+ int flags,
+ async = !(how & FLUSH_SYNC),
+ stable = (how & FLUSH_STABLE);
+ sigset_t oldset;
+
+
+ data = nfs_writedata_alloc();
+ if (!data)
+ goto out_bad;
+ task = &data->task;
+
+ /* Set the initial flags for the task. */
+ flags = (async) ? RPC_TASK_ASYNC : 0;
+
+ /* Set up the argument struct */
+ nfs_write_rpcsetup(head, data);
+ if (stable) {
+ if (!inode->u.nfs_i.ncommit)
+ data->args.stable = NFS_FILE_SYNC;
+ else
+ data->args.stable = NFS_DATA_SYNC;
+ } else
+ data->args.stable = NFS_UNSTABLE;
+
+ /* Finalize the task. */
+ rpc_init_task(task, clnt, nfs_writeback_done, flags);
+ task->tk_calldata = data;
+
+#ifdef CONFIG_NFS_V3
+ msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
+#else
+ msg.rpc_proc = NFSPROC_WRITE;
+#endif
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ msg.rpc_cred = data->cred;
+
+ dprintk("NFS: %4d initiated write call (req %s/%s count %d nriov %d)\n",
+ task->tk_pid,
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name,
+ data->args.count, data->args.nriov);
+
+ rpc_clnt_sigmask(clnt, &oldset);
+ rpc_call_setup(task, &msg, 0);
+ rpc_execute(task);
+ rpc_clnt_sigunmask(clnt, &oldset);
+ return 0;
+ out_bad:
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_dirty(req);
+ nfs_unlock_request(req);
+ }
+ return -ENOMEM;
+}
- req = head = NFS_WRITEBACK(inode);
- while (req != NULL) {
- if (pid == 0 || req->wb_pid == pid)
- nfs_cancel_request(req);
- if ((req = WB_NEXT(req)) == head)
+static int
+nfs_flush_list(struct inode *inode, struct list_head *head, int how)
+{
+ LIST_HEAD(one_request);
+ struct nfs_page *req;
+ int error = 0;
+ unsigned int pages = 0,
+ wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT;
+
+ while (!list_empty(head)) {
+ pages += coalesce_requests(head, &one_request, wpages);
+ req = nfs_list_entry(one_request.next);
+ error = nfs_flush_one(&one_request, req->wb_file, how);
+ if (error < 0)
break;
}
+ if (error >= 0)
+ return pages;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_dirty(req);
+ nfs_unlock_request(req);
+ }
+ return error;
}
+
/*
- * If we're waiting on somebody else's request
- * we need to increment the counter during the
- * wait so that the request doesn't disappear
- * from under us during the wait..
+ * This function is called when the WRITE call is complete.
*/
-static int FASTCALL(wait_on_other_req(struct nfs_wreq *));
-static int wait_on_other_req(struct nfs_wreq *req)
+static void
+nfs_writeback_done(struct rpc_task *task)
{
- int retval;
- req->wb_count++;
- retval = wait_on_write_request(req);
- free_write_request(req);
- return retval;
-}
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+ struct nfs_writeargs *argp = &data->args;
+ struct nfs_writeres *resp = &data->res;
+ struct dentry *dentry = data->file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct nfs_page *req;
+
+ dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
+ task->tk_pid, task->tk_status);
+
+ /* We can't handle that yet but we check for it nevertheless */
+ if (resp->count < argp->count && task->tk_status >= 0) {
+ static unsigned long complain = 0;
+ if (time_before(complain, jiffies)) {
+ printk(KERN_WARNING
+ "NFS: Server wrote less than requested.\n");
+ complain = jiffies + 300 * HZ;
+ }
+ /* Can't do anything about it right now except throw
+ * an error. */
+ task->tk_status = -EIO;
+ }
+#ifdef CONFIG_NFS_V3
+ if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+ /* We tried a write call, but the server did not
+ * commit data to stable storage even though we
+ * requested it.
+ */
+ static unsigned long complain = 0;
+
+ if (time_before(complain, jiffies)) {
+ printk(KERN_NOTICE "NFS: faulty NFSv3 server %s:"
+ " (committed = %d) != (stable = %d)\n",
+ NFS_SERVER(inode)->hostname,
+ resp->verf->committed, argp->stable);
+ complain = jiffies + 300 * HZ;
+ }
+ }
+#endif
-/*
- * This writes back a set of requests according to the condition.
- *
- * If this ever gets much more convoluted, use a fn pointer for
- * the condition..
- */
-#define NFS_WB(inode, cond) { int retval = 0 ; \
- do { \
- struct nfs_wreq *req = NFS_WRITEBACK(inode); \
- struct nfs_wreq *head = req; \
- if (!req) break; \
- for (;;) { \
- if (!(req->wb_flags & NFS_WRITE_COMPLETE)) \
- if (cond) break; \
- req = WB_NEXT(req); \
- if (req == head) goto out; \
- } \
- retval = wait_on_other_req(req); \
- } while (!retval); \
-out: return retval; \
-}
+ /* Update attributes as result of writeback. */
+ if (task->tk_status >= 0)
+ nfs_write_attributes(inode, resp->fattr);
-int
-nfs_wb_all(struct inode *inode)
-{
- NFS_WB(inode, 1);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+
+ kunmap(req->wb_page);
+
+ dprintk("NFS: write (%s/%s %d@%Ld)",
+ req->wb_file->f_dentry->d_parent->d_name.name,
+ req->wb_file->f_dentry->d_name.name,
+ req->wb_bytes,
+ page_offset(req->wb_page) + req->wb_offset);
+
+ if (task->tk_status < 0) {
+ req->wb_file->f_error = task->tk_status;
+ nfs_inode_remove_request(req);
+ dprintk(", error = %d\n", task->tk_status);
+ goto next;
+ }
+
+#ifdef CONFIG_NFS_V3
+ if (resp->verf->committed != NFS_UNSTABLE) {
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+ }
+ memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
+ req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
+ nfs_mark_request_commit(req);
+ dprintk(" marked for commit\n");
+#else
+ nfs_inode_remove_request(req);
+#endif
+ next:
+ nfs_unlock_request(req);
+ }
+ nfs_writedata_release(task);
}
+
+#ifdef CONFIG_NFS_V3
/*
- * Write back all requests on one page - we do this before reading it.
+ * Set up the argument/result storage required for the RPC call.
*/
-int
-nfs_wb_page(struct inode *inode, struct page *page)
+static void
+nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
{
- NFS_WB(inode, req->wb_page == page);
+ struct nfs_page *req;
+ struct dentry *dentry;
+ struct inode *inode;
+ unsigned long start, end, len;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ end = 0;
+ start = ~0;
+ req = nfs_list_entry(head->next);
+ data->file = req->wb_file;
+ data->cred = req->wb_cred;
+ dentry = data->file->f_dentry;
+ inode = dentry->d_inode;
+ while (!list_empty(head)) {
+ struct nfs_page *req;
+ unsigned long rqstart, rqend;
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &data->pages);
+ rqstart = page_offset(req->wb_page) + req->wb_offset;
+ rqend = rqstart + req->wb_bytes;
+ if (rqstart < start)
+ start = rqstart;
+ if (rqend > end)
+ end = rqend;
+ }
+ data->args.fh = NFS_FH(dentry);
+ data->args.offset = start;
+ len = end - start;
+ if (end >= inode->i_size || len > (~((u32)0) >> 1))
+ len = 0;
+ data->res.count = data->args.count = (u32)len;
+ data->res.fattr = &data->fattr;
+ data->res.verf = &data->verf;
}
/*
- * Write back all pending writes from one file descriptor..
+ * Commit dirty pages
*/
-int
-nfs_wb_file(struct inode *inode, struct file *file)
-{
- NFS_WB(inode, req->wb_file == file);
-}
-
-void
-nfs_inval(struct inode *inode)
+static int
+nfs_commit_list(struct list_head *head, int how)
{
- nfs_cancel_dirty(inode,0);
+ struct rpc_message msg;
+ struct file *file;
+ struct rpc_clnt *clnt;
+ struct nfs_write_data *data;
+ struct rpc_task *task;
+ struct nfs_page *req;
+ int flags,
+ async = !(how & FLUSH_SYNC);
+ sigset_t oldset;
+
+ data = nfs_writedata_alloc();
+
+ if (!data)
+ goto out_bad;
+ task = &data->task;
+
+ flags = (async) ? RPC_TASK_ASYNC : 0;
+
+ /* Set up the argument struct */
+ nfs_commit_rpcsetup(head, data);
+ req = nfs_list_entry(data->pages.next);
+ file = req->wb_file;
+ clnt = NFS_CLIENT(file->f_dentry->d_inode);
+
+ rpc_init_task(task, clnt, nfs_commit_done, flags);
+ task->tk_calldata = data;
+
+ msg.rpc_proc = NFS3PROC_COMMIT;
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ msg.rpc_cred = data->cred;
+
+ dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
+ rpc_clnt_sigmask(clnt, &oldset);
+ rpc_call_setup(task, &msg, 0);
+ rpc_execute(task);
+ rpc_clnt_sigunmask(clnt, &oldset);
+ return 0;
+ out_bad:
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_mark_request_commit(req);
+ nfs_unlock_request(req);
+ }
+ return -ENOMEM;
}
/*
- * The following procedures make up the writeback finite state machinery:
- *
- * 1. Try to lock the page if not yet locked by us,
- * set up the RPC call info, and pass to the call FSM.
+ * COMMIT call returned
*/
static void
-nfs_wback_begin(struct rpc_task *task)
+nfs_commit_done(struct rpc_task *task)
{
- struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
- struct page *page = req->wb_page;
- struct file *file = req->wb_file;
- struct dentry *dentry = file->f_dentry;
-
- dprintk("NFS: %4d nfs_wback_begin (%s/%s, status=%d flags=%x)\n",
- task->tk_pid, dentry->d_parent->d_name.name,
- dentry->d_name.name, task->tk_status, req->wb_flags);
+ struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
+ struct nfs_writeres *resp = &data->res;
+ struct nfs_page *req;
+ struct dentry *dentry = data->file->f_dentry;
+ struct inode *inode = dentry->d_inode;
- task->tk_status = 0;
+ dprintk("NFS: %4d nfs_commit_done (status %d)\n",
+ task->tk_pid, task->tk_status);
+
+ nfs_refresh_inode(inode, resp->fattr);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+
+ dprintk("NFS: commit (%s/%s %d@%ld)",
+ req->wb_file->f_dentry->d_parent->d_name.name,
+ req->wb_file->f_dentry->d_name.name,
+ req->wb_bytes,
+ page_offset(req->wb_page) + req->wb_offset);
+ if (task->tk_status < 0) {
+ req->wb_file->f_error = task->tk_status;
+ nfs_inode_remove_request(req);
+ dprintk(", error = %d\n", task->tk_status);
+ goto next;
+ }
- /* Setup the task struct for a writeback call */
- req->wb_flags |= NFS_WRITE_INPROGRESS;
- req->wb_args.fh = NFS_FH(dentry);
- req->wb_args.offset = (page->index << PAGE_CACHE_SHIFT) + req->wb_offset;
- req->wb_args.count = req->wb_bytes;
- req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset);
+ /* Okay, COMMIT succeeded, apparently. Check the verifier
+ * returned by the server against all stored verfs. */
+ if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+ /* We have a match */
+ nfs_inode_remove_request(req);
+ dprintk(" OK\n");
+ goto next;
+ }
+ /* We have a mismatch. Write the page again */
+ dprintk(" mismatch\n");
+ nfs_mark_request_dirty(req);
+ next:
+ nfs_unlock_request(req);
+ }
+ nfs_writedata_release(task);
+}
+#endif
- rpc_call_setup(task, NFSPROC_WRITE, &req->wb_args, &req->wb_fattr, 0);
+int nfs_flush_file(struct inode *inode, struct file *file, unsigned long start,
+ unsigned int count, int how)
+{
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
+
+ pages = nfs_scan_dirty(inode, &head, file, start, count);
+ if (pages)
+ error = nfs_flush_list(inode, &head, how);
+ if (error < 0)
+ return error;
+ return pages;
+}
- return;
+int nfs_flush_timeout(struct inode *inode, int how)
+{
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
+
+ pages = nfs_scan_dirty_timeout(inode, &head);
+ if (pages)
+ error = nfs_flush_list(inode, &head, how);
+ if (error < 0)
+ return error;
+ return pages;
}
-/*
- * 2. Collect the result
- */
-static void
-nfs_wback_result(struct rpc_task *task)
+#ifdef CONFIG_NFS_V3
+int nfs_commit_file(struct inode *inode, struct file *file, unsigned long start,
+ unsigned int count, int how)
{
- struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
- struct file *file = req->wb_file;
- struct page *page = req->wb_page;
- int status = task->tk_status;
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
+
+ pages = nfs_scan_commit(inode, &head, file, start, count);
+ if (pages)
+ error = nfs_commit_list(&head, how);
+ if (error < 0)
+ return error;
+ return pages;
+}
- dprintk("NFS: %4d nfs_wback_result (%s/%s, status=%d, flags=%x)\n",
- task->tk_pid, dentry->d_parent->d_name.name,
- dentry->d_name.name, status, req->wb_flags);
-
- /* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */
- req->wb_flags |= NFS_WRITE_COMPLETE;
- req->wb_status = status;
-
- if (status < 0) {
- req->wb_flags |= NFS_WRITE_INVALIDATE;
- file->f_error = status;
- } else if (!WB_CANCELLED(req)) {
- struct nfs_fattr *fattr = &req->wb_fattr;
- /* Update attributes as result of writeback.
- * Beware: when UDP replies arrive out of order, we
- * may end up overwriting a previous, bigger file size.
- *
- * When the file size shrinks we cancel all pending
- * writebacks.
- */
- if (fattr->mtime.seconds >= inode->i_mtime) {
- if (fattr->size < inode->i_size)
- fattr->size = inode->i_size;
-
- /* possible Solaris 2.5 server bug workaround */
- if (inode->i_ino == fattr->fileid) {
- /*
- * We expect these values to change, and
- * don't want to invalidate the caches.
- */
- inode->i_size = fattr->size;
- inode->i_mtime = fattr->mtime.seconds;
- nfs_refresh_inode(inode, fattr);
- }
- else
- printk("nfs_wback_result: inode %ld, got %u?\n",
- inode->i_ino, fattr->fileid);
- }
+int nfs_commit_timeout(struct inode *inode, int how)
+{
+ LIST_HEAD(head);
+ int pages,
+ error = 0;
+
+ pages = nfs_scan_commit_timeout(inode, &head);
+ if (pages) {
+ pages += nfs_scan_commit(inode, &head, NULL, 0, 0);
+ error = nfs_commit_list(&head, how);
}
+ if (error < 0)
+ return error;
+ return pages;
+}
+#endif
- rpc_release_task(task);
+int nfs_sync_file(struct inode *inode, struct file *file, unsigned long start,
+ unsigned int count, int how)
+{
+ int error,
+ wait;
- if (WB_INVALIDATE(req))
- ClearPageUptodate(page);
+ wait = how & FLUSH_WAIT;
+ how &= ~FLUSH_WAIT;
- kunmap(page);
- __free_page(page);
- remove_write_request(&NFS_WRITEBACK(inode), req);
- nr_write_requests--;
- fput(req->wb_file);
+ if (!inode && file)
+ inode = file->f_dentry->d_inode;
- wake_up(&req->wb_wait);
- free_write_request(req);
+ do {
+ error = 0;
+ if (wait)
+ error = nfs_wait_on_requests(inode, file, start, count);
+ if (error == 0)
+ error = nfs_flush_file(inode, file, start, count, how);
+#ifdef CONFIG_NFS_V3
+ if (error == 0)
+ error = nfs_commit_file(inode, file, start, count, how);
+#endif
+ } while (error > 0);
+ return error;
+}
+
+int nfs_init_nfspagecache(void)
+{
+ nfs_page_cachep = kmem_cache_create("nfs_page",
+ sizeof(struct nfs_page),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (nfs_page_cachep == NULL)
+ return -ENOMEM;
+
+ nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
+ sizeof(struct nfs_write_data),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (nfs_wdata_cachep == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void nfs_destroy_nfspagecache(void)
+{
+ if (kmem_cache_destroy(nfs_page_cachep))
+ printk(KERN_INFO "nfs_page: not all structures were freed\n");
+ if (kmem_cache_destroy(nfs_wdata_cachep))
+ printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
}
+
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8d2b610a8..c6ea9074c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -483,7 +483,10 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino,
* fh must be initialized before calling fh_compose
*/
fh_init(&fh, maxsize);
- err = fh_compose(&fh, exp, dentry);
+ if (fh_compose(&fh, exp, dentry))
+ err = -EINVAL;
+ else
+ err = 0;
memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh));
fh_put(&fh);
return err;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 6e102db9c..969ff54a9 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -748,6 +748,7 @@ encode_entry(struct readdir_cd *cd, const char *name,
if (fh_compose(&fh, exp, dchild) != 0 || !dchild->d_inode)
goto noexec;
p = encode_post_op_attr(cd->rqstp, p, fh.fh_dentry);
+ *p++ = xdr_one; /* yes, a file handle follows */
p = encode_fh(p, &fh);
fh_put(&fh);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f755adc8c..97b46f0c7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -42,8 +42,8 @@ static int nfsctl_export(struct nfsctl_export *data);
static int nfsctl_unexport(struct nfsctl_export *data);
static int nfsctl_getfh(struct nfsctl_fhparm *, __u8 *);
static int nfsctl_getfd(struct nfsctl_fdparm *, __u8 *);
-#ifdef notyet
static int nfsctl_getfs(struct nfsctl_fsparm *, struct knfsd_fh *);
+#ifdef notyet
static int nfsctl_ugidupdate(struct nfsctl_ugidmap *data);
#endif
@@ -112,7 +112,6 @@ nfsctl_ugidupdate(nfs_ugidmap *data)
}
#endif
-#ifdef notyet
static inline int
nfsctl_getfs(struct nfsctl_fsparm *data, struct knfsd_fh *res)
{
@@ -131,10 +130,9 @@ nfsctl_getfs(struct nfsctl_fsparm *data, struct knfsd_fh *res)
else
err = exp_rootfh(clp, 0, 0, data->gd_path, res, data->gd_maxlen);
exp_unlock();
-
+ /*HACK*/ res->fh_size = NFS_FHSIZE; /* HACK until lockd handles var-length handles */
return err;
}
-#endif
static inline int
nfsctl_getfd(struct nfsctl_fdparm *data, __u8 *res)
@@ -206,6 +204,21 @@ nfsctl_getfh(struct nfsctl_fhparm *data, __u8 *res)
#define handle_sys_nfsservctl sys_nfsservctl
#endif
+static struct {
+ int argsize, respsize;
+} sizes[] = {
+ /* NFSCTL_SVC */ { sizeof(struct nfsctl_svc), 0 },
+ /* NFSCTL_ADDCLIENT */ { sizeof(struct nfsctl_client), 0},
+ /* NFSCTL_DELCLIENT */ { sizeof(struct nfsctl_client), 0},
+ /* NFSCTL_EXPORT */ { sizeof(struct nfsctl_export), 0},
+ /* NFSCTL_UNEXPORT */ { sizeof(struct nfsctl_export), 0},
+ /* NFSCTL_UGIDUPDATE */ { sizeof(struct nfsctl_uidmap), 0},
+ /* NFSCTL_GETFH */ { sizeof(struct nfsctl_fhparm), NFS_FHSIZE},
+ /* NFSCTL_GETFD */ { sizeof(struct nfsctl_fdparm), NFS_FHSIZE},
+ /* NFSCTL_GETFS */ { sizeof(struct nfsctl_fsparm), sizeof(struct knfsd_fh)},
+};
+#define CMD_MAX (sizeof(sizes)/sizeof(sizes[0])-1)
+
int
asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
{
@@ -214,6 +227,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
struct nfsctl_arg * arg = NULL;
union nfsctl_res * res = NULL;
int err;
+ int argsize, respsize;
MOD_INC_USE_COUNT;
lock_kernel ();
@@ -223,12 +237,16 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
if (!capable(CAP_SYS_ADMIN)) {
goto done;
}
+ err = -EINVAL;
+ if (cmd<0 || cmd > CMD_MAX)
+ goto done;
err = -EFAULT;
- if (!access_ok(VERIFY_READ, argp, sizeof(*argp))
- || (resp && !access_ok(VERIFY_WRITE, resp, sizeof(*resp)))) {
+ argsize = sizes[cmd].argsize + sizeof(int); /* int for ca_version */
+ respsize = sizes[cmd].respsize; /* maximum */
+ if (!access_ok(VERIFY_READ, argp, argsize)
+ || (resp && !access_ok(VERIFY_WRITE, resp, respsize))) {
goto done;
}
-
err = -ENOMEM; /* ??? */
if (!(arg = kmalloc(sizeof(*arg), GFP_USER)) ||
(resp && !(res = kmalloc(sizeof(*res), GFP_USER)))) {
@@ -236,7 +254,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
}
err = -EINVAL;
- copy_from_user(arg, argp, sizeof(*argp));
+ copy_from_user(arg, argp, argsize);
if (arg->ca_version != NFSCTL_VERSION) {
printk(KERN_WARNING "nfsd: incompatible version in syscall.\n");
goto done;
@@ -269,16 +287,16 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
case NFSCTL_GETFD:
err = nfsctl_getfd(&arg->ca_getfd, res->cr_getfh);
break;
-#ifdef notyet
case NFSCTL_GETFS:
err = nfsctl_getfs(&arg->ca_getfs, &res->cr_getfs);
-#endif
+ respsize = res->cr_getfs.fh_size+sizeof(int);
+ break;
default:
err = -EINVAL;
}
- if (!err && resp)
- copy_to_user(resp, res, sizeof(*resp));
+ if (!err && resp && respsize)
+ copy_to_user(resp, res, respsize);
done:
if (arg)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6f69225cc..5c312b906 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -223,9 +223,10 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
goto done;
fh_lock(dirfhp);
dchild = lookup_one(argp->name, dget(dirfhp->fh_dentry));
- nfserr = nfserrno(PTR_ERR(dchild));
- if (IS_ERR(dchild))
+ if (IS_ERR(dchild)) {
+ nfserr = nfserrno(PTR_ERR(dchild));
goto out_unlock;
+ }
fh_init(newfhp, NFS_FHSIZE);
nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild);
if (!nfserr && !dchild->d_inode)
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d69bba8d0..fb3b32f8d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -43,6 +43,8 @@ extern struct svc_program nfsd_program;
static void nfsd(struct svc_rqst *rqstp);
struct timeval nfssvc_boot = { 0, 0 };
static struct svc_serv *nfsd_serv = NULL;
+static int nfsd_busy = 0;
+static unsigned long nfsd_last_call;
struct nfsd_list {
struct list_head list;
@@ -115,6 +117,24 @@ nfsd_svc(unsigned short port, int nrservs)
return error;
}
+static void inline
+update_thread_usage(int busy_threads)
+{
+ unsigned long prev_call;
+ unsigned long diff;
+ int decile;
+
+ prev_call = nfsd_last_call;
+ nfsd_last_call = jiffies;
+ decile = busy_threads*10/nfsdstats.th_cnt;
+ if (decile>0 && decile <= 10) {
+ diff = nfsd_last_call - prev_call;
+ nfsdstats.th_usage[decile-1] += diff;
+ if (decile == 10)
+ nfsdstats.th_fullcnt++;
+ }
+}
+
/*
* This is the NFS server kernel thread
*/
@@ -134,6 +154,7 @@ nfsd(struct svc_rqst *rqstp)
sprintf(current->comm, "nfsd");
current->fs->umask = 0;
+ nfsdstats.th_cnt++;
/* Let svc_process check client's authentication. */
rqstp->rq_auth = 1;
@@ -161,6 +182,8 @@ nfsd(struct svc_rqst *rqstp)
;
if (err < 0)
break;
+ update_thread_usage(nfsd_busy);
+ nfsd_busy++;
/* Lock the export hash tables for reading. */
exp_readlock();
@@ -179,6 +202,8 @@ nfsd(struct svc_rqst *rqstp)
/* Unlock export hash tables */
exp_unlock();
+ update_thread_usage(nfsd_busy);
+ nfsd_busy--;
}
if (err != -EINTR) {
@@ -202,6 +227,7 @@ nfsd(struct svc_rqst *rqstp)
nfsd_racache_shutdown(); /* release read-ahead cache */
}
list_del(&me.list);
+ nfsdstats.th_cnt --;
/* Release the thread */
svc_exit_thread(rqstp);
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 69defe790..254242fb6 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,6 +8,16 @@
* Format:
* rc <hits> <misses> <nocache>
* Statistsics for the reply cache
+ * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
+ * statistics for filehandle lookup
+ * io <bytes-read> <bytes-writtten>
+ * statistics for IO throughput
+ * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
+ * time (milliseconds) when nfsd thread usage above thresholds
+ * and number of times that all threads were in use
+ * ra cache-size <10% <20% <30% ... <100% not-found
+ * number of times that read-ahead entry was found that deep in
+ * the cache.
* plus generic RPC stats (see net/sunrpc/stats.c)
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -33,17 +43,30 @@ nfsd_proc_read(char *buffer, char **start, off_t offset, int count,
int *eof, void *data)
{
int len;
+ int i;
- len = sprintf(buffer, "rc %d %d %d %d %d %d %d %d\n",
- nfsdstats.rchits,
- nfsdstats.rcmisses,
- nfsdstats.rcnocache,
- nfsdstats.fh_stale,
- nfsdstats.fh_lookup,
- nfsdstats.fh_anon,
- nfsdstats.fh_nocache_dir,
- nfsdstats.fh_nocache_nondir);
+ len = sprintf(buffer, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
+ nfsdstats.rchits,
+ nfsdstats.rcmisses,
+ nfsdstats.rcnocache,
+ nfsdstats.fh_stale,
+ nfsdstats.fh_lookup,
+ nfsdstats.fh_anon,
+ nfsdstats.fh_nocache_dir,
+ nfsdstats.fh_nocache_nondir,
+ nfsdstats.io_read,
+ nfsdstats.io_write);
+ /* thread usage: */
+ len += sprintf(buffer+len, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
+ for (i=0; i<10; i++)
+ len += sprintf(buffer+len, " %u", nfsdstats.th_usage[i]);
+ /* newline and ra-cache */
+ len += sprintf(buffer+len, "\nra %u", nfsdstats.ra_size);
+ for (i=0; i<11; i++)
+ len += sprintf(buffer+len, " %u", nfsdstats.ra_depth[i]);
+ len += sprintf(buffer+len, "\n");
+
/* Assume we haven't hit EOF yet. Will be set by svc_proc_read. */
*eof = 0;
@@ -53,13 +76,13 @@ nfsd_proc_read(char *buffer, char **start, off_t offset, int count,
*/
if (len <= offset) {
len = svc_proc_read(buffer, start, offset - len, count,
- eof, data);
+ eof, data);
return len;
}
if (len < count) {
len += svc_proc_read(buffer + len, start, 0, count - len,
- eof, data);
+ eof, data);
}
if (offset >= len) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e3be271a2..5cd55fda8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -539,13 +539,16 @@ static inline struct raparms *
nfsd_get_raparms(dev_t dev, ino_t ino)
{
struct raparms *ra, **rap, **frap = NULL;
-
+ int depth = 0;
+
for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
if (ra->p_ino == ino && ra->p_dev == dev)
goto found;
+ depth++;
if (ra->p_count == 0)
frap = rap;
}
+ depth = nfsdstats.ra_size*11/10;
if (!frap)
return NULL;
rap = frap;
@@ -560,6 +563,7 @@ found:
raparm_cache = ra;
}
ra->p_count++;
+ nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
return ra;
}
@@ -598,6 +602,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
oldfs = get_fs(); set_fs(KERNEL_DS);
err = file.f_op->read(&file, buf, *count, &file.f_pos);
set_fs(oldfs);
+ nfsdstats.io_read += *count;
/* Write back readahead params */
if (ra != NULL) {
@@ -691,6 +696,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
#else
err = file.f_op->write(&file, buf, cnt, &file.f_pos);
#endif
+ nfsdstats.io_write += cnt;
set_fs(oldfs);
/* clear setuid/setgid flag after write */
@@ -1559,5 +1565,6 @@ nfsd_racache_init(int cache_size)
"nfsd: Could not allocate memory read-ahead cache.\n");
return -ENOMEM;
}
+ nfsdstats.ra_size = cache_size;
return 0;
}
diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c
index e95a36179..e6abd178e 100644
--- a/fs/ntfs/fs.c
+++ b/fs/ntfs/fs.c
@@ -595,7 +595,7 @@ static int ntfs_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,ntfs_get_block);
}
-static int ntfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int ntfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return cont_prepare_write(page,from,to,ntfs_get_block,
&((struct inode*)page->mapping->host)->u.ntfs_i.mmu_private);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 8a5e404b1..bd8aa6b98 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -258,7 +258,7 @@ static void ntfs_load_attributes(ntfs_inode* ino)
if( !buf )
return;
delta=0;
- for(offset=0;datasize;datasize-=len)
+ for(offset=0;datasize;datasize-=len,offset+=len)
{
ntfs_io io;
io.fn_put=ntfs_put;
@@ -268,7 +268,7 @@ static void ntfs_load_attributes(ntfs_inode* ino)
if(ntfs_read_attr(ino,vol->at_attribute_list,0,offset,&io)){
ntfs_error("error in load_attributes\n");
}
- delta=len;
+ delta+=len;
parse_attributes(ino,buf,&delta);
if(delta)
/* move remaining bytes to buffer start */
diff --git a/fs/open.c b/fs/open.c
index 9f4d50a79..44202fe4e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -298,7 +298,6 @@ asmlinkage long sys_access(const char * filename, int mode)
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
- lock_kernel();
old_fsuid = current->fsuid;
old_fsgid = current->fsgid;
old_cap = current->cap_effective;
@@ -311,7 +310,8 @@ asmlinkage long sys_access(const char * filename, int mode)
cap_clear(current->cap_effective);
else
current->cap_effective = current->cap_permitted;
-
+
+ lock_kernel();
dentry = namei(filename);
res = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
@@ -321,12 +321,12 @@ asmlinkage long sys_access(const char * filename, int mode)
res = -EROFS;
dput(dentry);
}
+ unlock_kernel();
current->fsuid = old_fsuid;
current->fsgid = old_fsgid;
current->cap_effective = old_cap;
- unlock_kernel();
return res;
}
@@ -646,25 +646,35 @@ out:
*/
struct file *filp_open(const char * filename, int flags, int mode, struct dentry * base)
{
- struct inode * inode;
struct dentry * dentry;
- struct file * f;
int flag,error;
- error = -ENFILE;
- f = get_empty_filp();
- if (!f)
- goto out;
- f->f_flags = flag = flags;
- f->f_mode = (flag+1) & O_ACCMODE;
- if (f->f_mode)
+ flag = flags;
+ if ((flag+1) & O_ACCMODE)
flag++;
if (flag & O_TRUNC)
flag |= 2;
+
dentry = __open_namei(filename, flag, mode, base);
error = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto cleanup_file;
+ if (!IS_ERR(dentry))
+ return dentry_open(dentry, flags);
+
+ return ERR_PTR(error);
+}
+
+struct file *dentry_open(struct dentry *dentry, int flags)
+{
+ struct file * f;
+ struct inode *inode;
+ int error;
+
+ error = -ENFILE;
+ f = get_empty_filp();
+ if (!f)
+ goto cleanup_dentry;
+ f->f_flags = flags;
+ f->f_mode = (flags+1) & O_ACCMODE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = get_write_access(inode);
@@ -692,12 +702,10 @@ struct file *filp_open(const char * filename, int flags, int mode, struct dentry
cleanup_all:
if (f->f_mode & FMODE_WRITE)
put_write_access(inode);
-cleanup_dentry:
f->f_dentry = NULL;
+cleanup_dentry:
dput(dentry);
-cleanup_file:
put_filp(f);
-out:
return ERR_PTR(error);
}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 27b4be8cb..92eed7559 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1,4 +1,4 @@
-/* $Id: inode.c,v 1.9 2000/03/13 21:59:43 davem Exp $
+/* $Id: inode.c,v 1.10 2000/03/24 01:32:51 davem Exp $
* openpromfs.c: /proc/openprom handling routines
*
* Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com)
@@ -980,10 +980,6 @@ static void openprom_read_inode(struct inode * inode)
}
}
-static void openprom_put_super(struct super_block *sb)
-{
-}
-
static int openprom_statfs(struct super_block *sb, struct statfs *buf)
{
buf->f_type = OPENPROM_SUPER_MAGIC;
@@ -997,7 +993,6 @@ static int openprom_statfs(struct super_block *sb, struct statfs *buf)
static struct super_operations openprom_sops = {
read_inode: openprom_read_inode,
- put_super: openprom_put_super,
statfs: openprom_statfs,
};
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index fc9555b77..21330f499 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -26,7 +26,10 @@
#include <linux/major.h>
#include <linux/string.h>
#include <linux/blk.h>
+
+#ifdef CONFIG_IDE
#include <linux/ide.h> /* IDE xlate */
+#endif /* CONFIG_IDE */
#include <asm/system.h>
@@ -347,19 +350,19 @@ int msdos_partition(struct gendisk *hd, kdev_t dev,
unsigned char *data;
int mask = (1 << hd->minor_shift) - 1;
int sector_size = get_hardsect_size(dev) / 512;
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_IDE
int tested_for_xlate = 0;
read_mbr:
-#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */
+#endif /* CONFIG_IDE */
if (!(bh = bread(dev,0,get_ptable_blocksize(dev)))) {
if (warn_no_part) printk(" unable to read partition table\n");
return -1;
}
data = bh->b_data;
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_IDE
check_table:
-#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */
+#endif /* CONFIG_IDE */
/* Use bforget(), because we may have changed the disk geometry */
if (*(unsigned short *) (0x1fe + data) != cpu_to_le16(MSDOS_LABEL_MAGIC)) {
bforget(bh);
@@ -367,7 +370,7 @@ check_table:
}
p = (struct partition *) (0x1be + data);
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_IDE
if (!tested_for_xlate++) { /* Do this only once per disk */
/*
* Look for various forms of IDE disk geometry translation
@@ -423,7 +426,7 @@ check_table:
(void) ide_xlate_1024(dev, 2, heads, " [PTBL]");
}
}
-#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */
+#endif /* CONFIG_IDE */
/* Look for partitions in two passes:
First find the primary partitions, and the DOS-type extended partitions.
diff --git a/fs/pipe.c b/fs/pipe.c
index 020416013..f25f5e514 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -13,16 +13,6 @@
#include <asm/uaccess.h>
/*
- * Define this if you want SunOS compatibility wrt braindead
- * select behaviour on FIFO's.
- */
-#ifdef __sparc__
-#define FIFO_SUNOS_BRAINDAMAGE
-#else
-#undef FIFO_SUNOS_BRAINDAMAGE
-#endif
-
-/*
* We use a start+len construction, which provides full use of the
* allocated memory.
* -- Florian Coosmann (FGC)
@@ -32,7 +22,7 @@
*/
/* Drop the inode semaphore and wait for a pipe event, atomically */
-static void pipe_wait(struct inode * inode)
+void pipe_wait(struct inode * inode)
{
DECLARE_WAITQUEUE(wait, current);
current->state = TASK_INTERRUPTIBLE;
@@ -296,7 +286,7 @@ pipe_poll(struct file *filp, poll_table *wait)
mask = POLLIN | POLLRDNORM;
if (PIPE_EMPTY(*inode))
mask = POLLOUT | POLLWRNORM;
- if (!PIPE_WRITERS(*inode))
+ if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
mask |= POLLHUP;
if (!PIPE_READERS(*inode))
mask |= POLLERR;
@@ -304,72 +294,9 @@ pipe_poll(struct file *filp, poll_table *wait)
return mask;
}
-#ifdef FIFO_SUNOS_BRAINDAMAGE
-/*
- * Argh! Why does SunOS have to have different select() behaviour
- * for pipes and FIFOs? Hate, hate, hate! SunOS lacks POLLHUP.
- */
-static unsigned int
-fifo_poll(struct file *filp, poll_table *wait)
-{
- unsigned int mask;
- struct inode *inode = filp->f_dentry->d_inode;
-
- poll_wait(filp, PIPE_WAIT(*inode), wait);
-
- /* Reading only -- no need for aquiring the semaphore. */
- mask = POLLIN | POLLRDNORM;
- if (PIPE_EMPTY(*inode))
- mask = POLLOUT | POLLWRNORM;
- if (!PIPE_READERS(*inode))
- mask |= POLLERR;
-
- return mask;
-}
-#else
-
+/* FIXME: most Unices do not set POLLERR for fifos */
#define fifo_poll pipe_poll
-#endif /* FIFO_SUNOS_BRAINDAMAGE */
-
-/*
- * The 'connect_xxx()' functions are needed for named pipes when
- * the open() code hasn't guaranteed a connection (O_NONBLOCK),
- * and we need to act differently until we do get a writer..
- */
-static ssize_t
-connect_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
-{
- struct inode *inode = filp->f_dentry->d_inode;
-
- /* Reading only -- no need for aquiring the semaphore. */
- if (PIPE_EMPTY(*inode) && !PIPE_WRITERS(*inode))
- return 0;
-
- filp->f_op = &read_fifo_fops;
- return pipe_read(filp, buf, count, ppos);
-}
-
-static unsigned int
-connect_poll(struct file *filp, poll_table *wait)
-{
- struct inode *inode = filp->f_dentry->d_inode;
- unsigned int mask = 0;
-
- poll_wait(filp, PIPE_WAIT(*inode), wait);
-
- /* Reading only -- no need for aquiring the semaphore. */
- if (!PIPE_EMPTY(*inode)) {
- filp->f_op = &read_fifo_fops;
- mask = POLLIN | POLLRDNORM;
- } else if (PIPE_WRITERS(*inode)) {
- filp->f_op = &read_fifo_fops;
- mask = POLLOUT | POLLWRNORM;
- }
-
- return mask;
-}
-
static int
pipe_release(struct inode *inode, int decr, int decw)
{
@@ -450,16 +377,6 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
* The file_operations structs are not static because they
* are also used in linux/fs/fifo.c to do operations on FIFOs.
*/
-struct file_operations connecting_fifo_fops = {
- llseek: pipe_lseek,
- read: connect_read,
- write: bad_pipe_w,
- poll: connect_poll,
- ioctl: pipe_ioctl,
- open: pipe_read_open,
- release: pipe_read_release,
-};
-
struct file_operations read_fifo_fops = {
llseek: pipe_lseek,
read: pipe_read,
@@ -520,29 +437,42 @@ struct file_operations rdwr_pipe_fops = {
release: pipe_rdwr_release,
};
-static struct inode * get_pipe_inode(void)
+struct inode* pipe_new(struct inode* inode)
{
- struct inode *inode = get_empty_inode();
unsigned long page;
- if (!inode)
- goto fail_inode;
-
page = __get_free_page(GFP_USER);
if (!page)
- goto fail_iput;
+ return NULL;
inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
if (!inode->i_pipe)
goto fail_page;
- inode->i_fop = &rdwr_pipe_fops;
-
init_waitqueue_head(PIPE_WAIT(*inode));
- PIPE_BASE(*inode) = (char *) page;
+ PIPE_BASE(*inode) = (char*) page;
PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
- PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
+ PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
+
+ return inode;
+fail_page:
+ free_page(page);
+ return NULL;
+}
+
+static struct inode * get_pipe_inode(void)
+{
+ struct inode *inode = get_empty_inode();
+
+ if (!inode)
+ goto fail_inode;
+
+ if(!pipe_new(inode))
+ goto fail_iput;
+ PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+ inode->i_fop = &rdwr_pipe_fops;
/*
* Mark the inode dirty from the very beginning,
@@ -558,8 +488,6 @@ static struct inode * get_pipe_inode(void)
inode->i_blksize = PAGE_SIZE;
return inode;
-fail_page:
- free_page(page);
fail_iput:
iput(inode);
fail_inode:
@@ -606,11 +534,13 @@ int do_pipe(int *fd)
f1->f_flags = O_RDONLY;
f1->f_op = &read_pipe_fops;
f1->f_mode = 1;
+ f1->f_version = 0;
/* write file */
f2->f_flags = O_WRONLY;
f2->f_op = &write_pipe_fops;
f2->f_mode = 2;
+ f2->f_version = 0;
fd_install(i, f1);
fd_install(j, f2);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3e1c58ad7..54e594634 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -402,7 +402,8 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
++*pages;
if (pte_dirty(page))
++*dirty;
- if (pte_pagenr(page) >= max_mapnr)
+ if ((pte_pagenr(page) >= max_mapnr) ||
+ PageReserved(pte_pagenr(page) + mem_map))
continue;
if (page_count(pte_page(page)) > 1)
++*shared;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 9e78119c9..c6511354b 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -418,7 +418,7 @@ static int qnx4_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,qnx4_get_block);
}
-static int qnx4_prepare_write(struct page *page, unsigned from, unsigned to)
+static int qnx4_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return cont_prepare_write(page,from,to,qnx4_get_block,
&((struct inode*)page->mapping->host)->u.qnx4_i.mmu_private);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 7f5f2dee6..63d5a58ab 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -161,14 +161,6 @@ outnobh:
return s;
}
-/* Nothing to do.. */
-
-static void
-romfs_put_super(struct super_block *sb)
-{
- return;
-}
-
/* That's simple too. */
static int
@@ -526,7 +518,6 @@ romfs_read_inode(struct inode *i)
static struct super_operations romfs_ops = {
read_inode: romfs_read_inode,
- put_super: romfs_put_super,
statfs: romfs_statfs,
};
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 1360ca994..12e2bf295 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -268,7 +268,7 @@ out:
* If the writer ends up delaying the write, the writer needs to
* increment the page use counts until he is done with the page.
*/
-static int smb_prepare_write(struct page *page, unsigned offset, unsigned to)
+static int smb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
kmap(page);
return 0;
diff --git a/fs/super.c b/fs/super.c
index dd34ddc70..302487807 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -147,7 +147,6 @@ static int fs_index(const char * __name)
err = index;
break;
}
- index++;
}
spin_unlock(&file_systems_lock);
putname(name);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3367c02ef..b6396ff04 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -949,7 +949,7 @@ static int sysv_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,sysv_get_block);
}
-static int sysv_prepare_write(struct page *page, unsigned from, unsigned to)
+static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return block_prepare_write(page,from,to,sysv_get_block);
}
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1aa3aa1c4..2be4e8562 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -85,14 +85,14 @@ found_middle:
#define find_first_one_bit(addr, size)\
find_next_one_bit((addr), (size), 0)
-static int read_block_bitmap(struct super_block * sb, unsigned int block,
- unsigned long bitmap_nr)
+static int read_block_bitmap(struct super_block * sb, Uint32 bitmap,
+ unsigned int block, unsigned long bitmap_nr)
{
struct buffer_head *bh = NULL;
int retval = 0;
lb_addr loc;
- loc.logicalBlockNum = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap;
+ loc.logicalBlockNum = bitmap;
loc.partitionReferenceNum = UDF_SB_PARTITION(sb);
bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block), sb->s_blocksize);
@@ -105,7 +105,8 @@ static int read_block_bitmap(struct super_block * sb, unsigned int block,
return retval;
}
-static int __load_block_bitmap(struct super_block * sb, unsigned int block_group)
+static int __load_block_bitmap(struct super_block * sb, Uint32 bitmap,
+ unsigned int block_group)
{
int i, j, retval = 0;
unsigned long block_bitmap_number;
@@ -125,7 +126,7 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group
if (UDF_SB_BLOCK_BITMAP_NUMBER(sb, block_group) == block_group)
return block_group;
}
- retval = read_block_bitmap(sb, block_group, block_group);
+ retval = read_block_bitmap(sb, bitmap, block_group, block_group);
if (retval < 0)
return retval;
return block_group;
@@ -150,7 +151,7 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group
UDF_SB_BLOCK_BITMAP(sb, 0) = block_bitmap;
if (!block_bitmap)
- retval = read_block_bitmap(sb, block_group, 0);
+ retval = read_block_bitmap(sb, bitmap, block_group, 0);
}
else
{
@@ -163,12 +164,12 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group
UDF_SB_BLOCK_BITMAP_NUMBER(sb, j) = UDF_SB_BLOCK_BITMAP_NUMBER(sb, j-1);
UDF_SB_BLOCK_BITMAP(sb, j) = UDF_SB_BLOCK_BITMAP(sb, j-1);
}
- retval = read_block_bitmap(sb, block_group, 0);
+ retval = read_block_bitmap(sb, bitmap, block_group, 0);
}
return retval;
}
-static inline int load_block_bitmap(struct super_block *sb,
+static inline int load_block_bitmap(struct super_block *sb, Uint32 bitmap,
unsigned int block_group)
{
int slot;
@@ -189,7 +190,7 @@ static inline int load_block_bitmap(struct super_block *sb,
}
else
{
- slot = __load_block_bitmap(sb, block_group);
+ slot = __load_block_bitmap(sb, bitmap, block_group);
}
if (slot < 0)
@@ -201,8 +202,8 @@ static inline int load_block_bitmap(struct super_block *sb,
return slot;
}
-void udf_free_blocks(const struct inode * inode, lb_addr bloc, Uint32 offset,
- Uint32 count)
+static void udf_bitmap_free_blocks(const struct inode * inode, Uint32 bitmap,
+ lb_addr bloc, Uint32 offset, Uint32 count)
{
struct buffer_head * bh = NULL;
unsigned long block;
@@ -220,9 +221,6 @@ void udf_free_blocks(const struct inode * inode, lb_addr bloc, Uint32 offset,
return;
}
- if (UDF_SB_PARTMAPS(sb)[bloc.partitionReferenceNum].s_uspace_bitmap == 0xFFFFFFFF)
- return;
-
lock_super(sb);
if (bloc.logicalBlockNum < 0 ||
(bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum))
@@ -248,7 +246,7 @@ do_more:
overflow = bit + count - (sb->s_blocksize << 3);
count -= overflow;
}
- bitmap_nr = load_block_bitmap(sb, block_group);
+ bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
if (bitmap_nr < 0)
goto error_return;
@@ -285,8 +283,8 @@ error_return:
return;
}
-int udf_prealloc_blocks(const struct inode * inode, Uint16 partition,
- Uint32 first_block, Uint32 block_count)
+static int udf_bitmap_prealloc_blocks(const struct inode * inode, Uint32 bitmap,
+ Uint16 partition, Uint32 first_block, Uint32 block_count)
{
int alloc_count = 0;
int bit, block, block_group, group_start;
@@ -312,7 +310,7 @@ repeat:
block_group = block >> (sb->s_blocksize_bits + 3);
group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc);
- bitmap_nr = load_block_bitmap(sb, block_group);
+ bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
if (bitmap_nr < 0)
goto out;
bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr);
@@ -351,7 +349,8 @@ out:
return alloc_count;
}
-int udf_new_block(const struct inode * inode, Uint16 partition, Uint32 goal, int *err)
+static int udf_bitmap_new_block(const struct inode * inode, Uint32 bitmap,
+ Uint16 partition, Uint32 goal, int *err)
{
int tmp, newbit, bit=0, block, block_group, group_start;
int end_goal, nr_groups, bitmap_nr, i;
@@ -379,7 +378,7 @@ repeat:
block_group = block >> (sb->s_blocksize_bits + 3);
group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc);
- bitmap_nr = load_block_bitmap(sb, block_group);
+ bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
if (bitmap_nr < 0)
goto error_return;
bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr);
@@ -419,7 +418,7 @@ repeat:
block_group = 0;
group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc);
- bitmap_nr = load_block_bitmap(sb, block_group);
+ bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
if (bitmap_nr < 0)
goto error_return;
bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr);
@@ -497,3 +496,64 @@ error_return:
unlock_super(sb);
return 0;
}
+
+inline void udf_free_blocks(const struct inode * inode, lb_addr bloc,
+ Uint32 offset, Uint32 count)
+{
+ if (UDF_SB_PARTFLAGS(inode->i_sb, bloc.partitionReferenceNum) & UDF_PART_FLAG_UNALLOC_BITMAP)
+ {
+ return udf_bitmap_free_blocks(inode,
+ UDF_SB_PARTMAPS(inode->i_sb)[bloc.partitionReferenceNum].s_uspace.bitmap,
+ bloc, offset, count);
+ }
+ else if (UDF_SB_PARTFLAGS(inode->i_sb, bloc.partitionReferenceNum) & UDF_PART_FLAG_FREED_BITMAP)
+ {
+ return udf_bitmap_free_blocks(inode,
+ UDF_SB_PARTMAPS(inode->i_sb)[bloc.partitionReferenceNum].s_fspace.bitmap,
+ bloc, offset, count);
+ }
+ else
+ return;
+}
+
+inline int udf_prealloc_blocks(const struct inode * inode, Uint16 partition,
+ Uint32 first_block, Uint32 block_count)
+{
+ if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP)
+ {
+ return udf_bitmap_prealloc_blocks(inode,
+ UDF_SB_PARTMAPS(inode->i_sb)[partition].s_uspace.bitmap,
+ partition, first_block, block_count);
+ }
+ else if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_FREED_BITMAP)
+ {
+ return udf_bitmap_prealloc_blocks(inode,
+ UDF_SB_PARTMAPS(inode->i_sb)[partition].s_fspace.bitmap,
+ partition, first_block, block_count);
+ }
+ else
+ return 0;
+}
+
+inline int udf_new_block(const struct inode * inode, Uint16 partition,
+ Uint32 goal, int *err)
+{
+ if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP)
+ {
+ return udf_bitmap_new_block(inode,
+ UDF_SB_PARTMAPS(inode->i_sb)[partition].s_uspace.bitmap,
+ partition, goal, err);
+ }
+ else if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_FREED_BITMAP)
+ {
+ return udf_bitmap_new_block(inode,
+ UDF_SB_PARTMAPS(inode->i_sb)[partition].s_fspace.bitmap,
+ partition, goal, err);
+ }
+ else
+ {
+ *err = -EIO;
+ return 0;
+ }
+}
+
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 73d47ac10..96297521b 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -87,7 +87,7 @@ static int udf_adinicb_writepage(struct dentry *dentry, struct page *page)
return 0;
}
-static int udf_adinicb_prepare_write(struct page *page, unsigned offset, unsigned to)
+static int udf_adinicb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
kmap(page);
return 0;
@@ -246,7 +246,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
struct FileEntry *fe;
fe = (struct FileEntry *)bh->b_data;
- eaicb = fe->extendedAttrICB;
+ eaicb = lela_to_cpu(fe->extendedAttrICB);
if (UDF_I_LENEATTR(inode))
ea = fe->extendedAttr;
}
@@ -255,7 +255,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
struct ExtendedFileEntry *efe;
efe = (struct ExtendedFileEntry *)bh->b_data;
- eaicb = efe->extendedAttrICB;
+ eaicb = lela_to_cpu(efe->extendedAttrICB);
if (UDF_I_LENEATTR(inode))
ea = efe->extendedAttr;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 264086135..ed1507fa7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -74,10 +74,13 @@ static int udf_get_block(struct inode *, long, struct buffer_head *, int);
*/
void udf_put_inode(struct inode * inode)
{
- lock_kernel();
- udf_discard_prealloc(inode);
- write_inode_now(inode);
- unlock_kernel();
+ if (!(inode->i_sb->s_flags & MS_RDONLY))
+ {
+ lock_kernel();
+ udf_discard_prealloc(inode);
+ write_inode_now(inode);
+ unlock_kernel();
+ }
}
/*
@@ -130,7 +133,7 @@ static int udf_readpage(struct dentry *dentry, struct page *page)
return block_read_full_page(page, udf_get_block);
}
-static int udf_prepare_write(struct page *page, unsigned from, unsigned to)
+static int udf_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return block_prepare_write(page, from, to, udf_get_block);
}
@@ -1554,16 +1557,18 @@ int udf_add_aext(struct inode *inode, lb_addr *bloc, int *extoffset,
case ICB_FLAG_AD_SHORT:
{
sad = (short_ad *)sptr;
- sad->extLength = EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
- inode->i_sb->s_blocksize;
+ sad->extLength = cpu_to_le32(
+ EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
+ inode->i_sb->s_blocksize);
sad->extPosition = cpu_to_le32(bloc->logicalBlockNum);
break;
}
case ICB_FLAG_AD_LONG:
{
lad = (long_ad *)sptr;
- lad->extLength = EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
- inode->i_sb->s_blocksize;
+ lad->extLength = cpu_to_le32(
+ EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
+ inode->i_sb->s_blocksize);
lad->extLocation = cpu_to_lelb(*bloc);
break;
}
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 653997033..1403cec52 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -70,8 +70,6 @@ udf_get_last_session(struct super_block *sb)
unsigned int
udf_get_last_block(struct super_block *sb)
{
- extern int *blksize_size[];
- kdev_t dev = sb->s_dev;
struct block_device *bdev = sb->s_bdev;
int ret;
unsigned long lblock = 0;
@@ -80,28 +78,10 @@ udf_get_last_block(struct super_block *sb)
if (ret) /* Hard Disk */
{
- unsigned int hbsize = get_hardblocksize(dev);
- unsigned int blocksize = sb->s_blocksize;
- unsigned int mult = 0;
- unsigned int div = 0;
-
- if (!hbsize)
- hbsize = blksize_size[MAJOR(dev)][MINOR(dev)];
-
- if (hbsize > blocksize)
- mult = hbsize / blocksize;
- else if (blocksize > hbsize)
- div = blocksize / hbsize;
-
ret = ioctl_by_bdev(bdev, BLKGETSIZE, (unsigned long) &lblock);
if (!ret && lblock != 0x7FFFFFFF)
- {
- if (mult)
- lblock *= mult;
- else if (div)
- lblock /= div;
- }
+ lblock = ((512 * lblock) / sb->s_blocksize);
}
if (!ret && lblock)
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index bed1e3984..ae998258e 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -90,7 +90,7 @@ udf_add_extendedattr(struct inode * inode, Uint32 size, Uint32 type,
struct FileEntry *fe;
fe = (struct FileEntry *)(*bh)->b_data;
- eaicb = fe->extendedAttrICB;
+ eaicb = lela_to_cpu(fe->extendedAttrICB);
offset = sizeof(struct FileEntry);
}
else
@@ -98,7 +98,7 @@ udf_add_extendedattr(struct inode * inode, Uint32 size, Uint32 type,
struct ExtendedFileEntry *efe;
efe = (struct ExtendedFileEntry *)(*bh)->b_data;
- eaicb = efe->extendedAttrICB;
+ eaicb = lela_to_cpu(efe->extendedAttrICB);
offset = sizeof(struct ExtendedFileEntry);
}
@@ -206,7 +206,7 @@ udf_get_extendedattr(struct inode * inode, Uint32 type, Uint8 subtype,
struct FileEntry *fe;
fe = (struct FileEntry *)(*bh)->b_data;
- eaicb = fe->extendedAttrICB;
+ eaicb = lela_to_cpu(fe->extendedAttrICB);
if (UDF_I_LENEATTR(inode))
ea = fe->extendedAttr;
}
@@ -215,7 +215,7 @@ udf_get_extendedattr(struct inode * inode, Uint32 type, Uint8 subtype,
struct ExtendedFileEntry *efe;
efe = (struct ExtendedFileEntry *)(*bh)->b_data;
- eaicb = efe->extendedAttrICB;
+ eaicb = lela_to_cpu(efe->extendedAttrICB);
if (UDF_I_LENEATTR(inode))
ea = efe->extendedAttr;
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c371b5d52..a44e19043 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -935,7 +935,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
inode->i_data.a_ops = &udf_symlink_aops;
inode->i_op = &page_symlink_inode_operations;
- if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_IN_ICB)
+ if (UDF_I_ALLOCTYPE(inode) != ICB_FLAG_AD_IN_ICB)
{
struct buffer_head *bh = NULL;
lb_addr bloc, eloc;
@@ -964,7 +964,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
bh = udf_tread(inode->i_sb, block, inode->i_sb->s_blocksize);
ea = bh->b_data + udf_ext0_offset(inode);
- eoffset = inode->i_sb->s_blocksize - (ea - bh->b_data);
+ eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode);
pc = (struct PathComponent *)ea;
if (*symname == '/')
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 81f59e9a3..5f76abbb0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -100,14 +100,14 @@ static DECLARE_FSTYPE_DEV(udf_fstype, "udf", udf_read_super);
/* Superblock operations */
static struct super_operations udf_sb_ops = {
- read_inode: udf_read_inode,
+ read_inode: udf_read_inode,
write_inode: udf_write_inode,
- put_inode: udf_put_inode,
+ put_inode: udf_put_inode,
delete_inode: udf_delete_inode,
- put_super: udf_put_super,
+ put_super: udf_put_super,
write_super: udf_write_super,
- statfs: udf_statfs,
- remount_fs: udf_remount_fs,
+ statfs: udf_statfs,
+ remount_fs: udf_remount_fs,
};
struct udf_options
@@ -127,7 +127,6 @@ struct udf_options
uid_t uid;
};
-
static int __init init_udf_fs(void)
{
printk(KERN_NOTICE "udf: registering filesystem\n");
@@ -745,8 +744,9 @@ udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh)
{
if (udf_CS0toUTF8(&outstr, &instr))
{
- udf_debug("volIdent[] = '%s'\n", outstr.u_name);
- strncpy( UDF_SB_VOLIDENT(sb), outstr.u_name, outstr.u_len);
+ strncpy( UDF_SB_VOLIDENT(sb), outstr.u_name,
+ outstr.u_len > 31 ? 31 : outstr.u_len);
+ udf_debug("volIdent[] = '%s'\n", UDF_SB_VOLIDENT(sb));
}
}
@@ -788,7 +788,6 @@ udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
{
UDF_SB_PARTLEN(sb,i) = le32_to_cpu(p->partitionLength); /* blocks */
UDF_SB_PARTROOT(sb,i) = le32_to_cpu(p->partitionStartingLocation) + UDF_SB_SESSION(sb);
- UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap = 0xFFFFFFFF;
if (UDF_SB_PARTTYPE(sb,i) == UDF_SPARABLE_MAP15)
udf_fill_spartable(sb, &UDF_SB_TYPESPAR(sb,i), UDF_SB_PARTLEN(sb,i));
@@ -803,17 +802,24 @@ udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
udf_debug("unallocatedSpaceTable (part %d)\n", i);
if (phd->unallocatedSpaceBitmap.extLength)
{
- UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap =
+ UDF_SB_PARTMAPS(sb)[i].s_uspace.bitmap =
le32_to_cpu(phd->unallocatedSpaceBitmap.extPosition);
+ UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_BITMAP;
udf_debug("unallocatedSpaceBitmap (part %d) @ %d\n",
- i, UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap);
+ i, UDF_SB_PARTMAPS(sb)[i].s_uspace.bitmap);
}
if (phd->partitionIntegrityTable.extLength)
udf_debug("partitionIntegrityTable (part %d)\n", i);
if (phd->freedSpaceTable.extLength)
udf_debug("freedSpaceTable (part %d)\n", i);
if (phd->freedSpaceBitmap.extLength)
- udf_debug("freedSpaceBitmap (part %d\n", i);
+ {
+ UDF_SB_PARTMAPS(sb)[i].s_fspace.bitmap =
+ le32_to_cpu(phd->freedSpaceBitmap.extPosition);
+ UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_BITMAP;
+ udf_debug("freedSpaceBitmap (part %d) @ %d\n",
+ i, UDF_SB_PARTMAPS(sb)[i].s_fspace.bitmap);
+ }
}
break;
}
@@ -1184,7 +1190,6 @@ udf_load_partition(struct super_block *sb, lb_addr *fileset)
}
UDF_SB_PARTROOT(sb,i) = udf_get_pblock(sb, 0, i, 0);
UDF_SB_PARTLEN(sb,i) = UDF_SB_PARTLEN(sb,ino.partitionReferenceNum);
- UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap = 0xFFFFFFFF;
}
}
}
@@ -1520,34 +1525,27 @@ static unsigned int
udf_count_free(struct super_block *sb)
{
struct buffer_head *bh = NULL;
- unsigned int accum=0;
- int index;
- int block=0, newblock;
+ unsigned int accum = 0;
lb_addr loc;
- Uint32 bytes;
- Uint8 value;
- Uint8 * ptr;
- Uint16 ident;
-
- if (UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap == 0xFFFFFFFF)
- {
- if (UDF_SB_LVIDBH(sb))
- {
- if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb))
- accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]);
-
- if (accum == 0xFFFFFFFF)
- accum = 0;
+ Uint32 bitmap;
- return accum;
- }
- else
- return 0;
- }
+ if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP)
+ bitmap = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.bitmap;
+ else if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP)
+ bitmap = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.bitmap;
else
+ bitmap = 0xFFFFFFFF;
+
+ if (bitmap != 0xFFFFFFFF)
{
struct SpaceBitmapDesc *bm;
- loc.logicalBlockNum = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap;
+ int block = 0, newblock, index;
+ Uint16 ident;
+ Uint32 bytes;
+ Uint8 value;
+ Uint8 * ptr;
+
+ loc.logicalBlockNum = bitmap;
loc.partitionReferenceNum = UDF_SB_PARTITION(sb);
bh = udf_read_ptagged(sb, loc, 0, &ident);
@@ -1593,6 +1591,18 @@ udf_count_free(struct super_block *sb)
}
}
udf_release_data(bh);
- return accum;
}
+ else
+ {
+ if (UDF_SB_LVIDBH(sb))
+ {
+ if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb))
+ accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]);
+
+ if (accum == 0xFFFFFFFF)
+ accum = 0;
+ }
+ }
+
+ return accum;
}
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 6084c5613..6988a7238 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -18,6 +18,11 @@
#define UDF_FLAG_UNDELETE 6
#define UDF_FLAG_UNHIDE 7
#define UDF_FLAG_VARCONV 8
+
+#define UDF_PART_FLAG_UNALLOC_BITMAP 0x0001
+#define UDF_PART_FLAG_UNALLOC_TABLE 0x0002
+#define UDF_PART_FLAG_FREED_BITMAP 0x0004
+#define UDF_PART_FLAG_FREED_TABLE 0x0008
#define UDF_SB_FREE(X)\
{\
@@ -52,6 +57,7 @@
#define UDF_SB_TYPESPAR(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_sparing )
#define UDF_SB_TYPEVIRT(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_virtual )
#define UDF_SB_PARTFUNC(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_func )
+#define UDF_SB_PARTFLAGS(X,Y) ( UDF_SB_PARTMAPS(X)[(Y)].s_partition_flags )
#define UDF_SB_VOLIDENT(X) ( UDF_SB(X)->s_volident )
#define UDF_SB_NUMPARTS(X) ( UDF_SB(X)->s_partitions )
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index b92eed7db..7dd00bc19 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -106,7 +106,7 @@ struct ktm
struct ustr
{
Uint8 u_cmpID;
- Uint8 u_name[UDF_NAME_LEN-1];
+ Uint8 u_name[UDF_NAME_LEN];
Uint8 u_len;
Uint8 padding;
unsigned long u_hash;
@@ -182,6 +182,8 @@ extern void udf_truncate(struct inode *);
extern void udf_free_blocks(const struct inode *, lb_addr, Uint32, Uint32);
extern int udf_prealloc_blocks(const struct inode *, Uint16, Uint32, Uint32);
extern int udf_new_block(const struct inode *, Uint16, Uint32, int *);
+
+/* fsync.c */
extern int udf_sync_file(struct file *, struct dentry *);
/* directory.c */
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 134b3c755..7cb2d3c1f 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -177,7 +177,8 @@ int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
return 0;
}
- for (i = 0; (i < ocu_len) && (utf_o->u_len < UDF_NAME_LEN) ;) {
+ for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
+ {
/* Expand OSTA compressed Unicode to Unicode */
c = ocu[i++];
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index af07961e1..8c5c15d55 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -548,7 +548,7 @@ static int ufs_readpage(struct dentry *dentry, struct page *page)
{
return block_read_full_page(page,ufs_getfrag_block);
}
-static int ufs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int ufs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
{
return block_prepare_write(page,from,to,ufs_getfrag_block);
}