Merge with Linux 2.3.99-pre3.

author: Ralf Baechle <ralf@linux-mips.org> 2000-03-27 23:54:12 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2000-03-27 23:54:12 +0000
commit: d3e71cb08747743fce908122bab08b479eb403a5 (patch)
tree: cbec6948fdbdee9af81cf3ecfb504070d2745d7b /fs
parent: fe7ff1706e323d0e5ed83972960a1ecc1ee538b3 (diff)
65 files changed, 2310 insertions, 1133 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 6eb08c857..c9aecc730 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -59,7 +59,7 @@ static int adfs_readpage(struct dentry *dentry, struct page *page)
 	return block_read_full_page(page, adfs_get_block);
 }
 
-static int adfs_prepare_write(struct page *page, unsigned int from, unsigned int to)
+static int adfs_prepare_write(struct file *file, struct page *page, unsigned int from, unsigned int to)
 {
 	return cont_prepare_write(page, from, to, adfs_get_block,
 		&((struct inode *)page->mapping->host)->u.adfs_i.mmu_private);
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 069964acb..bc0db190f 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -299,7 +299,7 @@ static int affs_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,affs_get_block);
 }
-static int affs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int affs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return cont_prepare_write(page,from,to,affs_get_block,
 		&((struct inode*)page->mapping->host)->u.affs_i.mmu_private);
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index f5ef5e652..c5ca51cda 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -137,7 +137,7 @@ static int bfs_readpage(struct dentry *dentry, struct page *page)
 	return block_read_full_page(page, bfs_get_block);
 }
 
-static int bfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return block_prepare_write(page, from, to, bfs_get_block);
 }
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 9339775ce..f48a2492d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -252,7 +252,6 @@ static unsigned long * create_aout_tables(char * p, struct linux_binprm * bprm)
 static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 {
 	struct exec ex;
-	struct file * file;
 	int fd;
 	unsigned long error;
 	unsigned long fd_offset;
@@ -263,7 +262,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
 	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
 	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
-	    bprm->dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    bprm->file->f_dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		return -ENOEXEC;
 	}
 
@@ -304,26 +303,32 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
  	current->flags &= ~PF_FORKNOEXEC;
 #ifdef __sparc__
 	if (N_MAGIC(ex) == NMAGIC) {
+		loff_t pos = fd_offset;
 		/* Fuck me plenty... */
+		/* <AOL></AOL> */
 		error = do_brk(N_TXTADDR(ex), ex.a_text);
-		read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex),
-			  ex.a_text, 0);
+		bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
+			  ex.a_text, &pos);
 		error = do_brk(N_DATADDR(ex), ex.a_data);
-		read_exec(bprm->dentry, fd_offset + ex.a_text, (char *) N_DATADDR(ex),
-			  ex.a_data, 0);
+		bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex),
+			  ex.a_data, &pos);
 		goto beyond_if;
 	}
 #endif
 
 	if (N_MAGIC(ex) == OMAGIC) {
+		loff_t pos;
 #if defined(__alpha__) || defined(__sparc__)
+		pos = fd_offset;
 		do_brk(N_TXTADDR(ex) & PAGE_MASK,
 			ex.a_text+ex.a_data + PAGE_SIZE - 1);
-		read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex),
-			  ex.a_text+ex.a_data, 0);
+		bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex),
+			  ex.a_text+ex.a_data, &pos);
 #else
+		pos = 32;
 		do_brk(0, ex.a_text+ex.a_data);
-		read_exec(bprm->dentry, 32, (char *) 0, ex.a_text+ex.a_data, 0);
+		bprm->file->f_op->read(bprm->file, (char *) 0,
+			ex.a_text+ex.a_data, &pos);
 #endif
 		flush_icache_range((unsigned long) 0,
 				   (unsigned long) ex.a_text+ex.a_data);
@@ -336,49 +341,48 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			error_time2 = jiffies;
 		}
 
-		fd = open_dentry(bprm->dentry, O_RDONLY);
+		fd = get_unused_fd();
 		if (fd < 0)
 			return fd;
-		file = fget(fd);
+		get_file(bprm->file);
+		fd_install(fd, bprm->file);
 
 		if ((fd_offset & ~PAGE_MASK) != 0 &&
 		    (jiffies-error_time) > 5*HZ)
 		{
 			printk(KERN_WARNING 
 			       "fd_offset is not page aligned. Please convert program: %s\n",
-			       file->f_dentry->d_name.name);
+			       bprm->file->f_dentry->d_name.name);
 			error_time = jiffies;
 		}
 
-		if (!file->f_op || !file->f_op->mmap || ((fd_offset & ~PAGE_MASK) != 0)) {
-			fput(file);
+		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
+			loff_t pos = fd_offset;
 			sys_close(fd);
 			do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
-			read_exec(bprm->dentry, fd_offset,
-				  (char *) N_TXTADDR(ex), ex.a_text+ex.a_data, 0);
+			bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
+					ex.a_text+ex.a_data, &pos);
 			flush_icache_range((unsigned long) N_TXTADDR(ex),
 					   (unsigned long) N_TXTADDR(ex) +
 					   ex.a_text+ex.a_data);
 			goto beyond_if;
 		}
 
-		error = do_mmap(file, N_TXTADDR(ex), ex.a_text,
+		error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
 			PROT_READ | PROT_EXEC,
 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
 			fd_offset);
 
 		if (error != N_TXTADDR(ex)) {
-			fput(file);
 			sys_close(fd);
 			send_sig(SIGKILL, current, 0);
 			return error;
 		}
 
- 		error = do_mmap(file, N_DATADDR(ex), ex.a_data,
+ 		error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
 				fd_offset + ex.a_text);
-		fput(file);
 		sys_close(fd);
 		if (error != N_DATADDR(ex)) {
 			send_sig(SIGKILL, current, 0);
@@ -420,16 +424,12 @@ static int load_aout_library(struct file *file)
 	unsigned long bss, start_addr, len;
 	unsigned long error;
 	int retval;
-	loff_t offset = 0;
 	struct exec ex;
 
 	inode = file->f_dentry->d_inode;
 
 	retval = -ENOEXEC;
-	/* N.B. Save current fs? */
-	set_fs(KERNEL_DS);
-	error = file->f_op->read(file, (char *) &ex, sizeof(ex), &offset);
-	set_fs(USER_DS);
+	error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
 	if (error != sizeof(ex))
 		goto out;
 
@@ -450,6 +450,7 @@ static int load_aout_library(struct file *file)
 
 	if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
 		static unsigned long error_time;
+		loff_t pos = N_TXTOFF(ex);
 
 		if ((jiffies-error_time) > 5*HZ)
 		{
@@ -461,8 +462,8 @@ static int load_aout_library(struct file *file)
 
 		do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
 		
-		read_exec(file->f_dentry, N_TXTOFF(ex),
-			  (char *)start_addr, ex.a_text + ex.a_data, 0);
+		file->f_op->read(file, (char *)start_addr,
+			ex.a_text + ex.a_data, &pos);
 		flush_icache_range((unsigned long) start_addr,
 				   (unsigned long) start_addr + ex.a_text + ex.a_data);
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 28f82594f..a12183834 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -205,17 +205,15 @@ create_elf_tables(char *p, int argc, int envc,
    an ELF header */
 
 static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
-				     struct dentry * interpreter_dentry,
+				     struct file * interpreter,
 				     unsigned long *interp_load_addr)
 {
-	struct file * file;
 	struct elf_phdr *elf_phdata;
 	struct elf_phdr *eppnt;
 	unsigned long load_addr = 0;
 	int load_addr_set = 0;
 	unsigned long last_bss = 0, elf_bss = 0;
 	unsigned long error = ~0UL;
-	int elf_exec_fileno;
 	int retval, i, size;
 
 	/* First of all, some simple consistency checks */
@@ -224,8 +222,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 		goto out;
 	if (!elf_check_arch(interp_elf_ex->e_machine))
 		goto out;
-	if (!interpreter_dentry->d_inode->i_fop ||
-	    !interpreter_dentry->d_inode->i_fop->mmap)
+	if (!interpreter->f_op->mmap)
 		goto out;
 
 	/*
@@ -244,17 +241,10 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 	if (!elf_phdata)
 		goto out;
 
-	retval = read_exec(interpreter_dentry, interp_elf_ex->e_phoff,
-			   (char *) elf_phdata, size, 1);
+	retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
 	error = retval;
 	if (retval < 0)
-		goto out_free;
-
-	error = ~0UL;
-	elf_exec_fileno = open_dentry(interpreter_dentry, O_RDONLY);
-	if (elf_exec_fileno < 0)
-		goto out_free;
-	file = fget(elf_exec_fileno);
+		goto out_close;
 
 	eppnt = elf_phdata;
 	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
@@ -271,7 +261,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 	    if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 	    	elf_type |= MAP_FIXED;
 
-	    map_addr = do_mmap(file,
+	    map_addr = do_mmap(interpreter,
 			    load_addr + ELF_PAGESTART(vaddr),
 			    eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr),
 			    elf_prot,
@@ -322,19 +312,17 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
 	error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
 
 out_close:
-	fput(file);
-	sys_close(elf_exec_fileno);
-out_free:
 	kfree(elf_phdata);
 out:
 	return error;
 }
 
 static unsigned long load_aout_interp(struct exec * interp_ex,
-			     struct dentry * interpreter_dentry)
+			     struct file * interpreter)
 {
-	unsigned long text_data, offset, elf_entry = ~0UL;
+	unsigned long text_data, elf_entry = ~0UL;
 	char * addr;
+	loff_t offset;
 	int retval;
 
 	current->mm->end_code = interp_ex->a_text;
@@ -357,7 +345,10 @@ static unsigned long load_aout_interp(struct exec * interp_ex,
 	}
 
 	do_brk(0, text_data);
-	retval = read_exec(interpreter_dentry, offset, addr, text_data, 0);
+	retval = -ENOEXEC;
+	if (!interpreter->f_op->read)
+		goto out;
+	retval = interpreter->f_op->read(interpreter, addr, text_data, &offset);
 	if (retval < 0)
 		goto out;
 	flush_icache_range((unsigned long)addr,
@@ -383,8 +374,7 @@ out:
 
 static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 {
-	struct file * file;
-	struct dentry *interpreter_dentry = NULL; /* to shut gcc up */
+	struct file *interpreter = NULL; /* to shut gcc up */
  	unsigned long load_addr = 0, load_bias;
 	int load_addr_set = 0;
 	char * elf_interpreter = NULL;
@@ -430,7 +420,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		goto out;
 	}
 #endif
-	if (!bprm->dentry->d_inode->i_fop||!bprm->dentry->d_inode->i_fop->mmap)
+	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 		goto out;
 
 	/* Now read in all of the header information */
@@ -443,16 +433,15 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	if (!elf_phdata)
 		goto out;
 
-	retval = read_exec(bprm->dentry, elf_ex.e_phoff,
-				(char *) elf_phdata, size, 1);
+	retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
 	if (retval < 0)
 		goto out_free_ph;
 
-	retval = open_dentry(bprm->dentry, O_RDONLY);
+	retval = get_unused_fd();
 	if (retval < 0)
 		goto out_free_ph;
-	elf_exec_fileno = retval;
-	file = fget(elf_exec_fileno);
+	get_file(bprm->file);
+	fd_install(elf_exec_fileno = retval, bprm->file);
 
 	elf_ppnt = elf_phdata;
 	elf_bss = 0;
@@ -480,9 +469,9 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			if (!elf_interpreter)
 				goto out_free_file;
 
-			retval = read_exec(bprm->dentry, elf_ppnt->p_offset,
+			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 					   elf_interpreter,
-					   elf_ppnt->p_filesz, 1);
+					   elf_ppnt->p_filesz);
 			if (retval < 0)
 				goto out_free_interp;
 			/* If the program interpreter is one of these two,
@@ -495,32 +484,22 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 #if 0
 			printk("Using ELF interpreter %s\n", elf_interpreter);
 #endif
-			old_fs = get_fs(); /* This could probably be optimized */
-			set_fs(get_ds());
 #ifdef __sparc__
 			if (ibcs2_interpreter) {
 				unsigned long old_pers = current->personality;
 					
 				current->personality = PER_SVR4;
-				lock_kernel();
-				interpreter_dentry = open_namei(elf_interpreter);
-				unlock_kernel();
+				interpreter = open_exec(elf_interpreter);
 				current->personality = old_pers;
 			} else
 #endif
 			{
-				lock_kernel();
-				interpreter_dentry = open_namei(elf_interpreter);
-				unlock_kernel();
+				interpreter = open_exec(elf_interpreter);
 			}
-			set_fs(old_fs);
-			retval = PTR_ERR(interpreter_dentry);
-			if (IS_ERR(interpreter_dentry))
+			retval = PTR_ERR(interpreter);
+			if (IS_ERR(interpreter))
 				goto out_free_interp;
-			retval = permission(interpreter_dentry->d_inode, MAY_EXEC);
-			if (retval < 0)
-				goto out_free_dentry;
-			retval = read_exec(interpreter_dentry, 0, bprm->buf, 128, 1);
+			retval = kernel_read(interpreter, 0, bprm->buf, 128);
 			if (retval < 0)
 				goto out_free_dentry;
 
@@ -629,7 +608,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			elf_flags |= MAP_FIXED;
 		}
 
-		error = do_mmap(file, ELF_PAGESTART(load_bias + vaddr),
+		error = do_mmap(bprm->file, ELF_PAGESTART(load_bias + vaddr),
 		                (elf_ppnt->p_filesz +
 		                ELF_PAGEOFFSET(elf_ppnt->p_vaddr)),
 		                elf_prot, elf_flags, (elf_ppnt->p_offset -
@@ -661,7 +640,6 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			elf_brk = k;
 	}
 	set_fs(old_fs);
-	fput(file); /* all done with the file */
 
 	elf_entry += load_bias;
 	elf_bss += load_bias;
@@ -674,14 +652,14 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	if (elf_interpreter) {
 		if (interpreter_type == INTERPRETER_AOUT)
 			elf_entry = load_aout_interp(&interp_ex,
-						     interpreter_dentry);
+						     interpreter);
 		else
 			elf_entry = load_elf_interp(&interp_elf_ex,
-						    interpreter_dentry,
+						    interpreter,
 						    &interp_load_addr);
 
 		lock_kernel();
-		dput(interpreter_dentry);
+		fput(interpreter);
 		unlock_kernel();
 		kfree(elf_interpreter);
 
@@ -708,7 +686,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 #ifndef VM_STACK_FLAGS
 	lock_kernel();
-	current->executable = dget(bprm->dentry);
+	current->executable = dget(bprm->file->f_dentry);
 	unlock_kernel();
 #endif
 	compute_creds(bprm);
@@ -779,13 +757,12 @@ out:
 	/* error cleanup */
 out_free_dentry:
 	lock_kernel();
-	dput(interpreter_dentry);
+	fput(interpreter);
 	unlock_kernel();
 out_free_interp:
 	if (elf_interpreter)
 		kfree(elf_interpreter);
 out_free_file:
-	fput(file);
 	sys_close(elf_exec_fileno);
 out_free_ph:
 	kfree(elf_phdata);
@@ -797,25 +774,13 @@ out_free_ph:
 
 static int load_elf_library(struct file *file)
 {
-	struct dentry * dentry;
-	struct inode * inode;
 	struct elf_phdr *elf_phdata;
 	unsigned long elf_bss = 0, bss, len, k;
 	int retval, error, i, j;
 	struct elfhdr elf_ex;
-	loff_t offset = 0;
-
-	error = -EACCES;
-	dentry = file->f_dentry;
-	inode = dentry->d_inode;
 
-	/* seek to the beginning of the file */
 	error = -ENOEXEC;
-
-	/* N.B. save current DS?? */
-	set_fs(KERNEL_DS);
-	retval = file->f_op->read(file, (char *) &elf_ex, sizeof(elf_ex), &offset);
-	set_fs(USER_DS);
+	retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
 	if (retval != sizeof(elf_ex))
 		goto out;
 
@@ -824,8 +789,7 @@ static int load_elf_library(struct file *file)
 
 	/* First of all, some simple consistency checks */
 	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
-	   !elf_check_arch(elf_ex.e_machine) ||
-	   (!inode->i_fop || !inode->i_fop->mmap))
+	   !elf_check_arch(elf_ex.e_machine) || !file->f_op->mmap)
 		goto out;
 
 	/* Now read in all of the header information */
@@ -840,8 +804,8 @@ static int load_elf_library(struct file *file)
 		goto out;
 
 	/* N.B. check for error return?? */
-	retval = read_exec(dentry, elf_ex.e_phoff, (char *) elf_phdata,
-			   sizeof(struct elf_phdr) * elf_ex.e_phnum, 1);
+	retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata,
+			   sizeof(struct elf_phdr) * elf_ex.e_phnum);
 
 	error = -ENOEXEC;
 	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 1a1533a10..1b18094eb 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -25,7 +25,7 @@
 static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
 {
 	char *interp, *i_name, *i_arg;
-	struct dentry * dentry;
+	struct file * file;
 	int retval;
 	struct elfhdr	elf_ex;
 
@@ -38,16 +38,13 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
 	/* First of all, some simple consistency checks */
 	if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
 		(!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
-		(!bprm->dentry->d_inode->i_fop || 
-		!bprm->dentry->d_inode->i_fop->mmap)) {
+		(!bprm->file->f_op || !bprm->file->f_op->mmap)) {
 			return -ENOEXEC;
 	}
 
 	bprm->sh_bang++;	/* Well, the bang-shell is implicit... */
-	lock_kernel();
-	dput(bprm->dentry);
-	unlock_kernel();
-	bprm->dentry = NULL;
+	fput(bprm->file);
+	bprm->file = NULL;
 
 	/* Unlike in the script case, we don't have to do any hairy
 	 * parsing to find our interpreter... it's hardcoded!
@@ -79,16 +76,14 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
 
 	/*
 	 * OK, now restart the process with the interpreter's inode.
-	 * Note that we use open_namei() as the name is now in kernel
+	 * Note that we use open_exec() as the name is now in kernel
 	 * space, and we don't need to copy it.
 	 */
-	lock_kernel();
-	dentry = open_namei(interp);
-	unlock_kernel();
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
+	file = open_exec(interp);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
 
-	bprm->dentry = dentry;
+	bprm->file = file;
 
 	retval = prepare_binprm(bprm);
 	if (retval < 0)
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9d98d7d70..a03c4723f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
 #include <linux/proc_fs.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
+#include <linux/file.h>
 #include <linux/spinlock.h>
 #include <asm/uaccess.h>
 
@@ -180,7 +181,7 @@ static struct binfmt_entry *check_file(struct linux_binprm *bprm)
 static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 {
 	struct binfmt_entry *fmt;
-	struct dentry * dentry;
+	struct file * file;
 	char iname[128];
 	char *iname_addr = iname;
 	int retval;
@@ -200,8 +201,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (!fmt)
 		goto _ret;
 
-	dput(bprm->dentry);
-	bprm->dentry = NULL;
+	fput(bprm->file);
+	bprm->file = NULL;
 
 	/* Build args for interpreter */
 	remove_arg_zero(bprm);
@@ -213,11 +214,11 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	bprm->argc++;
 	bprm->filename = iname;	/* for binfmt_script */
 
-	dentry = open_namei(iname);
-	retval = PTR_ERR(dentry);
-	if (IS_ERR(dentry))
+	file = open_exec(iname);
+	retval = PTR_ERR(file);
+	if (IS_ERR(file))
 		goto _ret;
-	bprm->dentry = dentry;
+	bprm->file = file;
 
 	retval = prepare_binprm(bprm);
 	if (retval >= 0)
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 450f918a4..dc78f8389 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -11,12 +11,13 @@
 #include <linux/malloc.h>
 #include <linux/binfmts.h>
 #include <linux/init.h>
+#include <linux/file.h>
 #include <linux/smp_lock.h>
 
 static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 {
 	char *cp, *i_name, *i_arg;
-	struct dentry * dentry;
+	struct file *file;
 	char interp[128];
 	int retval;
 
@@ -28,10 +29,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 	 */
 
 	bprm->sh_bang++;
-	lock_kernel();
-	dput(bprm->dentry);
-	unlock_kernel();
-	bprm->dentry = NULL;
+	fput(bprm->file);
+	bprm->file = NULL;
 
 	bprm->buf[127] = '\0';
 	if ((cp = strchr(bprm->buf, '\n')) == NULL)
@@ -81,13 +80,11 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 	/*
 	 * OK, now restart the process with the interpreter's dentry.
 	 */
-	lock_kernel();
-	dentry = open_namei(interp);
-	unlock_kernel();
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
+	file = open_exec(interp);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
 
-	bprm->dentry = dentry;
+	bprm->file = file;
 	retval = prepare_binprm(bprm);
 	if (retval < 0)
 		return retval;
diff --git a/fs/buffer.c b/fs/buffer.c
index 617188db0..26580ee0d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2029,7 +2029,7 @@ int block_symlink(struct inode *inode, const char *symname, int len)
 
 	if (!page)
 		goto fail;
-	err = mapping->a_ops->prepare_write(page, 0, len-1);
+	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
 	if (err)
 		goto fail_map;
 	kaddr = (char*)page_address(page);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 1e39811e6..bb51b0c05 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -685,7 +685,7 @@ static int coda_venus_readdir(struct file *filp, void *getdent,
         }
 
         /* we use this routine to read the file into our buffer */
-        bufsize = read_exec(filp->f_dentry, filp->f_pos, buff, DIR_BUFSIZE, 1);
+        bufsize = kernel_read(filp, filp->f_pos, buff, DIR_BUFSIZE);
         if ( bufsize < 0) {
                 printk("coda_venus_readdir: cannot read directory %d.\n",
 		       bufsize);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 237c7d9aa..c5ca590d2 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -200,12 +200,6 @@ out:
 	return retval;
 }
 
-/* Nothing to do.. */
-static void cramfs_put_super(struct super_block *sb)
-{
-	return;
-}
-
 static int cramfs_statfs(struct super_block *sb, struct statfs *buf)
 {
 	buf->f_type = CRAMFS_MAGIC;
@@ -361,7 +355,6 @@ static struct inode_operations cramfs_dir_inode_operations = {
 };
 
 static struct super_operations cramfs_ops = {
-	put_super:	cramfs_put_super,
 	statfs:		cramfs_statfs,
 };
 
diff --git a/fs/dcache.c b/fs/dcache.c
index dc424305f..d4aef49e7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -27,6 +27,9 @@
 #define DCACHE_PARANOIA 1
 /* #define DCACHE_DEBUG 1 */
 
+/* Right now the dcache depends on the kernel lock */
+#define check_lock()	if (!kernel_locked()) BUG()
+
 /* For managing the dcache */
 extern unsigned long num_physpages, page_cache_size;
 extern int inodes_stat[];
@@ -104,6 +107,8 @@ void dput(struct dentry *dentry)
 {
 	int count;
 
+	check_lock();
+
 	if (!dentry)
 		return;
 
@@ -158,7 +163,7 @@ out:
 		count,
 		dentry->d_parent->d_name.name,
 		dentry->d_name.name);
-	*(int *)0 = 0;	
+	BUG();
 }
 
 /*
@@ -168,6 +173,8 @@ out:
  */
 int d_invalidate(struct dentry * dentry)
 {
+	check_lock();
+
 	/*
 	 * If it's already been dropped, return OK.
 	 */
@@ -226,6 +233,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
  */
 void prune_dcache(int count)
 {
+	check_lock();
 	for (;;) {
 		struct dentry *dentry;
 		struct list_head *tmp = dentry_unused.prev;
@@ -261,6 +269,8 @@ void shrink_dcache_sb(struct super_block * sb)
 	struct list_head *tmp, *next;
 	struct dentry *dentry;
 
+	check_lock();
+
 	/*
 	 * Pass one ... move the dentries for the specified
 	 * superblock to the most recent end of the unused list.
@@ -308,6 +318,8 @@ int is_root_busy(struct dentry *root)
 	struct list_head *next;
 	int count = root->d_count;
 
+	check_lock();
+
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -337,6 +349,44 @@ resume:
 }
 
 /*
+ * Search for at least 1 mount point in the dentry's subdirs.
+ * We descend to the next level whenever the d_subdirs
+ * list is non-empty and continue searching.
+ */
+int have_submounts(struct dentry *parent)
+{
+	struct dentry *this_parent = parent;
+	struct list_head *next;
+
+	if (parent->d_mounts != parent)
+		return 1;
+repeat:
+	next = this_parent->d_subdirs.next;
+resume:
+	while (next != &this_parent->d_subdirs) {
+		struct list_head *tmp = next;
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+		next = tmp->next;
+		/* Have we found a mount point ? */
+		if (dentry->d_mounts != dentry)
+			return 1;
+		if (!list_empty(&dentry->d_subdirs)) {
+			this_parent = dentry;
+			goto repeat;
+		}
+	}
+	/*
+	 * All done at this level ... ascend and resume the search.
+	 */
+	if (this_parent != parent) {
+		next = this_parent->d_child.next; 
+		this_parent = this_parent->d_parent;
+		goto resume;
+	}
+	return 0; /* No mount points found in tree */
+}
+
+/*
  * Search the dentry child list for the specified parent,
  * and move any unused dentries to the end of the unused
  * list for prune_dcache(). We descend to the next level
@@ -349,6 +399,8 @@ static int select_parent(struct dentry * parent)
 	struct list_head *next;
 	int found = 0;
 
+	check_lock();
+
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -525,6 +577,8 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
 	struct list_head *head = d_hash(parent,hash);
 	struct list_head *tmp = head->next;
 
+	check_lock();
+
 	for (;;) {
 		struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
 		if (tmp == head)
@@ -564,6 +618,8 @@ int d_validate(struct dentry *dentry, struct dentry *dparent,
 	struct list_head *base, *lhp;
 	int valid = 1;
 
+	check_lock();
+
 	if (dentry != dparent) {
 		base = d_hash(dparent, hash);
 		lhp = base;
@@ -605,6 +661,10 @@ out:
  */
 void d_delete(struct dentry * dentry)
 {
+	check_lock();
+
+	check_lock();
+
 	/*
 	 * Are we the only user?
 	 */
@@ -646,6 +706,7 @@ static inline void switch_names(struct dentry * dentry, struct dentry * target)
 {
 	const unsigned char *old_name, *new_name;
 
+	check_lock();
 	memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); 
 	old_name = target->d_name.name;
 	new_name = dentry->d_name.name;
@@ -674,6 +735,8 @@ static inline void switch_names(struct dentry * dentry, struct dentry * target)
  */
 void d_move(struct dentry * dentry, struct dentry * target)
 {
+	check_lock();
+
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
@@ -773,7 +836,11 @@ asmlinkage long sys_getcwd(char *buf, unsigned long size)
 		error = -ENOMEM;
 		if (page) {
 			unsigned long len;
-			char * cwd = d_path(pwd, page, PAGE_SIZE);
+			char * cwd;
+
+			lock_kernel();
+			cwd = d_path(pwd, page, PAGE_SIZE);
+			unlock_kernel();
 
 			error = -ERANGE;
 			len = PAGE_SIZE + page - cwd;
diff --git a/fs/devfs/Makefile b/fs/devfs/Makefile
index 2b301b37a..23f190410 100644
--- a/fs/devfs/Makefile
+++ b/fs/devfs/Makefile
@@ -36,4 +36,4 @@ doc:	base.c util.c
 test:
 	gcc -o /tmp/base.o -D__KERNEL__ -I../../include -Wall \
 	-Wstrict-prototypes -O2 -fomit-frame-pointer -pipe \
-	-fno-strength-reduce -DCPU=686   -DEXPORT_SYMTAB -c base.c
+	-fno-strength-reduce   -DEXPORT_SYMTAB -c base.c
diff --git a/fs/exec.c b/fs/exec.c
index d7d5240be..8a8a10631 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -93,56 +93,6 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
 		__MOD_DEC_USE_COUNT(fmt->module);
 }
 
-/* N.B. Error returns must be < 0 */
-int open_dentry(struct dentry * dentry, int mode)
-{
-	struct inode * inode = dentry->d_inode;
-	struct file * f;
-	struct list_head * l = NULL;
-	int fd, error;
-
-	lock_kernel();
-	if (inode->i_sb)
-		l = &inode->i_sb->s_files;
-
-	error = -EINVAL;
-	if (!inode->i_fop)
-		goto out;
-	fd = get_unused_fd();
-	if (fd >= 0) {
-		error = -ENFILE;
-		f = get_empty_filp();
-		if (!f)
-			goto out_fd;
-		f->f_flags = mode;
-		f->f_mode = (mode+1) & O_ACCMODE;
-		f->f_dentry = dentry;
-		f->f_pos = 0;
-		f->f_reada = 0;
-		f->f_op = inode->i_fop;
-		if (f->f_op->open) {
-			error = f->f_op->open(inode,f);
-			if (error)
-				goto out_filp;
-		}
-		file_move(f, l);
-		fd_install(fd, f);
-		dget(dentry);
-	}
-	unlock_kernel();
-	return fd;
-
-out_filp:
-	if (error > 0)
-		error = -EIO;
-	put_filp(f);
-out_fd:
-	put_unused_fd(fd);
-out:
-	unlock_kernel();
-	return error;
-}
-
 /*
  * Note that a shared library must be both readable and executable due to
  * security reasons.
@@ -365,44 +315,45 @@ int setup_arg_pages(struct linux_binprm *bprm)
 	return 0;
 }
 
-/*
- * Read in the complete executable. This is used for "-N" files
- * that aren't on a block boundary, and for files on filesystems
- * without get_block support.
- */
-int read_exec(struct dentry *dentry, unsigned long offset,
-	char * addr, unsigned long count, int to_kmem)
+struct file *open_exec(const char *name)
 {
-	struct file file;
-	struct inode * inode = dentry->d_inode;
-	int result = -ENOEXEC;
+	struct dentry *dentry;
+	struct file *file;
 
-	if (!inode->i_fop)
-		goto end_readexec;
-	if (init_private_file(&file, dentry, 1))
-		goto end_readexec;
-	if (!file.f_op->read)
-		goto close_readexec;
-	if (file.f_op->llseek) {
-		if (file.f_op->llseek(&file,offset,0) != offset)
- 			goto close_readexec;
-	} else
-		file.f_pos = offset;
-	if (to_kmem) {
-		mm_segment_t old_fs = get_fs();
-		set_fs(get_ds());
-		result = file.f_op->read(&file, addr, count, &file.f_pos);
-		set_fs(old_fs);
-	} else {
-		result = verify_area(VERIFY_WRITE, addr, count);
-		if (result)
-			goto close_readexec;
-		result = file.f_op->read(&file, addr, count, &file.f_pos);
+	lock_kernel();
+	dentry = lookup_dentry(name, NULL, LOOKUP_FOLLOW);
+	file = (struct file*) dentry;
+	if (!IS_ERR(dentry)) {
+		file = ERR_PTR(-EACCES);
+		if (dentry->d_inode && S_ISREG(dentry->d_inode->i_mode)) {
+			int err = permission(dentry->d_inode, MAY_EXEC);
+			file = ERR_PTR(err);
+			if (!err) {
+				file = dentry_open(dentry, O_RDONLY);
+out:
+				unlock_kernel();
+				return file;
+			}
+		}
+		dput(dentry);
 	}
-close_readexec:
-	if (file.f_op->release)
-		file.f_op->release(inode,&file);
-end_readexec:
+	goto out;
+}
+
+int kernel_read(struct file *file, unsigned long offset,
+	char * addr, unsigned long count)
+{
+	mm_segment_t old_fs;
+	loff_t pos = offset;
+	int result = -ENOSYS;
+
+	if (!file->f_op->read)
+		goto fail;
+	old_fs = get_fs();
+	set_fs(get_ds());
+	result = file->f_op->read(file, addr, count, &pos);
+	set_fs(old_fs);
+fail:
 	return result;
 }
 
@@ -540,7 +491,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	flush_thread();
 
 	if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
-	    permission(bprm->dentry->d_inode,MAY_READ))
+	    permission(bprm->file->f_dentry->d_inode,MAY_READ))
 		current->dumpable = 0;
 
 	/* An exec changes our domain. We are no longer part of the thread
@@ -580,7 +531,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 {
 	int mode;
 	int retval,id_change,cap_raised;
-	struct inode * inode = bprm->dentry->d_inode;
+	struct inode * inode = bprm->file->f_dentry->d_inode;
 
 	mode = inode->i_mode;
 	if (!S_ISREG(mode))			/* must be regular file */
@@ -677,7 +628,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 	}
 
 	memset(bprm->buf,0,sizeof(bprm->buf));
-	return read_exec(bprm->dentry,0,bprm->buf,128,1);
+	return kernel_read(bprm->file,0,bprm->buf,128);
 }
 
 /*
@@ -763,24 +714,20 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 	    {
 		int i;
 		char * dynloader[] = { "/sbin/loader" };
-		struct dentry * dentry;
+		struct file * file;
 
-		lock_kernel();
-		dput(bprm->dentry);
-		unlock_kernel();
-		bprm->dentry = NULL;
+		fput(bprm->file);
+		bprm->file = NULL;
 
 	        bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 	        for (i = 0 ; i < MAX_ARG_PAGES ; i++)	/* clear page-table */
                     bprm_loader.page[i] = NULL;
 
-		lock_kernel();
-		dentry = open_namei(dynloader[0]);
-		unlock_kernel();
-		retval = PTR_ERR(dentry);
-		if (IS_ERR(dentry))
+		file = open_exec(dynloader[0]);
+		retval = PTR_ERR(file);
+		if (IS_ERR(file))
 			return retval;
-		bprm->dentry = dentry;
+		bprm->file = file;
 		bprm->loader = bprm_loader.p;
 		retval = prepare_binprm(bprm);
 		if (retval<0)
@@ -802,12 +749,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			retval = fn(bprm, regs);
 			if (retval >= 0) {
 				put_binfmt(fmt);
-				if (bprm->dentry) {
-					lock_kernel();
-					dput(bprm->dentry);
-					unlock_kernel();
-				}
-				bprm->dentry = NULL;
+				if (bprm->file)
+					fput(bprm->file);
+				bprm->file = NULL;
 				current->did_exec = 1;
 				return retval;
 			}
@@ -815,7 +759,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			put_binfmt(fmt);
 			if (retval != -ENOEXEC)
 				break;
-			if (!bprm->dentry) {
+			if (!bprm->file) {
 				spin_unlock(&binfmt_lock);
 				return retval;
 			}
@@ -847,37 +791,31 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
 {
 	struct linux_binprm bprm;
-	struct dentry * dentry;
+	struct file *file;
 	int retval;
 	int i;
 
 	bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
 	memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); 
 
-	lock_kernel();
-	dentry = open_namei(filename);
-	unlock_kernel();
+	file = open_exec(filename);
 
-	retval = PTR_ERR(dentry);
-	if (IS_ERR(dentry))
+	retval = PTR_ERR(file);
+	if (IS_ERR(file))
 		return retval;
 
-	bprm.dentry = dentry;
+	bprm.file = file;
 	bprm.filename = filename;
 	bprm.sh_bang = 0;
 	bprm.loader = 0;
 	bprm.exec = 0;
 	if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
-		lock_kernel();
-		dput(dentry);
-		unlock_kernel();
+		fput(file);
 		return bprm.argc;
 	}
 
 	if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
-		lock_kernel();
-		dput(dentry);
-		unlock_kernel();
+		fput(file);
 		return bprm.envc;
 	}
 
@@ -905,11 +843,8 @@ int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs
 
 out:
 	/* Something went wrong, return the inode and free the argument pages*/
-	if (bprm.dentry) {
-		lock_kernel();
-		dput(bprm.dentry);
-		unlock_kernel();
-	}
+	if (bprm.file)
+		fput(bprm.file);
 
 	/* Assumes that free_page() can take a NULL argument. */ 
 	/* I hope this is ok for all architectures */ 
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 904f5cb8f..90ce121ce 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -11,6 +11,7 @@
  *        David S. Miller (davem@caip.rutgers.edu), 1995
  */
 
+#include <linux/config.h>
 #include <linux/fs.h>
 #include <linux/locks.h>
 #include <linux/quotaops.h>
@@ -300,21 +301,20 @@ do_more:
 	if (!gdp)
 		goto error_return;
 
-	if (test_opt (sb, CHECK_STRICT) &&
-	    (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
-	     in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
-	     in_range (block, le32_to_cpu(gdp->bg_inode_table),
-		       sb->u.ext2_sb.s_itb_per_group) ||
-	     in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
-		       sb->u.ext2_sb.s_itb_per_group)))
-		ext2_panic (sb, "ext2_free_blocks",
+	if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
+	    in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
+	    in_range (block, le32_to_cpu(gdp->bg_inode_table),
+		      sb->u.ext2_sb.s_itb_per_group) ||
+	    in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
+		      sb->u.ext2_sb.s_itb_per_group))
+		ext2_error (sb, "ext2_free_blocks",
 			    "Freeing blocks in system zones - "
 			    "Block = %lu, count = %lu",
 			    block, count);
 
 	for (i = 0; i < count; i++) {
 		if (!ext2_clear_bit (bit + i, bh->b_data))
-			ext2_warning (sb, "ext2_free_blocks",
+			ext2_error (sb, "ext2_free_blocks",
 				      "bit already cleared for block %lu", 
 				      block);
 		else {
@@ -527,11 +527,11 @@ got_block:
 
 	tmp = j + i * EXT2_BLOCKS_PER_GROUP(sb) + le32_to_cpu(es->s_first_data_block);
 
-	if (test_opt (sb, CHECK_STRICT) &&
-	    (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
-	     tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
-	     in_range (tmp, le32_to_cpu(gdp->bg_inode_table), sb->u.ext2_sb.s_itb_per_group)))
-		ext2_panic (sb, "ext2_new_block",
+	if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
+	    tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
+	    in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
+		      sb->u.ext2_sb.s_itb_per_group))
+		ext2_error (sb, "ext2_new_block",
 			    "Allocating block in system zone - "
 			    "block = %u", tmp);
 
@@ -679,6 +679,7 @@ int ext2_group_sparse(int group)
 		test_root(group, 7));
 }
 
+#ifdef CONFIG_EXT2_CHECK
 /* Called at mount-time, super-block is locked */
 void ext2_check_blocks_bitmap (struct super_block * sb)
 {
@@ -753,3 +754,4 @@ void ext2_check_blocks_bitmap (struct super_block * sb)
 			    "stored = %lu, counted = %lu",
 			    (unsigned long) le32_to_cpu(es->s_free_blocks_count), bitmap_count);
 }
+#endif
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 3a3e4a69c..277562ec7 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -12,6 +12,7 @@
  *        David S. Miller (davem@caip.rutgers.edu), 1995
  */
 
+#include <linux/config.h>
 #include <linux/fs.h>
 #include <linux/locks.h>
 #include <linux/quotaops.h>
@@ -236,7 +237,7 @@ void ext2_free_inode (struct inode * inode)
 
 	/* Ok, now we can actually update the inode bitmaps.. */
 	if (!ext2_clear_bit (bit, bh->b_data))
-		ext2_warning (sb, "ext2_free_inode",
+		ext2_error (sb, "ext2_free_inode",
 			      "bit already cleared for inode %lu", ino);
 	else {
 		gdp = ext2_get_group_desc (sb, block_group, &bh2);
@@ -401,7 +402,7 @@ repeat:
 				      EXT2_INODES_PER_GROUP(sb))) <
 	    EXT2_INODES_PER_GROUP(sb)) {
 		if (ext2_set_bit (j, bh->b_data)) {
-			ext2_warning (sb, "ext2_new_inode",
+			ext2_error (sb, "ext2_new_inode",
 				      "bit already set for inode %d", j);
 			goto repeat;
 		}
@@ -527,6 +528,7 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
 #endif
 }
 
+#ifdef CONFIG_EXT2_CHECK
 /* Called at mount-time, super-block is locked */
 void ext2_check_inodes_bitmap (struct super_block * sb)
 {
@@ -565,3 +567,4 @@ void ext2_check_inodes_bitmap (struct super_block * sb)
 			    (unsigned long) le32_to_cpu(es->s_free_inodes_count),
 			    bitmap_count);
 }
+#endif
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index cfaf5d4d3..dd09b95aa 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -628,7 +628,7 @@ static int ext2_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,ext2_get_block);
 }
-static int ext2_prepare_write(struct page *page, unsigned from, unsigned to)
+static int ext2_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return block_prepare_write(page,from,to,ext2_get_block);
 }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 73be71e61..a68289d71 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -16,6 +16,7 @@
  *        David S. Miller (davem@caip.rutgers.edu), 1995
  */
 
+#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
@@ -153,23 +154,14 @@ static int parse_options (char * options, unsigned long * sb_block,
 			set_opt (*mount_options, NO_UID32);
 		}
 		else if (!strcmp (this_char, "check")) {
-			if (!value || !*value)
-				set_opt (*mount_options, CHECK_NORMAL);
-			else if (!strcmp (value, "none")) {
-				clear_opt (*mount_options, CHECK_NORMAL);
-				clear_opt (*mount_options, CHECK_STRICT);
-			}
-			else if (!strcmp (value, "normal"))
-				set_opt (*mount_options, CHECK_NORMAL);
-			else if (!strcmp (value, "strict")) {
-				set_opt (*mount_options, CHECK_NORMAL);
-				set_opt (*mount_options, CHECK_STRICT);
-			}
-			else {
-				printk ("EXT2-fs: Invalid check option: %s\n",
-					value);
-				return 0;
-			}
+			if (!value || !*value || !strcmp (value, "none"))
+				clear_opt (*mount_options, CHECK);
+			else
+#ifdef CONFIG_EXT2_CHECK
+				set_opt (*mount_options, CHECK);
+#else
+				printk("EXT2 Check option not supported\n");
+#endif
 		}
 		else if (!strcmp (this_char, "debug"))
 			set_opt (*mount_options, DEBUG);
@@ -205,10 +197,6 @@ static int parse_options (char * options, unsigned long * sb_block,
 			set_opt (*mount_options, GRPID);
 		else if (!strcmp (this_char, "minixdf"))
 			set_opt (*mount_options, MINIX_DF);
-		else if (!strcmp (this_char, "nocheck")) {
-			clear_opt (*mount_options, CHECK_NORMAL);
-			clear_opt (*mount_options, CHECK_STRICT);
-		}
 		else if (!strcmp (this_char, "nogrpid") ||
 			 !strcmp (this_char, "sysvgroups"))
 			clear_opt (*mount_options, GRPID);
@@ -305,10 +293,12 @@ static void ext2_setup_super (struct super_block * sb,
 				EXT2_BLOCKS_PER_GROUP(sb),
 				EXT2_INODES_PER_GROUP(sb),
 				sb->u.ext2_sb.s_mount_opt);
+#ifdef CONFIG_EXT2_CHECK
 		if (test_opt (sb, CHECK)) {
 			ext2_check_blocks_bitmap (sb);
 			ext2_check_inodes_bitmap (sb);
 		}
+#endif
 	}
 #if 0 /* ibasket's still have unresolved bugs... -DaveM */
 
@@ -398,7 +388,6 @@ struct super_block * ext2_read_super (struct super_block * sb, void * data,
 	  }
 
 	sb->u.ext2_sb.s_mount_opt = 0;
-	set_opt (sb->u.ext2_sb.s_mount_opt, CHECK_NORMAL);
 	if (!parse_options ((char *) data, &sb_block, &resuid, &resgid,
 	    &sb->u.ext2_sb.s_mount_opt)) {
 		return NULL;
@@ -674,7 +663,6 @@ int ext2_remount (struct super_block * sb, int * flags, char * data)
 	/*
 	 * Allow the "check" option to be passed as a remount option.
 	 */
-	new_mount_opt = EXT2_MOUNT_CHECK_NORMAL;
 	if (!parse_options (data, &tmp, &resuid, &resgid,
 			    &new_mount_opt))
 		return -EINVAL;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index cedd3ba2b..a0202c66f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -737,7 +737,7 @@ static int fat_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,fat_get_block);
 }
-static int fat_prepare_write(struct page *page, unsigned from, unsigned to)
+static int fat_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return cont_prepare_write(page,from,to,fat_get_block,
 		&MSDOS_I((struct inode*)page->mapping->host)->mmu_private);
diff --git a/fs/fifo.c b/fs/fifo.c
index fcaf45f9f..25a08e757 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -12,6 +12,21 @@
 #include <linux/mm.h>
 #include <linux/malloc.h>
 
+static void wait_for_partner(struct inode* inode, unsigned int* cnt)
+{
+	int cur = *cnt;	
+	while(cur == *cnt) {
+		pipe_wait(inode);
+		if(signal_pending(current))
+			break;
+	}
+}
+
+static void wake_up_partner(struct inode* inode)
+{
+	wake_up_interruptible(PIPE_WAIT(*inode));
+}
+
 static int fifo_open(struct inode *inode, struct file *filp)
 {
 	int ret;
@@ -20,29 +35,12 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	if (down_interruptible(PIPE_SEM(*inode)))
 		goto err_nolock_nocleanup;
 
-	if (! inode->i_pipe) {
-		unsigned long page;
-		struct pipe_inode_info *info;
-
-		info = kmalloc(sizeof(struct pipe_inode_info),GFP_KERNEL);
-
+	if (!inode->i_pipe) {
 		ret = -ENOMEM;
-		if (!info)
-			goto err_nocleanup;
-		page = __get_free_page(GFP_KERNEL);
-		if (!page) {
-			kfree(info);
+		if(!pipe_new(inode))
 			goto err_nocleanup;
-		}
-
-		inode->i_pipe = info;
-
-		init_waitqueue_head(PIPE_WAIT(*inode));
-		PIPE_BASE(*inode) = (char *) page;
-		PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-		PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
-		PIPE_WAITING_WRITERS(*inode) = PIPE_WAITING_READERS(*inode) = 0;
 	}
+	filp->f_version = 0;
 
 	switch (filp->f_mode) {
 	case 1:
@@ -51,27 +49,23 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
 	 *  opened, even when there is no process writing the FIFO.
 	 */
-		filp->f_op = &connecting_fifo_fops;
+		filp->f_op = &read_fifo_fops;
+		PIPE_RCOUNTER(*inode)++;
 		if (PIPE_READERS(*inode)++ == 0)
-			wake_up_interruptible(PIPE_WAIT(*inode));
-
-		if (!(filp->f_flags & O_NONBLOCK)) {
-			while (!PIPE_WRITERS(*inode)) {
-				if (signal_pending(current))
+			wake_up_partner(inode);
+
+		if (!PIPE_WRITERS(*inode)) {
+			if ((filp->f_flags & O_NONBLOCK)) {
+				/* suppress POLLHUP until we have
+				 * seen a writer */
+				filp->f_version = PIPE_WCOUNTER(*inode);
+			} else 
+			{
+				wait_for_partner(inode, &PIPE_WCOUNTER(*inode));
+				if(signal_pending(current))
 					goto err_rd;
-				up(PIPE_SEM(*inode));
-				interruptible_sleep_on(PIPE_WAIT(*inode));
-
-				/* Note that using down_interruptible here
-				   and similar places below is pointless,
-				   since we have to acquire the lock to clean
-				   up properly.  */
-				down(PIPE_SEM(*inode));
 			}
 		}
-
-		if (PIPE_WRITERS(*inode))
-			filp->f_op = &read_fifo_fops;
 		break;
 	
 	case 2:
@@ -85,15 +79,14 @@ static int fifo_open(struct inode *inode, struct file *filp)
 			goto err;
 
 		filp->f_op = &write_fifo_fops;
+		PIPE_WCOUNTER(*inode)++;
 		if (!PIPE_WRITERS(*inode)++)
-			wake_up_interruptible(PIPE_WAIT(*inode));
+			wake_up_partner(inode);
 
-		while (!PIPE_READERS(*inode)) {
+		if (!PIPE_READERS(*inode)) {
+			wait_for_partner(inode, &PIPE_RCOUNTER(*inode));
 			if (signal_pending(current))
 				goto err_wr;
-			up(PIPE_SEM(*inode));
-			interruptible_sleep_on(PIPE_WAIT(*inode));
-			down(PIPE_SEM(*inode));
 		}
 		break;
 	
@@ -108,8 +101,10 @@ static int fifo_open(struct inode *inode, struct file *filp)
 
 		PIPE_READERS(*inode)++;
 		PIPE_WRITERS(*inode)++;
+		PIPE_RCOUNTER(*inode)++;
+		PIPE_WCOUNTER(*inode)++;
 		if (PIPE_READERS(*inode) == 1 || PIPE_WRITERS(*inode) == 1)
-			wake_up_interruptible(PIPE_WAIT(*inode));
+			wake_up_partner(inode);
 		break;
 
 	default:
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 34e365663..8c0afe0c8 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -225,7 +225,7 @@ static int hfs_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,hfs_get_block);
 }
-static int hfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int hfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return cont_prepare_write(page,from,to,hfs_get_block,
 		&((struct inode*)page->mapping->host)->u.hfs_i.mmu_private);
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 710b9120b..d8063e296 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -94,7 +94,7 @@ static int hpfs_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,hpfs_get_block);
 }
-static int hpfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return cont_prepare_write(page,from,to,hpfs_get_block,
 		&((struct inode*)page->mapping->host)->u.hpfs_i.mmu_private);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 517456326..20b9bb490 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -296,6 +296,7 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
 	struct rpc_clnt	*clnt;
 	struct nlm_args	*argp = &req->a_args;
 	struct nlm_res	*resp = &req->a_res;
+	struct rpc_message msg;
 	int		status;
 
 	dprintk("lockd: call procedure %s on %s (async)\n",
@@ -306,8 +307,11 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
 		return -ENOLCK;
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_do_call(clnt, proc, argp, resp, RPC_TASK_ASYNC,
-					callback, req);
+	msg.rpc_proc = proc;
+	msg.rpc_argp = argp;
+	msg.rpc_resp =resp;
+	msg.rpc_cred = NULL;	
+        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req);
 
 	/* If the async call is proceeding, increment host refcount */
         if (status >= 0 && (req->a_flags & RPC_TASK_ASYNC))
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index a2f280bdc..55dee3886 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -163,7 +163,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 	*p++ = htonl(argp->proc);
 
 	/* This is the private part. Needed only for SM_MON call */
-	if (rqstp->rq_task->tk_proc == SM_MON) {
+	if (rqstp->rq_task->tk_msg.rpc_proc == SM_MON) {
 		*p++ = argp->addr;
 		*p++ = 0;
 		*p++ = 0;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 4c9fa16a3..a581e328a 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -1014,7 +1014,7 @@ static int minix_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,minix_get_block);
 }
-static int minix_prepare_write(struct page *page, unsigned from, unsigned to)
+static int minix_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return block_prepare_write(page,from,to,minix_get_block);
 }
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 3171e8adc..3c8aac510 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -9,7 +9,7 @@
 
 O_TARGET := nfs.o
 O_OBJS   := inode.o file.o read.o write.o dir.o symlink.o proc.o \
-	    nfs2xdr.o
+	    nfs2xdr.o flushd.o
 
 ifdef CONFIG_ROOT_NFS
   O_OBJS += nfsroot.o mount_clnt.o
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 37b2b682b..3ca240129 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -596,9 +596,12 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
 out_valid:
 	return 1;
 out_bad:
-	d_drop(dentry);
 	if (!list_empty(&dentry->d_subdirs))
 		shrink_dcache_parent(dentry);
+	/* If we have submounts, don't unhash ! */
+	if (have_submounts(dentry))
+		goto out_valid;
+	d_drop(dentry);
 	/* Purge readdir caches. */
 	if (dentry->d_parent->d_inode) {
 		nfs_zap_caches(dentry->d_parent->d_inode);
@@ -862,61 +865,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 	return error;
 }
 
-
-/*  Note: we copy the code from lookup_dentry() here, only: we have to
- *  omit the directory lock. We are already the owner of the lock when
- *  we reach here. And "down(&dir->i_sem)" would make us sleep forever
- *  ('cause WE have the lock)
- * 
- *  VERY IMPORTANT: calculate the hash for this dentry!!!!!!!!
- *  Otherwise the cached lookup DEFINITELY WILL fail. And a new dentry
- *  is created. Without the DCACHE_NFSFS_RENAMED flag. And with d_count
- *  == 1. And trouble.
- *
- *  Concerning my choice of the temp name: it is just nice to have
- *  i_ino part of the temp name, as this offers another check whether
- *  somebody attempts to remove the "silly renamed" dentry itself.
- *  Which is something that I consider evil. Your opinion may vary.
- *  BUT:
- *  Now that I compute the hash value right, it should be possible to simply
- *  check for the DCACHE_NFSFS_RENAMED flag in dentry->d_flag instead of
- *  doing the string compare.
- *  WHICH MEANS:
- *  This offers the opportunity to shorten the temp name. Currently, I use
- *  the hex representation of i_ino + an event counter. This sums up to
- *  as much as 36 characters for a 64 bit machine, and needs 20 chars on 
- *  a 32 bit machine.
- *  QUINTESSENCE
- *  The use of i_ino is simply cosmetic. All we need is a unique temp
- *  file name for the .nfs files. The event counter seemed to be adequate.
- *  And as we retry in case such a file already exists, we are guaranteed
- *  to succeed.
- */
-
-static
-struct dentry *nfs_silly_lookup(struct dentry *parent, char *silly, int slen)
-{
-	struct qstr    sqstr;
-	struct dentry *sdentry;
-	struct dentry *res;
-
-	sqstr.name = silly;
-	sqstr.len  = slen;
-	sqstr.hash = full_name_hash(silly, slen);
-	sdentry = d_lookup(parent, &sqstr);
-	if (!sdentry) {
-		sdentry = d_alloc(parent, &sqstr);
-		if (sdentry == NULL)
-			return ERR_PTR(-ENOMEM);
-		res = nfs_lookup(parent->d_inode, sdentry);
-		if (res) {
-			dput(sdentry);
-			return res;
-		}
-	}
-	return sdentry;
-}
-
 static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 {
 	static unsigned int sillycounter = 0;
@@ -966,7 +914,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 		dfprintk(VFS, "trying to rename %s to %s\n",
 			 dentry->d_name.name, silly);
 		
-		sdentry = nfs_silly_lookup(dentry->d_parent, silly, slen);
+		sdentry = lookup_one(silly, dget(dentry->d_parent));
 		/*
 		 * N.B. Better to return EBUSY here ... it could be
 		 * dangerous to delete the file while it's in use.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9a91bb1ab..32d290c73 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -144,10 +144,10 @@ nfs_fsync(struct file *file, struct dentry *dentry)
  * If the writer ends up delaying the write, the writer needs to
  * increment the page use counts until he is done with the page.
  */
-static int nfs_prepare_write(struct page *page, unsigned offset, unsigned to)
+static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
 	kmap(page);
-	return 0;
+	return nfs_flush_incompatible(file, page);
 }
 static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
diff --git a/fs/nfs/flushd.c b/fs/nfs/flushd.c
new file mode 100644
index 000000000..d36c3a9ae
--- /dev/null
+++ b/fs/nfs/flushd.c
@@ -0,0 +1,304 @@
+/*
+ * linux/fs/nfs/flushd.c
+ *
+ * For each NFS mount, there is a separate cache object that contains
+ * a hash table of all clusters. With this cache, an async RPC task
+ * (`flushd') is associated, which wakes up occasionally to inspect
+ * its list of dirty buffers.
+ * (Note that RPC tasks aren't kernel threads. Take a look at the
+ * rpciod code to understand what they are).
+ *
+ * Inside the cache object, we also maintain a count of the current number
+ * of dirty pages, which may not exceed a certain threshold.
+ * (FIXME: This threshold should be configurable).
+ *
+ * The code is streamlined for what I think is the prevalent case for
+ * NFS traffic, which is sequential write access without concurrent
+ * access by different processes.
+ *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
+ *
+ * Rewritten 6/3/2000 by Trond Myklebust
+ * Copyright (C) 1999, 2000, Trond Myklebust <trond.myklebust@fys.uio.no>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/malloc.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+
+#include <linux/sched.h>
+
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+
+#include <linux/spinlock.h>
+
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_flushd.h>
+#include <linux/nfs_mount.h>
+
+/*
+ * Various constants
+ */
+#define NFSDBG_FACILITY         NFSDBG_PAGECACHE
+
+/*
+ * This is the wait queue all cluster daemons sleep on
+ */
+static struct rpc_wait_queue    flushd_queue = RPC_INIT_WAITQ("nfs_flushd");
+
+/*
+ * Spinlock
+ */
+spinlock_t nfs_flushd_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Local function declarations.
+ */
+static void	nfs_flushd(struct rpc_task *);
+static void	nfs_flushd_exit(struct rpc_task *);
+
+
+int nfs_reqlist_init(struct nfs_server *server)
+{
+	struct nfs_reqlist	*cache;
+	struct rpc_task		*task;
+	int			status = 0;
+
+	dprintk("NFS: writecache_init\n");
+	spin_lock(&nfs_flushd_lock);
+	cache = server->rw_requests;
+
+	if (cache->task)
+		goto out_unlock;
+
+	/* Create the RPC task */
+	status = -ENOMEM;
+	task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
+	if (!task)
+		goto out_unlock;
+
+	task->tk_calldata = server;
+
+	cache->task = task;
+
+	/* Run the task */
+	cache->runat = jiffies;
+
+	cache->auth = server->client->cl_auth;
+	task->tk_action   = nfs_flushd;
+	task->tk_exit   = nfs_flushd_exit;
+
+	spin_unlock(&nfs_flushd_lock);
+	rpc_execute(task);
+	return 0;
+ out_unlock:
+	spin_unlock(&nfs_flushd_lock);
+	return status;
+}
+
+void nfs_reqlist_exit(struct nfs_server *server)
+{
+	struct nfs_reqlist      *cache;
+
+	cache = server->rw_requests;
+	if (!cache)
+		return;
+
+	dprintk("NFS: reqlist_exit (ptr %p rpc %p)\n", cache, cache->task);
+	while (cache->task || cache->inodes) {
+		spin_lock(&nfs_flushd_lock);
+		if (!cache->task) {
+			spin_unlock(&nfs_flushd_lock);
+			nfs_reqlist_init(server);
+		} else {
+			cache->task->tk_status = -ENOMEM;
+			rpc_wake_up_task(cache->task);
+			spin_unlock(&nfs_flushd_lock);
+		}
+		interruptible_sleep_on_timeout(&cache->request_wait, 1 * HZ);
+	}
+}
+
+int nfs_reqlist_alloc(struct nfs_server *server)
+{
+	struct nfs_reqlist	*cache;
+	if (server->rw_requests)
+		return 0;
+
+	cache = (struct nfs_reqlist *)kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (!cache)
+		return -ENOMEM;
+
+	memset(cache, 0, sizeof(*cache));
+	init_waitqueue_head(&cache->request_wait);
+	server->rw_requests = cache;
+
+	return 0;
+}
+
+void nfs_reqlist_free(struct nfs_server *server)
+{
+	if (server->rw_requests) {
+		kfree(server->rw_requests);
+		server->rw_requests = NULL;
+	}
+}
+
+void nfs_wake_flushd()
+{
+	rpc_wake_up_status(&flushd_queue, -ENOMEM);
+}
+
+static void inode_append_flushd(struct inode *inode)
+{
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct inode		**q;
+
+	spin_lock(&nfs_flushd_lock);
+	if (NFS_FLAGS(inode) & NFS_INO_FLUSH)
+		goto out;
+	inode->u.nfs_i.hash_next = NULL;
+
+	q = &cache->inodes;
+	while (*q)
+		q = &(*q)->u.nfs_i.hash_next;
+	*q = inode;
+
+	/* Note: we increase the inode i_count in order to prevent
+	 *	 it from disappearing when on the flush list
+	 */
+	NFS_FLAGS(inode) |= NFS_INO_FLUSH;
+	inode->i_count++;
+ out:
+	spin_unlock(&nfs_flushd_lock);
+}
+
+void inode_remove_flushd(struct inode *inode)
+{
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct inode		**q;
+
+	spin_lock(&nfs_flushd_lock);
+	if (!(NFS_FLAGS(inode) & NFS_INO_FLUSH))
+		goto out;
+
+	q = &cache->inodes;
+	while (*q && *q != inode)
+		q = &(*q)->u.nfs_i.hash_next;
+	if (*q) {
+		*q = inode->u.nfs_i.hash_next;
+		NFS_FLAGS(inode) &= ~NFS_INO_FLUSH;
+		iput(inode);
+	}
+ out:
+	spin_unlock(&nfs_flushd_lock);
+}
+
+void inode_schedule_scan(struct inode *inode, unsigned long time)
+{
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct rpc_task		*task;
+	unsigned long		mintimeout;
+
+	if (time_after(NFS_NEXTSCAN(inode), time))
+		NFS_NEXTSCAN(inode) = time;
+	mintimeout = jiffies + 1 * HZ;
+	if (time_before(mintimeout, NFS_NEXTSCAN(inode)))
+		mintimeout = NFS_NEXTSCAN(inode);
+	inode_append_flushd(inode);
+
+	spin_lock(&nfs_flushd_lock);
+	task = cache->task;
+	if (!task) {
+		spin_unlock(&nfs_flushd_lock);
+		nfs_reqlist_init(NFS_SERVER(inode));
+	} else {
+		if (time_after(cache->runat, mintimeout))
+			rpc_wake_up_task(task);
+		spin_unlock(&nfs_flushd_lock);
+	}
+}
+
+
+static void
+nfs_flushd(struct rpc_task *task)
+{
+	struct nfs_server	*server;
+	struct nfs_reqlist	*cache;
+	struct inode		*inode, *next;
+	unsigned long		delay = jiffies + NFS_WRITEBACK_LOCKDELAY;
+	int			flush = (task->tk_status == -ENOMEM);
+
+        dprintk("NFS: %4d flushd starting\n", task->tk_pid);
+	server = (struct nfs_server *) task->tk_calldata;
+        cache = server->rw_requests;
+
+	spin_lock(&nfs_flushd_lock);
+	next = cache->inodes;
+	cache->inodes = NULL;
+	spin_unlock(&nfs_flushd_lock);
+
+	while ((inode = next) != NULL) {
+		next = next->u.nfs_i.hash_next;
+		inode->u.nfs_i.hash_next = NULL;
+		NFS_FLAGS(inode) &= ~NFS_INO_FLUSH;
+
+		if (flush) {
+			nfs_sync_file(inode, NULL, 0, 0, FLUSH_AGING);
+		} else if (time_after(jiffies, NFS_NEXTSCAN(inode))) {
+			NFS_NEXTSCAN(inode) = jiffies + NFS_WRITEBACK_LOCKDELAY;
+			nfs_flush_timeout(inode, FLUSH_AGING);
+#ifdef CONFIG_NFS_V3
+			nfs_commit_timeout(inode, FLUSH_AGING);
+#endif
+		}
+
+		if (nfs_have_writebacks(inode)) {
+			inode_append_flushd(inode);
+			if (time_after(delay, NFS_NEXTSCAN(inode)))
+				delay = NFS_NEXTSCAN(inode);
+		}
+		iput(inode);
+	}
+
+	dprintk("NFS: %4d flushd back to sleep\n", task->tk_pid);
+	if (time_after(jiffies + 1 * HZ, delay))
+		delay = 1 * HZ;
+	else
+		delay = delay - jiffies;
+	task->tk_status = 0;
+	task->tk_action = nfs_flushd;
+	task->tk_timeout = delay;
+	cache->runat = jiffies + task->tk_timeout;
+
+	spin_lock(&nfs_flushd_lock);
+	if (!cache->nr_requests && !cache->inodes) {
+		cache->task = NULL;
+		task->tk_action = NULL;
+	} else
+		rpc_sleep_on(&flushd_queue, task, NULL, NULL);
+	spin_unlock(&nfs_flushd_lock);
+}
+
+static void
+nfs_flushd_exit(struct rpc_task *task)
+{
+	struct nfs_server	*server;
+	struct nfs_reqlist	*cache;
+	server = (struct nfs_server *) task->tk_calldata;
+	cache = server->rw_requests;
+
+	spin_lock(&nfs_flushd_lock);
+	if (cache->task == task)
+		cache->task = NULL;
+	spin_unlock(&nfs_flushd_lock);
+	wake_up(&cache->request_wait);
+	rpc_release_task(task);
+}
+
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 441d62edc..ca7e1b944 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -27,6 +27,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_flushd.h>
 #include <linux/lockd/bind.h>
 #include <linux/smp_lock.h>
 
@@ -74,6 +75,12 @@ nfs_read_inode(struct inode * inode)
 	inode->i_rdev = 0;
 	NFS_FILEID(inode) = 0;
 	NFS_FSID(inode) = 0;
+	INIT_LIST_HEAD(&inode->u.nfs_i.dirty);
+	INIT_LIST_HEAD(&inode->u.nfs_i.commit);
+	INIT_LIST_HEAD(&inode->u.nfs_i.writeback);
+	inode->u.nfs_i.ndirty = 0;
+	inode->u.nfs_i.ncommit = 0;
+	inode->u.nfs_i.npages = 0;
 	NFS_CACHEINV(inode);
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 }
@@ -92,8 +99,6 @@ nfs_put_inode(struct inode * inode)
 static void
 nfs_delete_inode(struct inode * inode)
 {
-	int failed;
-
 	dprintk("NFS: delete_inode(%x/%ld)\n", inode->i_dev, inode->i_ino);
 
 	lock_kernel();
@@ -101,29 +106,12 @@ nfs_delete_inode(struct inode * inode)
 		nfs_free_dircache(inode);
 	} else {
 		/*
-		 * Flush out any pending write requests ...
+		 * The following can never actually happen...
 		 */
-		if (NFS_WRITEBACK(inode) != NULL) {
-			unsigned long timeout = jiffies + 5*HZ;
-#ifdef NFS_DEBUG_VERBOSE
-printk("nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
-#endif
-			nfs_inval(inode);
-			while (NFS_WRITEBACK(inode) != NULL &&
-			       time_before(jiffies, timeout)) {
-				current->state = TASK_INTERRUPTIBLE;
-				schedule_timeout(HZ/10);
-			}
-			current->state = TASK_RUNNING;
-			if (NFS_WRITEBACK(inode) != NULL)
-				printk("NFS: Arghhh, stuck RPC requests!\n");
+		if (nfs_have_writebacks(inode)) {
+			printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino);
 		}
 	}
-
-	failed = nfs_check_failed_request(inode);
-	if (failed)
-		printk("NFS: inode %ld had %d failed requests\n",
-			inode->i_ino, failed);
 	unlock_kernel();
 
 	clear_inode(inode);
@@ -135,9 +123,18 @@ nfs_put_super(struct super_block *sb)
 	struct nfs_server *server = &sb->u.nfs_sb.s_server;
 	struct rpc_clnt	*rpc;
 
+	/*
+	 * First get rid of the request flushing daemon.
+	 * Relies on rpc_shutdown_client() waiting on all
+	 * client tasks to finish.
+	 */
+	nfs_reqlist_exit(server);
+
 	if ((rpc = server->client) != NULL)
 		rpc_shutdown_client(rpc);
 
+	nfs_reqlist_free(server);
+
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
 	rpciod_down();		/* release rpciod */
@@ -306,6 +303,12 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
 	sb->s_root->d_op = &nfs_dentry_operations;
 	sb->s_root->d_fsdata = root_fh;
 
+	/* Fire up the writeback cache */
+	if (nfs_reqlist_alloc(server) < 0) {
+		printk(KERN_NOTICE "NFS: cannot initialize writeback cache.\n");
+		goto failure_kill_reqlist;
+	}
+
 	/* We're airborne */
 
 	/* Check whether to start the lockd process */
@@ -314,6 +317,8 @@ nfs_read_super(struct super_block *sb, void *raw_data, int silent)
 	return sb;
 
 	/* Yargs. It didn't work out. */
+ failure_kill_reqlist:
+	nfs_reqlist_exit(server);
 out_no_root:
 	printk("nfs_read_super: get root inode failed\n");
 	iput(root_inode);
@@ -342,6 +347,7 @@ out_no_xprt:
 	printk(KERN_WARNING "NFS: cannot create RPC transport.\n");
 
 out_free_host:
+	nfs_reqlist_free(server);
 	kfree(server->hostname);
 out_unlock:
 	goto out_fail;
@@ -440,7 +446,6 @@ nfs_invalidate_inode(struct inode *inode)
 
 	make_bad_inode(inode);
 	inode->i_mode = save_mode;
-	nfs_inval(inode);
 	nfs_zap_caches(inode);
 }
 
@@ -864,7 +869,7 @@ nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	 * to look at the size or the mtime the server sends us
 	 * too closely, as we're in the middle of modifying them.
 	 */
-	if (NFS_WRITEBACK(inode))
+	if (nfs_have_writebacks(inode))
 		goto out;
 
 	if (inode->i_size != fattr->size) {
@@ -925,7 +930,7 @@ printk("nfs_refresh_inode: invalidating %ld pages\n", inode->i_nrpages);
 static DECLARE_FSTYPE(nfs_fs_type, "nfs", nfs_read_super, 0);
 
 extern int nfs_init_fhcache(void);
-extern int nfs_init_wreqcache(void);
+extern int nfs_init_nfspagecache(void);
 
 /*
  * Initialize NFS
@@ -939,7 +944,7 @@ init_nfs_fs(void)
 	if (err)
 		return err;
 
-	err = nfs_init_wreqcache();
+	err = nfs_init_nfspagecache();
 	if (err)
 		return err;
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a7e53e6db..5ad2aaa67 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -63,6 +63,7 @@ static int			nfs_stat_to_errno(int stat);
 #define NFS_diropres_sz		1+NFS_fhandle_sz+NFS_fattr_sz
 #define NFS_readlinkres_sz	1
 #define NFS_readres_sz		1+NFS_fattr_sz+1
+#define NFS_writeres_sz		NFS_attrstat_sz
 #define NFS_stat_sz		1
 #define NFS_readdirres_sz	1
 #define NFS_statfsres_sz	1+NFS_info_sz
@@ -273,6 +274,7 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
 static int
 nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
 {
+	unsigned int nr;
 	u32 count = args->count;
 
 	p = xdr_encode_fhandle(p, args->fh);
@@ -282,28 +284,35 @@ nfs_xdr_writeargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args)
 	*p++ = htonl(count);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
 
-	req->rq_svec[1].iov_base = (void *) args->buffer;
-	req->rq_svec[1].iov_len = count;
-	req->rq_slen += count;
-	req->rq_snr = 2;
+	/* Get the number of buffers in the send iovec */
+	nr = args->nriov;
+
+	if (nr+2 > MAX_IOVEC) {
+		printk(KERN_ERR "NFS: Bad number of iov's in xdr_writeargs "
+			"(nr %d max %d)\n", nr, MAX_IOVEC);
+		return -EINVAL;
+	}
+
+	/* Copy the iovec */
+	memcpy(req->rq_svec + 1, args->iov, nr * sizeof(struct iovec));
 
 #ifdef NFS_PAD_WRITES
 	/*
 	 * Some old servers require that the message length
 	 * be a multiple of 4, so we pad it here if needed.
 	 */
-	count = ((count + 3) & ~3) - count;
-	if (count) {
-#if 0
-printk("nfs_writeargs: padding write, len=%d, slen=%d, pad=%d\n",
-req->rq_svec[1].iov_len, req->rq_slen, count);
-#endif
-		req->rq_svec[2].iov_base = (void *) "\0\0\0";
-		req->rq_svec[2].iov_len  = count;
-		req->rq_slen += count;
-		req->rq_snr = 3;
+	if (count & 3) {
+		struct iovec	*iov = req->rq_svec + nr + 1;
+		int		pad = 4 - (count & 3);
+
+		iov->iov_base = (void *) "\0\0\0";
+		iov->iov_len  = pad;
+		count += pad;
+		nr++;
 	}
 #endif
+	req->rq_slen += count;
+	req->rq_snr += nr;
 
 	return 0;
 }
@@ -593,6 +602,16 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, void *dummy)
 }
 
 /*
+ * Decode WRITE reply
+ */
+static int
+nfs_xdr_writeres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res)
+{
+	res->verf->committed = NFS_FILE_SYNC;
+	return nfs_xdr_attrstat(req, p, res->fattr);
+}
+
+/*
  * Decode STATFS reply
  */
 static int
@@ -678,7 +697,7 @@ static struct rpc_procinfo	nfs_procedures[18] = {
     PROC(readlink,	readlinkargs,	readlinkres),
     PROC(read,		readargs,	readres),
     PROC(writecache,	enc_void,	dec_void),
-    PROC(write,		writeargs,	attrstat),
+    PROC(write,		writeargs,	writeres),
     PROC(create,	createargs,	diropres),
     PROC(remove,	diropargs,	stat),
     PROC(rename,	renameargs,	stat),
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index d9a423f16..a592608be 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -145,6 +145,8 @@ static struct nfs_bool_opts {
 	{ "nocto",	~NFS_MOUNT_NOCTO,	NFS_MOUNT_NOCTO },
 	{ "ac",		~NFS_MOUNT_NOAC,	0 },
 	{ "noac",	~NFS_MOUNT_NOAC,	NFS_MOUNT_NOAC },
+	{ "lock",	~NFS_MOUNT_NONLM,	0 },
+	{ "nolock",	~NFS_MOUNT_NONLM,	NFS_MOUNT_NONLM },
 	{ NULL,		0,			0 }
 };
 
@@ -320,7 +322,7 @@ int __init root_nfs_init(void)
  *  Parse NFS server and directory information passed on the kernel
  *  command line.
  */
-void __init nfs_root_setup(char *line)
+int __init nfs_root_setup(char *line)
 {
 	ROOT_DEV = MKDEV(UNNAMED_MAJOR, 255);
 	if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
@@ -333,6 +335,7 @@ void __init nfs_root_setup(char *line)
 		sprintf(nfs_root_name, NFS_ROOT, line);
 	}
 	root_nfs_parse_addr(nfs_root_name);
+	return 1;
 }
 
 __setup("nfsroot=", nfs_root_setup);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index bb55ce6d6..3823c3118 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -111,11 +111,15 @@ nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap,
 			unsigned long offset, unsigned int count,
 			const void *buffer, struct nfs_fattr *fattr)
 {
-	struct nfs_writeargs	arg = { fhandle, offset, count, buffer };
+	struct nfs_writeargs	arg = { fhandle, offset, count, 1, 1,
+					{{(void *) buffer, count}, {0,0}, {0,0}, {0,0},
+					{0,0}, {0,0}, {0,0}, {0,0}}};
+	struct nfs_writeverf	verf;
+	struct nfs_writeres	res = {fattr, &verf, count};
 	int			status;
 
 	dprintk("NFS call  write %d @ %ld\n", count, offset);
-	status = rpc_call(server->client, NFSPROC_WRITE, &arg, fattr,
+	status = rpc_call(server->client, NFSPROC_WRITE, &arg, &res,
 			swap? (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) : 0);
 	dprintk("NFS reply read: %d\n", status);
 	return status < 0? status : count;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 717d12bbb..aa17780e5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,6 +171,7 @@ static inline int
 nfs_readpage_async(struct dentry *dentry, struct inode *inode,
 			struct page *page)
 {
+	struct rpc_message msg;
 	unsigned long address;
 	struct nfs_rreq	*req;
 	int		result = -1, flags;
@@ -195,8 +196,13 @@ nfs_readpage_async(struct dentry *dentry, struct inode *inode,
 
 	/* Start the async call */
 	dprintk("NFS: executing async READ request.\n");
-	result = rpc_do_call(NFS_CLIENT(inode), NFSPROC_READ,
-				&req->ra_args, &req->ra_res, flags,
+
+	msg.rpc_proc = NFSPROC_READ;
+	msg.rpc_argp = &req->ra_args;
+	msg.rpc_resp = &req->ra_res;
+	msg.rpc_cred = NULL;
+
+	result = rpc_call_async(NFS_CLIENT(inode), &msg, flags,
 				nfs_readpage_result, req);
 	if (result < 0)
 		goto out_free;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5f847bec8..af023a121 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@
  * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
  */
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/malloc.h>
 #include <linux/swap.h>
@@ -54,33 +55,126 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_flushd.h>
 #include <asm/uaccess.h>
 #include <linux/smp_lock.h>
 
 #define NFS_PARANOIA 1
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
-static void			nfs_wback_begin(struct rpc_task *task);
-static void			nfs_wback_result(struct rpc_task *task);
-static void			nfs_cancel_request(struct nfs_wreq *req);
+/*
+ * Spinlock
+ */
+spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int	nfs_nr_requests = 0;
 
 /*
- * Cache parameters
+ * Local structures
+ *
+ * Valid flags for a dirty buffer
  */
-#define NFS_WRITEBACK_DELAY	(10 * HZ)
-#define NFS_WRITEBACK_MAX	64
+#define PG_BUSY			0x0001
 
 /*
- * Limit number of delayed writes
+ * This is the struct where the WRITE/COMMIT arguments go.
  */
-static int			nr_write_requests = 0;
-static struct rpc_wait_queue	write_queue = RPC_INIT_WAITQ("write_chain");
+struct nfs_write_data {
+	struct rpc_task		task;
+	struct file		*file;
+	struct rpc_cred		*cred;
+	struct nfs_writeargs	args;		/* argument struct */
+	struct nfs_writeres	res;		/* result struct */
+	struct nfs_fattr	fattr;
+	struct nfs_writeverf	verf;
+	struct list_head	pages;		/* Coalesced requests we wish to flush */
+};
+
+struct nfs_page {
+	struct list_head	wb_hash,	/* Inode */
+				wb_list,
+				*wb_list_head;
+	struct file		*wb_file;
+	struct rpc_cred		*wb_cred;
+	struct page		*wb_page;	/* page to write out */
+	wait_queue_head_t	wb_wait;	/* wait queue */
+	unsigned long		wb_timeout;	/* when to write/commit */
+	unsigned int		wb_offset,	/* Offset of write */
+				wb_bytes,	/* Length of request */
+				wb_count,	/* reference count */
+				wb_flags;
+	struct nfs_writeverf	wb_verf;	/* Commit cookie */
+};
+
+#define NFS_WBACK_BUSY(req)	((req)->wb_flags & PG_BUSY)
+
+/*
+ * Local function declarations
+ */
+static void	nfs_writeback_done(struct rpc_task *);
+#ifdef CONFIG_NFS_V3
+static void	nfs_commit_done(struct rpc_task *);
+#endif
 
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
 #endif
 
+static kmem_cache_t *nfs_page_cachep = NULL;
+static kmem_cache_t *nfs_wdata_cachep = NULL;
+
+static __inline__ struct nfs_page *nfs_page_alloc(void)
+{
+	struct nfs_page	*p;
+	p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->wb_hash);
+		INIT_LIST_HEAD(&p->wb_list);
+		init_waitqueue_head(&p->wb_wait);
+	}
+	return p;
+}
+
+static __inline__ void nfs_page_free(struct nfs_page *p)
+{
+	kmem_cache_free(nfs_page_cachep, p);
+}
+
+static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
+{
+	struct nfs_write_data	*p;
+	p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NFS);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+	}
+	return p;
+}
+
+static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
+{
+	kmem_cache_free(nfs_wdata_cachep, p);
+}
+
+static void nfs_writedata_release(struct rpc_task *task)
+{
+	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
+	rpc_release_task(task);
+	nfs_writedata_free(wdata);
+}
+
+/*
+ * This function will be used to simulate weak cache consistency
+ * under NFSv2 when the NFSv3 attribute patch is included.
+ * For the moment, we just call nfs_refresh_inode().
+ */
+static __inline__ int
+nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
+{
+	return nfs_refresh_inode(inode, fattr);
+}
+
 /*
  * Write a page synchronously.
  * Offset is the data offset within the page.
@@ -161,278 +255,770 @@ io_error:
 }
 
 /*
- * Append a writeback request to a list
+ * Write a page to the server. This was supposed to be used for
+ * NFS swapping only.
+ * FIXME: Using this for mmap is pointless, breaks asynchronous
+ *        writebacks, and is extremely slow.
  */
-static inline void
-append_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+int
+nfs_writepage(struct dentry * dentry, struct page *page)
 {
-	dprintk("NFS:      append_write_request(%p, %p)\n", q, wreq);
-	rpc_append_list(q, wreq);
+	struct inode *inode = dentry->d_inode;
+	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	unsigned offset = PAGE_CACHE_SIZE;
+	int err;
+
+	/* easy case */
+	if (page->index < end_index)
+		goto do_it;
+	/* things got complicated... */
+	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+	/* OK, are we completely out? */
+	if (page->index >= end_index+1 || !offset)
+		return -EIO;
+do_it:
+	err = nfs_writepage_sync(dentry, inode, page, 0, offset); 
+	if ( err == offset) return 0; 
+	return err; 
+}
+
+/*
+ * Check whether the file range we want to write to is locked by
+ * us.
+ */
+static int
+region_locked(struct inode *inode, struct nfs_page *req)
+{
+	struct file_lock	*fl;
+	unsigned long		rqstart, rqend;
+
+	/* Don't optimize writes if we don't use NLM */
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+		return 0;
+
+	rqstart = page_offset(req->wb_page) + req->wb_offset;
+	rqend = rqstart + req->wb_bytes;
+	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
+		if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
+		    && fl->fl_type == F_WRLCK
+		    && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static inline struct nfs_page *
+nfs_inode_wb_entry(struct list_head *head)
+{
+	return list_entry(head, struct nfs_page, wb_hash);
 }
 
 /*
- * Remove a writeback request from a list
+ * Insert a write request into an inode
  */
 static inline void
-remove_write_request(struct nfs_wreq **q, struct nfs_wreq *wreq)
+nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
-	dprintk("NFS:      remove_write_request(%p, %p)\n", q, wreq);
-	rpc_remove_list(q, wreq);
+	if (!list_empty(&req->wb_hash))
+		return;
+	if (!NFS_WBACK_BUSY(req))
+		printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
+	inode->u.nfs_i.npages++;
+	list_add(&req->wb_hash, &inode->u.nfs_i.writeback);
+	req->wb_count++;
 }
 
 /*
- * Find a non-busy write request for a given page to
- * try to combine with.
+ * Insert a write request into an inode
  */
-static inline struct nfs_wreq *
-find_write_request(struct inode *inode, struct page *page)
+static inline void
+nfs_inode_remove_request(struct nfs_page *req)
 {
-	pid_t pid = current->pid;
-	struct nfs_wreq	*head, *req;
+	struct inode *inode;
+	spin_lock(&nfs_wreq_lock);
+	if (list_empty(&req->wb_hash)) {
+		spin_unlock(&nfs_wreq_lock);
+		return;
+	}
+	if (!NFS_WBACK_BUSY(req))
+		printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
+	inode = req->wb_file->f_dentry->d_inode;
+	list_del(&req->wb_hash);
+	INIT_LIST_HEAD(&req->wb_hash);
+	inode->u.nfs_i.npages--;
+	if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
+	if (!nfs_have_writebacks(inode))
+		inode_remove_flushd(inode);
+	spin_unlock(&nfs_wreq_lock);
+	nfs_release_request(req);
+}
 
-	dprintk("NFS:      find_write_request(%x/%ld, %p)\n",
-				inode->i_dev, inode->i_ino, page);
-	if (!(req = head = NFS_WRITEBACK(inode)))
-		return NULL;
-	do {
-		/*
-		 * We can't combine with canceled requests or
-		 * requests that have already been started..
-		 */
-		if (req->wb_flags & (NFS_WRITE_CANCELLED | NFS_WRITE_INPROGRESS))
+/*
+ * Find a request
+ */
+static inline struct nfs_page *
+_nfs_find_request(struct inode *inode, struct page *page)
+{
+	struct list_head	*head, *next;
+
+	head = &inode->u.nfs_i.writeback;
+	next = head->next;
+	while (next != head) {
+		struct nfs_page *req = nfs_inode_wb_entry(next);
+		next = next->next;
+		if (page_index(req->wb_page) != page_index(page))
 			continue;
+		req->wb_count++;
+		return req;
+	}
+	return NULL;
+}
 
-		if (req->wb_page == page && req->wb_pid == pid)
-			return req;
+struct nfs_page *
+nfs_find_request(struct inode *inode, struct page *page)
+{
+	struct nfs_page		*req;
 
-		/*
-		 * Ehh, don't keep too many tasks queued..
-		 */
-		rpc_wake_up_task(&req->wb_task);
+	spin_lock(&nfs_wreq_lock);
+	req = _nfs_find_request(inode, page);
+	spin_unlock(&nfs_wreq_lock);
+	return req;
+}
 
-	} while ((req = WB_NEXT(req)) != head);
-	return NULL;
+static inline struct nfs_page *
+nfs_list_entry(struct list_head *head)
+{
+	return list_entry(head, struct nfs_page, wb_list);
 }
 
 /*
- * Find and release all failed requests for this inode.
+ * Insert a write request into a sorted list
  */
-int
-nfs_check_failed_request(struct inode * inode)
+static inline void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 {
-	/* FIXME! */
-	return 0;
+	struct list_head *prev;
+
+	if (!list_empty(&req->wb_list)) {
+		printk(KERN_ERR "NFS: Add to list failed!\n");
+		return;
+	}
+	if (list_empty(&req->wb_hash)) {
+		printk(KERN_ERR "NFS: Unhashed request attempted added to a list!\n");
+		return;
+	}
+	if (!NFS_WBACK_BUSY(req))
+		printk(KERN_ERR "NFS: unlocked request attempted added to list!\n");
+	prev = head->prev;
+	while (prev != head) {
+		struct nfs_page	*p = nfs_list_entry(prev);
+		if (page_index(p->wb_page) < page_index(req->wb_page))
+			break;
+		prev = prev->prev;
+	}
+	list_add(&req->wb_list, prev);
+	req->wb_list_head = head;
 }
 
 /*
- * Try to merge adjacent write requests. This works only for requests
- * issued by the same user.
+ * Insert a write request into an inode
  */
-static inline int
-update_write_request(struct nfs_wreq *req, unsigned int first,
-			unsigned int bytes)
+static inline void
+nfs_list_remove_request(struct nfs_page *req)
 {
-	unsigned int	rqfirst = req->wb_offset,
-			rqlast = rqfirst + req->wb_bytes,
-			last = first + bytes;
+	if (list_empty(&req->wb_list))
+		return;
+	if (!NFS_WBACK_BUSY(req))
+		printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n");
+	list_del(&req->wb_list);
+	INIT_LIST_HEAD(&req->wb_list);
+	req->wb_list_head = NULL;
+}
 
-	dprintk("nfs:      trying to update write request %p\n", req);
+/*
+ * Add a request to the inode's dirty list.
+ */
+static inline void
+nfs_mark_request_dirty(struct nfs_page *req)
+{
+	struct inode *inode = req->wb_file->f_dentry->d_inode;
 
-	/* not contiguous? */
-	if (rqlast < first || last < rqfirst)
-		return 0;
+	spin_lock(&nfs_wreq_lock);
+	if (list_empty(&req->wb_list)) {
+		nfs_list_add_request(req, &inode->u.nfs_i.dirty);
+		inode->u.nfs_i.ndirty++;
+	}
+	spin_unlock(&nfs_wreq_lock);
+	/*
+	 * NB: the call to inode_schedule_scan() must lie outside the
+	 *     spinlock since it can run flushd().
+	 */
+	inode_schedule_scan(inode, req->wb_timeout);
+}
 
-	if (first < rqfirst)
-		rqfirst = first;
-	if (rqlast < last)
-		rqlast = last;
+/*
+ * Check if a request is dirty
+ */
+static inline int
+nfs_dirty_request(struct nfs_page *req)
+{
+	struct inode *inode = req->wb_file->f_dentry->d_inode;
+	return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
+}
 
-	req->wb_offset = rqfirst;
-	req->wb_bytes  = rqlast - rqfirst;
-	req->wb_count++;
+#ifdef CONFIG_NFS_V3
+/*
+ * Add a request to the inode's commit list.
+ */
+static inline void
+nfs_mark_request_commit(struct nfs_page *req)
+{
+	struct inode *inode = req->wb_file->f_dentry->d_inode;
 
-	return 1;
+	spin_lock(&nfs_wreq_lock);
+	if (list_empty(&req->wb_list)) {
+		nfs_list_add_request(req, &inode->u.nfs_i.commit);
+		inode->u.nfs_i.ncommit++;
+	}
+	spin_unlock(&nfs_wreq_lock);
+	/*
+	 * NB: the call to inode_schedule_scan() must lie outside the
+	 *     spinlock since it can run flushd().
+	 */
+	inode_schedule_scan(inode, req->wb_timeout);
 }
+#endif
 
-static kmem_cache_t *nfs_wreq_cachep;
-
-int nfs_init_wreqcache(void)
+/*
+ * Lock the page of an asynchronous request
+ */
+static inline int
+nfs_lock_request(struct nfs_page *req)
 {
-	nfs_wreq_cachep = kmem_cache_create("nfs_wreq",
-					    sizeof(struct nfs_wreq),
-					    0, SLAB_HWCACHE_ALIGN,
-					    NULL, NULL);
-	if (nfs_wreq_cachep == NULL)
-		return -ENOMEM;
-	return 0;
+	if (NFS_WBACK_BUSY(req))
+		return 0;
+	req->wb_count++;
+	req->wb_flags |= PG_BUSY;
+	return 1;
 }
 
 static inline void
-free_write_request(struct nfs_wreq * req)
+nfs_unlock_request(struct nfs_page *req)
 {
-	if (!--req->wb_count)
-		kmem_cache_free(nfs_wreq_cachep, req);
+	if (!NFS_WBACK_BUSY(req)) {
+		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
+		return;
+	}
+	req->wb_flags &= ~PG_BUSY;
+	wake_up(&req->wb_wait);
+	nfs_release_request(req);
 }
 
 /*
- * Create and initialize a writeback request
+ * Create a write request.
+ * Page must be locked by the caller. This makes sure we never create
+ * two different requests for the same page, and avoids possible deadlock
+ * when we reach the hard limit on the number of dirty pages.
  */
-static inline struct nfs_wreq *
-create_write_request(struct file * file, struct page *page, unsigned int offset, unsigned int bytes)
+static struct nfs_page *
+nfs_create_request(struct inode *inode, struct file *file, struct page *page,
+		   unsigned int offset, unsigned int count)
 {
-	struct dentry	*dentry = file->f_dentry;
-	struct inode	*inode = dentry->d_inode;
-	struct rpc_clnt	*clnt = NFS_CLIENT(inode);
-	struct nfs_wreq *wreq;
-	struct rpc_task	*task;
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct nfs_page		*req = NULL;
+	long			timeout;
 
-	dprintk("NFS:      create_write_request(%s/%s, %ld+%d)\n",
-		dentry->d_parent->d_name.name, dentry->d_name.name,
-		(page->index << PAGE_CACHE_SHIFT) + offset, bytes);
-
-	/* FIXME: Enforce hard limit on number of concurrent writes? */
-	wreq = kmem_cache_alloc(nfs_wreq_cachep, SLAB_KERNEL);
-	if (!wreq)
-		goto out_fail;
-	memset(wreq, 0, sizeof(*wreq));
+	/* Deal with hard/soft limits.
+	 */
+	do {
+		/* If we're over the soft limit, flush out old requests */
+		if (nfs_nr_requests >= MAX_REQUEST_SOFT)
+			nfs_wb_file(inode, file);
+
+		/* If we're still over the soft limit, wake up some requests */
+		if (nfs_nr_requests >= MAX_REQUEST_SOFT) {
+			dprintk("NFS:      hit soft limit (%d requests)\n",
+				nfs_nr_requests);
+			if (!cache->task)
+				nfs_reqlist_init(NFS_SERVER(inode));
+			nfs_wake_flushd();
+		}
 
-	task = &wreq->wb_task;
-	rpc_init_task(task, clnt, nfs_wback_result, RPC_TASK_NFSWRITE);
-	task->tk_calldata = wreq;
-	task->tk_action = nfs_wback_begin;
+		/* If we haven't reached the hard limit yet,
+		 * try to allocate the request struct */
+		if (nfs_nr_requests < MAX_REQUEST_HARD) {
+			req = nfs_page_alloc();
+			if (req != NULL)
+				break;
+		}
 
-	rpcauth_lookupcred(task);	/* Obtain user creds */
-	if (task->tk_status < 0)
-		goto out_req;
+		/* We're over the hard limit. Wait for better times */
+		dprintk("NFS:      create_request sleeping (total %d pid %d)\n",
+			nfs_nr_requests, current->pid);
+
+		timeout = 1 * HZ;
+		if (NFS_SERVER(inode)->flags & NFS_MOUNT_INTR) {
+			interruptible_sleep_on_timeout(&cache->request_wait,
+						       timeout);
+			if (signalled())
+				break;
+		} else
+			sleep_on_timeout(&cache->request_wait, timeout);
+
+		dprintk("NFS:      create_request waking up (tot %d pid %d)\n",
+			nfs_nr_requests, current->pid);
+	} while (!req);
+	if (!req)
+		return NULL;
 
-	/* Put the task on inode's writeback request list. */
+	/* Initialize the request struct. Initially, we assume a
+	 * long write-back delay. This will be adjusted in
+	 * update_nfs_request below if the region is not locked. */
+	req->wb_page    = page;
+	atomic_inc(&page->count);
+	req->wb_offset  = offset;
+	req->wb_bytes   = count;
+	/* If the region is locked, adjust the timeout */
+	if (region_locked(inode, req))
+		req->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
+	else
+		req->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
+	req->wb_file    = file;
+	req->wb_cred	= rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	get_file(file);
-	wreq->wb_file = file;
-	wreq->wb_pid    = current->pid;
-	wreq->wb_page   = page;
-	init_waitqueue_head(&wreq->wb_wait);
-	wreq->wb_offset = offset;
-	wreq->wb_bytes  = bytes;
-	wreq->wb_count	= 2;		/* One for the IO, one for us */
+	req->wb_count   = 1;
 
-	kmap(page);
-	append_write_request(&NFS_WRITEBACK(inode), wreq);
+	/* register request's existence */
+	cache->nr_requests++;
+	nfs_nr_requests++;
+	return req;
+}
 
-	if (nr_write_requests++ > NFS_WRITEBACK_MAX*3/4)
-		rpc_wake_up_next(&write_queue);
 
-	return wreq;
+/*
+ * Release all resources associated with a write request after it
+ * has been committed to stable storage
+ *
+ * Note: Should always be called with the spinlock held!
+ */
+void
+nfs_release_request(struct nfs_page *req)
+{
+	struct inode		*inode = req->wb_file->f_dentry->d_inode;
+	struct nfs_reqlist	*cache = NFS_REQUESTLIST(inode);
+	struct page		*page = req->wb_page;
+
+	spin_lock(&nfs_wreq_lock);
+	if (--req->wb_count) {
+		spin_unlock(&nfs_wreq_lock);
+		return;
+	}
+	spin_unlock(&nfs_wreq_lock);
 
-out_req:
-	rpc_release_task(task);
-	kmem_cache_free(nfs_wreq_cachep, wreq);
-out_fail:
-	return NULL;
+	if (!list_empty(&req->wb_list)) {
+		printk(KERN_ERR "NFS: Request released while still on a list!\n");
+		nfs_list_remove_request(req);
+	}
+	if (!list_empty(&req->wb_hash)) {
+		printk(KERN_ERR "NFS: Request released while still hashed!\n");
+		nfs_inode_remove_request(req);
+	}
+	if (NFS_WBACK_BUSY(req))
+		printk(KERN_ERR "NFS: Request released while still locked!\n");
+
+	rpcauth_releasecred(NFS_CLIENT(inode)->cl_auth, req->wb_cred);
+	fput(req->wb_file);
+	page_cache_release(page);
+	nfs_page_free(req);
+	/* wake up anyone waiting to allocate a request */
+	cache->nr_requests--;
+	nfs_nr_requests--;
+	wake_up(&cache->request_wait);
 }
 
 /*
- * Schedule a writeback RPC call.
- * If the server is congested, don't add to our backlog of queued
- * requests but call it synchronously.
- * The function returns whether we should wait for the thing or not.
+ * Wait for a request to complete.
  *
- * FIXME: Here we could walk the inode's lock list to see whether the
- * page we're currently writing to has been write-locked by the caller.
- * If it is, we could schedule an async write request with a long
- * delay in order to avoid writing back the page until the lock is
- * released.
+ * Interruptible by signals only if mounted with intr flag.
  */
-static inline int
-schedule_write_request(struct nfs_wreq *req, int sync)
+static int
+nfs_wait_on_request(struct nfs_page *req)
 {
-	struct rpc_task	*task = &req->wb_task;
-	struct file	*file = req->wb_file;
-	struct dentry	*dentry = file->f_dentry;
-	struct inode	*inode = dentry->d_inode;
+	struct inode	*inode = req->wb_file->f_dentry->d_inode;
+        struct rpc_clnt	*clnt = NFS_CLIENT(inode);
+        int retval;
 
-	if (NFS_CONGESTED(inode) || nr_write_requests >= NFS_WRITEBACK_MAX)
-		sync = 1;
-
-	if (sync) {
-		sigset_t	oldmask;
-		struct rpc_clnt *clnt = NFS_CLIENT(inode);
-		dprintk("NFS: %4d schedule_write_request (sync)\n",
-					task->tk_pid);
-		/* Page is already locked */
-		rpc_clnt_sigmask(clnt, &oldmask);
-		rpc_execute(task);
-		rpc_clnt_sigunmask(clnt, &oldmask);
-	} else {
-		dprintk("NFS: %4d schedule_write_request (async)\n",
-					task->tk_pid);
-		task->tk_flags |= RPC_TASK_ASYNC;
-		task->tk_timeout = NFS_WRITEBACK_DELAY;
-		rpc_sleep_on(&write_queue, task, NULL, NULL);
+	if (!NFS_WBACK_BUSY(req))
+		return 0;
+	req->wb_count++;
+	retval = nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req));
+	nfs_release_request(req);
+        return retval;
+}
+
+/*
+ * Wait for a request to complete.
+ *
+ * Interruptible by signals only if mounted with intr flag.
+ */
+static int
+nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long start, unsigned int count)
+{
+	struct list_head	*p, *head;
+	unsigned long		idx_start, idx_end;
+	unsigned int		pages = 0;
+	int			error;
+
+	idx_start = start >> PAGE_CACHE_SHIFT;
+	if (count == 0)
+		idx_end = ~0;
+	else {
+		unsigned long idx_count = (count-1) >> PAGE_CACHE_SHIFT;
+		idx_end = idx_start + idx_count;
 	}
+	spin_lock(&nfs_wreq_lock);
+	head = &inode->u.nfs_i.writeback;
+	p = head->next;
+	while (p != head) {
+		unsigned long pg_idx;
+		struct nfs_page *req = nfs_inode_wb_entry(p);
+
+		p = p->next;
+
+		if (file && req->wb_file != file)
+			continue;
+
+		pg_idx = page_index(req->wb_page);
+		if (pg_idx < idx_start || pg_idx > idx_end)
+			continue;
 
-	return sync;
+		if (!NFS_WBACK_BUSY(req))
+			continue;
+		req->wb_count++;
+		spin_unlock(&nfs_wreq_lock);
+		error = nfs_wait_on_request(req);
+		nfs_release_request(req);
+		if (error < 0)
+			return error;
+		spin_lock(&nfs_wreq_lock);
+		p = head->next;
+		pages++;
+	}
+	spin_unlock(&nfs_wreq_lock);
+	return pages;
 }
 
 /*
- * Wait for request to complete.
+ * Scan cluster for dirty pages and send as many of them to the
+ * server as possible.
  */
 static int
-wait_on_write_request(struct nfs_wreq *req)
+nfs_scan_list_timeout(struct list_head *head, struct list_head *dst, struct inode *inode)
 {
-	struct file		*file = req->wb_file;
-	struct dentry		*dentry = file->f_dentry;
-	struct inode		*inode = dentry->d_inode;
-	struct rpc_clnt		*clnt = NFS_CLIENT(inode);
-	DECLARE_WAITQUEUE(wait, current);
-	sigset_t		oldmask;
-	int retval;
+	struct list_head	*p;
+        struct nfs_page		*req;
+        int			pages = 0;
+
+	p = head->next;
+        while (p != head) {
+		req = nfs_list_entry(p);
+		p = p->next;
+		if (time_after(req->wb_timeout, jiffies)) {
+			if (time_after(NFS_NEXTSCAN(inode), req->wb_timeout))
+				NFS_NEXTSCAN(inode) = req->wb_timeout;
+			continue;
+		}
+		if (!nfs_lock_request(req))
+			continue;
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, dst);
+		pages++;
+	}
+	return pages;
+}
+
+static int
+nfs_scan_dirty_timeout(struct inode *inode, struct list_head *dst)
+{
+	int	pages;
+	spin_lock(&nfs_wreq_lock);
+	pages = nfs_scan_list_timeout(&inode->u.nfs_i.dirty, dst, inode);
+	inode->u.nfs_i.ndirty -= pages;
+	if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	spin_unlock(&nfs_wreq_lock);
+	return pages;
+}
 
-	/* Make sure it's started.. */
-	if (!WB_INPROGRESS(req))
-		rpc_wake_up_task(&req->wb_task);
+#ifdef CONFIG_NFS_V3
+static int
+nfs_scan_commit_timeout(struct inode *inode, struct list_head *dst)
+{
+	int	pages;
+	spin_lock(&nfs_wreq_lock);
+	pages = nfs_scan_list_timeout(&inode->u.nfs_i.commit, dst, inode);
+	inode->u.nfs_i.ncommit -= pages;
+	if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	spin_unlock(&nfs_wreq_lock);
+	return pages;
+}
+#endif
+
+static int
+nfs_scan_list(struct list_head *src, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+	struct list_head	*p;
+	struct nfs_page		*req;
+	unsigned long		idx_start, idx_end;
+	int			pages;
+
+	pages = 0;
+	idx_start = start >> PAGE_CACHE_SHIFT;
+	if (count == 0)
+		idx_end = ~0;
+	else
+		idx_end = idx_start + ((count-1) >> PAGE_CACHE_SHIFT);
+	p = src->next;
+	while (p != src) {
+		unsigned long pg_idx;
+
+		req = nfs_list_entry(p);
+		p = p->next;
+
+		if (file && req->wb_file != file)
+			continue;
+
+		pg_idx = page_index(req->wb_page);
+		if (pg_idx < idx_start || pg_idx > idx_end)
+			continue;
+
+		if (!nfs_lock_request(req))
+			continue;
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, dst);
+		pages++;
+	}
+	return pages;
+}
+
+static int
+nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+	int	pages;
+	spin_lock(&nfs_wreq_lock);
+	pages = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, start, count);
+	inode->u.nfs_i.ndirty -= pages;
+	if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
+	spin_unlock(&nfs_wreq_lock);
+	return pages;
+}
+
+#ifdef CONFIG_NFS_V3
+static int
+nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long start, unsigned int count)
+{
+	int	pages;
+	spin_lock(&nfs_wreq_lock);
+	pages = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, start, count);
+	inode->u.nfs_i.ncommit -= pages;
+	if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
+		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+	spin_unlock(&nfs_wreq_lock);
+	return pages;
+}
+#endif
+
+
+static int
+coalesce_requests(struct list_head *src, struct list_head *dst, unsigned int maxpages)
+{
+	struct nfs_page		*req = NULL;
+	unsigned int		pages = 0;
+
+	while (!list_empty(src)) {
+		struct nfs_page	*prev = req;
+
+		req = nfs_list_entry(src->next);
+		if (prev) {
+			if (req->wb_file != prev->wb_file)
+				break;
+
+			if (page_index(req->wb_page) != page_index(prev->wb_page)+1)
+				break;
+
+			if (req->wb_offset != 0)
+				break;
+		}
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, dst);
+		pages++;
+		if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
+			break;
+		if (pages >= maxpages)
+			break;
+	}
+	return pages;
+}
+
+/*
+ * Try to update any existing write request, or create one if there is none.
+ * In order to match, the request's credentials must match those of
+ * the calling process.
+ *
+ * Note: Should always be called with the Page Lock held!
+ */
+static struct nfs_page *
+nfs_update_request(struct file* file, struct page *page,
+		   unsigned long offset, unsigned int bytes)
+{
+	struct inode		*inode = file->f_dentry->d_inode;
+	struct nfs_page		*req, *new = NULL;
+	unsigned long		rqend, end;
+
+	end = offset + bytes;
 
-	rpc_clnt_sigmask(clnt, &oldmask);
-	add_wait_queue(&req->wb_wait, &wait);
 	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		retval = 0;
-		if (req->wb_flags & NFS_WRITE_COMPLETE)
+		/* Loop over all inode entries and see if we find
+		 * A request for the page we wish to update
+		 */
+		spin_lock(&nfs_wreq_lock);
+		req = _nfs_find_request(inode, page);
+		if (req) {
+			if (!nfs_lock_request(req)) {
+				spin_unlock(&nfs_wreq_lock);
+				nfs_wait_on_request(req);
+				nfs_release_request(req);
+				continue;
+			}
+			spin_unlock(&nfs_wreq_lock);
+			if (new)
+				nfs_release_request(new);
 			break;
-		retval = -ERESTARTSYS;
-		if (signalled())
+		}
+
+		req = new;
+		if (req) {
+			nfs_lock_request(req);
+			nfs_inode_add_request(inode, req);
+			spin_unlock(&nfs_wreq_lock);
+			nfs_mark_request_dirty(req);
 			break;
-		schedule();
+		}
+		spin_unlock(&nfs_wreq_lock);
+
+		/* Create the request. It's safe to sleep in this call because
+		 * we only get here if the page is locked.
+		 */
+		new = nfs_create_request(inode, file, page, offset, bytes);
+		if (!new)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	/* We have a request for our page.
+	 * If the creds don't match, or the
+	 * page addresses don't match,
+	 * tell the caller to wait on the conflicting
+	 * request.
+	 */
+	rqend = req->wb_offset + req->wb_bytes;
+	if (req->wb_file != file
+	    || req->wb_page != page
+	    || !nfs_dirty_request(req)
+	    || offset > rqend || end < req->wb_offset) {
+		nfs_unlock_request(req);
+		nfs_release_request(req);
+		return ERR_PTR(-EBUSY);
+	}
+
+	/* Okay, the request matches. Update the region */
+	if (offset < req->wb_offset) {
+		req->wb_offset = offset;
+		req->wb_bytes = rqend - req->wb_offset;
 	}
-	remove_wait_queue(&req->wb_wait, &wait);
-	current->state = TASK_RUNNING;
-	rpc_clnt_sigunmask(clnt, &oldmask);
-	return retval;
+
+	if (end > rqend)
+		req->wb_bytes = end - req->wb_offset;
+
+	nfs_unlock_request(req);
+
+	return req;
 }
 
 /*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
+ * This is the strategy routine for NFS.
+ * It is called by nfs_updatepage whenever the user wrote up to the end
+ * of a page.
+ *
+ * We always try to submit a set of requests in parallel so that the
+ * server's write code can gather writes. This is mainly for the benefit
+ * of NFSv2.
+ *
+ * We never submit more requests than we think the remote can handle.
+ * For UDP sockets, we make sure we don't exceed the congestion window;
+ * for TCP, we limit the number of requests to 8.
+ *
+ * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
+ * should be sent out in one go. This is for the benefit of NFSv2 servers
+ * that perform write gathering.
+ *
+ * FIXME: Different servers may have different sweet spots.
+ * Record the average congestion window in server struct?
  */
-int
-nfs_writepage(struct dentry * dentry, struct page *page)
+#define NFS_STRATEGY_PAGES      8
+static void
+nfs_strategy(struct file *file)
 {
-	struct inode *inode = dentry->d_inode;
-	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-	unsigned offset = PAGE_CACHE_SIZE;
-	int err;
+	struct inode	*inode = file->f_dentry->d_inode;
+	unsigned int	dirty, wpages;
+
+	dirty  = inode->u.nfs_i.ndirty;
+	wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT;
+#ifdef CONFIG_NFS_V3
+	if (NFS_PROTO(inode)->version == 2) {
+		if (dirty >= NFS_STRATEGY_PAGES * wpages)
+			nfs_flush_file(inode, file, 0, 0, 0);
+	} else {
+		if (dirty >= wpages)
+			nfs_flush_file(inode, file, 0, 0, 0);
+	}
+#else
+	if (dirty >= NFS_STRATEGY_PAGES * wpages)
+		nfs_flush_file(inode, file, 0, 0, 0);
+#endif
+	/*
+	 * If we're running out of requests, flush out everything
+	 * in order to reduce memory useage...
+	 */
+	if (nfs_nr_requests > MAX_REQUEST_SOFT)
+		nfs_wb_file(inode, file);
+}
 
-	/* easy case */
-	if (page->index < end_index)
-		goto do_it;
-	/* things got complicated... */
-	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
-	/* OK, are we completely out? */
-	if (page->index >= end_index+1 || !offset)
-		return -EIO;
-do_it:
-	err = nfs_writepage_sync(dentry, inode, page, 0, offset); 
-	if ( err == offset) return 0; 
-	return err; 
+int
+nfs_flush_incompatible(struct file *file, struct page *page)
+{
+	struct inode	*inode = file->f_dentry->d_inode;
+	struct nfs_page	*req;
+	int		status = 0;
+	/*
+	 * Look for a request corresponding to this page. If there
+	 * is one, and it belongs to another file, we flush it out
+	 * before we try to copy anything into the page. Do this
+	 * due to the lack of an ACCESS-type call in NFSv2.
+	 * Also do the same if we find a request from an existing
+	 * dropped page.
+	 */
+	req = nfs_find_request(inode,page);
+	if (req) {
+		if (req->wb_file != file || req->wb_page != page)
+			status = nfs_wb_page(inode, page);
+		nfs_release_request(req);
+	}
+	return (status < 0) ? status : 0;
 }
 
 /*
@@ -446,27 +1032,13 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
 {
 	struct dentry	*dentry = file->f_dentry;
 	struct inode	*inode = dentry->d_inode;
-	struct nfs_wreq	*req;
+	struct nfs_page	*req;
 	int		synchronous = file->f_flags & O_SYNC;
-	int		retval;
+	int		status = 0;
 
-	dprintk("NFS:      nfs_updatepage(%s/%s %d@%ld)\n",
+	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-		count, (page->index << PAGE_CACHE_SHIFT) +offset);
-
-	/*
-	 * Try to find a corresponding request on the writeback queue.
-	 * If there is one, we can be sure that this request is not
-	 * yet being processed, because we hold a lock on the page.
-	 *
-	 * If the request was created by us, update it. Otherwise,
-	 * transfer the page lock and flush out the dirty page now.
-	 * After returning, generic_file_write will wait on the
-	 * page and retry the update.
-	 */
-	req = find_write_request(inode, page);
-	if (req && req->wb_file == file && update_write_request(req, offset, count))
-		goto updated;
+		count, page_offset(page) +offset);
 
 	/*
 	 * If wsize is smaller than page size, update and write
@@ -475,241 +1047,542 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
 	if (NFS_SERVER(inode)->wsize < PAGE_SIZE)
 		return nfs_writepage_sync(dentry, inode, page, offset, count);
 
-	/* Create the write request. */
-	req = create_write_request(file, page, offset, count);
-	if (!req)
-		return -ENOBUFS;
-
 	/*
-	 * Ok, there's another user of this page with the new request..
-	 * The IO completion will then free the page and the dentry.
+	 * Try to find an NFS request corresponding to this page
+	 * and update it.
+	 * If the existing request cannot be updated, we must flush
+	 * it out now.
 	 */
-	get_page(page);
-
-	/* Schedule request */
-	synchronous = schedule_write_request(req, synchronous);
+	do {
+		req = nfs_update_request(file, page, offset, count);
+		status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+		if (status != -EBUSY)
+			break;
+		/* Request could not be updated. Flush it out and try again */
+		status = nfs_wb_page(inode, page);
+	} while (status >= 0);
+	if (status < 0)
+		goto done;
 
-updated:
-	if (req->wb_bytes == PAGE_SIZE)
+	if (req->wb_bytes == PAGE_CACHE_SIZE)
 		SetPageUptodate(page);
 
-	retval = 0;
+	status = 0;
 	if (synchronous) {
-		int status = wait_on_write_request(req);
-		if (status) {
-			nfs_cancel_request(req);
-			retval = status;
-		} else {
-			status = req->wb_status;
-			if (status < 0)
-				retval = status;
-		}
+		int error;
 
-		if (retval < 0)
-			ClearPageUptodate(page);
+		error = nfs_sync_file(inode, file, page_offset(page) + offset, count, FLUSH_SYNC|FLUSH_STABLE);
+		if (error < 0 || (error = file->f_error) < 0)
+			status = error;
+		file->f_error = 0;
+	} else {
+		/* If we wrote past the end of the page.
+		 * Call the strategy routine so it can send out a bunch
+		 * of requests.
+		 */
+		if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE)
+			nfs_strategy(file);
 	}
-
-	free_write_request(req);
-	return retval;
+	nfs_release_request(req);
+done:
+        dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
+                                                status, inode->i_size);
+	if (status < 0)
+		clear_bit(PG_uptodate, &page->flags);
+	return status;
 }
 
 /*
- * Cancel a write request. We always mark it cancelled,
- * but if it's already in progress there's no point in
- * calling rpc_exit, and we don't want to overwrite the
- * tk_status field.
- */ 
+ * Set up the argument/result storage required for the RPC call.
+ */
 static void
-nfs_cancel_request(struct nfs_wreq *req)
+nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
 {
-	req->wb_flags |= NFS_WRITE_CANCELLED;
-	if (!WB_INPROGRESS(req)) {
-		rpc_exit(&req->wb_task, 0);
-		rpc_wake_up_task(&req->wb_task);
+	struct nfs_page		*req;
+	struct iovec		*iov;
+	unsigned int		count;
+
+	/* Set up the RPC argument and reply structs
+	 * NB: take care not to mess about with data->commit et al. */
+
+	iov = data->args.iov;
+	count = 0;
+	while (!list_empty(head)) {
+		struct nfs_page *req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, &data->pages);
+		iov->iov_base = (void *)(kmap(req->wb_page) + req->wb_offset);
+		iov->iov_len = req->wb_bytes;
+		count += req->wb_bytes;
+		iov++;
+		data->args.nriov++;
 	}
+	req = nfs_list_entry(data->pages.next);
+	data->file = req->wb_file;
+	data->cred = req->wb_cred;
+	data->args.fh     = NFS_FH(req->wb_file->f_dentry);
+	data->args.offset = page_offset(req->wb_page) + req->wb_offset;
+	data->args.count  = count;
+	data->res.fattr   = &data->fattr;
+	data->res.count   = count;
+	data->res.verf    = &data->verf;
 }
 
+
 /*
- * Cancel all writeback requests, both pending and in progress.
+ * Create an RPC task for the given write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
  */
-static void
-nfs_cancel_dirty(struct inode *inode, pid_t pid)
+static int
+nfs_flush_one(struct list_head *head, struct file *file, int how)
 {
-	struct nfs_wreq *head, *req;
+	struct dentry           *dentry = file->f_dentry;
+	struct inode            *inode = dentry->d_inode;
+	struct rpc_clnt 	*clnt = NFS_CLIENT(inode);
+	struct nfs_write_data	*data;
+	struct rpc_task		*task;
+	struct rpc_message	msg;
+	int                     flags,
+				async = !(how & FLUSH_SYNC),
+				stable = (how & FLUSH_STABLE);
+	sigset_t		oldset;
+
+
+	data = nfs_writedata_alloc();
+	if (!data)
+		goto out_bad;
+	task = &data->task;
+
+	/* Set the initial flags for the task.  */
+	flags = (async) ? RPC_TASK_ASYNC : 0;
+
+	/* Set up the argument struct */
+	nfs_write_rpcsetup(head, data);
+	if (stable) {
+		if (!inode->u.nfs_i.ncommit)
+			data->args.stable = NFS_FILE_SYNC;
+		else
+			data->args.stable = NFS_DATA_SYNC;
+	} else
+		data->args.stable = NFS_UNSTABLE;
+
+	/* Finalize the task. */
+	rpc_init_task(task, clnt, nfs_writeback_done, flags);
+	task->tk_calldata = data;
+
+#ifdef CONFIG_NFS_V3
+	msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
+#else
+	msg.rpc_proc = NFSPROC_WRITE;
+#endif
+	msg.rpc_argp = &data->args;
+	msg.rpc_resp = &data->res;
+	msg.rpc_cred = data->cred;
+
+	dprintk("NFS: %4d initiated write call (req %s/%s count %d nriov %d)\n",
+		task->tk_pid, 
+		dentry->d_parent->d_name.name,
+		dentry->d_name.name,
+		data->args.count, data->args.nriov);
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	rpc_call_setup(task, &msg, 0);
+	rpc_execute(task);
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return 0;
+ out_bad:
+	while (!list_empty(head)) {
+		struct nfs_page *req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_mark_request_dirty(req);
+		nfs_unlock_request(req);
+	}
+	return -ENOMEM;
+}
 
-	req = head = NFS_WRITEBACK(inode);
-	while (req != NULL) {
-		if (pid == 0 || req->wb_pid == pid)
-			nfs_cancel_request(req);
-		if ((req = WB_NEXT(req)) == head)
+static int
+nfs_flush_list(struct inode *inode, struct list_head *head, int how)
+{
+	LIST_HEAD(one_request);
+	struct nfs_page		*req;
+	int			error = 0;
+	unsigned int		pages = 0,
+				wpages = NFS_SERVER(inode)->wsize >> PAGE_CACHE_SHIFT;
+
+	while (!list_empty(head)) {
+		pages += coalesce_requests(head, &one_request, wpages);
+		req = nfs_list_entry(one_request.next);
+		error = nfs_flush_one(&one_request, req->wb_file, how);
+		if (error < 0)
 			break;
 	}
+	if (error >= 0)
+		return pages;
+
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_mark_request_dirty(req);
+		nfs_unlock_request(req);
+	}
+	return error;
 }
 
+
 /*
- * If we're waiting on somebody else's request
- * we need to increment the counter during the
- * wait so that the request doesn't disappear
- * from under us during the wait..
+ * This function is called when the WRITE call is complete.
  */
-static int FASTCALL(wait_on_other_req(struct nfs_wreq *));
-static int wait_on_other_req(struct nfs_wreq *req)
+static void
+nfs_writeback_done(struct rpc_task *task)
 {
-	int retval;
-	req->wb_count++;
-	retval = wait_on_write_request(req);
-	free_write_request(req);
-	return retval;
-}
+	struct nfs_write_data	*data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_writeargs	*argp = &data->args;
+	struct nfs_writeres	*resp = &data->res;
+	struct dentry		*dentry = data->file->f_dentry;
+	struct inode		*inode = dentry->d_inode;
+	struct nfs_page		*req;
+
+	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
+		task->tk_pid, task->tk_status);
+
+	/* We can't handle that yet but we check for it nevertheless */
+	if (resp->count < argp->count && task->tk_status >= 0) {
+		static unsigned long    complain = 0;
+		if (time_before(complain, jiffies)) {
+			printk(KERN_WARNING
+			       "NFS: Server wrote less than requested.\n");
+			complain = jiffies + 300 * HZ;
+		}
+		/* Can't do anything about it right now except throw
+		 * an error. */
+		task->tk_status = -EIO;
+	}
+#ifdef CONFIG_NFS_V3
+	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+		/* We tried a write call, but the server did not
+		 * commit data to stable storage even though we
+		 * requested it.
+		 */
+		static unsigned long    complain = 0;
+
+		if (time_before(complain, jiffies)) {
+			printk(KERN_NOTICE "NFS: faulty NFSv3 server %s:"
+			       " (committed = %d) != (stable = %d)\n",
+			       NFS_SERVER(inode)->hostname,
+			       resp->verf->committed, argp->stable);
+			complain = jiffies + 300 * HZ;
+		}
+	}
+#endif
 
-/*
- * This writes back a set of requests according to the condition.
- *
- * If this ever gets much more convoluted, use a fn pointer for
- * the condition..
- */
-#define NFS_WB(inode, cond) { int retval = 0 ; \
-	do { \
-		struct nfs_wreq *req = NFS_WRITEBACK(inode); \
-		struct nfs_wreq *head = req; \
-		if (!req) break; \
-		for (;;) { \
-			if (!(req->wb_flags & NFS_WRITE_COMPLETE)) \
-				if (cond) break; \
-			req = WB_NEXT(req); \
-			if (req == head) goto out; \
-		} \
-		retval = wait_on_other_req(req); \
-	} while (!retval); \
-out:	return retval; \
-}
+	/* Update attributes as result of writeback. */
+	if (task->tk_status >= 0)
+		nfs_write_attributes(inode, resp->fattr);
 
-int
-nfs_wb_all(struct inode *inode)
-{
-	NFS_WB(inode, 1);
+	while (!list_empty(&data->pages)) {
+		req = nfs_list_entry(data->pages.next);
+		nfs_list_remove_request(req);
+
+		kunmap(req->wb_page);
+
+		dprintk("NFS: write (%s/%s %d@%Ld)",
+			req->wb_file->f_dentry->d_parent->d_name.name,
+			req->wb_file->f_dentry->d_name.name,
+			req->wb_bytes,
+			page_offset(req->wb_page) + req->wb_offset);
+
+		if (task->tk_status < 0) {
+			req->wb_file->f_error = task->tk_status;
+			nfs_inode_remove_request(req);
+			dprintk(", error = %d\n", task->tk_status);
+			goto next;
+		}
+
+#ifdef CONFIG_NFS_V3
+		if (resp->verf->committed != NFS_UNSTABLE) {
+			nfs_inode_remove_request(req);
+			dprintk(" OK\n");
+			goto next;
+		}
+		memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
+		req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
+		nfs_mark_request_commit(req);
+		dprintk(" marked for commit\n");
+#else
+		nfs_inode_remove_request(req);
+#endif
+	next:
+		nfs_unlock_request(req);
+	}
+	nfs_writedata_release(task);
 }
 
+
+#ifdef CONFIG_NFS_V3
 /*
- * Write back all requests on one page - we do this before reading it.
+ * Set up the argument/result storage required for the RPC call.
  */
-int
-nfs_wb_page(struct inode *inode, struct page *page)
+static void
+nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
 {
-	NFS_WB(inode, req->wb_page == page);
+	struct nfs_page		*req;
+	struct dentry		*dentry;
+	struct inode		*inode;
+	unsigned long		start, end, len;
+
+	/* Set up the RPC argument and reply structs
+	 * NB: take care not to mess about with data->commit et al. */
+
+	end = 0;
+	start = ~0;
+	req = nfs_list_entry(head->next);
+	data->file = req->wb_file;
+	data->cred = req->wb_cred;
+	dentry = data->file->f_dentry;
+	inode = dentry->d_inode;
+	while (!list_empty(head)) {
+		struct nfs_page	*req;
+		unsigned long	rqstart, rqend;
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_list_add_request(req, &data->pages);
+		rqstart = page_offset(req->wb_page) + req->wb_offset;
+		rqend = rqstart + req->wb_bytes;
+		if (rqstart < start)
+			start = rqstart;
+		if (rqend > end)
+			end = rqend;
+	}
+	data->args.fh     = NFS_FH(dentry);
+	data->args.offset = start;
+	len = end - start;
+	if (end >= inode->i_size || len > (~((u32)0) >> 1))
+		len = 0;
+	data->res.count   = data->args.count = (u32)len;
+	data->res.fattr   = &data->fattr;
+	data->res.verf    = &data->verf;
 }
 
 /*
- * Write back all pending writes from one file descriptor..
+ * Commit dirty pages
  */
-int
-nfs_wb_file(struct inode *inode, struct file *file)
-{
-	NFS_WB(inode, req->wb_file == file);
-}
-
-void
-nfs_inval(struct inode *inode)
+static int
+nfs_commit_list(struct list_head *head, int how)
 {
-	nfs_cancel_dirty(inode,0);
+	struct rpc_message	msg;
+	struct file		*file;
+	struct rpc_clnt		*clnt;
+	struct nfs_write_data	*data;
+	struct rpc_task         *task;
+	struct nfs_page         *req;
+	int                     flags,
+				async = !(how & FLUSH_SYNC);
+	sigset_t		oldset;
+
+	data = nfs_writedata_alloc();
+
+	if (!data)
+		goto out_bad;
+	task = &data->task;
+
+	flags = (async) ? RPC_TASK_ASYNC : 0;
+
+	/* Set up the argument struct */
+	nfs_commit_rpcsetup(head, data);
+	req = nfs_list_entry(data->pages.next);
+	file = req->wb_file;
+	clnt = NFS_CLIENT(file->f_dentry->d_inode);
+
+	rpc_init_task(task, clnt, nfs_commit_done, flags);
+	task->tk_calldata = data;
+
+	msg.rpc_proc = NFS3PROC_COMMIT;
+	msg.rpc_argp = &data->args;
+	msg.rpc_resp = &data->res;
+	msg.rpc_cred = data->cred;
+
+	dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
+	rpc_clnt_sigmask(clnt, &oldset);
+	rpc_call_setup(task, &msg, 0);
+	rpc_execute(task);
+	rpc_clnt_sigunmask(clnt, &oldset);
+	return 0;
+ out_bad:
+	while (!list_empty(head)) {
+		req = nfs_list_entry(head->next);
+		nfs_list_remove_request(req);
+		nfs_mark_request_commit(req);
+		nfs_unlock_request(req);
+	}
+	return -ENOMEM;
 }
 
 /*
- * The following procedures make up the writeback finite state machinery:
- *
- * 1.	Try to lock the page if not yet locked by us,
- *	set up the RPC call info, and pass to the call FSM.
+ * COMMIT call returned
  */
 static void
-nfs_wback_begin(struct rpc_task *task)
+nfs_commit_done(struct rpc_task *task)
 {
-	struct nfs_wreq	*req = (struct nfs_wreq *) task->tk_calldata;
-	struct page	*page = req->wb_page;
-	struct file	*file = req->wb_file;
-	struct dentry	*dentry = file->f_dentry;
-
-	dprintk("NFS: %4d nfs_wback_begin (%s/%s, status=%d flags=%x)\n",
-		task->tk_pid, dentry->d_parent->d_name.name,
-		dentry->d_name.name, task->tk_status, req->wb_flags);
+	struct nfs_write_data	*data = (struct nfs_write_data *)task->tk_calldata;
+	struct nfs_writeres	*resp = &data->res;
+	struct nfs_page		*req;
+	struct dentry		*dentry = data->file->f_dentry;
+	struct inode		*inode = dentry->d_inode;
 
-	task->tk_status = 0;
+        dprintk("NFS: %4d nfs_commit_done (status %d)\n",
+                                task->tk_pid, task->tk_status);
+
+	nfs_refresh_inode(inode, resp->fattr);
+	while (!list_empty(&data->pages)) {
+		req = nfs_list_entry(data->pages.next);
+		nfs_list_remove_request(req);
+
+		dprintk("NFS: commit (%s/%s %d@%ld)",
+			req->wb_file->f_dentry->d_parent->d_name.name,
+			req->wb_file->f_dentry->d_name.name,
+			req->wb_bytes,
+			page_offset(req->wb_page) + req->wb_offset);
+		if (task->tk_status < 0) {
+			req->wb_file->f_error = task->tk_status;
+			nfs_inode_remove_request(req);
+			dprintk(", error = %d\n", task->tk_status);
+			goto next;
+		}
 
-	/* Setup the task struct for a writeback call */
-	req->wb_flags |= NFS_WRITE_INPROGRESS;
-	req->wb_args.fh     = NFS_FH(dentry);
-	req->wb_args.offset = (page->index << PAGE_CACHE_SHIFT) + req->wb_offset;
-	req->wb_args.count  = req->wb_bytes;
-	req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset);
+		/* Okay, COMMIT succeeded, apparently. Check the verifier
+		 * returned by the server against all stored verfs. */
+		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+			/* We have a match */
+			nfs_inode_remove_request(req);
+			dprintk(" OK\n");
+			goto next;
+		}
+		/* We have a mismatch. Write the page again */
+		dprintk(" mismatch\n");
+		nfs_mark_request_dirty(req);
+	next:
+		nfs_unlock_request(req);
+	}
+	nfs_writedata_release(task);
+}
+#endif
 
-	rpc_call_setup(task, NFSPROC_WRITE, &req->wb_args, &req->wb_fattr, 0);
+int nfs_flush_file(struct inode *inode, struct file *file, unsigned long start,
+		   unsigned int count, int how)
+{
+	LIST_HEAD(head);
+	int			pages,
+				error = 0;
+
+	pages = nfs_scan_dirty(inode, &head, file, start, count);
+	if (pages)
+		error = nfs_flush_list(inode, &head, how);
+	if (error < 0)
+		return error;
+	return pages;
+}
 
-	return;
+int nfs_flush_timeout(struct inode *inode, int how)
+{
+	LIST_HEAD(head);
+	int			pages,
+				error = 0;
+
+	pages = nfs_scan_dirty_timeout(inode, &head);
+	if (pages)
+		error = nfs_flush_list(inode, &head, how);
+	if (error < 0)
+		return error;
+	return pages;
 }
 
-/*
- * 2.	Collect the result
- */
-static void
-nfs_wback_result(struct rpc_task *task)
+#ifdef CONFIG_NFS_V3
+int nfs_commit_file(struct inode *inode, struct file *file, unsigned long start,
+		    unsigned int count, int how)
 {
-	struct nfs_wreq *req = (struct nfs_wreq *) task->tk_calldata;
-	struct file	*file = req->wb_file;
-	struct page	*page = req->wb_page;
-	int		status = task->tk_status;
-	struct dentry	*dentry = file->f_dentry;
-	struct inode	*inode = dentry->d_inode;
+	LIST_HEAD(head);
+	int			pages,
+				error = 0;
+
+	pages = nfs_scan_commit(inode, &head, file, start, count);
+	if (pages)
+		error = nfs_commit_list(&head, how);
+	if (error < 0)
+		return error;
+	return pages;
+}
 
-	dprintk("NFS: %4d nfs_wback_result (%s/%s, status=%d, flags=%x)\n",
-		task->tk_pid, dentry->d_parent->d_name.name,
-		dentry->d_name.name, status, req->wb_flags);
-
-	/* Set the WRITE_COMPLETE flag, but leave WRITE_INPROGRESS set */
-	req->wb_flags |= NFS_WRITE_COMPLETE;
-	req->wb_status = status;
-
-	if (status < 0) {
-		req->wb_flags |= NFS_WRITE_INVALIDATE;
-		file->f_error = status;
-	} else if (!WB_CANCELLED(req)) {
-		struct nfs_fattr *fattr = &req->wb_fattr;
-		/* Update attributes as result of writeback. 
-		 * Beware: when UDP replies arrive out of order, we
-		 * may end up overwriting a previous, bigger file size.
-		 *
-		 * When the file size shrinks we cancel all pending
-		 * writebacks. 
-		 */
-		if (fattr->mtime.seconds >= inode->i_mtime) {
-			if (fattr->size < inode->i_size)
-				fattr->size = inode->i_size;
-
-			/* possible Solaris 2.5 server bug workaround */
-			if (inode->i_ino == fattr->fileid) {
-				/*
-				 * We expect these values to change, and
-				 * don't want to invalidate the caches.
-				 */
-				inode->i_size  = fattr->size;
-				inode->i_mtime = fattr->mtime.seconds;
-				nfs_refresh_inode(inode, fattr);
-			}
-			else
-				printk("nfs_wback_result: inode %ld, got %u?\n",
-					inode->i_ino, fattr->fileid);
-		}
+int nfs_commit_timeout(struct inode *inode, int how)
+{
+	LIST_HEAD(head);
+	int			pages,
+				error = 0;
+
+	pages = nfs_scan_commit_timeout(inode, &head);
+	if (pages) {
+		pages += nfs_scan_commit(inode, &head, NULL, 0, 0);
+		error = nfs_commit_list(&head, how);
 	}
+	if (error < 0)
+		return error;
+	return pages;
+}
+#endif
 
-	rpc_release_task(task);
+int nfs_sync_file(struct inode *inode, struct file *file, unsigned long start,
+		  unsigned int count, int how)
+{
+	int	error,
+		wait;
 
-	if (WB_INVALIDATE(req))
-		ClearPageUptodate(page);
+	wait = how & FLUSH_WAIT;
+	how &= ~FLUSH_WAIT;
 
-	kunmap(page);
-	__free_page(page);
-	remove_write_request(&NFS_WRITEBACK(inode), req);
-	nr_write_requests--;
-	fput(req->wb_file);
+	if (!inode && file)
+		inode = file->f_dentry->d_inode;
 
-	wake_up(&req->wb_wait);
-	free_write_request(req);
+	do {
+		error = 0;
+		if (wait)
+			error = nfs_wait_on_requests(inode, file, start, count);
+		if (error == 0)
+			error = nfs_flush_file(inode, file, start, count, how);
+#ifdef CONFIG_NFS_V3
+		if (error == 0)
+			error = nfs_commit_file(inode, file, start, count, how);
+#endif
+	} while (error > 0);
+	return error;
+}
+
+int nfs_init_nfspagecache(void)
+{
+	nfs_page_cachep = kmem_cache_create("nfs_page",
+					    sizeof(struct nfs_page),
+					    0, SLAB_HWCACHE_ALIGN,
+					    NULL, NULL);
+	if (nfs_page_cachep == NULL)
+		return -ENOMEM;
+
+	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
+					     sizeof(struct nfs_write_data),
+					     0, SLAB_HWCACHE_ALIGN,
+					     NULL, NULL);
+	if (nfs_wdata_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_nfspagecache(void)
+{
+	if (kmem_cache_destroy(nfs_page_cachep))
+		printk(KERN_INFO "nfs_page: not all structures were freed\n");
+	if (kmem_cache_destroy(nfs_wdata_cachep))
+		printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
 }
+
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8d2b610a8..c6ea9074c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -483,7 +483,10 @@ exp_rootfh(struct svc_client *clp, kdev_t dev, ino_t ino,
 	 * fh must be initialized before calling fh_compose
 	 */
 	fh_init(&fh, maxsize);
-	err = fh_compose(&fh, exp, dentry);
+	if (fh_compose(&fh, exp, dentry))
+		err = -EINVAL;
+	else
+		err = 0;
 	memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh));
 	fh_put(&fh);
 	return err;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 6e102db9c..969ff54a9 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -748,6 +748,7 @@ encode_entry(struct readdir_cd *cd, const char *name,
 		if (fh_compose(&fh, exp, dchild) != 0 || !dchild->d_inode)
 			goto noexec;
 		p = encode_post_op_attr(cd->rqstp, p, fh.fh_dentry);
+		*p++ = xdr_one; /* yes, a file handle follows */
 		p = encode_fh(p, &fh);
 		fh_put(&fh);
 	}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f755adc8c..97b46f0c7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -42,8 +42,8 @@ static int	nfsctl_export(struct nfsctl_export *data);
 static int	nfsctl_unexport(struct nfsctl_export *data);
 static int	nfsctl_getfh(struct nfsctl_fhparm *, __u8 *);
 static int	nfsctl_getfd(struct nfsctl_fdparm *, __u8 *);
-#ifdef notyet
 static int	nfsctl_getfs(struct nfsctl_fsparm *, struct knfsd_fh *);
+#ifdef notyet
 static int	nfsctl_ugidupdate(struct nfsctl_ugidmap *data);
 #endif
 
@@ -112,7 +112,6 @@ nfsctl_ugidupdate(nfs_ugidmap *data)
 }
 #endif
 
-#ifdef notyet
 static inline int
 nfsctl_getfs(struct nfsctl_fsparm *data, struct knfsd_fh *res)
 {
@@ -131,10 +130,9 @@ nfsctl_getfs(struct nfsctl_fsparm *data, struct knfsd_fh *res)
 	else
 		err = exp_rootfh(clp, 0, 0, data->gd_path, res, data->gd_maxlen);
 	exp_unlock();
-
+	/*HACK*/ res->fh_size = NFS_FHSIZE; /* HACK until lockd handles var-length handles */
 	return err;
 }
-#endif
 
 static inline int
 nfsctl_getfd(struct nfsctl_fdparm *data, __u8 *res)
@@ -206,6 +204,21 @@ nfsctl_getfh(struct nfsctl_fhparm *data, __u8 *res)
 #define handle_sys_nfsservctl sys_nfsservctl
 #endif
 
+static struct {
+	int argsize, respsize;
+}  sizes[] = {
+	/* NFSCTL_SVC        */ { sizeof(struct nfsctl_svc), 0 },
+	/* NFSCTL_ADDCLIENT  */ { sizeof(struct nfsctl_client), 0},
+	/* NFSCTL_DELCLIENT  */ { sizeof(struct nfsctl_client), 0},
+	/* NFSCTL_EXPORT     */ { sizeof(struct nfsctl_export), 0},
+	/* NFSCTL_UNEXPORT   */ { sizeof(struct nfsctl_export), 0},
+	/* NFSCTL_UGIDUPDATE */ { sizeof(struct nfsctl_uidmap), 0},
+	/* NFSCTL_GETFH      */ { sizeof(struct nfsctl_fhparm), NFS_FHSIZE},
+	/* NFSCTL_GETFD      */ { sizeof(struct nfsctl_fdparm), NFS_FHSIZE},
+	/* NFSCTL_GETFS      */ { sizeof(struct nfsctl_fsparm), sizeof(struct knfsd_fh)},
+};
+#define CMD_MAX (sizeof(sizes)/sizeof(sizes[0])-1)
+
 int
 asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
 {
@@ -214,6 +227,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
 	struct nfsctl_arg *	arg = NULL;
 	union nfsctl_res *	res = NULL;
 	int			err;
+	int			argsize, respsize;
 
 	MOD_INC_USE_COUNT;
 	lock_kernel ();
@@ -223,12 +237,16 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
 	if (!capable(CAP_SYS_ADMIN)) {
 		goto done;
 	}
+	err = -EINVAL;
+	if (cmd<0 || cmd > CMD_MAX)
+		goto done;
 	err = -EFAULT;
-	if (!access_ok(VERIFY_READ, argp, sizeof(*argp))
-	 || (resp && !access_ok(VERIFY_WRITE, resp, sizeof(*resp)))) {
+	argsize = sizes[cmd].argsize + sizeof(int); /* int for ca_version */
+	respsize = sizes[cmd].respsize;	/* maximum */
+	if (!access_ok(VERIFY_READ, argp, argsize)
+	 || (resp && !access_ok(VERIFY_WRITE, resp, respsize))) {
 		goto done;
 	}
-
 	err = -ENOMEM;	/* ??? */
 	if (!(arg = kmalloc(sizeof(*arg), GFP_USER)) ||
 	    (resp && !(res = kmalloc(sizeof(*res), GFP_USER)))) {
@@ -236,7 +254,7 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
 	}
 
 	err = -EINVAL;
-	copy_from_user(arg, argp, sizeof(*argp));
+	copy_from_user(arg, argp, argsize);
 	if (arg->ca_version != NFSCTL_VERSION) {
 		printk(KERN_WARNING "nfsd: incompatible version in syscall.\n");
 		goto done;
@@ -269,16 +287,16 @@ asmlinkage handle_sys_nfsservctl(int cmd, void *opaque_argp, void *opaque_resp)
 	case NFSCTL_GETFD:
 		err = nfsctl_getfd(&arg->ca_getfd, res->cr_getfh);
 		break;
-#ifdef notyet
 	case NFSCTL_GETFS:
 		err = nfsctl_getfs(&arg->ca_getfs, &res->cr_getfs);
-#endif
+		respsize = res->cr_getfs.fh_size+sizeof(int);
+		break;
 	default:
 		err = -EINVAL;
 	}
 
-	if (!err && resp)
-		copy_to_user(resp, res, sizeof(*resp));
+	if (!err && resp && respsize)
+		copy_to_user(resp, res, respsize);
 
 done:
 	if (arg)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6f69225cc..5c312b906 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -223,9 +223,10 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 		goto done;
 	fh_lock(dirfhp);
 	dchild = lookup_one(argp->name, dget(dirfhp->fh_dentry));
-	nfserr = nfserrno(PTR_ERR(dchild));
-	if (IS_ERR(dchild))
+	if (IS_ERR(dchild)) {
+		nfserr = nfserrno(PTR_ERR(dchild));
 		goto out_unlock;
+	}
 	fh_init(newfhp, NFS_FHSIZE);
 	nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild);
 	if (!nfserr && !dchild->d_inode)
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d69bba8d0..fb3b32f8d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -43,6 +43,8 @@ extern struct svc_program	nfsd_program;
 static void			nfsd(struct svc_rqst *rqstp);
 struct timeval			nfssvc_boot = { 0, 0 };
 static struct svc_serv 		*nfsd_serv = NULL;
+static int			nfsd_busy = 0;
+static unsigned long		nfsd_last_call;
 
 struct nfsd_list {
 	struct list_head 	list;
@@ -115,6 +117,24 @@ nfsd_svc(unsigned short port, int nrservs)
 	return error;
 }
 
+static void inline
+update_thread_usage(int busy_threads)
+{
+	unsigned long prev_call;
+	unsigned long diff;
+	int decile;
+
+	prev_call = nfsd_last_call;
+	nfsd_last_call = jiffies;
+	decile = busy_threads*10/nfsdstats.th_cnt;
+	if (decile>0 && decile <= 10) {
+		diff = nfsd_last_call - prev_call;
+		nfsdstats.th_usage[decile-1] += diff;
+		if (decile == 10)
+			nfsdstats.th_fullcnt++;
+	}
+}
+
 /*
  * This is the NFS server kernel thread
  */
@@ -134,6 +154,7 @@ nfsd(struct svc_rqst *rqstp)
 	sprintf(current->comm, "nfsd");
 	current->fs->umask = 0;
 
+	nfsdstats.th_cnt++;
 	/* Let svc_process check client's authentication. */
 	rqstp->rq_auth = 1;
 
@@ -161,6 +182,8 @@ nfsd(struct svc_rqst *rqstp)
 		    ;
 		if (err < 0)
 			break;
+		update_thread_usage(nfsd_busy);
+		nfsd_busy++;
 
 		/* Lock the export hash tables for reading. */
 		exp_readlock();
@@ -179,6 +202,8 @@ nfsd(struct svc_rqst *rqstp)
 
 		/* Unlock export hash tables */
 		exp_unlock();
+		update_thread_usage(nfsd_busy);
+		nfsd_busy--;
 	}
 
 	if (err != -EINTR) {
@@ -202,6 +227,7 @@ nfsd(struct svc_rqst *rqstp)
 	        nfsd_racache_shutdown();	/* release read-ahead cache */
 	}
 	list_del(&me.list);
+	nfsdstats.th_cnt --;
 
 	/* Release the thread */
 	svc_exit_thread(rqstp);
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 69defe790..254242fb6 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,6 +8,16 @@
  * Format:
  *	rc <hits> <misses> <nocache>
  *			Statistsics for the reply cache
+ *	fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
+ *			statistics for filehandle lookup
+ *	io <bytes-read> <bytes-writtten>
+ *			statistics for IO throughput
+ *	th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 
+ *			time (milliseconds) when nfsd thread usage above thresholds
+ *			and number of times that all threads were in use
+ *	ra cache-size  <10%  <20%  <30% ... <100% not-found
+ *			number of times that read-ahead entry was found that deep in
+ *			the cache.
  *	plus generic RPC stats (see net/sunrpc/stats.c)
  *
  * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -33,17 +43,30 @@ nfsd_proc_read(char *buffer, char **start, off_t offset, int count,
 				int *eof, void *data)
 {
 	int	len;
+	int	i;
 
-	len = sprintf(buffer, "rc %d %d %d  %d %d %d %d %d\n",
-			nfsdstats.rchits,
-			nfsdstats.rcmisses,
-			nfsdstats.rcnocache,
-			nfsdstats.fh_stale,
-			nfsdstats.fh_lookup,
-		        nfsdstats.fh_anon,
-			nfsdstats.fh_nocache_dir,
-			nfsdstats.fh_nocache_nondir);
+	len = sprintf(buffer, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
+		      nfsdstats.rchits,
+		      nfsdstats.rcmisses,
+		      nfsdstats.rcnocache,
+		      nfsdstats.fh_stale,
+		      nfsdstats.fh_lookup,
+		      nfsdstats.fh_anon,
+		      nfsdstats.fh_nocache_dir,
+		      nfsdstats.fh_nocache_nondir,
+		      nfsdstats.io_read,
+		      nfsdstats.io_write);
+	/* thread usage: */
+	len += sprintf(buffer+len, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
+	for (i=0; i<10; i++)
+		len += sprintf(buffer+len, " %u", nfsdstats.th_usage[i]);
 
+	/* newline and ra-cache */
+	len += sprintf(buffer+len, "\nra %u", nfsdstats.ra_size);
+	for (i=0; i<11; i++)
+		len += sprintf(buffer+len, " %u", nfsdstats.ra_depth[i]);
+	len += sprintf(buffer+len, "\n");
+	
 
 	/* Assume we haven't hit EOF yet. Will be set by svc_proc_read. */
 	*eof = 0;
@@ -53,13 +76,13 @@ nfsd_proc_read(char *buffer, char **start, off_t offset, int count,
 	 */
 	if (len <= offset) {
 		len = svc_proc_read(buffer, start, offset - len, count,
-				eof, data);
+				    eof, data);
 		return len;
 	}
 
 	if (len < count) {
 		len += svc_proc_read(buffer + len, start, 0, count - len,
-				eof, data);
+				     eof, data);
 	}
 
 	if (offset >= len) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e3be271a2..5cd55fda8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -539,13 +539,16 @@ static inline struct raparms *
 nfsd_get_raparms(dev_t dev, ino_t ino)
 {
 	struct raparms	*ra, **rap, **frap = NULL;
-
+	int depth = 0;
+	
 	for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
 		if (ra->p_ino == ino && ra->p_dev == dev)
 			goto found;
+		depth++;
 		if (ra->p_count == 0)
 			frap = rap;
 	}
+	depth = nfsdstats.ra_size*11/10;
 	if (!frap)
 		return NULL;
 	rap = frap;
@@ -560,6 +563,7 @@ found:
 		raparm_cache = ra;
 	}
 	ra->p_count++;
+	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
 	return ra;
 }
 
@@ -598,6 +602,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 	oldfs = get_fs(); set_fs(KERNEL_DS);
 	err = file.f_op->read(&file, buf, *count, &file.f_pos);
 	set_fs(oldfs);
+	nfsdstats.io_read += *count;
 
 	/* Write back readahead params */
 	if (ra != NULL) {
@@ -691,6 +696,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
 #else
 	err = file.f_op->write(&file, buf, cnt, &file.f_pos);
 #endif
+	nfsdstats.io_write += cnt;
 	set_fs(oldfs);
 
 	/* clear setuid/setgid flag after write */
@@ -1559,5 +1565,6 @@ nfsd_racache_init(int cache_size)
 		       "nfsd: Could not allocate memory read-ahead cache.\n");
 		return -ENOMEM;
 	}
+	nfsdstats.ra_size = cache_size;
 	return 0;
 }
diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c
index e95a36179..e6abd178e 100644
--- a/fs/ntfs/fs.c
+++ b/fs/ntfs/fs.c
@@ -595,7 +595,7 @@ static int ntfs_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,ntfs_get_block);
 }
-static int ntfs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int ntfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return cont_prepare_write(page,from,to,ntfs_get_block,
 		&((struct inode*)page->mapping->host)->u.ntfs_i.mmu_private);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 8a5e404b1..bd8aa6b98 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -258,7 +258,7 @@ static void ntfs_load_attributes(ntfs_inode* ino)
 	if( !buf )
 		return;
 	delta=0;
-	for(offset=0;datasize;datasize-=len)
+	for(offset=0;datasize;datasize-=len,offset+=len)
 	{
 		ntfs_io io;
 		io.fn_put=ntfs_put;
@@ -268,7 +268,7 @@ static void ntfs_load_attributes(ntfs_inode* ino)
 		if(ntfs_read_attr(ino,vol->at_attribute_list,0,offset,&io)){
 			ntfs_error("error in load_attributes\n");
 		}
-		delta=len;
+		delta+=len;
 		parse_attributes(ino,buf,&delta);
 		if(delta)
 			/* move remaining bytes to buffer start */
diff --git a/fs/open.c b/fs/open.c
index 9f4d50a79..44202fe4e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -298,7 +298,6 @@ asmlinkage long sys_access(const char * filename, int mode)
 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
 		return -EINVAL;
 
-	lock_kernel();
 	old_fsuid = current->fsuid;
 	old_fsgid = current->fsgid;
 	old_cap = current->cap_effective;
@@ -311,7 +310,8 @@ asmlinkage long sys_access(const char * filename, int mode)
 		cap_clear(current->cap_effective);
 	else
 		current->cap_effective = current->cap_permitted;
-		
+
+	lock_kernel();		
 	dentry = namei(filename);
 	res = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
@@ -321,12 +321,12 @@ asmlinkage long sys_access(const char * filename, int mode)
 			res = -EROFS;
 		dput(dentry);
 	}
+	unlock_kernel();
 
 	current->fsuid = old_fsuid;
 	current->fsgid = old_fsgid;
 	current->cap_effective = old_cap;
 
-	unlock_kernel();
 	return res;
 }
 
@@ -646,25 +646,35 @@ out:
  */
 struct file *filp_open(const char * filename, int flags, int mode, struct dentry * base)
 {
-	struct inode * inode;
 	struct dentry * dentry;
-	struct file * f;
 	int flag,error;
 
-	error = -ENFILE;
-	f = get_empty_filp();
-	if (!f)
-		goto out;
-	f->f_flags = flag = flags;
-	f->f_mode = (flag+1) & O_ACCMODE;
-	if (f->f_mode)
+	flag = flags;
+	if ((flag+1) & O_ACCMODE)
 		flag++;
 	if (flag & O_TRUNC)
 		flag |= 2;
+
 	dentry = __open_namei(filename, flag, mode, base);
 	error = PTR_ERR(dentry);
-	if (IS_ERR(dentry))
-		goto cleanup_file;
+	if (!IS_ERR(dentry))
+		return dentry_open(dentry, flags);
+
+	return ERR_PTR(error);
+}
+
+struct file *dentry_open(struct dentry *dentry, int flags)
+{
+	struct file * f;
+	struct inode *inode;
+	int error;
+
+	error = -ENFILE;
+	f = get_empty_filp();
+	if (!f)
+		goto cleanup_dentry;
+	f->f_flags = flags;
+	f->f_mode = (flags+1) & O_ACCMODE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
 		error = get_write_access(inode);
@@ -692,12 +702,10 @@ struct file *filp_open(const char * filename, int flags, int mode, struct dentry
 cleanup_all:
 	if (f->f_mode & FMODE_WRITE)
 		put_write_access(inode);
-cleanup_dentry:
 	f->f_dentry = NULL;
+cleanup_dentry:
 	dput(dentry);
-cleanup_file:
 	put_filp(f);
-out:
 	return ERR_PTR(error);
 }
 
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 27b4be8cb..92eed7559 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1,4 +1,4 @@
-/* $Id: inode.c,v 1.9 2000/03/13 21:59:43 davem Exp $
+/* $Id: inode.c,v 1.10 2000/03/24 01:32:51 davem Exp $
  * openpromfs.c: /proc/openprom handling routines
  *
  * Copyright (C) 1996-1999 Jakub Jelinek  (jakub@redhat.com)
@@ -980,10 +980,6 @@ static void openprom_read_inode(struct inode * inode)
 	}
 }
 
-static void openprom_put_super(struct super_block *sb)
-{
-}
-
 static int openprom_statfs(struct super_block *sb, struct statfs *buf)
 {
 	buf->f_type = OPENPROM_SUPER_MAGIC;
@@ -997,7 +993,6 @@ static int openprom_statfs(struct super_block *sb, struct statfs *buf)
 
 static struct super_operations openprom_sops = { 
 	read_inode:	openprom_read_inode,
-	put_super:	openprom_put_super,
 	statfs:		openprom_statfs,
 };
 
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index fc9555b77..21330f499 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -26,7 +26,10 @@
 #include <linux/major.h>
 #include <linux/string.h>
 #include <linux/blk.h>
+
+#ifdef CONFIG_IDE
 #include <linux/ide.h>	/* IDE xlate */
+#endif /* CONFIG_IDE */
 
 #include <asm/system.h>
 
@@ -347,19 +350,19 @@ int msdos_partition(struct gendisk *hd, kdev_t dev,
 	unsigned char *data;
 	int mask = (1 << hd->minor_shift) - 1;
 	int sector_size = get_hardsect_size(dev) / 512;
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_IDE
 	int tested_for_xlate = 0;
 
 read_mbr:
-#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */
+#endif /* CONFIG_IDE */
 	if (!(bh = bread(dev,0,get_ptable_blocksize(dev)))) {
 		if (warn_no_part) printk(" unable to read partition table\n");
 		return -1;
 	}
 	data = bh->b_data;
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_IDE
 check_table:
-#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */
+#endif /* CONFIG_IDE */
 	/* Use bforget(), because we may have changed the disk geometry */
 	if (*(unsigned short *)  (0x1fe + data) != cpu_to_le16(MSDOS_LABEL_MAGIC)) {
 		bforget(bh);
@@ -367,7 +370,7 @@ check_table:
 	}
 	p = (struct partition *) (0x1be + data);
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_IDE
 	if (!tested_for_xlate++) {	/* Do this only once per disk */
 		/*
 		 * Look for various forms of IDE disk geometry translation
@@ -423,7 +426,7 @@ check_table:
 			(void) ide_xlate_1024(dev, 2, heads, " [PTBL]");
 		}
 	}
-#endif /* (CONFIG_BLK_DEV_IDE) || (CONFIG_BLK_DEV_IDE_MODULE) */
+#endif /* CONFIG_IDE */
 
 	/* Look for partitions in two passes:
 	   First find the primary partitions, and the DOS-type extended partitions.
diff --git a/fs/pipe.c b/fs/pipe.c
index 020416013..f25f5e514 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -13,16 +13,6 @@
 #include <asm/uaccess.h>
 
 /*
- * Define this if you want SunOS compatibility wrt braindead
- * select behaviour on FIFO's.
- */
-#ifdef __sparc__
-#define FIFO_SUNOS_BRAINDAMAGE
-#else
-#undef FIFO_SUNOS_BRAINDAMAGE
-#endif
-
-/*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
  * -- Florian Coosmann (FGC)
@@ -32,7 +22,7 @@
  */
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
-static void pipe_wait(struct inode * inode)
+void pipe_wait(struct inode * inode)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	current->state = TASK_INTERRUPTIBLE;
@@ -296,7 +286,7 @@ pipe_poll(struct file *filp, poll_table *wait)
 	mask = POLLIN | POLLRDNORM;
 	if (PIPE_EMPTY(*inode))
 		mask = POLLOUT | POLLWRNORM;
-	if (!PIPE_WRITERS(*inode))
+	if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
 		mask |= POLLHUP;
 	if (!PIPE_READERS(*inode))
 		mask |= POLLERR;
@@ -304,72 +294,9 @@ pipe_poll(struct file *filp, poll_table *wait)
 	return mask;
 }
 
-#ifdef FIFO_SUNOS_BRAINDAMAGE
-/*
- * Argh!  Why does SunOS have to have different select() behaviour
- * for pipes and FIFOs?  Hate, hate, hate!  SunOS lacks POLLHUP.
- */
-static unsigned int
-fifo_poll(struct file *filp, poll_table *wait)
-{
-	unsigned int mask;
-	struct inode *inode = filp->f_dentry->d_inode;
-
-	poll_wait(filp, PIPE_WAIT(*inode), wait);
-
-	/* Reading only -- no need for aquiring the semaphore.  */
-	mask = POLLIN | POLLRDNORM;
-	if (PIPE_EMPTY(*inode))
-		mask = POLLOUT | POLLWRNORM;
-	if (!PIPE_READERS(*inode))
-		mask |= POLLERR;
-
-	return mask;
-}
-#else
-
+/* FIXME: most Unices do not set POLLERR for fifos */
 #define fifo_poll pipe_poll
 
-#endif /* FIFO_SUNOS_BRAINDAMAGE */
-
-/*
- * The 'connect_xxx()' functions are needed for named pipes when
- * the open() code hasn't guaranteed a connection (O_NONBLOCK),
- * and we need to act differently until we do get a writer..
- */
-static ssize_t
-connect_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
-{
-	struct inode *inode = filp->f_dentry->d_inode;
-
-	/* Reading only -- no need for aquiring the semaphore.  */
-	if (PIPE_EMPTY(*inode) && !PIPE_WRITERS(*inode))
-		return 0;
-
-	filp->f_op = &read_fifo_fops;
-	return pipe_read(filp, buf, count, ppos);
-}
-
-static unsigned int
-connect_poll(struct file *filp, poll_table *wait)
-{
-	struct inode *inode = filp->f_dentry->d_inode;
-	unsigned int mask = 0;
-
-	poll_wait(filp, PIPE_WAIT(*inode), wait);
-
-	/* Reading only -- no need for aquiring the semaphore.  */
-	if (!PIPE_EMPTY(*inode)) {
-		filp->f_op = &read_fifo_fops;
-		mask = POLLIN | POLLRDNORM;
-	} else if (PIPE_WRITERS(*inode)) {
-		filp->f_op = &read_fifo_fops;
-		mask = POLLOUT | POLLWRNORM;
-	}
-
-	return mask;
-}
-
 static int
 pipe_release(struct inode *inode, int decr, int decw)
 {
@@ -450,16 +377,6 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
  * The file_operations structs are not static because they
  * are also used in linux/fs/fifo.c to do operations on FIFOs.
  */
-struct file_operations connecting_fifo_fops = {
-	llseek:		pipe_lseek,
-	read:		connect_read,
-	write:		bad_pipe_w,
-	poll:		connect_poll,
-	ioctl:		pipe_ioctl,
-	open:		pipe_read_open,
-	release:	pipe_read_release,
-};
-
 struct file_operations read_fifo_fops = {
 	llseek:		pipe_lseek,
 	read:		pipe_read,
@@ -520,29 +437,42 @@ struct file_operations rdwr_pipe_fops = {
 	release:	pipe_rdwr_release,
 };
 
-static struct inode * get_pipe_inode(void)
+struct inode* pipe_new(struct inode* inode)
 {
-	struct inode *inode = get_empty_inode();
 	unsigned long page;
 
-	if (!inode)
-		goto fail_inode;
-
 	page = __get_free_page(GFP_USER);
 	if (!page)
-		goto fail_iput;
+		return NULL;
 
 	inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 	if (!inode->i_pipe)
 		goto fail_page;
 
-	inode->i_fop = &rdwr_pipe_fops;
-
 	init_waitqueue_head(PIPE_WAIT(*inode));
-	PIPE_BASE(*inode) = (char *) page;
+	PIPE_BASE(*inode) = (char*) page;
 	PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
-	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
 	PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
+	PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
+
+	return inode;
+fail_page:
+	free_page(page);
+	return NULL;
+}
+
+static struct inode * get_pipe_inode(void)
+{
+	struct inode *inode = get_empty_inode();
+
+	if (!inode)
+		goto fail_inode;
+
+	if(!pipe_new(inode))
+		goto fail_iput;
+	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
+	inode->i_fop = &rdwr_pipe_fops;
 
 	/*
 	 * Mark the inode dirty from the very beginning,
@@ -558,8 +488,6 @@ static struct inode * get_pipe_inode(void)
 	inode->i_blksize = PAGE_SIZE;
 	return inode;
 
-fail_page:
-	free_page(page);
 fail_iput:
 	iput(inode);
 fail_inode:
@@ -606,11 +534,13 @@ int do_pipe(int *fd)
 	f1->f_flags = O_RDONLY;
 	f1->f_op = &read_pipe_fops;
 	f1->f_mode = 1;
+	f1->f_version = 0;
 
 	/* write file */
 	f2->f_flags = O_WRONLY;
 	f2->f_op = &write_pipe_fops;
 	f2->f_mode = 2;
+	f2->f_version = 0;
 
 	fd_install(i, f1);
 	fd_install(j, f2);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3e1c58ad7..54e594634 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -402,7 +402,8 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
 		++*pages;
 		if (pte_dirty(page))
 			++*dirty;
-		if (pte_pagenr(page) >= max_mapnr)
+		if ((pte_pagenr(page) >= max_mapnr) || 
+					PageReserved(pte_pagenr(page) + mem_map))
 			continue;
 		if (page_count(pte_page(page)) > 1)
 			++*shared;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 9e78119c9..c6511354b 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -418,7 +418,7 @@ static int qnx4_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,qnx4_get_block);
 }
-static int qnx4_prepare_write(struct page *page, unsigned from, unsigned to)
+static int qnx4_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return cont_prepare_write(page,from,to,qnx4_get_block,
 		&((struct inode*)page->mapping->host)->u.qnx4_i.mmu_private);
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 7f5f2dee6..63d5a58ab 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -161,14 +161,6 @@ outnobh:
 	return s;
 }
 
-/* Nothing to do.. */
-
-static void
-romfs_put_super(struct super_block *sb)
-{
-	return;
-}
-
 /* That's simple too. */
 
 static int
@@ -526,7 +518,6 @@ romfs_read_inode(struct inode *i)
 
 static struct super_operations romfs_ops = {
 	read_inode:	romfs_read_inode,
-	put_super:	romfs_put_super,
 	statfs:		romfs_statfs,
 };
 
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 1360ca994..12e2bf295 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -268,7 +268,7 @@ out:
  * If the writer ends up delaying the write, the writer needs to
  * increment the page use counts until he is done with the page.
  */
-static int smb_prepare_write(struct page *page, unsigned offset, unsigned to)
+static int smb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
 	kmap(page);
 	return 0;
diff --git a/fs/super.c b/fs/super.c
index dd34ddc70..302487807 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -147,7 +147,6 @@ static int fs_index(const char * __name)
 			err = index;
 			break;
 		}
-		index++;
 	}
 	spin_unlock(&file_systems_lock);
 	putname(name);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3367c02ef..b6396ff04 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -949,7 +949,7 @@ static int sysv_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,sysv_get_block);
 }
-static int sysv_prepare_write(struct page *page, unsigned from, unsigned to)
+static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return block_prepare_write(page,from,to,sysv_get_block);
 }
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1aa3aa1c4..2be4e8562 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -85,14 +85,14 @@ found_middle:
 #define find_first_one_bit(addr, size)\
 	find_next_one_bit((addr), (size), 0)
 
-static int read_block_bitmap(struct super_block * sb, unsigned int block,
-	unsigned long bitmap_nr)
+static int read_block_bitmap(struct super_block * sb, Uint32 bitmap,
+	unsigned int block, unsigned long bitmap_nr)
 {
 	struct buffer_head *bh = NULL;
 	int retval = 0;
 	lb_addr loc;
 
-	loc.logicalBlockNum = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap;
+	loc.logicalBlockNum = bitmap;
 	loc.partitionReferenceNum = UDF_SB_PARTITION(sb);
 
 	bh = udf_tread(sb, udf_get_lb_pblock(sb, loc, block), sb->s_blocksize);
@@ -105,7 +105,8 @@ static int read_block_bitmap(struct super_block * sb, unsigned int block,
 	return retval;
 }
 
-static int __load_block_bitmap(struct super_block * sb, unsigned int block_group)
+static int __load_block_bitmap(struct super_block * sb, Uint32 bitmap,
+	unsigned int block_group)
 {
 	int i, j, retval = 0;
 	unsigned long block_bitmap_number;
@@ -125,7 +126,7 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group
 			if (UDF_SB_BLOCK_BITMAP_NUMBER(sb, block_group) == block_group)
 				return block_group;
 		}
-		retval = read_block_bitmap(sb, block_group, block_group);
+		retval = read_block_bitmap(sb, bitmap, block_group, block_group);
 		if (retval < 0)
 			return retval;
 		return block_group;
@@ -150,7 +151,7 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group
 		UDF_SB_BLOCK_BITMAP(sb, 0) = block_bitmap;
 
 		if (!block_bitmap)
-			retval = read_block_bitmap(sb, block_group, 0);
+			retval = read_block_bitmap(sb, bitmap, block_group, 0);
 	}
 	else
 	{
@@ -163,12 +164,12 @@ static int __load_block_bitmap(struct super_block * sb, unsigned int block_group
 			UDF_SB_BLOCK_BITMAP_NUMBER(sb, j) = UDF_SB_BLOCK_BITMAP_NUMBER(sb, j-1);
 			UDF_SB_BLOCK_BITMAP(sb, j) = UDF_SB_BLOCK_BITMAP(sb, j-1);
 		}
-		retval = read_block_bitmap(sb, block_group, 0);
+		retval = read_block_bitmap(sb, bitmap, block_group, 0);
 	}
 	return retval;
 }
 
-static inline int load_block_bitmap(struct super_block *sb,
+static inline int load_block_bitmap(struct super_block *sb, Uint32 bitmap,
 	unsigned int block_group)
 {
 	int slot;
@@ -189,7 +190,7 @@ static inline int load_block_bitmap(struct super_block *sb,
 	}
 	else
 	{
-		slot = __load_block_bitmap(sb, block_group);
+		slot = __load_block_bitmap(sb, bitmap, block_group);
 	}
 
 	if (slot < 0)
@@ -201,8 +202,8 @@ static inline int load_block_bitmap(struct super_block *sb,
 	return slot;
 }
 
-void udf_free_blocks(const struct inode * inode, lb_addr bloc, Uint32 offset,
-	Uint32 count)
+static void udf_bitmap_free_blocks(const struct inode * inode, Uint32 bitmap,
+	lb_addr bloc, Uint32 offset, Uint32 count)
 {
 	struct buffer_head * bh = NULL;
 	unsigned long block;
@@ -220,9 +221,6 @@ void udf_free_blocks(const struct inode * inode, lb_addr bloc, Uint32 offset,
 		return;
 	}
 
-	if (UDF_SB_PARTMAPS(sb)[bloc.partitionReferenceNum].s_uspace_bitmap == 0xFFFFFFFF)
-		return;
-
 	lock_super(sb);
 	if (bloc.logicalBlockNum < 0 ||
 		(bloc.logicalBlockNum + count) > UDF_SB_PARTLEN(sb, bloc.partitionReferenceNum))
@@ -248,7 +246,7 @@ do_more:
 		overflow = bit + count - (sb->s_blocksize << 3);
 		count -= overflow;
 	}
-	bitmap_nr = load_block_bitmap(sb, block_group);
+	bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
 	if (bitmap_nr < 0)
 		goto error_return;
 
@@ -285,8 +283,8 @@ error_return:
 	return;
 }
 
-int udf_prealloc_blocks(const struct inode * inode, Uint16 partition,
-	Uint32 first_block, Uint32 block_count)
+static int udf_bitmap_prealloc_blocks(const struct inode * inode, Uint32 bitmap,
+	Uint16 partition, Uint32 first_block, Uint32 block_count)
 {
 	int alloc_count = 0;
 	int bit, block, block_group, group_start;
@@ -312,7 +310,7 @@ repeat:
 	block_group = block >> (sb->s_blocksize_bits + 3);
 	group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc);
 
-	bitmap_nr = load_block_bitmap(sb, block_group);
+	bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
 	if (bitmap_nr < 0)
 		goto out;
 	bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr);
@@ -351,7 +349,8 @@ out:
 	return alloc_count;
 }
 
-int udf_new_block(const struct inode * inode, Uint16 partition, Uint32 goal, int *err)
+static int udf_bitmap_new_block(const struct inode * inode, Uint32 bitmap,
+	Uint16 partition, Uint32 goal, int *err)
 {
 	int tmp, newbit, bit=0, block, block_group, group_start;
 	int end_goal, nr_groups, bitmap_nr, i;
@@ -379,7 +378,7 @@ repeat:
 	block_group = block >> (sb->s_blocksize_bits + 3);
 	group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc);
 
-	bitmap_nr = load_block_bitmap(sb, block_group);
+	bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
 	if (bitmap_nr < 0)
 		goto error_return;
 	bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr);
@@ -419,7 +418,7 @@ repeat:
 			block_group = 0;
 		group_start = block_group ? 0 : sizeof(struct SpaceBitmapDesc);
 
-		bitmap_nr = load_block_bitmap(sb, block_group);
+		bitmap_nr = load_block_bitmap(sb, bitmap, block_group);
 		if (bitmap_nr < 0)
 			goto error_return;
 		bh = UDF_SB_BLOCK_BITMAP(sb, bitmap_nr);
@@ -497,3 +496,64 @@ error_return:
 	unlock_super(sb);
 	return 0;
 }
+
+inline void udf_free_blocks(const struct inode * inode, lb_addr bloc,
+    Uint32 offset, Uint32 count)
+{
+	if (UDF_SB_PARTFLAGS(inode->i_sb, bloc.partitionReferenceNum) & UDF_PART_FLAG_UNALLOC_BITMAP)
+	{
+		return udf_bitmap_free_blocks(inode,
+			UDF_SB_PARTMAPS(inode->i_sb)[bloc.partitionReferenceNum].s_uspace.bitmap,
+			bloc, offset, count);
+	}
+	else if (UDF_SB_PARTFLAGS(inode->i_sb, bloc.partitionReferenceNum) & UDF_PART_FLAG_FREED_BITMAP)
+	{
+		return udf_bitmap_free_blocks(inode,
+			UDF_SB_PARTMAPS(inode->i_sb)[bloc.partitionReferenceNum].s_fspace.bitmap,
+			bloc, offset, count);
+	}
+	else
+		return;
+}
+
+inline int udf_prealloc_blocks(const struct inode * inode, Uint16 partition,
+	Uint32 first_block, Uint32 block_count)
+{
+	if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP)
+	{
+		return udf_bitmap_prealloc_blocks(inode,
+			UDF_SB_PARTMAPS(inode->i_sb)[partition].s_uspace.bitmap,
+			partition, first_block, block_count);
+	}
+	else if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_FREED_BITMAP)
+	{
+		return udf_bitmap_prealloc_blocks(inode,
+			UDF_SB_PARTMAPS(inode->i_sb)[partition].s_fspace.bitmap,
+			partition, first_block, block_count);
+	}
+	else
+		return 0;
+}
+
+inline int udf_new_block(const struct inode * inode, Uint16 partition,
+	Uint32 goal, int *err)
+{
+	if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_UNALLOC_BITMAP)
+	{
+		return udf_bitmap_new_block(inode,
+			UDF_SB_PARTMAPS(inode->i_sb)[partition].s_uspace.bitmap,
+			partition, goal, err);
+	}
+	else if (UDF_SB_PARTFLAGS(inode->i_sb, partition) & UDF_PART_FLAG_FREED_BITMAP)
+	{
+		return udf_bitmap_new_block(inode,
+			UDF_SB_PARTMAPS(inode->i_sb)[partition].s_fspace.bitmap,
+			partition, goal, err);
+	}
+	else
+	{
+		*err = -EIO;
+		return 0;
+	}
+}
+
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 73d47ac10..96297521b 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -87,7 +87,7 @@ static int udf_adinicb_writepage(struct dentry *dentry, struct page *page)
 	return 0;
 }
 
-static int udf_adinicb_prepare_write(struct page *page, unsigned offset, unsigned to)
+static int udf_adinicb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
 {
 	kmap(page);
 	return 0;
@@ -246,7 +246,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		struct FileEntry *fe;
 
 		fe = (struct FileEntry *)bh->b_data;
-		eaicb = fe->extendedAttrICB;
+		eaicb = lela_to_cpu(fe->extendedAttrICB);
 		if (UDF_I_LENEATTR(inode))
 			ea = fe->extendedAttr;
 	}
@@ -255,7 +255,7 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		struct ExtendedFileEntry *efe;
 
 		efe = (struct ExtendedFileEntry *)bh->b_data;
-		eaicb = efe->extendedAttrICB;
+		eaicb = lela_to_cpu(efe->extendedAttrICB);
 		if (UDF_I_LENEATTR(inode))
 			ea = efe->extendedAttr;
 	}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 264086135..ed1507fa7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -74,10 +74,13 @@ static int udf_get_block(struct inode *, long, struct buffer_head *, int);
  */
 void udf_put_inode(struct inode * inode)
 {
-	lock_kernel();
-	udf_discard_prealloc(inode);
-	write_inode_now(inode);
-	unlock_kernel();
+	if (!(inode->i_sb->s_flags & MS_RDONLY))
+	{
+		lock_kernel();
+		udf_discard_prealloc(inode);
+		write_inode_now(inode);
+		unlock_kernel();
+	}
 }
 
 /*
@@ -130,7 +133,7 @@ static int udf_readpage(struct dentry *dentry, struct page *page)
 	return block_read_full_page(page, udf_get_block);
 }
 
-static int udf_prepare_write(struct page *page, unsigned from, unsigned to)
+static int udf_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return block_prepare_write(page, from, to, udf_get_block);
 }
@@ -1554,16 +1557,18 @@ int udf_add_aext(struct inode *inode, lb_addr *bloc, int *extoffset,
 			case ICB_FLAG_AD_SHORT:
 			{
 				sad = (short_ad *)sptr;
-				sad->extLength = EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
-					inode->i_sb->s_blocksize;
+				sad->extLength = cpu_to_le32(
+					EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
+					inode->i_sb->s_blocksize);
 				sad->extPosition = cpu_to_le32(bloc->logicalBlockNum);
 				break;
 			}
 			case ICB_FLAG_AD_LONG:
 			{
 				lad = (long_ad *)sptr;
-				lad->extLength = EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
-					inode->i_sb->s_blocksize;
+				lad->extLength = cpu_to_le32(
+					EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
+					inode->i_sb->s_blocksize);
 				lad->extLocation = cpu_to_lelb(*bloc);
 				break;
 			}
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 653997033..1403cec52 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -70,8 +70,6 @@ udf_get_last_session(struct super_block *sb)
 unsigned int
 udf_get_last_block(struct super_block *sb)
 {
-	extern int *blksize_size[];
-	kdev_t dev = sb->s_dev;
 	struct block_device *bdev = sb->s_bdev;
 	int ret;
 	unsigned long lblock = 0;
@@ -80,28 +78,10 @@ udf_get_last_block(struct super_block *sb)
 
 	if (ret) /* Hard Disk */
 	{
-		unsigned int hbsize = get_hardblocksize(dev);
-		unsigned int blocksize = sb->s_blocksize;
-		unsigned int mult = 0;
-		unsigned int div = 0;
-
-		if (!hbsize)
-			hbsize = blksize_size[MAJOR(dev)][MINOR(dev)];
-
-		if (hbsize > blocksize)
-			mult = hbsize / blocksize;
-		else if (blocksize > hbsize)
-			div = blocksize / hbsize;
-
 		ret = ioctl_by_bdev(bdev, BLKGETSIZE, (unsigned long) &lblock);
 
 		if (!ret && lblock != 0x7FFFFFFF)
-		{
-			if (mult)
-				lblock *= mult;
-			else if (div)
-				lblock /= div;
-		}
+			lblock = ((512 * lblock) / sb->s_blocksize);
 	}
 
 	if (!ret && lblock)
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index bed1e3984..ae998258e 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -90,7 +90,7 @@ udf_add_extendedattr(struct inode * inode, Uint32 size, Uint32 type,
 		struct FileEntry *fe;
 
 		fe = (struct FileEntry *)(*bh)->b_data;
-		eaicb = fe->extendedAttrICB;
+		eaicb = lela_to_cpu(fe->extendedAttrICB);
 		offset = sizeof(struct FileEntry);
 	}
 	else
@@ -98,7 +98,7 @@ udf_add_extendedattr(struct inode * inode, Uint32 size, Uint32 type,
 		struct ExtendedFileEntry *efe;
 
 		efe = (struct ExtendedFileEntry *)(*bh)->b_data;
-		eaicb = efe->extendedAttrICB;
+		eaicb = lela_to_cpu(efe->extendedAttrICB);
 		offset = sizeof(struct ExtendedFileEntry);
 	}
 
@@ -206,7 +206,7 @@ udf_get_extendedattr(struct inode * inode, Uint32 type, Uint8 subtype,
 		struct FileEntry *fe;
 
 		fe = (struct FileEntry *)(*bh)->b_data;
-		eaicb = fe->extendedAttrICB;
+		eaicb = lela_to_cpu(fe->extendedAttrICB);
 		if (UDF_I_LENEATTR(inode))
 			ea = fe->extendedAttr;
 	}
@@ -215,7 +215,7 @@ udf_get_extendedattr(struct inode * inode, Uint32 type, Uint8 subtype,
 		struct ExtendedFileEntry *efe;
 
 		efe = (struct ExtendedFileEntry *)(*bh)->b_data;
-		eaicb = efe->extendedAttrICB;
+		eaicb = lela_to_cpu(efe->extendedAttrICB);
 		if (UDF_I_LENEATTR(inode))
 			ea = efe->extendedAttr;
 	}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c371b5d52..a44e19043 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -935,7 +935,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
 	inode->i_data.a_ops = &udf_symlink_aops;
 	inode->i_op = &page_symlink_inode_operations;
 
-	if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_IN_ICB)
+	if (UDF_I_ALLOCTYPE(inode) != ICB_FLAG_AD_IN_ICB)
 	{
 		struct buffer_head *bh = NULL;
 		lb_addr bloc, eloc;
@@ -964,7 +964,7 @@ static int udf_symlink(struct inode * dir, struct dentry * dentry, const char *
 	bh = udf_tread(inode->i_sb, block, inode->i_sb->s_blocksize);
 	ea = bh->b_data + udf_ext0_offset(inode);
 
-	eoffset = inode->i_sb->s_blocksize - (ea - bh->b_data);
+	eoffset = inode->i_sb->s_blocksize - udf_ext0_offset(inode);
 	pc = (struct PathComponent *)ea;
 
 	if (*symname == '/')
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 81f59e9a3..5f76abbb0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -100,14 +100,14 @@ static DECLARE_FSTYPE_DEV(udf_fstype, "udf", udf_read_super);
 
 /* Superblock operations */
 static struct super_operations udf_sb_ops = {
-	read_inode:		udf_read_inode,
+	read_inode:			udf_read_inode,
 	write_inode:		udf_write_inode,
-	put_inode:		udf_put_inode,
+	put_inode:			udf_put_inode,
 	delete_inode:		udf_delete_inode,
-	put_super:		udf_put_super,
+	put_super:			udf_put_super,
 	write_super:		udf_write_super,
-	statfs:			udf_statfs,
-	remount_fs:		udf_remount_fs,
+	statfs:				udf_statfs,
+	remount_fs:			udf_remount_fs,
 };
 
 struct udf_options
@@ -127,7 +127,6 @@ struct udf_options
 	uid_t uid;
 };
 
-
 static int __init init_udf_fs(void)
 {
 	printk(KERN_NOTICE "udf: registering filesystem\n");
@@ -745,8 +744,9 @@ udf_load_pvoldesc(struct super_block *sb, struct buffer_head *bh)
 	{
 		if (udf_CS0toUTF8(&outstr, &instr))
 		{
-			udf_debug("volIdent[] = '%s'\n", outstr.u_name);
-			strncpy( UDF_SB_VOLIDENT(sb), outstr.u_name, outstr.u_len);
+			strncpy( UDF_SB_VOLIDENT(sb), outstr.u_name,
+				outstr.u_len > 31 ? 31 : outstr.u_len);
+			udf_debug("volIdent[] = '%s'\n", UDF_SB_VOLIDENT(sb));
 		}
 	}
 
@@ -788,7 +788,6 @@ udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 		{
 			UDF_SB_PARTLEN(sb,i) = le32_to_cpu(p->partitionLength); /* blocks */
 			UDF_SB_PARTROOT(sb,i) = le32_to_cpu(p->partitionStartingLocation) + UDF_SB_SESSION(sb);
-			UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap = 0xFFFFFFFF;
 
 			if (UDF_SB_PARTTYPE(sb,i) == UDF_SPARABLE_MAP15)
 				udf_fill_spartable(sb, &UDF_SB_TYPESPAR(sb,i), UDF_SB_PARTLEN(sb,i));
@@ -803,17 +802,24 @@ udf_load_partdesc(struct super_block *sb, struct buffer_head *bh)
 					udf_debug("unallocatedSpaceTable (part %d)\n", i);
 				if (phd->unallocatedSpaceBitmap.extLength)
 				{
-					UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap =
+					UDF_SB_PARTMAPS(sb)[i].s_uspace.bitmap =
 						le32_to_cpu(phd->unallocatedSpaceBitmap.extPosition);
+					UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_BITMAP;
 					udf_debug("unallocatedSpaceBitmap (part %d) @ %d\n",
-						i, UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap);
+						i, UDF_SB_PARTMAPS(sb)[i].s_uspace.bitmap);
 				}
 				if (phd->partitionIntegrityTable.extLength)
 					udf_debug("partitionIntegrityTable (part %d)\n", i);
 				if (phd->freedSpaceTable.extLength)
 					udf_debug("freedSpaceTable (part %d)\n", i);
 				if (phd->freedSpaceBitmap.extLength)
-					udf_debug("freedSpaceBitmap (part %d\n", i);
+				{
+					UDF_SB_PARTMAPS(sb)[i].s_fspace.bitmap =
+						le32_to_cpu(phd->freedSpaceBitmap.extPosition);
+					UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_BITMAP;
+					udf_debug("freedSpaceBitmap (part %d) @ %d\n",
+						i, UDF_SB_PARTMAPS(sb)[i].s_fspace.bitmap);
+				}
 			}
 			break;
 		}
@@ -1184,7 +1190,6 @@ udf_load_partition(struct super_block *sb, lb_addr *fileset)
 				}
 				UDF_SB_PARTROOT(sb,i) = udf_get_pblock(sb, 0, i, 0);
 				UDF_SB_PARTLEN(sb,i) = UDF_SB_PARTLEN(sb,ino.partitionReferenceNum);
-				UDF_SB_PARTMAPS(sb)[i].s_uspace_bitmap = 0xFFFFFFFF;
 			}
 		}
 	}
@@ -1520,34 +1525,27 @@ static unsigned int
 udf_count_free(struct super_block *sb)
 {
 	struct buffer_head *bh = NULL;
-	unsigned int accum=0;
-	int index;
-	int block=0, newblock;
+	unsigned int accum = 0;
 	lb_addr loc;
-	Uint32  bytes;
-	Uint8   value;
-	Uint8 * ptr;
-	Uint16 ident;
-
-	if (UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap == 0xFFFFFFFF)
-	{
-		if (UDF_SB_LVIDBH(sb))
-		{
-			if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb))
-				accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]);
-
-			if (accum == 0xFFFFFFFF)
-				accum = 0;
+	Uint32 bitmap;
 
-			return accum;
-		}
-		else
-			return 0;
-	}
+	if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_UNALLOC_BITMAP)
+		bitmap = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace.bitmap;
+	else if (UDF_SB_PARTFLAGS(sb,UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_FREED_BITMAP)
+		bitmap = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_fspace.bitmap;
 	else
+		bitmap = 0xFFFFFFFF;
+
+	if (bitmap != 0xFFFFFFFF)
 	{
 		struct SpaceBitmapDesc *bm;
-		loc.logicalBlockNum = UDF_SB_PARTMAPS(sb)[UDF_SB_PARTITION(sb)].s_uspace_bitmap;
+		int block = 0, newblock, index;
+		Uint16 ident;
+		Uint32 bytes;
+		Uint8 value;
+		Uint8 * ptr;
+
+		loc.logicalBlockNum = bitmap;
 		loc.partitionReferenceNum = UDF_SB_PARTITION(sb);
 		bh = udf_read_ptagged(sb, loc, 0, &ident);
 
@@ -1593,6 +1591,18 @@ udf_count_free(struct super_block *sb)
 			}
 		}
 		udf_release_data(bh);
-		return accum;
 	}
+	else
+	{
+		if (UDF_SB_LVIDBH(sb))
+		{
+			if (le32_to_cpu(UDF_SB_LVID(sb)->numOfPartitions) > UDF_SB_PARTITION(sb))
+				accum = le32_to_cpu(UDF_SB_LVID(sb)->freeSpaceTable[UDF_SB_PARTITION(sb)]);
+
+			if (accum == 0xFFFFFFFF)
+				accum = 0;
+		}
+	}
+
+	return accum;
 }
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 6084c5613..6988a7238 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -18,6 +18,11 @@
 #define UDF_FLAG_UNDELETE			6
 #define UDF_FLAG_UNHIDE				7
 #define UDF_FLAG_VARCONV			8
+
+#define UDF_PART_FLAG_UNALLOC_BITMAP		0x0001
+#define UDF_PART_FLAG_UNALLOC_TABLE			0x0002
+#define UDF_PART_FLAG_FREED_BITMAP			0x0004
+#define UDF_PART_FLAG_FREED_TABLE			0x0008
 	
 #define UDF_SB_FREE(X)\
 {\
@@ -52,6 +57,7 @@
 #define UDF_SB_TYPESPAR(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_sparing )
 #define UDF_SB_TYPEVIRT(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_type_specific.s_virtual )
 #define UDF_SB_PARTFUNC(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_func )
+#define UDF_SB_PARTFLAGS(X,Y)			( UDF_SB_PARTMAPS(X)[(Y)].s_partition_flags )
 
 #define UDF_SB_VOLIDENT(X)				( UDF_SB(X)->s_volident )
 #define UDF_SB_NUMPARTS(X)				( UDF_SB(X)->s_partitions )
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index b92eed7db..7dd00bc19 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -106,7 +106,7 @@ struct ktm
 struct ustr
 {
 	Uint8 u_cmpID;
-	Uint8 u_name[UDF_NAME_LEN-1];
+	Uint8 u_name[UDF_NAME_LEN];
 	Uint8 u_len;
 	Uint8 padding;
 	unsigned long u_hash;
@@ -182,6 +182,8 @@ extern void udf_truncate(struct inode *);
 extern void udf_free_blocks(const struct inode *, lb_addr, Uint32, Uint32);
 extern int udf_prealloc_blocks(const struct inode *, Uint16, Uint32, Uint32);
 extern int udf_new_block(const struct inode *, Uint16, Uint32, int *);
+
+/* fsync.c */
 extern int udf_sync_file(struct file *, struct dentry *);
 
 /* directory.c */
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 134b3c755..7cb2d3c1f 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -177,7 +177,8 @@ int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
 		return 0;
 	}
 
-	for (i = 0; (i < ocu_len) && (utf_o->u_len < UDF_NAME_LEN) ;) {
+	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
+	{
 
 		/* Expand OSTA compressed Unicode to Unicode */
 		c = ocu[i++];
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index af07961e1..8c5c15d55 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -548,7 +548,7 @@ static int ufs_readpage(struct dentry *dentry, struct page *page)
 {
 	return block_read_full_page(page,ufs_getfrag_block);
 }
-static int ufs_prepare_write(struct page *page, unsigned from, unsigned to)
+static int ufs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 {
 	return block_prepare_write(page,from,to,ufs_getfrag_block);
 }
author	Ralf Baechle <ralf@linux-mips.org>	2000-03-27 23:54:12 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2000-03-27 23:54:12 +0000
commit	d3e71cb08747743fce908122bab08b479eb403a5 (patch)
tree	cbec6948fdbdee9af81cf3ecfb504070d2745d7b /fs
parent	fe7ff1706e323d0e5ed83972960a1ecc1ee538b3 (diff)