summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-01-27 01:05:20 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-01-27 01:05:20 +0000
commit546db14ee74118296f425f3b91634fb767d67290 (patch)
tree22b613a3da8d4bf663eec5e155af01b87fdf9094 /fs
parent1e25e41c4f5474e14452094492dbc169b800e4c8 (diff)
Merge with Linux 2.3.23. The new bootmem stuff has broken various
platforms. At this time I've only verified that IP22 support compiles and IP27 actually works.
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_aout.c65
-rw-r--r--fs/buffer.c405
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/exec.c78
-rw-r--r--fs/file.c4
-rw-r--r--fs/hpfs/anode.c2
-rw-r--r--fs/hpfs/dir.c41
-rw-r--r--fs/hpfs/dnode.c4
-rw-r--r--fs/hpfs/file.c139
-rw-r--r--fs/hpfs/hpfs_fn.h6
-rw-r--r--fs/hpfs/inode.c8
-rw-r--r--fs/inode.c19
-rw-r--r--fs/iobuf.c22
-rw-r--r--fs/minix/truncate.c2
-rw-r--r--fs/nfs/dir.c144
-rw-r--r--fs/nfs/inode.c226
-rw-r--r--fs/nfs/nfs2xdr.c36
-rw-r--r--fs/nfs/proc.c8
-rw-r--r--fs/nfs/symlink.c7
-rw-r--r--fs/proc/array.c316
-rw-r--r--fs/proc/mem.c40
-rw-r--r--fs/super.c4
22 files changed, 863 insertions, 720 deletions
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index cc72f4e18..ca5d8e8cb 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -270,7 +270,6 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs
unsigned long fd_offset;
unsigned long rlim;
int retval;
- static unsigned long error_time=0;
ex = *((struct exec *) bprm->buf); /* exec-header */
if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
@@ -282,29 +281,6 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs
fd_offset = N_TXTOFF(ex);
-#ifdef __i386__
- if (N_MAGIC(ex) == ZMAGIC && fd_offset != BLOCK_SIZE) {
- if((jiffies-error_time) >5)
- {
- printk(KERN_NOTICE "N_TXTOFF != BLOCK_SIZE. See a.out.h.\n");
- error_time=jiffies;
- }
- return -ENOEXEC;
- }
-
- if (N_MAGIC(ex) == ZMAGIC && ex.a_text &&
- bprm->dentry->d_inode->i_op &&
- bprm->dentry->d_inode->i_op->get_block &&
- (fd_offset < bprm->dentry->d_inode->i_sb->s_blocksize)) {
- if((jiffies-error_time) >5)
- {
- printk(KERN_NOTICE "N_TXTOFF < BLOCK_SIZE. Please convert binary.\n");
- error_time=jiffies;
- }
- return -ENOEXEC;
- }
-#endif
-
/* Check initial limits. This avoids letting people circumvent
* size limits imposed on them by creating programs with large
* arrays in the data or bss.
@@ -364,26 +340,32 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs
flush_icache_range((unsigned long) 0,
(unsigned long) ex.a_text+ex.a_data);
} else {
+ static unsigned long error_time, error_time2;
if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
- (N_MAGIC(ex) != NMAGIC))
+ (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
+ {
printk(KERN_NOTICE "executable not page aligned\n");
+ error_time2 = jiffies;
+ }
fd = open_dentry(bprm->dentry, O_RDONLY);
if (fd < 0)
return fd;
file = fget(fd);
- if ((fd_offset & ~PAGE_MASK) != 0) {
+ if ((fd_offset & ~PAGE_MASK) != 0 &&
+ (jiffies-error_time) > 5*HZ)
+ {
printk(KERN_WARNING
"fd_offset is not page aligned. Please convert program: %s\n",
- file->f_dentry->d_name.name
- );
+ file->f_dentry->d_name.name);
+ error_time = jiffies;
}
if (!file->f_op || !file->f_op->mmap || ((fd_offset & ~PAGE_MASK) != 0)) {
fput(file);
sys_close(fd);
- do_brk(0, ex.a_text+ex.a_data);
+ do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
read_exec(bprm->dentry, fd_offset,
(char *) N_TXTADDR(ex), ex.a_text+ex.a_data, 0);
flush_icache_range((unsigned long) N_TXTADDR(ex),
@@ -493,12 +475,6 @@ do_load_aout_library(int fd)
goto out_putf;
}
- if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) &&
- (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) {
- printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n");
- goto out_putf;
- }
-
if (N_FLAGS(ex))
goto out_putf;
@@ -508,14 +484,17 @@ do_load_aout_library(int fd)
start_addr = ex.a_entry & 0xfffff000;
if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
- printk(KERN_WARNING
- "N_TXTOFF is not page aligned. Please convert library: %s\n",
- file->f_dentry->d_name.name
- );
-
- do_mmap(NULL, start_addr & PAGE_MASK, ex.a_text + ex.a_data + ex.a_bss,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED| MAP_PRIVATE, 0);
+ static unsigned long error_time;
+
+ if ((jiffies-error_time) > 5*HZ)
+ {
+ printk(KERN_WARNING
+ "N_TXTOFF is not page aligned. Please convert library: %s\n",
+ file->f_dentry->d_name.name);
+ error_time = jiffies;
+ }
+
+ do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
read_exec(file->f_dentry, N_TXTOFF(ex),
(char *)start_addr, ex.a_text + ex.a_data, 0);
diff --git a/fs/buffer.c b/fs/buffer.c
index c43c54a36..39dd880f8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -26,6 +26,8 @@
/* Thread it... -DaveM */
+/* async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */
+
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/malloc.h>
@@ -76,6 +78,7 @@ static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED;
static struct buffer_head *lru_list[NR_LIST];
static spinlock_t lru_list_lock = SPIN_LOCK_UNLOCKED;
static int nr_buffers_type[NR_LIST] = {0,};
+static unsigned long size_buffers_type[NR_LIST] = {0,};
static struct buffer_head * unused_list = NULL;
static int nr_unused_buffer_heads = 0;
@@ -93,7 +96,7 @@ static kmem_cache_t *bh_cachep;
static int grow_buffers(int size);
/* This is used by some architectures to estimate available memory. */
-atomic_t buffermem = ATOMIC_INIT(0);
+atomic_t buffermem_pages = ATOMIC_INIT(0);
/* Here is the parameter block for the bdflush process. If you add or
* remove any of the parameters, make sure to update kernel/sysctl.c.
@@ -114,18 +117,18 @@ union bdflush_param {
each time we call refill */
int nref_dirt; /* Dirty buffer threshold for activating bdflush
when trying to refill buffers. */
- int dummy1; /* unused */
+ int interval; /* jiffies delay between kupdate flushes */
int age_buffer; /* Time for normal buffer to age before we flush it */
int age_super; /* Time for superblock to age before we flush it */
int dummy2; /* unused */
int dummy3; /* unused */
} b_un;
unsigned int data[N_PARAM];
-} bdf_prm = {{40, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}};
+} bdf_prm = {{40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ, 1884, 2}};
/* These are the min and max parameter values that we will allow to be assigned */
int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 1*HZ, 1*HZ, 1, 1};
-int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,1000, 6000*HZ, 6000*HZ, 2047, 5};
+int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,600*HZ, 6000*HZ, 6000*HZ, 2047, 5};
void wakeup_bdflush(int);
@@ -482,6 +485,7 @@ static void __insert_into_lru_list(struct buffer_head * bh, int blist)
(*bhp)->b_prev_free->b_next_free = bh;
(*bhp)->b_prev_free = bh;
nr_buffers_type[blist]++;
+ size_buffers_type[blist] += bh->b_size;
}
static void __remove_from_lru_list(struct buffer_head * bh, int blist)
@@ -495,6 +499,7 @@ static void __remove_from_lru_list(struct buffer_head * bh, int blist)
lru_list[blist] = NULL;
bh->b_next_free = bh->b_prev_free = NULL;
nr_buffers_type[blist]--;
+ size_buffers_type[blist] -= bh->b_size;
}
}
@@ -813,6 +818,27 @@ out:
return bh;
}
+/* -1 -> no need to flush
+ 0 -> async flush
+ 1 -> sync flush (wait for I/O completation) */
+static int balance_dirty_state(kdev_t dev)
+{
+ unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
+
+ dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
+ tot = nr_lru_pages + nr_free_pages + nr_free_highpages;
+ hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100;
+ soft_dirty_limit = hard_dirty_limit >> 1;
+
+ if (dirty > soft_dirty_limit)
+ {
+ if (dirty > hard_dirty_limit)
+ return 1;
+ return 0;
+ }
+ return -1;
+}
+
/*
* if a new dirty buffer is created we need to balance bdflush.
*
@@ -820,23 +846,13 @@ out:
* pressures on different devices - thus the (currently unused)
* 'dev' parameter.
*/
-static int too_many_dirty_buffers;
-
void balance_dirty(kdev_t dev)
{
- int dirty = nr_buffers_type[BUF_DIRTY];
- int ndirty = bdf_prm.b_un.ndirty;
-
- if (dirty > ndirty) {
- if (dirty > 2*ndirty) {
- too_many_dirty_buffers = 1;
- wakeup_bdflush(1);
- return;
- }
- wakeup_bdflush(0);
- }
- too_many_dirty_buffers = 0;
- return;
+ int state = balance_dirty_state(dev);
+
+ if (state < 0)
+ return;
+ wakeup_bdflush(state);
}
static inline void __mark_dirty(struct buffer_head *bh, int flag)
@@ -1250,7 +1266,7 @@ int block_flushpage(struct inode *inode, struct page *page, unsigned long offset
*/
if (!offset) {
if (!try_to_free_buffers(page)) {
- atomic_add(PAGE_CACHE_SIZE, &buffermem);
+ atomic_inc(&buffermem_pages);
return 0;
}
}
@@ -1364,6 +1380,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long
unsigned long bbits, blocks, i, len;
struct buffer_head *bh, *head;
char * target_buf;
+ int need_balance_dirty;
target_buf = (char *)page_address(page) + offset;
@@ -1403,6 +1420,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long
i = 0;
bh = head;
partial = 0;
+ need_balance_dirty = 0;
do {
if (!bh)
BUG();
@@ -1473,8 +1491,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long
set_bit(BH_Uptodate, &bh->b_state);
if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
__mark_dirty(bh, 0);
- if (too_many_dirty_buffers)
- balance_dirty(bh->b_dev);
+ need_balance_dirty = 1;
}
if (err) {
@@ -1488,6 +1505,9 @@ skip:
bh = bh->b_this_page;
} while (bh != head);
+ if (need_balance_dirty)
+ balance_dirty(bh->b_dev);
+
/*
* is this a partial write that happened to make all buffers
* uptodate then we can optimize away a bogus readpage() for
@@ -1519,6 +1539,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of
struct buffer_head *bh, *head;
char * target_buf, *target_data;
unsigned long data_offset = offset;
+ int need_balance_dirty;
offset = inode->i_size - page->offset;
if (page->offset>inode->i_size)
@@ -1566,6 +1587,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of
i = 0;
bh = head;
partial = 0;
+ need_balance_dirty = 0;
do {
if (!bh)
BUG();
@@ -1644,8 +1666,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of
set_bit(BH_Uptodate, &bh->b_state);
if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
__mark_dirty(bh, 0);
- if (too_many_dirty_buffers)
- balance_dirty(bh->b_dev);
+ need_balance_dirty = 1;
}
if (err) {
@@ -1659,6 +1680,9 @@ skip:
bh = bh->b_this_page;
} while (bh != head);
+ if (need_balance_dirty)
+ balance_dirty(bh->b_dev);
+
/*
* is this a partial write that happened to make all buffers
* uptodate then we can optimize away a bogus readpage() for
@@ -1809,12 +1833,12 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
dprintk ("iobuf %d %d %d\n", offset, length, size);
for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
- page = iobuf->pagelist[pageind];
map = iobuf->maplist[pageind];
- if (map && PageBIGMEM(map)) {
+ if (map && PageHighMem(map)) {
err = -EIO;
goto error;
}
+ page = page_address(map);
while (length > 0) {
blocknr = b[bufind++];
@@ -2090,7 +2114,7 @@ static int grow_buffers(int size)
page_map = mem_map + MAP_NR(page);
page_map->buffers = bh;
lru_cache_add(page_map);
- atomic_add(PAGE_SIZE, &buffermem);
+ atomic_inc(&buffermem_pages);
return 1;
no_buffer_head:
@@ -2168,12 +2192,53 @@ out:
busy_buffer_page:
/* Uhhuh, start writeback so that we don't end up with all dirty pages */
- too_many_dirty_buffers = 1;
wakeup_bdflush(0);
ret = 0;
goto out;
}
+/* ================== Debugging =================== */
+
+void show_buffers(void)
+{
+ struct buffer_head * bh;
+ int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
+ int protected = 0;
+ int nlist;
+ static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" };
+
+ printk("Buffer memory: %6dkB\n",
+ atomic_read(&buffermem_pages) << (PAGE_SHIFT-10));
+
+#ifdef __SMP__ /* trylock does nothing on UP and so we could deadlock */
+ if (!spin_trylock(&lru_list_lock))
+ return;
+ for(nlist = 0; nlist < NR_LIST; nlist++) {
+ found = locked = dirty = used = lastused = protected = 0;
+ bh = lru_list[nlist];
+ if(!bh) continue;
+
+ do {
+ found++;
+ if (buffer_locked(bh))
+ locked++;
+ if (buffer_protected(bh))
+ protected++;
+ if (buffer_dirty(bh))
+ dirty++;
+ if (atomic_read(&bh->b_count))
+ used++, lastused = found;
+ bh = bh->b_next_free;
+ } while (bh != lru_list[nlist]);
+ printk("%8s: %d buffers, %d used (last=%d), "
+ "%d locked, %d protected, %d dirty\n",
+ buf_types[nlist], found, used, lastused,
+ locked, protected, dirty);
+ }
+ spin_unlock(&lru_list_lock);
+#endif
+}
+
/* ===================== Init ======================= */
/*
@@ -2181,7 +2246,7 @@ busy_buffer_page:
* Use gfp() for the hash table to decrease TLB misses, use
* SLAB cache for buffer heads.
*/
-void __init buffer_init(unsigned long memory_size)
+void __init buffer_init(unsigned long mempages)
{
int order, i;
unsigned int nr_hash;
@@ -2189,9 +2254,11 @@ void __init buffer_init(unsigned long memory_size)
/* The buffer cache hash table is less important these days,
* trim it a bit.
*/
- memory_size >>= 14;
- memory_size *= sizeof(struct buffer_head *);
- for (order = 0; (PAGE_SIZE << order) < memory_size; order++)
+ mempages >>= 14;
+
+ mempages *= sizeof(struct buffer_head *);
+
+ for (order = 0; (1 << order) < mempages; order++)
;
/* try to allocate something until we get it or we're asking
@@ -2246,21 +2313,92 @@ void __init buffer_init(unsigned long memory_size)
* response to dirty buffers. Once this process is activated, we write back
* a limited number of buffers to the disks and then go back to sleep again.
*/
-static DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
static DECLARE_WAIT_QUEUE_HEAD(bdflush_done);
struct task_struct *bdflush_tsk = 0;
-void wakeup_bdflush(int wait)
+void wakeup_bdflush(int block)
{
+ DECLARE_WAITQUEUE(wait, current);
+
if (current == bdflush_tsk)
return;
- if (wait)
- run_task_queue(&tq_disk);
- wake_up(&bdflush_wait);
- if (wait)
- sleep_on(&bdflush_done);
+
+ if (!block)
+ {
+ wake_up_process(bdflush_tsk);
+ return;
+ }
+
+ /* kflushd can wakeup us before we have a chance to
+ go to sleep so we must be smart in handling
+ this wakeup event from kflushd to avoid deadlocking in SMP
+ (we are not holding any lock anymore in these two paths). */
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&bdflush_done, &wait);
+
+ wake_up_process(bdflush_tsk);
+ schedule();
+
+ remove_wait_queue(&bdflush_done, &wait);
+ __set_current_state(TASK_RUNNING);
}
+/* This is the _only_ function that deals with flushing async writes
+ to disk.
+ NOTENOTENOTENOTE: we _only_ need to browse the DIRTY lru list
+ as all dirty buffers lives _only_ in the DIRTY lru list.
+ As we never browse the LOCKED and CLEAN lru lists they are infact
+ completly useless. */
+static void flush_dirty_buffers(int check_flushtime)
+{
+ struct buffer_head * bh, *next;
+ int flushed = 0, i;
+
+ restart:
+ spin_lock(&lru_list_lock);
+ bh = lru_list[BUF_DIRTY];
+ if (!bh)
+ goto out_unlock;
+ for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next)
+ {
+ next = bh->b_next_free;
+
+ if (!buffer_dirty(bh))
+ {
+ __refile_buffer(bh);
+ continue;
+ }
+ if (buffer_locked(bh))
+ continue;
+
+ if (check_flushtime)
+ {
+ /* The dirty lru list is chronogical ordered so
+ if the current bh is not yet timed out,
+ then also all the following bhs
+ will be too young. */
+ if (time_before(jiffies, bh->b_flushtime))
+ goto out_unlock;
+ }
+ else
+ {
+ if (++flushed > bdf_prm.b_un.ndirty)
+ goto out_unlock;
+ }
+
+ /* OK, now we are committed to write it out. */
+ atomic_inc(&bh->b_count);
+ spin_unlock(&lru_list_lock);
+ ll_rw_block(WRITE, 1, &bh);
+ atomic_dec(&bh->b_count);
+
+ if (current->need_resched)
+ schedule();
+ goto restart;
+ }
+ out_unlock:
+ spin_unlock(&lru_list_lock);
+}
/*
* Here we attempt to write back old buffers. We also try to flush inodes
@@ -2272,47 +2410,13 @@ void wakeup_bdflush(int wait)
static int sync_old_buffers(void)
{
- int nlist;
-
lock_kernel();
sync_supers(0);
sync_inodes(0);
unlock_kernel();
- for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++) {
- struct buffer_head *bh;
- repeat:
- spin_lock(&lru_list_lock);
- bh = lru_list[nlist];
- if(bh) {
- struct buffer_head *next;
- int i;
- for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) {
- next = bh->b_next_free;
-
- /* If the buffer is not on the proper list,
- * then refile it.
- */
- if ((nlist == BUF_DIRTY &&
- (!buffer_dirty(bh) && !buffer_locked(bh))) ||
- (nlist == BUF_LOCKED && !buffer_locked(bh))) {
- __refile_buffer(bh);
- continue;
- }
-
- if (buffer_locked(bh) || !buffer_dirty(bh))
- continue;
-
- /* OK, now we are committed to write it out. */
- atomic_inc(&bh->b_count);
- spin_unlock(&lru_list_lock);
- ll_rw_block(WRITE, 1, &bh);
- atomic_dec(&bh->b_count);
- goto repeat;
- }
- }
- spin_unlock(&lru_list_lock);
- }
+ flush_dirty_buffers(1);
+ /* must really sync all the active I/O request to disk here */
run_task_queue(&tq_disk);
return 0;
}
@@ -2328,6 +2432,10 @@ asmlinkage long sys_bdflush(int func, long data)
return -EPERM;
if (func == 1) {
+ /* do_exit directly and let kupdate to do its work alone. */
+ do_exit(0);
+#if 0 /* left here as it's the only example of lazy-mm-stuff used from
+ a syscall that doesn't care about the current mm context. */
int error;
struct mm_struct *user_mm;
@@ -2341,6 +2449,7 @@ asmlinkage long sys_bdflush(int func, long data)
error = sync_old_buffers();
end_lazy_tlb(user_mm);
return error;
+#endif
}
/* Basically func 1 means read param 1, 2 means write param 1, etc */
@@ -2383,85 +2492,103 @@ int bdflush(void * unused)
sprintf(current->comm, "kflushd");
bdflush_tsk = current;
- for (;;) {
- int nlist;
+ /* avoid getting signals */
+ spin_lock_irq(&current->sigmask_lock);
+ flush_signals(current);
+ sigfillset(&current->blocked);
+ recalc_sigpending(current);
+ spin_unlock_irq(&current->sigmask_lock);
+ for (;;) {
CHECK_EMERGENCY_SYNC
- for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++) {
- int nr, major, written = 0;
- struct buffer_head *next;
-
- repeat:
- spin_lock(&lru_list_lock);
- next = lru_list[nlist];
- nr = nr_buffers_type[nlist];
- while (nr-- > 0) {
- struct buffer_head *bh = next;
-
- next = next->b_next_free;
-
- /* If the buffer is not on the correct list,
- * then refile it.
- */
- if ((nlist == BUF_DIRTY &&
- (!buffer_dirty(bh) && !buffer_locked(bh))) ||
- (nlist == BUF_LOCKED && !buffer_locked(bh))) {
- __refile_buffer(bh);
- continue;
- }
-
- /* If we aren't in panic mode, don't write out too much
- * at a time. Also, don't write out buffers we don't
- * really have to write out yet..
- */
- if (!too_many_dirty_buffers) {
- if (written > bdf_prm.b_un.ndirty)
- break;
- if (time_before(jiffies, bh->b_flushtime))
- continue;
- }
-
- if (buffer_locked(bh) || !buffer_dirty(bh))
- continue;
-
- major = MAJOR(bh->b_dev);
- written++;
-
- /*
- * For the loop major we can try to do asynchronous writes,
- * but we have to guarantee that we're making some progress..
- */
- atomic_inc(&bh->b_count);
- spin_unlock(&lru_list_lock);
- ll_rw_block(WRITE, 1, &bh);
- atomic_dec(&bh->b_count);
- goto repeat;
- }
- spin_unlock(&lru_list_lock);
- }
- run_task_queue(&tq_disk);
+ flush_dirty_buffers(0);
+
+ /* If wakeup_bdflush will wakeup us
+ after our bdflush_done wakeup, then
+ we must make sure to not sleep
+ in schedule_timeout otherwise
+ wakeup_bdflush may wait for our
+ bdflush_done wakeup that would never arrive
+ (as we would be sleeping) and so it would
+ deadlock in SMP. */
+ __set_current_state(TASK_INTERRUPTIBLE);
wake_up(&bdflush_done);
-
/*
* If there are still a lot of dirty buffers around,
* skip the sleep and flush some more. Otherwise, we
- * sleep for a while and mark us as not being in panic
- * mode..
+ * sleep for a while.
*/
- if (!too_many_dirty_buffers || nr_buffers_type[BUF_DIRTY] < bdf_prm.b_un.ndirty) {
- too_many_dirty_buffers = 0;
- spin_lock_irq(&current->sigmask_lock);
- flush_signals(current);
- spin_unlock_irq(&current->sigmask_lock);
- interruptible_sleep_on_timeout(&bdflush_wait, 5*HZ);
+ if (balance_dirty_state(NODEV) < 0)
+ schedule_timeout(5*HZ);
+ /* Remember to mark us as running otherwise
+ the next schedule will block. */
+ __set_current_state(TASK_RUNNING);
+ }
+}
+
+/*
+ * This is the kernel update daemon. It was used to live in userspace
+ * but since it's need to run safely we want it unkillable by mistake.
+ * You don't need to change your userspace configuration since
+ * the userspace `update` will do_exit(0) at the first sys_bdflush().
+ */
+int kupdate(void * unused)
+{
+ struct task_struct * tsk = current;
+ int interval;
+
+ tsk->session = 1;
+ tsk->pgrp = 1;
+ strcpy(tsk->comm, "kupdate");
+
+ /* sigstop and sigcont will stop and wakeup kupdate */
+ spin_lock_irq(&tsk->sigmask_lock);
+ sigfillset(&tsk->blocked);
+ siginitsetinv(&current->blocked, sigmask(SIGCONT) | sigmask(SIGSTOP));
+ recalc_sigpending(tsk);
+ spin_unlock_irq(&tsk->sigmask_lock);
+
+ for (;;) {
+ /* update interval */
+ interval = bdf_prm.b_un.interval;
+ if (interval)
+ {
+ tsk->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(interval);
+ }
+ else
+ {
+ stop_kupdate:
+ tsk->state = TASK_STOPPED;
+ schedule(); /* wait for SIGCONT */
}
+ /* check for sigstop */
+ if (signal_pending(tsk))
+ {
+ int stopped = 0;
+ spin_lock_irq(&tsk->sigmask_lock);
+ if (sigismember(&tsk->signal, SIGSTOP))
+ {
+ sigdelset(&tsk->signal, SIGSTOP);
+ stopped = 1;
+ }
+ recalc_sigpending(tsk);
+ spin_unlock_irq(&tsk->sigmask_lock);
+ if (stopped)
+ goto stop_kupdate;
+ }
+#ifdef DEBUG
+ printk("kupdate() activated...\n");
+#endif
+ sync_old_buffers();
}
}
static int __init bdflush_init(void)
{
kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
return 0;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index ef45eba7d..b6f7a7203 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -169,6 +169,11 @@ out:
int d_invalidate(struct dentry * dentry)
{
/*
+ * If it's already been dropped, return OK.
+ */
+ if (list_empty(&dentry->d_hash))
+ return 0;
+ /*
* Check whether to do a partial shrink_dcache
* to get rid of unused child entries.
*/
@@ -415,7 +420,7 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask)
unlock_kernel();
/* FIXME: kmem_cache_shrink here should tell us
the number of pages freed, and it should
- work in a __GFP_DMA/__GFP_BIGMEM behaviour
+ work in a __GFP_DMA/__GFP_HIGHMEM behaviour
to free only the interesting pages in
function of the needs of the current allocation. */
kmem_cache_shrink(dentry_cache);
diff --git a/fs/exec.c b/fs/exec.c
index b3f31fd0a..dea4f0712 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -31,6 +31,8 @@
#include <linux/fcntl.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -212,20 +214,42 @@ int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
/* XXX: add architecture specific overflow check here. */
pos = bprm->p;
- while (len>0) {
- char *pag;
+ while (len > 0) {
+ char *kaddr;
+ int i, new, err;
+ struct page *page;
int offset, bytes_to_copy;
offset = pos % PAGE_SIZE;
- if (!(pag = (char *) bprm->page[pos/PAGE_SIZE]) &&
- !(pag = (char *) bprm->page[pos/PAGE_SIZE] =
- (unsigned long *) get_free_page(GFP_USER)))
- return -ENOMEM;
+ i = pos/PAGE_SIZE;
+ page = bprm->page[i];
+ new = 0;
+ if (!page) {
+ /*
+ * Cannot yet use highmem page because
+ * we cannot sleep with a kmap held.
+ */
+ page = __get_pages(GFP_USER, 0);
+ bprm->page[i] = page;
+ if (!page)
+ return -ENOMEM;
+ new = 1;
+ }
+ kaddr = (char *)kmap(page, KM_WRITE);
+ if (new && offset)
+ memset(kaddr, 0, offset);
bytes_to_copy = PAGE_SIZE - offset;
- if (bytes_to_copy > len)
+ if (bytes_to_copy > len) {
bytes_to_copy = len;
- if (copy_from_user(pag + offset, str, bytes_to_copy))
+ if (new)
+ memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
+ }
+ err = copy_from_user(kaddr + offset, str, bytes_to_copy);
+ flush_page_to_ram(page);
+ kunmap((unsigned long)kaddr, KM_WRITE);
+
+ if (err)
return -EFAULT;
pos += bytes_to_copy;
@@ -276,7 +300,9 @@ int setup_arg_pages(struct linux_binprm *bprm)
mpnt->vm_offset = 0;
mpnt->vm_file = NULL;
mpnt->vm_private_data = (void *) 0;
+ vmlist_modify_lock(current->mm);
insert_vm_struct(current->mm, mpnt);
+ vmlist_modify_unlock(current->mm);
current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
}
@@ -467,6 +493,11 @@ int flush_old_exec(struct linux_binprm * bprm)
permission(bprm->dentry->d_inode,MAY_READ))
current->dumpable = 0;
+ /* An exec changes our domain. We are no longer part of the thread
+ group */
+
+ current->self_exec_id++;
+
flush_signal_handlers(current);
flush_old_files(current->files);
@@ -640,14 +671,22 @@ void remove_arg_zero(struct linux_binprm *bprm)
{
if (bprm->argc) {
unsigned long offset;
- char * page;
+ char * kaddr;
+ struct page *page;
+
offset = bprm->p % PAGE_SIZE;
- page = (char*)bprm->page[bprm->p/PAGE_SIZE];
- while(bprm->p++,*(page+offset++))
- if(offset==PAGE_SIZE){
- offset=0;
- page = (char*)bprm->page[bprm->p/PAGE_SIZE];
- }
+ goto inside;
+
+ while (bprm->p++, *(kaddr+offset++)) {
+ if (offset != PAGE_SIZE)
+ continue;
+ offset = 0;
+ kunmap((unsigned long)kaddr, KM_WRITE);
+inside:
+ page = bprm->page[bprm->p/PAGE_SIZE];
+ kaddr = (char *)kmap(page, KM_WRITE);
+ }
+ kunmap((unsigned long)kaddr, KM_WRITE);
bprm->argc--;
}
}
@@ -676,8 +715,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
bprm->dentry = NULL;
bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
- for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
- bprm_loader.page[i] = 0;
+ for (i = 0 ; i < MAX_ARG_PAGES ; i++) /* clear page-table */
+ bprm_loader.page[i] = NULL;
dentry = open_namei(dynloader[0], 0, 0);
retval = PTR_ERR(dentry);
@@ -793,8 +832,9 @@ out:
/* Assumes that free_page() can take a NULL argument. */
/* I hope this is ok for all architectures */
- for (i=0 ; i<MAX_ARG_PAGES ; i++)
- free_page(bprm.page[i]);
+ for (i = 0 ; i < MAX_ARG_PAGES ; i++)
+ if (bprm.page[i])
+ __free_page(bprm.page[i]);
return retval;
}
diff --git a/fs/file.c b/fs/file.c
index fd33dc8b8..d62fb3ef3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -16,7 +16,7 @@
/*
- * Allocate an fd array, using get_free_page() if possible.
+ * Allocate an fd array, using __get_free_page() if possible.
* Note: the array isn't cleared at allocation time.
*/
struct file ** alloc_fd_array(int num)
@@ -129,7 +129,7 @@ out:
}
/*
- * Allocate an fdset array, using get_free_page() if possible.
+ * Allocate an fdset array, using __get_free_page() if possible.
* Note: the array isn't cleared at allocation time.
*/
fd_set * alloc_fdset(int num)
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 62410ca26..6fb9c1633 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -293,10 +293,10 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
if (!level) return;
if (s->s_hpfs_chk)
if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return;
+ brelse(bh);
hpfs_free_sectors(s, ano, 1);
oano = ano;
ano = anode->up;
- brelse(bh);
if (--level) {
anode = hpfs_map_anode(s, ano, &bh);
btree1 = &anode->btree;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 8af35847d..36e665c32 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -20,7 +20,34 @@ int hpfs_dir_release(struct inode *inode, struct file *filp)
return 0;
}
-int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir)
+/* This is slow, but it's not used often */
+
+loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
+{
+ loff_t new_off = off + (whence == 1 ? filp->f_pos : 0);
+ loff_t pos;
+ struct quad_buffer_head qbh;
+ struct inode *i = filp->f_dentry->d_inode;
+ struct super_block *s = filp->f_dentry->d_sb;
+ /*printk("dir lseek\n");*/
+ if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
+ hpfs_lock_inode(i);
+ pos = ((loff_t) hpfs_de_as_down_as_possible(s, i->i_hpfs_dno) << 4) + 1;
+ while (pos != new_off) {
+ if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
+ else goto fail;
+ if (pos == 12) goto fail;
+ }
+ hpfs_unlock_inode(i);
+ ok:
+ return filp->f_pos = new_off;
+ fail:
+ hpfs_unlock_inode(i);
+ /*printk("illegal lseek: %016llx\n", new_off);*/
+ return -ESPIPE;
+}
+
+int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct inode *inode = filp->f_dentry->d_inode;
struct quad_buffer_head qbh;
@@ -54,11 +81,11 @@ int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir)
if (e) return -EFSERROR;
}
lc = inode->i_sb->s_hpfs_lowercase;
- if (filp->f_pos == -2) { /* diff -r requires this (note, that diff -r */
- filp->f_pos = -3; /* also fails on msdos filesystem in 2.0) */
+ if (filp->f_pos == 12) { /* diff -r requires this (note, that diff -r */
+ filp->f_pos = 13; /* also fails on msdos filesystem in 2.0) */
return 0;
}
- if (filp->f_pos == -3) return -ENOENT;
+ if (filp->f_pos == 13) return -ENOENT;
hpfs_lock_inode(inode);
@@ -72,7 +99,7 @@ int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir)
hpfs_unlock_inode(inode);
return -EFSERROR;
}
- if (filp->f_pos == -2) {
+ if (filp->f_pos == 12) {
hpfs_unlock_inode(inode);
return 0;
}
@@ -86,9 +113,9 @@ int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir)
hpfs_unlock_inode(inode);
return 0;
}
- filp->f_pos = -1;
+ filp->f_pos = 11;
}
- if (filp->f_pos == -1) {
+ if (filp->f_pos == 11) {
if (filldir(dirent, "..", 2, filp->f_pos, inode->i_hpfs_parent_dir) < 0) {
hpfs_unlock_inode(inode);
return 0;
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index e4b4bbc91..d1ca8e3e6 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -539,7 +539,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
brelse(bh);
}
i->i_hpfs_dno = down;
- for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, (loff_t) -2);
+ for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, (loff_t) 12);
return;
}
if (!(dnode = hpfs_map_dnode(i->i_sb, up, &qbh))) return;
@@ -876,7 +876,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
hpfs_brelse4(&qbh0);
bail:
- *posp = -2;
+ *posp = 12;
return de;
}
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fbb1f2f6c..066ce5c28 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -58,91 +58,96 @@ void hpfs_truncate(struct inode *i)
hpfs_write_inode(i);
}
-int hpfs_getblk_block(struct inode *inode, long block, int create, int *err, int *created)
+int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
{
- int add;
- int sec = 0;
- down(&inode->i_sem);
- if (err) *err = 0;
- if (created) *created = 0;
- if (!inode->i_blocks) {
- hpfs_error(inode->i_sb, "hpfs_get_block: inode %08x has no blocks", inode->i_ino);
- if (err) *err = -EFSERROR;
- up(&inode->i_sem);
+ secno s;
+ if (iblock < inode->i_blocks - 1) {
+ s = hpfs_bmap(inode, iblock);
+ bh_result->b_dev = inode->i_dev;
+ bh_result->b_blocknr = s;
+ bh_result->b_state |= (1UL << BH_Mapped);
return 0;
}
- if (block < ((add = inode->i_blocks - 1))) {
- int bm;
- if (!(bm = hpfs_bmap(inode, block))) {
- hpfs_error(inode->i_sb, "hpfs_get_block: cound not bmap block %08x, inode %08x, size %08x", (int)block, inode->i_ino, (int)inode->i_size);
- *err = -EFSERROR;
- }
- up(&inode->i_sem);
- return bm;
- }
- if (!create) {
- if (err) *err = -EFBIG;
- up(&inode->i_sem);
- return 0;
+ if (!create) return 0;
+ if (iblock > inode->i_blocks - 1) {
+ //hpfs_error(inode->i_sb, "hpfs_get_block beyond file end (requested %08x, inode size %08x", (int)iblock, (int)inode->i_blocks - 1);
+ printk("HPFS: could not write beyond file end. This is known bug.\n");
+ return -EFSERROR;
}
- if (created) *created = 1;
- while (add <= block) {
- if ((sec = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, add)) == -1) {
- if (err) *err = -ENOSPC;
- hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
- return 0;
- } /* FIXME: clear block */
- add++;
+ if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) {
+ hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
+ return -ENOSPC;
}
- inode->i_blocks = add + 1;
- up(&inode->i_sem);
- return sec;
+ inode->i_blocks++;
+ bh_result->b_dev = inode->i_dev;
+ bh_result->b_blocknr = s;
+ bh_result->b_state |= (1UL << BH_Mapped) | (1UL << BH_New);
+ return 0;
}
-/* copied from ext2fs */
-static int hpfs_get_block(struct inode *inode, unsigned long block, struct buffer_head *bh, int update)
+static int hpfs_write_partial_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
{
- if (!bh->b_blocknr) {
- int error, created;
- unsigned long blocknr;
-
- blocknr = hpfs_getblk_block(inode, block, 1, &error, &created);
- if (!blocknr) {
- if (!error)
- error = -ENOSPC;
- return error;
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct page *new_page, **hash;
+ unsigned long pgpos;
+ unsigned long page_cache = 0;
+ long status;
+
+ printk("- off: %08x\n", (int)page->offset);
+ pgpos = (inode->i_blocks - 1) * 512 & PAGE_CACHE_MASK;
+ while (pgpos < page->offset) {
+long pgp = pgpos;
+ printk("pgpos: %08x, bl: %d\n", (int)pgpos, (int)inode->i_blocks);
+ hash = page_hash(inode, pgpos);
+repeat_find: new_page = __find_lock_page(inode, pgpos, hash);
+ if (!new_page) {
+ if (!page_cache) {
+ page_cache = page_cache_alloc();
+ if (page_cache)
+ goto repeat_find;
+ status = -ENOMEM;
+ goto out;
+ }
+ new_page = page_cache_entry(page_cache);
+ if (add_to_page_cache_unique(new_page,inode,pgpos,hash))
+ goto repeat_find;
+ page_cache = 0;
}
-
- bh->b_dev = inode->i_dev;
- bh->b_blocknr = blocknr;
-
- if (!update)
- return 0;
-
- if (created) {
- memset(bh->b_data, 0, bh->b_size);
- set_bit(BH_Uptodate, &bh->b_state);
- return 0;
+ printk("A\n");
+ status = block_write_cont_page(file, new_page, PAGE_SIZE, 0, NULL);
+ printk("B\n");
+ UnlockPage(new_page);
+ page_cache_release(new_page);
+ if (status < 0)
+ goto out;
+ pgpos = (inode->i_blocks - 1) * 512 & PAGE_CACHE_MASK;
+ printk("pgpos2: %08x, bl: %d\n", (int)pgpos, (int)inode->i_blocks);
+ if (pgpos == pgp) {
+ status = -1;
+ printk("ERROR\n");
+ goto out;
}
}
-
- if (!update)
- return 0;
-
- lock_kernel();
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- unlock_kernel();
-
- return buffer_uptodate(bh) ? 0 : -EIO;
+ //if ((status = block_write_cont_page(file, page, PAGE_SIZE, 0, NULL)) < 0) goto out;
+ printk("C\n");
+ status = block_write_cont_page(file, page, offset, bytes, buf);
+ printk("D\n");
+out:
+ printk("O\n");
+ if (page_cache)
+ page_cache_free(page_cache);
+ printk("E\n");
+ return status;
}
+
ssize_t hpfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
{
ssize_t retval;
retval = generic_file_write(file, buf, count,
- ppos, block_write_partial_page);
+ ppos, /*hpfs_write_partial_page*/block_write_partial_page);
if (retval > 0) {
struct inode *inode = file->f_dentry->d_inode;
inode->i_mtime = CURRENT_TIME;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 050b63597..9ae4a67da 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -226,6 +226,7 @@ void hpfs_set_dentry_operations(struct dentry *);
int hpfs_dir_read(struct file *, char *, size_t, loff_t *);
int hpfs_dir_release(struct inode *, struct file *);
+loff_t hpfs_dir_lseek(struct file *, loff_t, int);
int hpfs_readdir(struct file *, void *, filldir_t);
struct dentry *hpfs_lookup(struct inode *, struct dentry *);
@@ -258,9 +259,8 @@ int hpfs_open(struct inode *, struct file *);
int hpfs_file_fsync(struct file *, struct dentry *);
secno hpfs_bmap(struct inode *, unsigned);
void hpfs_truncate(struct inode *);
-ssize_t hpfs_file_read(struct file *, char *, size_t, loff_t *);
-ssize_t hpfs_file_write(struct file *, const char *, size_t, loff_t *);
-int hpfs_writepage (struct file *, struct page *);
+int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create);
+ssize_t hpfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos);
/* inode.c */
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index efc776218..d79e55814 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -41,11 +41,9 @@ static const struct inode_operations hpfs_file_iops =
NULL, /* rename */
NULL, /* readlink */
NULL, /* follow_link */
- (int (*)(struct inode *, int))
-#warning Someone needs to code up hpfs_get_block properly... -DaveM
- &hpfs_bmap, /* get_block */
+ &hpfs_get_block, /* get_block */
block_read_full_page, /* readpage */
- hpfs_writepage, /* writepage */
+ block_write_full_page, /* writepage */
block_flushpage, /* flushpage */
hpfs_truncate, /* truncate */
NULL, /* permission */
@@ -55,7 +53,7 @@ static const struct inode_operations hpfs_file_iops =
static const struct file_operations hpfs_dir_ops =
{
- NULL, /* lseek - default */
+ hpfs_dir_lseek, /* lseek */
hpfs_dir_read, /* read */
NULL, /* write - bad */
hpfs_readdir, /* readdir */
diff --git a/fs/inode.c b/fs/inode.c
index 55eddfde8..f03295d5c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -89,6 +89,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
memset(inode, 0, sizeof(*inode));
init_waitqueue_head(&inode->i_wait);
INIT_LIST_HEAD(&inode->i_hash);
+ INIT_LIST_HEAD(&inode->i_pages);
INIT_LIST_HEAD(&inode->i_dentry);
sema_init(&inode->i_sem, 1);
spin_lock_init(&inode->i_shared_lock);
@@ -401,7 +402,7 @@ int shrink_icache_memory(int priority, int gfp_mask)
prune_icache(count);
/* FIXME: kmem_cache_shrink here should tell us
the number of pages freed, and it should
- work in a __GFP_DMA/__GFP_BIGMEM behaviour
+ work in a __GFP_DMA/__GFP_HIGHMEM behaviour
to free only the interesting pages in
function of the needs of the current allocation. */
kmem_cache_shrink(inode_cachep);
@@ -429,7 +430,7 @@ static inline void __iget(struct inode * inode)
* by hand after calling find_inode now! This simplify iunique and won't
* add any additional branch in the common code.
*/
-static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head)
+static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
{
struct list_head *tmp;
struct inode * inode;
@@ -445,6 +446,8 @@ static struct inode * find_inode(struct super_block * sb, unsigned long ino, str
continue;
if (inode->i_ino != ino)
continue;
+ if (find_actor && !find_actor(inode, ino, opaque))
+ continue;
break;
}
return inode;
@@ -504,7 +507,7 @@ struct inode * get_empty_inode(void)
* We no longer cache the sb_flags in i_flags - see fs.h
* -- rmk@arm.uk.linux.org
*/
-static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head)
+static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
{
struct inode * inode;
@@ -514,7 +517,7 @@ static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, s
spin_lock(&inode_lock);
/* We released the lock, so.. */
- old = find_inode(sb, ino, head);
+ old = find_inode(sb, ino, head, find_actor, opaque);
if (!old)
{
list_add(&inode->i_list, &inode_in_use);
@@ -570,7 +573,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
retry:
if (counter > max_reserved) {
head = inode_hashtable + hash(sb,counter);
- inode = find_inode(sb, res = counter++, head);
+ inode = find_inode(sb, res = counter++, head, NULL, NULL);
if (!inode) {
spin_unlock(&inode_lock);
return res;
@@ -595,13 +598,13 @@ struct inode *igrab(struct inode *inode)
return inode;
}
-struct inode *iget(struct super_block *sb, unsigned long ino)
+struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
{
struct list_head * head = inode_hashtable + hash(sb,ino);
struct inode * inode;
spin_lock(&inode_lock);
- inode = find_inode(sb, ino, head);
+ inode = find_inode(sb, ino, head, find_actor, opaque);
if (inode) {
__iget(inode);
spin_unlock(&inode_lock);
@@ -614,7 +617,7 @@ struct inode *iget(struct super_block *sb, unsigned long ino)
* get_new_inode() will do the right thing, re-trying the search
* in case it had to block at any point.
*/
- return get_new_inode(sb, ino, head);
+ return get_new_inode(sb, ino, head, find_actor, opaque);
}
void insert_inode_hash(struct inode *inode)
diff --git a/fs/iobuf.c b/fs/iobuf.c
index b46a13bfd..eaabf2f7c 100644
--- a/fs/iobuf.c
+++ b/fs/iobuf.c
@@ -50,7 +50,6 @@ int alloc_kiovec(int nr, struct kiobuf **bufp)
init_waitqueue_head(&iobuf->wait_queue);
iobuf->end_io = simple_wakeup_kiobuf;
iobuf->array_len = KIO_STATIC_PAGES;
- iobuf->pagelist = iobuf->page_array;
iobuf->maplist = iobuf->map_array;
*bufp++ = iobuf;
}
@@ -65,50 +64,35 @@ void free_kiovec(int nr, struct kiobuf **bufp)
for (i = 0; i < nr; i++) {
iobuf = bufp[i];
- if (iobuf->array_len > KIO_STATIC_PAGES) {
- kfree (iobuf->pagelist);
+ if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
- }
kmem_cache_free(kiobuf_cachep, bufp[i]);
}
}
int expand_kiobuf(struct kiobuf *iobuf, int wanted)
{
- unsigned long * pagelist;
struct page ** maplist;
if (iobuf->array_len >= wanted)
return 0;
- pagelist = (unsigned long *)
- kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
- if (!pagelist)
- return -ENOMEM;
-
maplist = (struct page **)
kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
- if (!maplist) {
- kfree(pagelist);
+ if (!maplist)
return -ENOMEM;
- }
/* Did it grow while we waited? */
if (iobuf->array_len >= wanted) {
- kfree(pagelist);
kfree(maplist);
return 0;
}
- memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long));
memcpy (maplist, iobuf->maplist, wanted * sizeof(struct page **));
- if (iobuf->array_len > KIO_STATIC_PAGES) {
- kfree (iobuf->pagelist);
+ if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
- }
- iobuf->pagelist = pagelist;
iobuf->maplist = maplist;
iobuf->array_len = wanted;
return 0;
diff --git a/fs/minix/truncate.c b/fs/minix/truncate.c
index f26aa086c..70b01dc20 100644
--- a/fs/minix/truncate.c
+++ b/fs/minix/truncate.c
@@ -33,7 +33,7 @@
*/
#define DATA_BUFFER_USED(bh) \
- (atomic_read(&bh->b_count) || buffer_locked(bh))
+ (atomic_read(&bh->b_count) > 1 || buffer_locked(bh))
/*
* The functions for minix V1 fs truncation.
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6b52b2d54..b7ec225ac 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -308,8 +308,7 @@ static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int
struct nfs_readdirres rd_res;
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
- struct page *page, **hash;
- unsigned long page_cache;
+ struct page *page, **hash, *page_cache;
long offset;
__u32 *cookiep;
@@ -341,14 +340,14 @@ repeat:
goto unlock_out;
}
- page = page_cache_entry(page_cache);
+ page = page_cache;
if (add_to_page_cache_unique(page, inode, offset, hash)) {
page_cache_release(page);
goto repeat;
}
rd_args.fh = NFS_FH(dentry);
- rd_res.buffer = (char *)page_cache;
+ rd_res.buffer = (char *)page_address(page_cache);
rd_res.bufsiz = PAGE_CACHE_SIZE;
rd_res.cookie = *cookiep;
do {
@@ -533,13 +532,15 @@ static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags)
* If mtime is close to present time, we revalidate
* more often.
*/
+#define NFS_REVALIDATE_NEGATIVE (1 * HZ)
static inline int nfs_neg_need_reval(struct dentry *dentry)
{
- unsigned long timeout = 30 * HZ;
- long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime;
+ struct inode *dir = dentry->d_parent->d_inode;
+ unsigned long timeout = NFS_ATTRTIMEO(dir);
+ long diff = CURRENT_TIME - dir->i_mtime;
- if (diff < 5*60)
- timeout = 1 * HZ;
+ if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE)
+ timeout = NFS_REVALIDATE_NEGATIVE;
return time_after(jiffies, dentry->d_time + timeout);
}
@@ -581,12 +582,14 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
goto out_bad;
}
- if (IS_ROOT(dentry))
- goto out_valid;
-
if (!nfs_dentry_force_reval(dentry, flags))
goto out_valid;
+ if (IS_ROOT(dentry)) {
+ __nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
+ goto out_valid_renew;
+ }
+
/*
* Do a new lookup and check the dentry attributes.
*/
@@ -596,32 +599,29 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags)
goto out_bad;
/* Inode number matches? */
- if (fattr.fileid != inode->i_ino)
+ if (NFS_FSID(inode) != fattr.fsid ||
+ NFS_FILEID(inode) != fattr.fileid)
goto out_bad;
/* Filehandle matches? */
- if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh))) {
- if (!list_empty(&dentry->d_subdirs))
- shrink_dcache_parent(dentry);
- if (dentry->d_count < 2)
- goto out_bad;
- }
+ if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh)))
+ goto out_bad;
/* Ok, remeber that we successfully checked it.. */
- nfs_renew_times(dentry);
nfs_refresh_inode(inode, &fattr);
+ out_valid_renew:
+ nfs_renew_times(dentry);
out_valid:
return 1;
out_bad:
+ d_drop(dentry);
+ if (!list_empty(&dentry->d_subdirs))
+ shrink_dcache_parent(dentry);
/* Purge readdir caches. */
if (dentry->d_parent->d_inode) {
- invalidate_inode_pages(dentry->d_parent->d_inode);
- nfs_flush_dircache(dentry->d_parent->d_inode);
- }
- if (inode && S_ISDIR(inode->i_mode)) {
- invalidate_inode_pages(inode);
- nfs_flush_dircache(inode);
+ nfs_zap_caches(dentry->d_parent->d_inode);
+ NFS_CACHEINV(dentry->d_parent->d_inode);
}
return 0;
}
@@ -649,21 +649,6 @@ static void nfs_dentry_delete(struct dentry *dentry)
dentry->d_name.name, error);
}
-#ifdef NFS_PARANOIA
- /*
- * Sanity check: if the dentry has been unhashed and the
- * inode still has users, we could have problems ...
- */
- if (list_empty(&dentry->d_hash) && dentry->d_inode) {
- struct inode *inode = dentry->d_inode;
- int max_count = (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink);
- if (inode->i_count > max_count) {
-printk("nfs_dentry_delete: %s/%s: ino=%ld, count=%d, nlink=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name,
-inode->i_ino, inode->i_count, inode->i_nlink);
- }
- }
-#endif
}
static kmem_cache_t *nfs_fh_cachep;
@@ -750,14 +735,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry)
error = -EACCES;
inode = nfs_fhget(dentry, &fhandle, &fattr);
if (inode) {
-#ifdef NFS_PARANOIA
-if (inode->i_count > (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink)) {
-printk("nfs_lookup: %s/%s ino=%ld in use, count=%d, nlink=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name,
-inode->i_ino, inode->i_count, inode->i_nlink);
-show_dentry(&inode->i_dentry);
-}
-#endif
no_entry:
d_add(dentry, inode);
nfs_renew_times(dentry);
@@ -779,14 +756,6 @@ static int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
inode = nfs_fhget(dentry, fhandle, fattr);
if (inode) {
-#ifdef NFS_PARANOIA
-if (inode->i_count > (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink)) {
-printk("nfs_instantiate: %s/%s ino=%ld in use, count=%d, nlink=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name,
-inode->i_ino, inode->i_count, inode->i_nlink);
-show_dentry(&inode->i_dentry);
-}
-#endif
d_instantiate(dentry, inode);
nfs_renew_times(dentry);
error = 0;
@@ -803,16 +772,15 @@ show_dentry(&inode->i_dentry);
static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
{
int error;
- struct nfs_sattr sattr;
+ struct iattr attr;
struct nfs_fattr fattr;
struct nfs_fh fhandle;
dfprintk(VFS, "NFS: create(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
- sattr.mode = mode;
- sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
- sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ attr.ia_mode = mode;
+ attr.ia_valid = ATTR_MODE;
/*
* Invalidate the dir cache before the operation to avoid a race.
@@ -820,7 +788,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
invalidate_inode_pages(dir);
nfs_flush_dircache(dir);
error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
- dentry->d_name.name, &sattr, &fhandle, &fattr);
+ dentry->d_name.name, &attr, &fhandle, &fattr);
if (!error)
error = nfs_instantiate(dentry, &fhandle, &fattr);
if (error)
@@ -834,23 +802,25 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode)
static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
{
int error;
- struct nfs_sattr sattr;
+ struct iattr attr;
struct nfs_fattr fattr;
struct nfs_fh fhandle;
dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
- sattr.mode = mode;
- sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
- if (S_ISCHR(mode) || S_ISBLK(mode))
- sattr.size = rdev; /* get out your barf bag */
- sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ attr.ia_mode = mode;
+ attr.ia_valid = ATTR_MODE;
+ /* FIXME: move this to a special nfs_proc_mknod() */
+ if (S_ISCHR(mode) || S_ISBLK(mode)) {
+ attr.ia_size = rdev; /* get out your barf bag */
+ attr.ia_valid |= ATTR_SIZE;
+ }
invalidate_inode_pages(dir);
nfs_flush_dircache(dir);
error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
- dentry->d_name.name, &sattr, &fhandle, &fattr);
+ dentry->d_name.name, &attr, &fhandle, &fattr);
if (!error)
error = nfs_instantiate(dentry, &fhandle, &fattr);
if (error)
@@ -864,16 +834,15 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde
static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
int error;
- struct nfs_sattr sattr;
+ struct iattr attr;
struct nfs_fattr fattr;
struct nfs_fh fhandle;
dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
- sattr.mode = mode | S_IFDIR;
- sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
- sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ attr.ia_valid = ATTR_MODE;
+ attr.ia_mode = mode | S_IFDIR;
/*
* Always drop the dentry, we can't always depend on
@@ -885,7 +854,7 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
invalidate_inode_pages(dir);
nfs_flush_dircache(dir);
error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent),
- dentry->d_name.name, &sattr, &fhandle, &fattr);
+ dentry->d_name.name, &attr, &fhandle, &fattr);
if (!error)
dir->i_nlink++;
return error;
@@ -898,13 +867,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n",
dir->i_dev, dir->i_ino, dentry->d_name.name);
-#ifdef NFS_PARANOIA
-if (dentry->d_inode->i_count > 1)
-printk("nfs_rmdir: %s/%s inode busy?? i_count=%d, i_nlink=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name,
-dentry->d_inode->i_count, dentry->d_inode->i_nlink);
-#endif
-
invalidate_inode_pages(dir);
nfs_flush_dircache(dir);
error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
@@ -1082,12 +1044,6 @@ dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count);
#endif
goto out;
}
-#ifdef NFS_PARANOIA
-if (inode && inode->i_count > inode->i_nlink)
-printk("nfs_safe_remove: %s/%s inode busy?? i_count=%d, i_nlink=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name,
-inode->i_count, inode->i_nlink);
-#endif
/*
* Unhash the dentry while we remove the file ...
*/
@@ -1141,7 +1097,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
static int
nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
- struct nfs_sattr sattr;
+ struct iattr attr;
int error;
dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n",
@@ -1160,9 +1116,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
* Fill in the sattr for the call.
* Note: SunOS 4.1.2 crashes if the mode isn't initialized!
*/
- sattr.mode = S_IFLNK | S_IRWXUGO;
- sattr.uid = sattr.gid = sattr.size = (unsigned) -1;
- sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1;
+ attr.ia_valid = ATTR_MODE;
+ attr.ia_mode = S_IFLNK | S_IRWXUGO;
/*
* Drop the dentry in advance to force a new lookup.
@@ -1173,7 +1128,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
invalidate_inode_pages(dir);
nfs_flush_dircache(dir);
error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent),
- dentry->d_name.name, symname, &sattr);
+ dentry->d_name.name, symname, &attr);
if (!error) {
nfs_renew_times(dentry->d_parent);
} else if (error == -EEXIST) {
@@ -1332,13 +1287,6 @@ do_rename:
* To prevent any new references to the target during the rename,
* we unhash the dentry and free the inode in advance.
*/
-#ifdef NFS_PARANOIA
-if (new_inode &&
- new_inode->i_count > (S_ISDIR(new_inode->i_mode) ? 1 : new_inode->i_nlink))
-printk("nfs_rename: %s/%s inode busy?? i_count=%d, i_nlink=%d\n",
-new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
-new_inode->i_count, new_inode->i_nlink);
-#endif
if (!list_empty(&new_dentry->d_hash)) {
d_drop(new_dentry);
rehash = update;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5421cebf9..ab1e51485 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,7 +37,7 @@
#define NFS_PARANOIA 1
static struct inode * __nfs_fhget(struct super_block *, struct nfs_fattr *);
-static void nfs_zap_caches(struct inode *);
+void nfs_zap_caches(struct inode *);
static void nfs_invalidate_inode(struct inode *);
static void nfs_read_inode(struct inode *);
@@ -78,6 +78,8 @@ nfs_read_inode(struct inode * inode)
inode->i_mode = 0;
inode->i_rdev = 0;
inode->i_op = NULL;
+ NFS_FILEID(inode) = 0;
+ NFS_FSID(inode) = 0;
NFS_CACHEINV(inode);
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
}
@@ -415,13 +417,15 @@ restart:
dprintk("nfs_free_dentries: found %s/%s, d_count=%d, hashed=%d\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
dentry->d_count, !list_empty(&dentry->d_hash));
+ if (!list_empty(&dentry->d_subdirs))
+ shrink_dcache_parent(dentry);
if (!dentry->d_count) {
dget(dentry);
d_drop(dentry);
dput(dentry);
goto restart;
}
- if (!list_empty(&dentry->d_hash))
+ if (list_empty(&dentry->d_hash))
unhashed++;
}
return unhashed;
@@ -430,7 +434,7 @@ restart:
/*
* Invalidate the local caches
*/
-static void
+void
nfs_zap_caches(struct inode *inode)
{
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
@@ -466,6 +470,8 @@ nfs_fill_inode(struct inode *inode, struct nfs_fattr *fattr)
* do this once. (We don't allow inodes to change types.)
*/
if (inode->i_mode == 0) {
+ NFS_FILEID(inode) = fattr->fileid;
+ NFS_FSID(inode) = fattr->fsid;
inode->i_mode = fattr->mode;
if (S_ISREG(inode->i_mode))
inode->i_op = &nfs_file_inode_operations;
@@ -487,6 +493,54 @@ nfs_fill_inode(struct inode *inode, struct nfs_fattr *fattr)
}
/*
+ * In NFSv3 we can have 64bit inode numbers. In order to support
+ * this, and re-exported directories (also seen in NFSv2)
+ * we are forced to allow 2 different inodes to have the same
+ * i_ino.
+ */
+static int
+nfs_find_actor(struct inode *inode, unsigned long ino, void *opaque)
+{
+ struct nfs_fattr *fattr = (struct nfs_fattr *)opaque;
+ if (NFS_FSID(inode) != fattr->fsid)
+ return 0;
+ if (NFS_FILEID(inode) != fattr->fileid)
+ return 0;
+ return 1;
+}
+
+static int
+nfs_inode_is_stale(struct inode *inode, struct nfs_fattr *fattr)
+{
+ int unhashed;
+ int is_stale = 0;
+
+ if (inode->i_mode &&
+ (fattr->mode & S_IFMT) != (inode->i_mode & S_IFMT))
+ is_stale = 1;
+
+ if (is_bad_inode(inode))
+ is_stale = 1;
+
+ /*
+ * If the inode seems stale, free up cached dentries.
+ */
+ unhashed = nfs_free_dentries(inode);
+
+ /* Assume we're holding an i_count
+ *
+ * NB: sockets sometimes have volatile file handles
+ * don't invalidate their inodes even if all dentries are
+ * unhashed.
+ */
+ if (unhashed && inode->i_count == unhashed + 1
+ && !S_ISSOCK(inode->i_mode) && !S_ISFIFO(inode->i_mode))
+ is_stale = 1;
+
+ return is_stale;
+}
+
+/*
* This is our own version of iget that looks up inodes by file handle
* instead of inode number. We use this technique instead of using
* the vfs read_inode function because there is no way to pass the
@@ -545,54 +599,40 @@ nfs_fhget(struct dentry *dentry, struct nfs_fh *fhandle,
static struct inode *
__nfs_fhget(struct super_block *sb, struct nfs_fattr *fattr)
{
- struct inode *inode;
- int max_count, stale_inode, unhashed = 0;
+ struct inode *inode = NULL;
+ unsigned long ino;
-retry:
- inode = iget(sb, fattr->fileid);
- if (!inode)
+ if (!fattr->nlink) {
+ printk("NFS: Buggy server - nlink == 0!\n");
goto out_no_inode;
- /* N.B. This should be impossible ... */
- if (inode->i_ino != fattr->fileid)
- goto out_bad_id;
+ }
- /*
- * Check for busy inodes, and attempt to get rid of any
- * unused local references. If successful, we release the
- * inode and try again.
- *
- * Note that the busy test uses the values in the fattr,
- * as the inode may have become a different object.
- * (We can probably handle modes changes here, too.)
- */
- stale_inode = inode->i_mode &&
- ((fattr->mode ^ inode->i_mode) & S_IFMT);
- stale_inode |= inode->i_count && inode->i_count == unhashed;
- max_count = S_ISDIR(fattr->mode) ? 1 : fattr->nlink;
- if (stale_inode || inode->i_count > max_count + unhashed) {
- dprintk("__nfs_fhget: inode %ld busy, i_count=%d, i_nlink=%d\n",
- inode->i_ino, inode->i_count, inode->i_nlink);
- unhashed = nfs_free_dentries(inode);
- if (stale_inode || inode->i_count > max_count + unhashed) {
- printk("__nfs_fhget: inode %ld still busy, i_count=%d\n",
- inode->i_ino, inode->i_count);
- if (!list_empty(&inode->i_dentry)) {
- struct dentry *dentry;
- dentry = list_entry(inode->i_dentry.next,
- struct dentry, d_alias);
- printk("__nfs_fhget: killing %s/%s filehandle\n",
- dentry->d_parent->d_name.name,
- dentry->d_name.name);
- memset(dentry->d_fsdata, 0,
- sizeof(struct nfs_fh));
- }
- remove_inode_hash(inode);
- nfs_invalidate_inode(inode);
- unhashed = 0;
- }
+ ino = fattr->fileid;
+
+ while((inode = iget4(sb, ino, nfs_find_actor, fattr)) != NULL) {
+
+ /*
+ * Check for busy inodes, and attempt to get rid of any
+ * unused local references. If successful, we release the
+ * inode and try again.
+ *
+ * Note that the busy test uses the values in the fattr,
+ * as the inode may have become a different object.
+ * (We can probably handle modes changes here, too.)
+ */
+ if (!nfs_inode_is_stale(inode,fattr))
+ break;
+
+ dprintk("__nfs_fhget: inode %ld still busy, i_count=%d\n",
+ inode->i_ino, inode->i_count);
+ nfs_zap_caches(inode);
+ remove_inode_hash(inode);
iput(inode);
- goto retry;
}
+
+ if (!inode)
+ goto out_no_inode;
+
nfs_fill_inode(inode, fattr);
dprintk("NFS: __nfs_fhget(%x/%ld ct=%d)\n",
inode->i_dev, inode->i_ino, inode->i_count);
@@ -603,18 +643,14 @@ out:
out_no_inode:
printk("__nfs_fhget: iget failed\n");
goto out;
-out_bad_id:
- printk("__nfs_fhget: unexpected inode from iget\n");
- goto out;
}
int
nfs_notify_change(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
- int error;
- struct nfs_sattr sattr;
struct nfs_fattr fattr;
+ int error;
/*
* Make sure the inode is up-to-date.
@@ -627,54 +663,29 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error);
goto out;
}
- sattr.mode = (u32) -1;
- if (attr->ia_valid & ATTR_MODE)
- sattr.mode = attr->ia_mode;
-
- sattr.uid = (u32) -1;
- if (attr->ia_valid & ATTR_UID)
- sattr.uid = attr->ia_uid;
-
- sattr.gid = (u32) -1;
- if (attr->ia_valid & ATTR_GID)
- sattr.gid = attr->ia_gid;
-
- sattr.size = (u32) -1;
- if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode))
- sattr.size = attr->ia_size;
-
- sattr.mtime.seconds = sattr.mtime.useconds = (u32) -1;
- if (attr->ia_valid & ATTR_MTIME) {
- sattr.mtime.seconds = attr->ia_mtime;
- sattr.mtime.useconds = 0;
- }
-
- sattr.atime.seconds = sattr.atime.useconds = (u32) -1;
- if (attr->ia_valid & ATTR_ATIME) {
- sattr.atime.seconds = attr->ia_atime;
- sattr.atime.useconds = 0;
- }
+ if (!S_ISREG(inode->i_mode))
+ attr->ia_valid &= ~ATTR_SIZE;
error = nfs_wb_all(inode);
if (error)
goto out;
error = nfs_proc_setattr(NFS_DSERVER(dentry), NFS_FH(dentry),
- &sattr, &fattr);
+ &fattr, attr);
if (error)
goto out;
/*
* If we changed the size or mtime, update the inode
* now to avoid invalidating the page cache.
*/
- if (sattr.size != (u32) -1) {
- if (sattr.size != fattr.size)
- printk("nfs_notify_change: sattr=%d, fattr=%d??\n",
- sattr.size, fattr.size);
- inode->i_size = sattr.size;
+ if (attr->ia_valid & ATTR_SIZE) {
+ if (attr->ia_size != fattr.size)
+ printk("nfs_notify_change: attr=%ld, fattr=%d??\n",
+ attr->ia_size, fattr.size);
+ inode->i_size = attr->ia_size;
inode->i_mtime = fattr.mtime.seconds;
}
- if (sattr.mtime.seconds != (u32) -1)
+ if (attr->ia_valid & ATTR_MTIME)
inode->i_mtime = fattr.mtime.seconds;
error = nfs_refresh_inode(inode, &fattr);
out:
@@ -682,6 +693,34 @@ out:
}
/*
+ * Wait for the inode to get unlocked.
+ * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING).
+ */
+int
+nfs_wait_on_inode(struct inode *inode, int flag)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ int intr, error = 0;
+
+ intr = NFS_SERVER(inode)->flags & NFS_MOUNT_INTR;
+ add_wait_queue(&inode->i_wait, &wait);
+ for (;;) {
+ set_task_state(tsk, (intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE));
+ error = 0;
+ if (!(NFS_FLAGS(inode) & flag))
+ break;
+ error = -ERESTARTSYS;
+ if (intr && signalled())
+ break;
+ schedule();
+ }
+ set_task_state(tsk, TASK_RUNNING);
+ remove_wait_queue(&inode->i_wait, &wait);
+ return error;
+}
+
+/*
* Externally visible revalidation function
*/
int
@@ -711,7 +750,7 @@ int nfs_release(struct inode *inode, struct file *filp)
* the cached attributes have to be refreshed.
*/
int
-_nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry)
+__nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
int status = 0;
@@ -720,6 +759,19 @@ _nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry)
dfprintk(PAGECACHE, "NFS: revalidating %s/%s, ino=%ld\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
inode->i_ino);
+
+ if (!inode || is_bad_inode(inode))
+ return -ESTALE;
+
+ while (NFS_REVALIDATING(inode)) {
+ status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
+ if (status < 0)
+ return status;
+ if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode)))
+ return 0;
+ }
+ NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
+
status = nfs_proc_getattr(server, NFS_FH(dentry), &fattr);
if (status) {
int error;
@@ -759,6 +811,8 @@ _nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry)
dfprintk(PAGECACHE, "NFS: %s/%s revalidation complete\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
out:
+ NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
+ wake_up(&inode->i_wait);
return status;
}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1bc7d3d37..a7e53e6db 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -118,19 +118,35 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
return p;
}
+
+#define SATTR(p, attr, flag, field) \
+ *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0
static inline u32 *
-xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr)
+xdr_encode_sattr(u32 *p, struct iattr *attr)
{
- *p++ = htonl(sattr->mode);
- *p++ = htonl(sattr->uid);
- *p++ = htonl(sattr->gid);
- *p++ = htonl(sattr->size);
- *p++ = htonl(sattr->atime.seconds);
- *p++ = htonl(sattr->atime.useconds);
- *p++ = htonl(sattr->mtime.seconds);
- *p++ = htonl(sattr->mtime.useconds);
- return p;
+ SATTR(p, attr, ATTR_MODE, ia_mode);
+ SATTR(p, attr, ATTR_UID, ia_uid);
+ SATTR(p, attr, ATTR_GID, ia_gid);
+ SATTR(p, attr, ATTR_SIZE, ia_size);
+
+ if (attr->ia_valid & (ATTR_ATIME|ATTR_ATIME_SET)) {
+ *p++ = htonl(attr->ia_atime);
+ *p++ = 0;
+ } else {
+ *p++ = ~(u32) 0;
+ *p++ = ~(u32) 0;
+ }
+
+ if (attr->ia_valid & (ATTR_MTIME|ATTR_MTIME_SET)) {
+ *p++ = htonl(attr->ia_mtime);
+ *p++ = 0;
+ } else {
+ *p++ = ~(u32) 0;
+ *p++ = ~(u32) 0;
+ }
+ return p;
}
+#undef SATTR
/*
* NFS encode functions
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 3b48b326a..bb55ce6d6 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -65,7 +65,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
int
nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle,
- struct nfs_sattr *sattr, struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr, struct iattr *sattr)
{
struct nfs_sattrargs arg = { fhandle, sattr };
int status;
@@ -123,7 +123,7 @@ nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap,
int
nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir,
- const char *name, struct nfs_sattr *sattr,
+ const char *name, struct iattr *sattr,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_createargs arg = { dir, name, sattr };
@@ -178,7 +178,7 @@ nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle,
int
nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir,
const char *name, const char *path,
- struct nfs_sattr *sattr)
+ struct iattr *sattr)
{
struct nfs_symlinkargs arg = { dir, name, path, sattr };
int status;
@@ -191,7 +191,7 @@ nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir,
int
nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir,
- const char *name, struct nfs_sattr *sattr,
+ const char *name, struct iattr *sattr,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct nfs_createargs arg = { dir, name, sattr };
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 6cd892740..6b0d0f05b 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -59,8 +59,7 @@ struct inode_operations nfs_symlink_inode_operations = {
static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode *inode)
{
struct nfs_readlinkargs rl_args;
- struct page *page, **hash;
- unsigned long page_cache;
+ struct page *page, **hash, *page_cache;
page = NULL;
page_cache = page_cache_alloc();
@@ -75,7 +74,7 @@ repeat:
goto unlock_out;
}
- page = page_cache_entry(page_cache);
+ page = page_cache;
if (add_to_page_cache_unique(page, inode, 0, hash)) {
page_cache_release(page);
goto repeat;
@@ -86,7 +85,7 @@ repeat:
* XDR response verification will NULL terminate it.
*/
rl_args.fh = NFS_FH(dentry);
- rl_args.buffer = (const void *)page_cache;
+ rl_args.buffer = (const void *)page_address(page_cache);
if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK,
&rl_args, NULL, 0) < 0)
goto error;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d7f8ad9dd..249abd8cd 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -38,6 +38,7 @@
*
* aeb@cwi.nl : /proc/partitions
*
+ *
* Alan Cox : security fixes.
* <Alan.Cox@linux.org>
*
@@ -45,11 +46,6 @@
*
* Gerhard Wichert : added BIGMEM support
* Siemens AG <Gerhard.Wichert@pdb.siemens.de>
- *
- * Chuck Lever : safe handling of task_struct
- * <cel@monkey.org>
- *
- * Andrea Arcangeli : SMP race/security fixes.
*/
#include <linux/types.h>
@@ -71,7 +67,6 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/signal.h>
-#include <linux/smp_lock.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -365,16 +360,24 @@ static int get_meminfo(char * buffer)
struct sysinfo i;
int len;
+/*
+ * display in kilobytes.
+ */
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+
si_meminfo(&i);
si_swapinfo(&i);
len = sprintf(buffer, " total: used: free: shared: buffers: cached:\n"
- "Mem: %8lu %8lu %8lu %8lu %8lu %8lu\n"
+ "Mem: %8lu %8lu %8lu %8lu %8lu %8u\n"
"Swap: %8lu %8lu %8lu\n",
- i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, (unsigned long) atomic_read(&page_cache_size)*PAGE_SIZE,
- i.totalswap, i.totalswap-i.freeswap, i.freeswap);
+ K(i.totalram), K(i.totalram-i.freeram), K(i.freeram),
+ K(i.sharedram), K(i.bufferram),
+ K(atomic_read(&page_cache_size)), K(i.totalswap),
+ K(i.totalswap-i.freeswap), K(i.freeswap));
/*
- * Tagged format, for easy grepping and expansion. The above will go away
- * eventually, once the tools have been updated.
+ * Tagged format, for easy grepping and expansion.
+ * The above will go away eventually, once the tools
+ * have been updated.
*/
return len + sprintf(buffer+len,
"MemTotal: %8lu kB\n"
@@ -382,19 +385,20 @@ static int get_meminfo(char * buffer)
"MemShared: %8lu kB\n"
"Buffers: %8lu kB\n"
"Cached: %8u kB\n"
- "BigTotal: %8lu kB\n"
- "BigFree: %8lu kB\n"
+ "HighTotal: %8lu kB\n"
+ "HighFree: %8lu kB\n"
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n",
- i.totalram >> 10,
- i.freeram >> 10,
- i.sharedram >> 10,
- i.bufferram >> 10,
- atomic_read(&page_cache_size) << (PAGE_SHIFT - 10),
- i.totalbig >> 10,
- i.freebig >> 10,
- i.totalswap >> 10,
- i.freeswap >> 10);
+ K(i.totalram),
+ K(i.freeram),
+ K(i.sharedram),
+ K(i.bufferram),
+ K(atomic_read(&page_cache_size)),
+ K(i.totalhigh),
+ K(i.freehigh),
+ K(i.totalswap),
+ K(i.freeswap));
+#undef K
}
static int get_version(char * buffer)
@@ -412,69 +416,68 @@ static int get_cmdline(char * buffer)
return sprintf(buffer, "%s\n", saved_command_line);
}
-static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr)
+static struct page * get_phys_addr(struct mm_struct * mm, unsigned long ptr)
{
- pgd_t *page_dir;
- pmd_t *page_middle;
+ pgd_t *pgd;
+ pmd_t *pmd;
pte_t pte;
if (ptr >= TASK_SIZE)
return 0;
- page_dir = pgd_offset(mm,ptr);
- if (pgd_none(*page_dir))
+ pgd = pgd_offset(mm,ptr);
+ if (pgd_none(*pgd))
return 0;
- if (pgd_bad(*page_dir)) {
- printk("bad page directory entry %08lx\n", pgd_val(*page_dir));
- pgd_clear(page_dir);
+ if (pgd_bad(*pgd)) {
+ pgd_ERROR(*pgd);
+ pgd_clear(pgd);
return 0;
}
- page_middle = pmd_offset(page_dir,ptr);
- if (pmd_none(*page_middle))
+ pmd = pmd_offset(pgd,ptr);
+ if (pmd_none(*pmd))
return 0;
- if (pmd_bad(*page_middle)) {
- printk("bad page middle entry %08lx\n", pmd_val(*page_middle));
- pmd_clear(page_middle);
+ if (pmd_bad(*pmd)) {
+ pmd_ERROR(*pmd);
+ pmd_clear(pmd);
return 0;
}
- pte = *pte_offset(page_middle,ptr);
+ pte = *pte_offset(pmd,ptr);
if (!pte_present(pte))
return 0;
- return pte_page(pte) + (ptr & ~PAGE_MASK);
+ return pte_page(pte);
}
-#include <linux/bigmem.h>
-
static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer)
{
- unsigned long addr;
+ struct page *page;
+ unsigned long kaddr;
int size = 0, result = 0;
char c;
if (start >= end)
return result;
for (;;) {
- addr = get_phys_addr(mm, start);
- if (!addr)
+ page = get_phys_addr(mm, start);
+ if (!page)
return result;
- addr = kmap(addr, KM_READ);
+ kaddr = kmap(page, KM_READ) + (start & ~PAGE_MASK);
do {
- c = *(char *) addr;
+ c = *(char *) kaddr;
if (!c)
result = size;
if (size < PAGE_SIZE)
buffer[size++] = c;
else {
- kunmap(addr, KM_READ);
+ kunmap(kaddr, KM_READ);
return result;
}
- addr++;
+ kaddr++;
start++;
if (!c && start >= end) {
- kunmap(addr, KM_READ);
+ kunmap(kaddr, KM_READ);
return result;
}
- } while (addr & ~PAGE_MASK);
- kunmap(addr-1, KM_READ);
+ } while (kaddr & ~PAGE_MASK);
+ kunmap(kaddr, KM_READ);
}
return result;
}
@@ -483,9 +486,7 @@ static struct mm_struct *get_mm(int pid)
{
struct task_struct *p;
struct mm_struct *mm = NULL;
-
- /* need kernel lock to avoid the tsk->mm to go away under us */
- lock_kernel();
+
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
if (p)
@@ -493,10 +494,10 @@ static struct mm_struct *get_mm(int pid)
if (mm)
atomic_inc(&mm->mm_users);
read_unlock(&tasklist_lock);
- unlock_kernel();
return mm;
}
+
static int get_env(int pid, char * buffer)
{
struct mm_struct *mm = get_mm(pid);
@@ -859,9 +860,6 @@ static inline char * task_mem(struct mm_struct *mm, char *buffer)
return buffer;
}
-/*
- * These next two assume that the task's sigmask_lock is held by the caller.
- */
static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
sigset_t *catch)
{
@@ -914,115 +912,77 @@ extern inline char *task_cap(struct task_struct *p, char *buffer)
cap_t(p->cap_effective));
}
-/*
- * This is somewhat safer than it was before. However...
- *
- * Embedded pointers in the task structure may reference data that
- * can be changed or that is no longer valid after the tasklist
- * lock is released, or that isn't even protected by the tasklist
- * lock. Eg. tsk->tty, tsk->sig, and tsk->p_pptr can change after
- * we make our own copy of the task structure. This doesn't matter
- * unless we are trying to use the pointed-to data as an address.
- * So there are still a few safety issues to be addressed here.
- */
+
static int get_status(int pid, char * buffer)
{
char * orig = buffer;
struct task_struct *tsk;
struct mm_struct *mm = NULL;
- /*
- * We lock the whole kernel here because p->files and p->mm are still
- * protected by the global kernel lock.
- */
- lock_kernel();
-
read_lock(&tasklist_lock);
tsk = find_task_by_pid(pid);
- if (tsk) {
+ if (tsk)
mm = tsk->mm;
- if (mm)
- atomic_inc(&mm->mm_users);
-
- buffer = task_name(tsk, buffer);
- buffer = task_state(tsk, buffer);
-
- spin_lock_irq(&tsk->sigmask_lock);
- buffer = task_sig(tsk, buffer);
- spin_unlock_irq(&tsk->sigmask_lock);
-
- buffer = task_cap(tsk, buffer);
- }
- read_unlock(&tasklist_lock);
-
- unlock_kernel();
-
- /*
- * We can't hold the tasklist_lock and jiggle the mmap_sem --
- * that can result in a deadlock.
- */
- if (mm) {
+ if (mm)
+ atomic_inc(&mm->mm_users);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
+ if (!tsk)
+ return 0;
+ buffer = task_name(tsk, buffer);
+ buffer = task_state(tsk, buffer);
+ if (mm)
buffer = task_mem(mm, buffer);
+ buffer = task_sig(tsk, buffer);
+ buffer = task_cap(tsk, buffer);
+ if (mm)
mmput(mm);
- }
-
- /*
- * (buffer - orig) will be zero on an error exit.
- */
return buffer - orig;
}
static int get_stat(int pid, char * buffer)
{
struct task_struct *tsk;
- struct mm_struct *mm;
+ struct mm_struct *mm = NULL;
unsigned long vsize, eip, esp, wchan;
long priority, nice;
- pid_t ppid = 0;
+ int tty_pgrp;
sigset_t sigign, sigcatch;
char state;
- int res = 0;
- unsigned int tty_device;
- int tty_pgrp;
+ int res;
read_lock(&tasklist_lock);
tsk = find_task_by_pid(pid);
- if (!tsk)
- goto out_unlock;
- /* avoid the task list to go away under us (security) */
- get_page(MAP_NR(tsk) + mem_map);
- ppid = tsk->p_pptr->pid;
- read_unlock(&tasklist_lock);
-
- /* we need the big kernel lock to avoid tsk->mm and tsk->tty
- to change under us */
- lock_kernel();
- mm = tsk->mm;
+ if (tsk)
+ mm = tsk->mm;
if (mm)
atomic_inc(&mm->mm_users);
- tty_device = tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0;
- tty_pgrp = tsk->tty ? tsk->tty->pgrp : -1;
- unlock_kernel();
-
- spin_lock_irq(&tsk->sigmask_lock);
- collect_sigign_sigcatch(tsk, &sigign, &sigcatch);
- spin_unlock_irq(&tsk->sigmask_lock);
-
- eip = KSTK_EIP(tsk);
- esp = KSTK_ESP(tsk);
- wchan = get_wchan(tsk);
-
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
+ if (!tsk)
+ return 0;
state = *get_task_state(tsk);
vsize = eip = esp = 0;
- if (mm)
- {
+ if (mm) {
struct vm_area_struct *vma;
down(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next)
+ vma = mm->mmap;
+ while (vma) {
vsize += vma->vm_end - vma->vm_start;
+ vma = vma->vm_next;
+ }
+ eip = KSTK_EIP(tsk);
+ esp = KSTK_ESP(tsk);
up(&mm->mmap_sem);
}
+ wchan = get_wchan(tsk);
+
+ collect_sigign_sigcatch(tsk, &sigign, &sigcatch);
+
+ if (tsk->tty)
+ tty_pgrp = tsk->tty->pgrp;
+ else
+ tty_pgrp = -1;
+
/* scale priority and nice values from timeslices to -20..20 */
/* to make it look like a "normal" Unix priority/nice value */
priority = tsk->counter;
@@ -1036,10 +996,10 @@ static int get_stat(int pid, char * buffer)
pid,
tsk->comm,
state,
- ppid,
+ tsk->p_pptr->pid,
tsk->pgrp,
tsk->session,
- tty_device,
+ tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0,
tty_pgrp,
tsk->flags,
tsk->min_flt,
@@ -1076,16 +1036,9 @@ static int get_stat(int pid, char * buffer)
tsk->cnswap,
tsk->exit_signal,
tsk->processor);
-
if (mm)
mmput(mm);
- free_task_struct(tsk);
return res;
-
-out_unlock:
- read_unlock(&tasklist_lock);
- unlock_kernel();
- return 0;
}
static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
@@ -1097,7 +1050,7 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned
if (pmd_none(*pmd))
return;
if (pmd_bad(*pmd)) {
- printk("statm_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+ pmd_ERROR(*pmd);
pmd_clear(pmd);
return;
}
@@ -1135,7 +1088,7 @@ static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned
if (pgd_none(*pgd))
return;
if (pgd_bad(*pgd)) {
- printk("statm_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
+ pgd_ERROR(*pgd);
pgd_clear(pgd);
return;
}
@@ -1233,11 +1186,11 @@ static ssize_t read_maps (int pid, struct file * file, char * buf,
size_t count, loff_t *ppos)
{
struct task_struct *p;
- struct mm_struct *mm = NULL;
struct vm_area_struct * map, * next;
char * destptr = buf, * buffer;
loff_t lineno;
ssize_t column, i;
+ int volatile_task;
long retval;
/*
@@ -1249,30 +1202,24 @@ static ssize_t read_maps (int pid, struct file * file, char * buf,
goto out;
retval = -EINVAL;
- lock_kernel();
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
- if (p) {
- mm = p->mm;
- if (mm)
- atomic_inc(&mm->mm_users);
- }
- read_unlock(&tasklist_lock);
- unlock_kernel();
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
if (!p)
goto freepage_out;
- /* nothing to map */
- if (!mm || count == 0)
+ if (!p->mm || count == 0)
goto getlen_out;
+ /* Check whether the mmaps could change if we sleep */
+ volatile_task = (p != current || atomic_read(&p->mm->mm_users) > 1);
+
/* decode f_pos */
lineno = *ppos >> MAPS_LINE_SHIFT;
column = *ppos & (MAPS_LINE_LENGTH-1);
- down(&mm->mmap_sem);
- /* quickly go to line "lineno" */
- for (map = mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
+ /* quickly go to line lineno */
+ for (map = p->mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
continue;
for ( ; map ; map = next ) {
@@ -1343,13 +1290,17 @@ static ssize_t read_maps (int pid, struct file * file, char * buf,
/* done? */
if (count == 0)
break;
+
+ /* By writing to user space, we might have slept.
+ * Stop the loop, to avoid a race condition.
+ */
+ if (volatile_task)
+ break;
}
- up(&mm->mmap_sem);
/* encode f_pos */
*ppos = (lineno << MAPS_LINE_SHIFT) + column;
- mmput(mm);
getlen_out:
retval = destptr - buf;
@@ -1362,31 +1313,28 @@ out:
#ifdef __SMP__
static int get_pidcpu(int pid, char * buffer)
{
- struct task_struct * tsk;
+ struct task_struct * tsk = current ;
int i, len = 0;
- /*
- * Hold the tasklist_lock to guarantee that the task_struct
- * address will remain valid while we examine its contents.
- */
read_lock(&tasklist_lock);
- tsk = find_task_by_pid(pid);
- if (tsk)
- get_page(MAP_NR(tsk) + mem_map);
- read_unlock(&tasklist_lock);
- if (tsk) {
- len = sprintf(buffer,
- "cpu %lu %lu\n",
- HZ_TO_STD(tsk->times.tms_utime),
- HZ_TO_STD(tsk->times.tms_stime));
-
- for (i = 0 ; i < smp_num_cpus; i++)
- len += sprintf(buffer + len, "cpu%d %lu %lu\n",
- i,
- HZ_TO_STD(tsk->per_cpu_utime[cpu_logical_map(i)]),
- HZ_TO_STD(tsk->per_cpu_stime[cpu_logical_map(i)]));
- free_task_struct(tsk);
- }
+ if (pid != tsk->pid)
+ tsk = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
+
+ if (tsk == NULL)
+ return 0;
+
+ len = sprintf(buffer,
+ "cpu %lu %lu\n",
+ HZ_TO_STD(tsk->times.tms_utime),
+ HZ_TO_STD(tsk->times.tms_stime));
+
+ for (i = 0 ; i < smp_num_cpus; i++)
+ len += sprintf(buffer + len, "cpu%d %lu %lu\n",
+ i,
+ HZ_TO_STD(tsk->per_cpu_utime[cpu_logical_map(i)]),
+ HZ_TO_STD(tsk->per_cpu_stime[cpu_logical_map(i)]));
+
return len;
}
#endif
@@ -1519,6 +1467,12 @@ static int process_unauthorized(int type, int pid)
int ok = 0;
read_lock(&tasklist_lock);
+
+ /*
+ * Grab the lock, find the task, save the uid and
+ * check it has an mm still (ie its not dead)
+ */
+
p = find_task_by_pid(pid);
if (p) {
euid=p->euid;
@@ -1526,7 +1480,9 @@ static int process_unauthorized(int type, int pid)
if(!cap_issubset(p->cap_permitted, current->cap_permitted))
ok=0;
}
+
read_unlock(&tasklist_lock);
+
if (!p)
return 1;
diff --git a/fs/proc/mem.c b/fs/proc/mem.c
index f9fcb0970..90cd79722 100644
--- a/fs/proc/mem.c
+++ b/fs/proc/mem.c
@@ -10,7 +10,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -79,9 +79,10 @@ static ssize_t mem_read(struct file * file, char * buf,
pgd_t *page_dir;
pmd_t *page_middle;
pte_t pte;
- char * page;
+ struct page * page;
struct task_struct * tsk;
unsigned long addr;
+ unsigned long maddr; /* temporary mapped address */
char *tmp;
ssize_t scount, i;
@@ -102,7 +103,7 @@ static ssize_t mem_read(struct file * file, char * buf,
if (pgd_none(*page_dir))
break;
if (pgd_bad(*page_dir)) {
- printk("Bad page dir entry %08lx\n", pgd_val(*page_dir));
+ pgd_ERROR(*page_dir);
pgd_clear(page_dir);
break;
}
@@ -110,20 +111,20 @@ static ssize_t mem_read(struct file * file, char * buf,
if (pmd_none(*page_middle))
break;
if (pmd_bad(*page_middle)) {
- printk("Bad page middle entry %08lx\n", pmd_val(*page_middle));
+ pmd_ERROR(*page_middle);
pmd_clear(page_middle);
break;
}
pte = *pte_offset(page_middle,addr);
if (!pte_present(pte))
break;
- page = (char *) pte_page(pte) + (addr & ~PAGE_MASK);
+ page = pte_page(pte);
i = PAGE_SIZE-(addr & ~PAGE_MASK);
if (i > scount)
i = scount;
- page = (char *) kmap((unsigned long) page, KM_READ);
- copy_to_user(tmp, page, i);
- kunmap((unsigned long) page, KM_READ);
+ maddr = kmap(page, KM_READ);
+ copy_to_user(tmp, (char *)maddr + (addr & ~PAGE_MASK), i);
+ kunmap(maddr, KM_READ);
addr += i;
tmp += i;
scount -= i;
@@ -141,9 +142,10 @@ static ssize_t mem_write(struct file * file, char * buf,
pgd_t *page_dir;
pmd_t *page_middle;
pte_t pte;
- char * page;
+ struct page * page;
struct task_struct * tsk;
unsigned long addr;
+ unsigned long maddr; /* temporary mapped address */
char *tmp;
long i;
@@ -159,7 +161,7 @@ static ssize_t mem_write(struct file * file, char * buf,
if (pgd_none(*page_dir))
break;
if (pgd_bad(*page_dir)) {
- printk("Bad page dir entry %08lx\n", pgd_val(*page_dir));
+ pgd_ERROR(*page_dir);
pgd_clear(page_dir);
break;
}
@@ -167,7 +169,7 @@ static ssize_t mem_write(struct file * file, char * buf,
if (pmd_none(*page_middle))
break;
if (pmd_bad(*page_middle)) {
- printk("Bad page middle entry %08lx\n", pmd_val(*page_middle));
+ pmd_ERROR(*page_middle);
pmd_clear(page_middle);
break;
}
@@ -176,13 +178,13 @@ static ssize_t mem_write(struct file * file, char * buf,
break;
if (!pte_write(pte))
break;
- page = (char *) pte_page(pte) + (addr & ~PAGE_MASK);
+ page = pte_page(pte);
i = PAGE_SIZE-(addr & ~PAGE_MASK);
if (i > count)
i = count;
- page = (unsigned long) kmap((unsigned long) page, KM_WRITE);
- copy_from_user(page, tmp, i);
- kunmap((unsigned long) page, KM_WRITE);
+ maddr = kmap(page, KM_WRITE);
+ copy_from_user((char *)maddr + (addr & ~PAGE_MASK), tmp, i);
+ kunmap(maddr, KM_WRITE);
addr += i;
tmp += i;
count -= i;
@@ -248,14 +250,14 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
if (pgd_none(*src_dir))
return -EINVAL;
if (pgd_bad(*src_dir)) {
- printk("Bad source page dir entry %08lx\n", pgd_val(*src_dir));
+ pgd_ERROR(*src_dir);
return -EINVAL;
}
src_middle = pmd_offset(src_dir, stmp);
if (pmd_none(*src_middle))
return -EINVAL;
if (pmd_bad(*src_middle)) {
- printk("Bad source page middle entry %08lx\n", pmd_val(*src_middle));
+ pmd_ERROR(*src_middle);
return -EINVAL;
}
src_table = pte_offset(src_middle, stmp);
@@ -301,9 +303,9 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
set_pte(src_table, pte_mkdirty(*src_table));
set_pte(dest_table, *src_table);
- mapnr = MAP_NR(pte_page(*src_table));
+ mapnr = pte_pagenr(*src_table);
if (mapnr < max_mapnr)
- get_page(mem_map + MAP_NR(pte_page(*src_table)));
+ get_page(mem_map + pte_pagenr(*src_table));
stmp += PAGE_SIZE;
dtmp += PAGE_SIZE;
diff --git a/fs/super.c b/fs/super.c
index 693017eee..3b58d13cc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -135,7 +135,7 @@ out:
return lptr;
}
-static void remove_vfsmnt(kdev_t dev)
+void remove_vfsmnt(kdev_t dev)
{
struct vfsmount *lptr, *tofree;
@@ -508,7 +508,7 @@ out:
/*
* Find a super_block with no device assigned.
*/
-static struct super_block *get_empty_super(void)
+struct super_block *get_empty_super(void)
{
struct super_block *s;