diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-01-27 01:05:20 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-01-27 01:05:20 +0000 |
commit | 546db14ee74118296f425f3b91634fb767d67290 (patch) | |
tree | 22b613a3da8d4bf663eec5e155af01b87fdf9094 /fs | |
parent | 1e25e41c4f5474e14452094492dbc169b800e4c8 (diff) |
Merge with Linux 2.3.23. The new bootmem stuff has broken various
platforms. At this time I've only verified that IP22 support compiles
and IP27 actually works.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/binfmt_aout.c | 65 | ||||
-rw-r--r-- | fs/buffer.c | 405 | ||||
-rw-r--r-- | fs/dcache.c | 7 | ||||
-rw-r--r-- | fs/exec.c | 78 | ||||
-rw-r--r-- | fs/file.c | 4 | ||||
-rw-r--r-- | fs/hpfs/anode.c | 2 | ||||
-rw-r--r-- | fs/hpfs/dir.c | 41 | ||||
-rw-r--r-- | fs/hpfs/dnode.c | 4 | ||||
-rw-r--r-- | fs/hpfs/file.c | 139 | ||||
-rw-r--r-- | fs/hpfs/hpfs_fn.h | 6 | ||||
-rw-r--r-- | fs/hpfs/inode.c | 8 | ||||
-rw-r--r-- | fs/inode.c | 19 | ||||
-rw-r--r-- | fs/iobuf.c | 22 | ||||
-rw-r--r-- | fs/minix/truncate.c | 2 | ||||
-rw-r--r-- | fs/nfs/dir.c | 144 | ||||
-rw-r--r-- | fs/nfs/inode.c | 226 | ||||
-rw-r--r-- | fs/nfs/nfs2xdr.c | 36 | ||||
-rw-r--r-- | fs/nfs/proc.c | 8 | ||||
-rw-r--r-- | fs/nfs/symlink.c | 7 | ||||
-rw-r--r-- | fs/proc/array.c | 316 | ||||
-rw-r--r-- | fs/proc/mem.c | 40 | ||||
-rw-r--r-- | fs/super.c | 4 |
22 files changed, 863 insertions, 720 deletions
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index cc72f4e18..ca5d8e8cb 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -270,7 +270,6 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs unsigned long fd_offset; unsigned long rlim; int retval; - static unsigned long error_time=0; ex = *((struct exec *) bprm->buf); /* exec-header */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && @@ -282,29 +281,6 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs fd_offset = N_TXTOFF(ex); -#ifdef __i386__ - if (N_MAGIC(ex) == ZMAGIC && fd_offset != BLOCK_SIZE) { - if((jiffies-error_time) >5) - { - printk(KERN_NOTICE "N_TXTOFF != BLOCK_SIZE. See a.out.h.\n"); - error_time=jiffies; - } - return -ENOEXEC; - } - - if (N_MAGIC(ex) == ZMAGIC && ex.a_text && - bprm->dentry->d_inode->i_op && - bprm->dentry->d_inode->i_op->get_block && - (fd_offset < bprm->dentry->d_inode->i_sb->s_blocksize)) { - if((jiffies-error_time) >5) - { - printk(KERN_NOTICE "N_TXTOFF < BLOCK_SIZE. Please convert binary.\n"); - error_time=jiffies; - } - return -ENOEXEC; - } -#endif - /* Check initial limits. This avoids letting people circumvent * size limits imposed on them by creating programs with large * arrays in the data or bss. @@ -364,26 +340,32 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs flush_icache_range((unsigned long) 0, (unsigned long) ex.a_text+ex.a_data); } else { + static unsigned long error_time, error_time2; if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC)) + (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) + { printk(KERN_NOTICE "executable not page aligned\n"); + error_time2 = jiffies; + } fd = open_dentry(bprm->dentry, O_RDONLY); if (fd < 0) return fd; file = fget(fd); - if ((fd_offset & ~PAGE_MASK) != 0) { + if ((fd_offset & ~PAGE_MASK) != 0 && + (jiffies-error_time) > 5*HZ) + { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", - file->f_dentry->d_name.name - ); + file->f_dentry->d_name.name); + error_time = jiffies; } if (!file->f_op || !file->f_op->mmap || ((fd_offset & ~PAGE_MASK) != 0)) { fput(file); sys_close(fd); - do_brk(0, ex.a_text+ex.a_data); + do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex), ex.a_text+ex.a_data, 0); flush_icache_range((unsigned long) N_TXTADDR(ex), @@ -493,12 +475,6 @@ do_load_aout_library(int fd) goto out_putf; } - if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && - (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) { - printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); - goto out_putf; - } - if (N_FLAGS(ex)) goto out_putf; @@ -508,14 +484,17 @@ do_load_aout_library(int fd) start_addr = ex.a_entry & 0xfffff000; if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { - printk(KERN_WARNING - "N_TXTOFF is not page aligned. Please convert library: %s\n", - file->f_dentry->d_name.name - ); - - do_mmap(NULL, start_addr & PAGE_MASK, ex.a_text + ex.a_data + ex.a_bss, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED| MAP_PRIVATE, 0); + static unsigned long error_time; + + if ((jiffies-error_time) > 5*HZ) + { + printk(KERN_WARNING + "N_TXTOFF is not page aligned. Please convert library: %s\n", + file->f_dentry->d_name.name); + error_time = jiffies; + } + + do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); read_exec(file->f_dentry, N_TXTOFF(ex), (char *)start_addr, ex.a_text + ex.a_data, 0); diff --git a/fs/buffer.c b/fs/buffer.c index c43c54a36..39dd880f8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -26,6 +26,8 @@ /* Thread it... -DaveM */ +/* async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */ + #include <linux/sched.h> #include <linux/fs.h> #include <linux/malloc.h> @@ -76,6 +78,7 @@ static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED; static struct buffer_head *lru_list[NR_LIST]; static spinlock_t lru_list_lock = SPIN_LOCK_UNLOCKED; static int nr_buffers_type[NR_LIST] = {0,}; +static unsigned long size_buffers_type[NR_LIST] = {0,}; static struct buffer_head * unused_list = NULL; static int nr_unused_buffer_heads = 0; @@ -93,7 +96,7 @@ static kmem_cache_t *bh_cachep; static int grow_buffers(int size); /* This is used by some architectures to estimate available memory. */ -atomic_t buffermem = ATOMIC_INIT(0); +atomic_t buffermem_pages = ATOMIC_INIT(0); /* Here is the parameter block for the bdflush process. If you add or * remove any of the parameters, make sure to update kernel/sysctl.c. @@ -114,18 +117,18 @@ union bdflush_param { each time we call refill */ int nref_dirt; /* Dirty buffer threshold for activating bdflush when trying to refill buffers. */ - int dummy1; /* unused */ + int interval; /* jiffies delay between kupdate flushes */ int age_buffer; /* Time for normal buffer to age before we flush it */ int age_super; /* Time for superblock to age before we flush it */ int dummy2; /* unused */ int dummy3; /* unused */ } b_un; unsigned int data[N_PARAM]; -} bdf_prm = {{40, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}}; +} bdf_prm = {{40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ, 1884, 2}}; /* These are the min and max parameter values that we will allow to be assigned */ int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 1*HZ, 1*HZ, 1, 1}; -int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,1000, 6000*HZ, 6000*HZ, 2047, 5}; +int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,600*HZ, 6000*HZ, 6000*HZ, 2047, 5}; void wakeup_bdflush(int); @@ -482,6 +485,7 @@ static void __insert_into_lru_list(struct buffer_head * bh, int blist) (*bhp)->b_prev_free->b_next_free = bh; (*bhp)->b_prev_free = bh; nr_buffers_type[blist]++; + size_buffers_type[blist] += bh->b_size; } static void __remove_from_lru_list(struct buffer_head * bh, int blist) @@ -495,6 +499,7 @@ static void __remove_from_lru_list(struct buffer_head * bh, int blist) lru_list[blist] = NULL; bh->b_next_free = bh->b_prev_free = NULL; nr_buffers_type[blist]--; + size_buffers_type[blist] -= bh->b_size; } } @@ -813,6 +818,27 @@ out: return bh; } +/* -1 -> no need to flush + 0 -> async flush + 1 -> sync flush (wait for I/O completation) */ +static int balance_dirty_state(kdev_t dev) +{ + unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit; + + dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT; + tot = nr_lru_pages + nr_free_pages + nr_free_highpages; + hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100; + soft_dirty_limit = hard_dirty_limit >> 1; + + if (dirty > soft_dirty_limit) + { + if (dirty > hard_dirty_limit) + return 1; + return 0; + } + return -1; +} + /* * if a new dirty buffer is created we need to balance bdflush. * @@ -820,23 +846,13 @@ out: * pressures on different devices - thus the (currently unused) * 'dev' parameter. */ -static int too_many_dirty_buffers; - void balance_dirty(kdev_t dev) { - int dirty = nr_buffers_type[BUF_DIRTY]; - int ndirty = bdf_prm.b_un.ndirty; - - if (dirty > ndirty) { - if (dirty > 2*ndirty) { - too_many_dirty_buffers = 1; - wakeup_bdflush(1); - return; - } - wakeup_bdflush(0); - } - too_many_dirty_buffers = 0; - return; + int state = balance_dirty_state(dev); + + if (state < 0) + return; + wakeup_bdflush(state); } static inline void __mark_dirty(struct buffer_head *bh, int flag) @@ -1250,7 +1266,7 @@ int block_flushpage(struct inode *inode, struct page *page, unsigned long offset */ if (!offset) { if (!try_to_free_buffers(page)) { - atomic_add(PAGE_CACHE_SIZE, &buffermem); + atomic_inc(&buffermem_pages); return 0; } } @@ -1364,6 +1380,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long unsigned long bbits, blocks, i, len; struct buffer_head *bh, *head; char * target_buf; + int need_balance_dirty; target_buf = (char *)page_address(page) + offset; @@ -1403,6 +1420,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long i = 0; bh = head; partial = 0; + need_balance_dirty = 0; do { if (!bh) BUG(); @@ -1473,8 +1491,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long set_bit(BH_Uptodate, &bh->b_state); if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { __mark_dirty(bh, 0); - if (too_many_dirty_buffers) - balance_dirty(bh->b_dev); + need_balance_dirty = 1; } if (err) { @@ -1488,6 +1505,9 @@ skip: bh = bh->b_this_page; } while (bh != head); + if (need_balance_dirty) + balance_dirty(bh->b_dev); + /* * is this a partial write that happened to make all buffers * uptodate then we can optimize away a bogus readpage() for @@ -1519,6 +1539,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of struct buffer_head *bh, *head; char * target_buf, *target_data; unsigned long data_offset = offset; + int need_balance_dirty; offset = inode->i_size - page->offset; if (page->offset>inode->i_size) @@ -1566,6 +1587,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of i = 0; bh = head; partial = 0; + need_balance_dirty = 0; do { if (!bh) BUG(); @@ -1644,8 +1666,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of set_bit(BH_Uptodate, &bh->b_state); if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { __mark_dirty(bh, 0); - if (too_many_dirty_buffers) - balance_dirty(bh->b_dev); + need_balance_dirty = 1; } if (err) { @@ -1659,6 +1680,9 @@ skip: bh = bh->b_this_page; } while (bh != head); + if (need_balance_dirty) + balance_dirty(bh->b_dev); + /* * is this a partial write that happened to make all buffers * uptodate then we can optimize away a bogus readpage() for @@ -1809,12 +1833,12 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], dprintk ("iobuf %d %d %d\n", offset, length, size); for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - page = iobuf->pagelist[pageind]; map = iobuf->maplist[pageind]; - if (map && PageBIGMEM(map)) { + if (map && PageHighMem(map)) { err = -EIO; goto error; } + page = page_address(map); while (length > 0) { blocknr = b[bufind++]; @@ -2090,7 +2114,7 @@ static int grow_buffers(int size) page_map = mem_map + MAP_NR(page); page_map->buffers = bh; lru_cache_add(page_map); - atomic_add(PAGE_SIZE, &buffermem); + atomic_inc(&buffermem_pages); return 1; no_buffer_head: @@ -2168,12 +2192,53 @@ out: busy_buffer_page: /* Uhhuh, start writeback so that we don't end up with all dirty pages */ - too_many_dirty_buffers = 1; wakeup_bdflush(0); ret = 0; goto out; } +/* ================== Debugging =================== */ + +void show_buffers(void) +{ + struct buffer_head * bh; + int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; + int protected = 0; + int nlist; + static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" }; + + printk("Buffer memory: %6dkB\n", + atomic_read(&buffermem_pages) << (PAGE_SHIFT-10)); + +#ifdef __SMP__ /* trylock does nothing on UP and so we could deadlock */ + if (!spin_trylock(&lru_list_lock)) + return; + for(nlist = 0; nlist < NR_LIST; nlist++) { + found = locked = dirty = used = lastused = protected = 0; + bh = lru_list[nlist]; + if(!bh) continue; + + do { + found++; + if (buffer_locked(bh)) + locked++; + if (buffer_protected(bh)) + protected++; + if (buffer_dirty(bh)) + dirty++; + if (atomic_read(&bh->b_count)) + used++, lastused = found; + bh = bh->b_next_free; + } while (bh != lru_list[nlist]); + printk("%8s: %d buffers, %d used (last=%d), " + "%d locked, %d protected, %d dirty\n", + buf_types[nlist], found, used, lastused, + locked, protected, dirty); + } + spin_unlock(&lru_list_lock); +#endif +} + /* ===================== Init ======================= */ /* @@ -2181,7 +2246,7 @@ busy_buffer_page: * Use gfp() for the hash table to decrease TLB misses, use * SLAB cache for buffer heads. */ -void __init buffer_init(unsigned long memory_size) +void __init buffer_init(unsigned long mempages) { int order, i; unsigned int nr_hash; @@ -2189,9 +2254,11 @@ void __init buffer_init(unsigned long memory_size) /* The buffer cache hash table is less important these days, * trim it a bit. */ - memory_size >>= 14; - memory_size *= sizeof(struct buffer_head *); - for (order = 0; (PAGE_SIZE << order) < memory_size; order++) + mempages >>= 14; + + mempages *= sizeof(struct buffer_head *); + + for (order = 0; (1 << order) < mempages; order++) ; /* try to allocate something until we get it or we're asking @@ -2246,21 +2313,92 @@ void __init buffer_init(unsigned long memory_size) * response to dirty buffers. Once this process is activated, we write back * a limited number of buffers to the disks and then go back to sleep again. */ -static DECLARE_WAIT_QUEUE_HEAD(bdflush_wait); static DECLARE_WAIT_QUEUE_HEAD(bdflush_done); struct task_struct *bdflush_tsk = 0; -void wakeup_bdflush(int wait) +void wakeup_bdflush(int block) { + DECLARE_WAITQUEUE(wait, current); + if (current == bdflush_tsk) return; - if (wait) - run_task_queue(&tq_disk); - wake_up(&bdflush_wait); - if (wait) - sleep_on(&bdflush_done); + + if (!block) + { + wake_up_process(bdflush_tsk); + return; + } + + /* kflushd can wakeup us before we have a chance to + go to sleep so we must be smart in handling + this wakeup event from kflushd to avoid deadlocking in SMP + (we are not holding any lock anymore in these two paths). */ + __set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&bdflush_done, &wait); + + wake_up_process(bdflush_tsk); + schedule(); + + remove_wait_queue(&bdflush_done, &wait); + __set_current_state(TASK_RUNNING); } +/* This is the _only_ function that deals with flushing async writes + to disk. + NOTENOTENOTENOTE: we _only_ need to browse the DIRTY lru list + as all dirty buffers lives _only_ in the DIRTY lru list. + As we never browse the LOCKED and CLEAN lru lists they are infact + completly useless. */ +static void flush_dirty_buffers(int check_flushtime) +{ + struct buffer_head * bh, *next; + int flushed = 0, i; + + restart: + spin_lock(&lru_list_lock); + bh = lru_list[BUF_DIRTY]; + if (!bh) + goto out_unlock; + for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) + { + next = bh->b_next_free; + + if (!buffer_dirty(bh)) + { + __refile_buffer(bh); + continue; + } + if (buffer_locked(bh)) + continue; + + if (check_flushtime) + { + /* The dirty lru list is chronogical ordered so + if the current bh is not yet timed out, + then also all the following bhs + will be too young. */ + if (time_before(jiffies, bh->b_flushtime)) + goto out_unlock; + } + else + { + if (++flushed > bdf_prm.b_un.ndirty) + goto out_unlock; + } + + /* OK, now we are committed to write it out. */ + atomic_inc(&bh->b_count); + spin_unlock(&lru_list_lock); + ll_rw_block(WRITE, 1, &bh); + atomic_dec(&bh->b_count); + + if (current->need_resched) + schedule(); + goto restart; + } + out_unlock: + spin_unlock(&lru_list_lock); +} /* * Here we attempt to write back old buffers. We also try to flush inodes @@ -2272,47 +2410,13 @@ void wakeup_bdflush(int wait) static int sync_old_buffers(void) { - int nlist; - lock_kernel(); sync_supers(0); sync_inodes(0); unlock_kernel(); - for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++) { - struct buffer_head *bh; - repeat: - spin_lock(&lru_list_lock); - bh = lru_list[nlist]; - if(bh) { - struct buffer_head *next; - int i; - for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) { - next = bh->b_next_free; - - /* If the buffer is not on the proper list, - * then refile it. - */ - if ((nlist == BUF_DIRTY && - (!buffer_dirty(bh) && !buffer_locked(bh))) || - (nlist == BUF_LOCKED && !buffer_locked(bh))) { - __refile_buffer(bh); - continue; - } - - if (buffer_locked(bh) || !buffer_dirty(bh)) - continue; - - /* OK, now we are committed to write it out. */ - atomic_inc(&bh->b_count); - spin_unlock(&lru_list_lock); - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - goto repeat; - } - } - spin_unlock(&lru_list_lock); - } + flush_dirty_buffers(1); + /* must really sync all the active I/O request to disk here */ run_task_queue(&tq_disk); return 0; } @@ -2328,6 +2432,10 @@ asmlinkage long sys_bdflush(int func, long data) return -EPERM; if (func == 1) { + /* do_exit directly and let kupdate to do its work alone. */ + do_exit(0); +#if 0 /* left here as it's the only example of lazy-mm-stuff used from + a syscall that doesn't care about the current mm context. */ int error; struct mm_struct *user_mm; @@ -2341,6 +2449,7 @@ asmlinkage long sys_bdflush(int func, long data) error = sync_old_buffers(); end_lazy_tlb(user_mm); return error; +#endif } /* Basically func 1 means read param 1, 2 means write param 1, etc */ @@ -2383,85 +2492,103 @@ int bdflush(void * unused) sprintf(current->comm, "kflushd"); bdflush_tsk = current; - for (;;) { - int nlist; + /* avoid getting signals */ + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + for (;;) { CHECK_EMERGENCY_SYNC - for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++) { - int nr, major, written = 0; - struct buffer_head *next; - - repeat: - spin_lock(&lru_list_lock); - next = lru_list[nlist]; - nr = nr_buffers_type[nlist]; - while (nr-- > 0) { - struct buffer_head *bh = next; - - next = next->b_next_free; - - /* If the buffer is not on the correct list, - * then refile it. - */ - if ((nlist == BUF_DIRTY && - (!buffer_dirty(bh) && !buffer_locked(bh))) || - (nlist == BUF_LOCKED && !buffer_locked(bh))) { - __refile_buffer(bh); - continue; - } - - /* If we aren't in panic mode, don't write out too much - * at a time. Also, don't write out buffers we don't - * really have to write out yet.. - */ - if (!too_many_dirty_buffers) { - if (written > bdf_prm.b_un.ndirty) - break; - if (time_before(jiffies, bh->b_flushtime)) - continue; - } - - if (buffer_locked(bh) || !buffer_dirty(bh)) - continue; - - major = MAJOR(bh->b_dev); - written++; - - /* - * For the loop major we can try to do asynchronous writes, - * but we have to guarantee that we're making some progress.. - */ - atomic_inc(&bh->b_count); - spin_unlock(&lru_list_lock); - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - goto repeat; - } - spin_unlock(&lru_list_lock); - } - run_task_queue(&tq_disk); + flush_dirty_buffers(0); + + /* If wakeup_bdflush will wakeup us + after our bdflush_done wakeup, then + we must make sure to not sleep + in schedule_timeout otherwise + wakeup_bdflush may wait for our + bdflush_done wakeup that would never arrive + (as we would be sleeping) and so it would + deadlock in SMP. */ + __set_current_state(TASK_INTERRUPTIBLE); wake_up(&bdflush_done); - /* * If there are still a lot of dirty buffers around, * skip the sleep and flush some more. Otherwise, we - * sleep for a while and mark us as not being in panic - * mode.. + * sleep for a while. */ - if (!too_many_dirty_buffers || nr_buffers_type[BUF_DIRTY] < bdf_prm.b_un.ndirty) { - too_many_dirty_buffers = 0; - spin_lock_irq(¤t->sigmask_lock); - flush_signals(current); - spin_unlock_irq(¤t->sigmask_lock); - interruptible_sleep_on_timeout(&bdflush_wait, 5*HZ); + if (balance_dirty_state(NODEV) < 0) + schedule_timeout(5*HZ); + /* Remember to mark us as running otherwise + the next schedule will block. */ + __set_current_state(TASK_RUNNING); + } +} + +/* + * This is the kernel update daemon. It was used to live in userspace + * but since it's need to run safely we want it unkillable by mistake. + * You don't need to change your userspace configuration since + * the userspace `update` will do_exit(0) at the first sys_bdflush(). + */ +int kupdate(void * unused) +{ + struct task_struct * tsk = current; + int interval; + + tsk->session = 1; + tsk->pgrp = 1; + strcpy(tsk->comm, "kupdate"); + + /* sigstop and sigcont will stop and wakeup kupdate */ + spin_lock_irq(&tsk->sigmask_lock); + sigfillset(&tsk->blocked); + siginitsetinv(¤t->blocked, sigmask(SIGCONT) | sigmask(SIGSTOP)); + recalc_sigpending(tsk); + spin_unlock_irq(&tsk->sigmask_lock); + + for (;;) { + /* update interval */ + interval = bdf_prm.b_un.interval; + if (interval) + { + tsk->state = TASK_INTERRUPTIBLE; + schedule_timeout(interval); + } + else + { + stop_kupdate: + tsk->state = TASK_STOPPED; + schedule(); /* wait for SIGCONT */ } + /* check for sigstop */ + if (signal_pending(tsk)) + { + int stopped = 0; + spin_lock_irq(&tsk->sigmask_lock); + if (sigismember(&tsk->signal, SIGSTOP)) + { + sigdelset(&tsk->signal, SIGSTOP); + stopped = 1; + } + recalc_sigpending(tsk); + spin_unlock_irq(&tsk->sigmask_lock); + if (stopped) + goto stop_kupdate; + } +#ifdef DEBUG + printk("kupdate() activated...\n"); +#endif + sync_old_buffers(); } } static int __init bdflush_init(void) { kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); return 0; } diff --git a/fs/dcache.c b/fs/dcache.c index ef45eba7d..b6f7a7203 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -169,6 +169,11 @@ out: int d_invalidate(struct dentry * dentry) { /* + * If it's already been dropped, return OK. + */ + if (list_empty(&dentry->d_hash)) + return 0; + /* * Check whether to do a partial shrink_dcache * to get rid of unused child entries. */ @@ -415,7 +420,7 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask) unlock_kernel(); /* FIXME: kmem_cache_shrink here should tell us the number of pages freed, and it should - work in a __GFP_DMA/__GFP_BIGMEM behaviour + work in a __GFP_DMA/__GFP_HIGHMEM behaviour to free only the interesting pages in function of the needs of the current allocation. */ kmem_cache_shrink(dentry_cache); @@ -31,6 +31,8 @@ #include <linux/fcntl.h> #include <linux/smp_lock.h> #include <linux/init.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -212,20 +214,42 @@ int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) /* XXX: add architecture specific overflow check here. */ pos = bprm->p; - while (len>0) { - char *pag; + while (len > 0) { + char *kaddr; + int i, new, err; + struct page *page; int offset, bytes_to_copy; offset = pos % PAGE_SIZE; - if (!(pag = (char *) bprm->page[pos/PAGE_SIZE]) && - !(pag = (char *) bprm->page[pos/PAGE_SIZE] = - (unsigned long *) get_free_page(GFP_USER))) - return -ENOMEM; + i = pos/PAGE_SIZE; + page = bprm->page[i]; + new = 0; + if (!page) { + /* + * Cannot yet use highmem page because + * we cannot sleep with a kmap held. + */ + page = __get_pages(GFP_USER, 0); + bprm->page[i] = page; + if (!page) + return -ENOMEM; + new = 1; + } + kaddr = (char *)kmap(page, KM_WRITE); + if (new && offset) + memset(kaddr, 0, offset); bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) + if (bytes_to_copy > len) { bytes_to_copy = len; - if (copy_from_user(pag + offset, str, bytes_to_copy)) + if (new) + memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len); + } + err = copy_from_user(kaddr + offset, str, bytes_to_copy); + flush_page_to_ram(page); + kunmap((unsigned long)kaddr, KM_WRITE); + + if (err) return -EFAULT; pos += bytes_to_copy; @@ -276,7 +300,9 @@ int setup_arg_pages(struct linux_binprm *bprm) mpnt->vm_offset = 0; mpnt->vm_file = NULL; mpnt->vm_private_data = (void *) 0; + vmlist_modify_lock(current->mm); insert_vm_struct(current->mm, mpnt); + vmlist_modify_unlock(current->mm); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; } @@ -467,6 +493,11 @@ int flush_old_exec(struct linux_binprm * bprm) permission(bprm->dentry->d_inode,MAY_READ)) current->dumpable = 0; + /* An exec changes our domain. We are no longer part of the thread + group */ + + current->self_exec_id++; + flush_signal_handlers(current); flush_old_files(current->files); @@ -640,14 +671,22 @@ void remove_arg_zero(struct linux_binprm *bprm) { if (bprm->argc) { unsigned long offset; - char * page; + char * kaddr; + struct page *page; + offset = bprm->p % PAGE_SIZE; - page = (char*)bprm->page[bprm->p/PAGE_SIZE]; - while(bprm->p++,*(page+offset++)) - if(offset==PAGE_SIZE){ - offset=0; - page = (char*)bprm->page[bprm->p/PAGE_SIZE]; - } + goto inside; + + while (bprm->p++, *(kaddr+offset++)) { + if (offset != PAGE_SIZE) + continue; + offset = 0; + kunmap((unsigned long)kaddr, KM_WRITE); +inside: + page = bprm->page[bprm->p/PAGE_SIZE]; + kaddr = (char *)kmap(page, KM_WRITE); + } + kunmap((unsigned long)kaddr, KM_WRITE); bprm->argc--; } } @@ -676,8 +715,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->dentry = NULL; bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */ - bprm_loader.page[i] = 0; + for (i = 0 ; i < MAX_ARG_PAGES ; i++) /* clear page-table */ + bprm_loader.page[i] = NULL; dentry = open_namei(dynloader[0], 0, 0); retval = PTR_ERR(dentry); @@ -793,8 +832,9 @@ out: /* Assumes that free_page() can take a NULL argument. */ /* I hope this is ok for all architectures */ - for (i=0 ; i<MAX_ARG_PAGES ; i++) - free_page(bprm.page[i]); + for (i = 0 ; i < MAX_ARG_PAGES ; i++) + if (bprm.page[i]) + __free_page(bprm.page[i]); return retval; } @@ -16,7 +16,7 @@ /* - * Allocate an fd array, using get_free_page() if possible. + * Allocate an fd array, using __get_free_page() if possible. * Note: the array isn't cleared at allocation time. */ struct file ** alloc_fd_array(int num) @@ -129,7 +129,7 @@ out: } /* - * Allocate an fdset array, using get_free_page() if possible. + * Allocate an fdset array, using __get_free_page() if possible. * Note: the array isn't cleared at allocation time. */ fd_set * alloc_fdset(int num) diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 62410ca26..6fb9c1633 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -293,10 +293,10 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) if (!level) return; if (s->s_hpfs_chk) if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return; + brelse(bh); hpfs_free_sectors(s, ano, 1); oano = ano; ano = anode->up; - brelse(bh); if (--level) { anode = hpfs_map_anode(s, ano, &bh); btree1 = &anode->btree; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 8af35847d..36e665c32 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -20,7 +20,34 @@ int hpfs_dir_release(struct inode *inode, struct file *filp) return 0; } -int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir) +/* This is slow, but it's not used often */ + +loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) +{ + loff_t new_off = off + (whence == 1 ? filp->f_pos : 0); + loff_t pos; + struct quad_buffer_head qbh; + struct inode *i = filp->f_dentry->d_inode; + struct super_block *s = filp->f_dentry->d_sb; + /*printk("dir lseek\n");*/ + if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; + hpfs_lock_inode(i); + pos = ((loff_t) hpfs_de_as_down_as_possible(s, i->i_hpfs_dno) << 4) + 1; + while (pos != new_off) { + if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh); + else goto fail; + if (pos == 12) goto fail; + } + hpfs_unlock_inode(i); + ok: + return filp->f_pos = new_off; + fail: + hpfs_unlock_inode(i); + /*printk("illegal lseek: %016llx\n", new_off);*/ + return -ESPIPE; +} + +int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_dentry->d_inode; struct quad_buffer_head qbh; @@ -54,11 +81,11 @@ int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir) if (e) return -EFSERROR; } lc = inode->i_sb->s_hpfs_lowercase; - if (filp->f_pos == -2) { /* diff -r requires this (note, that diff -r */ - filp->f_pos = -3; /* also fails on msdos filesystem in 2.0) */ + if (filp->f_pos == 12) { /* diff -r requires this (note, that diff -r */ + filp->f_pos = 13; /* also fails on msdos filesystem in 2.0) */ return 0; } - if (filp->f_pos == -3) return -ENOENT; + if (filp->f_pos == 13) return -ENOENT; hpfs_lock_inode(inode); @@ -72,7 +99,7 @@ int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir) hpfs_unlock_inode(inode); return -EFSERROR; } - if (filp->f_pos == -2) { + if (filp->f_pos == 12) { hpfs_unlock_inode(inode); return 0; } @@ -86,9 +113,9 @@ int hpfs_readdir(struct file *filp, void * dirent, filldir_t filldir) hpfs_unlock_inode(inode); return 0; } - filp->f_pos = -1; + filp->f_pos = 11; } - if (filp->f_pos == -1) { + if (filp->f_pos == 11) { if (filldir(dirent, "..", 2, filp->f_pos, inode->i_hpfs_parent_dir) < 0) { hpfs_unlock_inode(inode); return 0; diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index e4b4bbc91..d1ca8e3e6 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -539,7 +539,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) brelse(bh); } i->i_hpfs_dno = down; - for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, (loff_t) -2); + for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, (loff_t) 12); return; } if (!(dnode = hpfs_map_dnode(i->i_sb, up, &qbh))) return; @@ -876,7 +876,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, hpfs_brelse4(&qbh0); bail: - *posp = -2; + *posp = 12; return de; } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index fbb1f2f6c..066ce5c28 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -58,91 +58,96 @@ void hpfs_truncate(struct inode *i) hpfs_write_inode(i); } -int hpfs_getblk_block(struct inode *inode, long block, int create, int *err, int *created) +int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) { - int add; - int sec = 0; - down(&inode->i_sem); - if (err) *err = 0; - if (created) *created = 0; - if (!inode->i_blocks) { - hpfs_error(inode->i_sb, "hpfs_get_block: inode %08x has no blocks", inode->i_ino); - if (err) *err = -EFSERROR; - up(&inode->i_sem); + secno s; + if (iblock < inode->i_blocks - 1) { + s = hpfs_bmap(inode, iblock); + bh_result->b_dev = inode->i_dev; + bh_result->b_blocknr = s; + bh_result->b_state |= (1UL << BH_Mapped); return 0; } - if (block < ((add = inode->i_blocks - 1))) { - int bm; - if (!(bm = hpfs_bmap(inode, block))) { - hpfs_error(inode->i_sb, "hpfs_get_block: cound not bmap block %08x, inode %08x, size %08x", (int)block, inode->i_ino, (int)inode->i_size); - *err = -EFSERROR; - } - up(&inode->i_sem); - return bm; - } - if (!create) { - if (err) *err = -EFBIG; - up(&inode->i_sem); - return 0; + if (!create) return 0; + if (iblock > inode->i_blocks - 1) { + //hpfs_error(inode->i_sb, "hpfs_get_block beyond file end (requested %08x, inode size %08x", (int)iblock, (int)inode->i_blocks - 1); + printk("HPFS: could not write beyond file end. This is known bug.\n"); + return -EFSERROR; } - if (created) *created = 1; - while (add <= block) { - if ((sec = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, add)) == -1) { - if (err) *err = -ENOSPC; - hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1); - return 0; - } /* FIXME: clear block */ - add++; + if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) { + hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1); + return -ENOSPC; } - inode->i_blocks = add + 1; - up(&inode->i_sem); - return sec; + inode->i_blocks++; + bh_result->b_dev = inode->i_dev; + bh_result->b_blocknr = s; + bh_result->b_state |= (1UL << BH_Mapped) | (1UL << BH_New); + return 0; } -/* copied from ext2fs */ -static int hpfs_get_block(struct inode *inode, unsigned long block, struct buffer_head *bh, int update) +static int hpfs_write_partial_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf) { - if (!bh->b_blocknr) { - int error, created; - unsigned long blocknr; - - blocknr = hpfs_getblk_block(inode, block, 1, &error, &created); - if (!blocknr) { - if (!error) - error = -ENOSPC; - return error; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + struct page *new_page, **hash; + unsigned long pgpos; + unsigned long page_cache = 0; + long status; + + printk("- off: %08x\n", (int)page->offset); + pgpos = (inode->i_blocks - 1) * 512 & PAGE_CACHE_MASK; + while (pgpos < page->offset) { +long pgp = pgpos; + printk("pgpos: %08x, bl: %d\n", (int)pgpos, (int)inode->i_blocks); + hash = page_hash(inode, pgpos); +repeat_find: new_page = __find_lock_page(inode, pgpos, hash); + if (!new_page) { + if (!page_cache) { + page_cache = page_cache_alloc(); + if (page_cache) + goto repeat_find; + status = -ENOMEM; + goto out; + } + new_page = page_cache_entry(page_cache); + if (add_to_page_cache_unique(new_page,inode,pgpos,hash)) + goto repeat_find; + page_cache = 0; } - - bh->b_dev = inode->i_dev; - bh->b_blocknr = blocknr; - - if (!update) - return 0; - - if (created) { - memset(bh->b_data, 0, bh->b_size); - set_bit(BH_Uptodate, &bh->b_state); - return 0; + printk("A\n"); + status = block_write_cont_page(file, new_page, PAGE_SIZE, 0, NULL); + printk("B\n"); + UnlockPage(new_page); + page_cache_release(new_page); + if (status < 0) + goto out; + pgpos = (inode->i_blocks - 1) * 512 & PAGE_CACHE_MASK; + printk("pgpos2: %08x, bl: %d\n", (int)pgpos, (int)inode->i_blocks); + if (pgpos == pgp) { + status = -1; + printk("ERROR\n"); + goto out; } } - - if (!update) - return 0; - - lock_kernel(); - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - unlock_kernel(); - - return buffer_uptodate(bh) ? 0 : -EIO; + //if ((status = block_write_cont_page(file, page, PAGE_SIZE, 0, NULL)) < 0) goto out; + printk("C\n"); + status = block_write_cont_page(file, page, offset, bytes, buf); + printk("D\n"); +out: + printk("O\n"); + if (page_cache) + page_cache_free(page_cache); + printk("E\n"); + return status; } + ssize_t hpfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { ssize_t retval; retval = generic_file_write(file, buf, count, - ppos, block_write_partial_page); + ppos, /*hpfs_write_partial_page*/block_write_partial_page); if (retval > 0) { struct inode *inode = file->f_dentry->d_inode; inode->i_mtime = CURRENT_TIME; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 050b63597..9ae4a67da 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -226,6 +226,7 @@ void hpfs_set_dentry_operations(struct dentry *); int hpfs_dir_read(struct file *, char *, size_t, loff_t *); int hpfs_dir_release(struct inode *, struct file *); +loff_t hpfs_dir_lseek(struct file *, loff_t, int); int hpfs_readdir(struct file *, void *, filldir_t); struct dentry *hpfs_lookup(struct inode *, struct dentry *); @@ -258,9 +259,8 @@ int hpfs_open(struct inode *, struct file *); int hpfs_file_fsync(struct file *, struct dentry *); secno hpfs_bmap(struct inode *, unsigned); void hpfs_truncate(struct inode *); -ssize_t hpfs_file_read(struct file *, char *, size_t, loff_t *); -ssize_t hpfs_file_write(struct file *, const char *, size_t, loff_t *); -int hpfs_writepage (struct file *, struct page *); +int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create); +ssize_t hpfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos); /* inode.c */ diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index efc776218..d79e55814 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -41,11 +41,9 @@ static const struct inode_operations hpfs_file_iops = NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - (int (*)(struct inode *, int)) -#warning Someone needs to code up hpfs_get_block properly... -DaveM - &hpfs_bmap, /* get_block */ + &hpfs_get_block, /* get_block */ block_read_full_page, /* readpage */ - hpfs_writepage, /* writepage */ + block_write_full_page, /* writepage */ block_flushpage, /* flushpage */ hpfs_truncate, /* truncate */ NULL, /* permission */ @@ -55,7 +53,7 @@ static const struct inode_operations hpfs_file_iops = static const struct file_operations hpfs_dir_ops = { - NULL, /* lseek - default */ + hpfs_dir_lseek, /* lseek */ hpfs_dir_read, /* read */ NULL, /* write - bad */ hpfs_readdir, /* readdir */ diff --git a/fs/inode.c b/fs/inode.c index 55eddfde8..f03295d5c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -89,6 +89,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) memset(inode, 0, sizeof(*inode)); init_waitqueue_head(&inode->i_wait); INIT_LIST_HEAD(&inode->i_hash); + INIT_LIST_HEAD(&inode->i_pages); INIT_LIST_HEAD(&inode->i_dentry); sema_init(&inode->i_sem, 1); spin_lock_init(&inode->i_shared_lock); @@ -401,7 +402,7 @@ int shrink_icache_memory(int priority, int gfp_mask) prune_icache(count); /* FIXME: kmem_cache_shrink here should tell us the number of pages freed, and it should - work in a __GFP_DMA/__GFP_BIGMEM behaviour + work in a __GFP_DMA/__GFP_HIGHMEM behaviour to free only the interesting pages in function of the needs of the current allocation. */ kmem_cache_shrink(inode_cachep); @@ -429,7 +430,7 @@ static inline void __iget(struct inode * inode) * by hand after calling find_inode now! This simplify iunique and won't * add any additional branch in the common code. */ -static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head) +static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) { struct list_head *tmp; struct inode * inode; @@ -445,6 +446,8 @@ static struct inode * find_inode(struct super_block * sb, unsigned long ino, str continue; if (inode->i_ino != ino) continue; + if (find_actor && !find_actor(inode, ino, opaque)) + continue; break; } return inode; @@ -504,7 +507,7 @@ struct inode * get_empty_inode(void) * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org */ -static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head) +static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) { struct inode * inode; @@ -514,7 +517,7 @@ static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, s spin_lock(&inode_lock); /* We released the lock, so.. */ - old = find_inode(sb, ino, head); + old = find_inode(sb, ino, head, find_actor, opaque); if (!old) { list_add(&inode->i_list, &inode_in_use); @@ -570,7 +573,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) retry: if (counter > max_reserved) { head = inode_hashtable + hash(sb,counter); - inode = find_inode(sb, res = counter++, head); + inode = find_inode(sb, res = counter++, head, NULL, NULL); if (!inode) { spin_unlock(&inode_lock); return res; @@ -595,13 +598,13 @@ struct inode *igrab(struct inode *inode) return inode; } -struct inode *iget(struct super_block *sb, unsigned long ino) +struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) { struct list_head * head = inode_hashtable + hash(sb,ino); struct inode * inode; spin_lock(&inode_lock); - inode = find_inode(sb, ino, head); + inode = find_inode(sb, ino, head, find_actor, opaque); if (inode) { __iget(inode); spin_unlock(&inode_lock); @@ -614,7 +617,7 @@ struct inode *iget(struct super_block *sb, unsigned long ino) * get_new_inode() will do the right thing, re-trying the search * in case it had to block at any point. */ - return get_new_inode(sb, ino, head); + return get_new_inode(sb, ino, head, find_actor, opaque); } void insert_inode_hash(struct inode *inode) diff --git a/fs/iobuf.c b/fs/iobuf.c index b46a13bfd..eaabf2f7c 100644 --- a/fs/iobuf.c +++ b/fs/iobuf.c @@ -50,7 +50,6 @@ int alloc_kiovec(int nr, struct kiobuf **bufp) init_waitqueue_head(&iobuf->wait_queue); iobuf->end_io = simple_wakeup_kiobuf; iobuf->array_len = KIO_STATIC_PAGES; - iobuf->pagelist = iobuf->page_array; iobuf->maplist = iobuf->map_array; *bufp++ = iobuf; } @@ -65,50 +64,35 @@ void free_kiovec(int nr, struct kiobuf **bufp) for (i = 0; i < nr; i++) { iobuf = bufp[i]; - if (iobuf->array_len > KIO_STATIC_PAGES) { - kfree (iobuf->pagelist); + if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - } kmem_cache_free(kiobuf_cachep, bufp[i]); } } int expand_kiobuf(struct kiobuf *iobuf, int wanted) { - unsigned long * pagelist; struct page ** maplist; if (iobuf->array_len >= wanted) return 0; - pagelist = (unsigned long *) - kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL); - if (!pagelist) - return -ENOMEM; - maplist = (struct page **) kmalloc(wanted * sizeof(struct page **), GFP_KERNEL); - if (!maplist) { - kfree(pagelist); + if (!maplist) return -ENOMEM; - } /* Did it grow while we waited? */ if (iobuf->array_len >= wanted) { - kfree(pagelist); kfree(maplist); return 0; } - memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long)); memcpy (maplist, iobuf->maplist, wanted * sizeof(struct page **)); - if (iobuf->array_len > KIO_STATIC_PAGES) { - kfree (iobuf->pagelist); + if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - } - iobuf->pagelist = pagelist; iobuf->maplist = maplist; iobuf->array_len = wanted; return 0; diff --git a/fs/minix/truncate.c b/fs/minix/truncate.c index f26aa086c..70b01dc20 100644 --- a/fs/minix/truncate.c +++ b/fs/minix/truncate.c @@ -33,7 +33,7 @@ */ #define DATA_BUFFER_USED(bh) \ - (atomic_read(&bh->b_count) || buffer_locked(bh)) + (atomic_read(&bh->b_count) > 1 || buffer_locked(bh)) /* * The functions for minix V1 fs truncation. diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6b52b2d54..b7ec225ac 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -308,8 +308,7 @@ static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int struct nfs_readdirres rd_res; struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - struct page *page, **hash; - unsigned long page_cache; + struct page *page, **hash, *page_cache; long offset; __u32 *cookiep; @@ -341,14 +340,14 @@ repeat: goto unlock_out; } - page = page_cache_entry(page_cache); + page = page_cache; if (add_to_page_cache_unique(page, inode, offset, hash)) { page_cache_release(page); goto repeat; } rd_args.fh = NFS_FH(dentry); - rd_res.buffer = (char *)page_cache; + rd_res.buffer = (char *)page_address(page_cache); rd_res.bufsiz = PAGE_CACHE_SIZE; rd_res.cookie = *cookiep; do { @@ -533,13 +532,15 @@ static inline int nfs_dentry_force_reval(struct dentry *dentry, int flags) * If mtime is close to present time, we revalidate * more often. */ +#define NFS_REVALIDATE_NEGATIVE (1 * HZ) static inline int nfs_neg_need_reval(struct dentry *dentry) { - unsigned long timeout = 30 * HZ; - long diff = CURRENT_TIME - dentry->d_parent->d_inode->i_mtime; + struct inode *dir = dentry->d_parent->d_inode; + unsigned long timeout = NFS_ATTRTIMEO(dir); + long diff = CURRENT_TIME - dir->i_mtime; - if (diff < 5*60) - timeout = 1 * HZ; + if (diff < 5*60 && timeout > NFS_REVALIDATE_NEGATIVE) + timeout = NFS_REVALIDATE_NEGATIVE; return time_after(jiffies, dentry->d_time + timeout); } @@ -581,12 +582,14 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) goto out_bad; } - if (IS_ROOT(dentry)) - goto out_valid; - if (!nfs_dentry_force_reval(dentry, flags)) goto out_valid; + if (IS_ROOT(dentry)) { + __nfs_revalidate_inode(NFS_DSERVER(dentry), dentry); + goto out_valid_renew; + } + /* * Do a new lookup and check the dentry attributes. */ @@ -596,32 +599,29 @@ static int nfs_lookup_revalidate(struct dentry * dentry, int flags) goto out_bad; /* Inode number matches? */ - if (fattr.fileid != inode->i_ino) + if (NFS_FSID(inode) != fattr.fsid || + NFS_FILEID(inode) != fattr.fileid) goto out_bad; /* Filehandle matches? */ - if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh))) { - if (!list_empty(&dentry->d_subdirs)) - shrink_dcache_parent(dentry); - if (dentry->d_count < 2) - goto out_bad; - } + if (memcmp(dentry->d_fsdata, &fhandle, sizeof(struct nfs_fh))) + goto out_bad; /* Ok, remeber that we successfully checked it.. */ - nfs_renew_times(dentry); nfs_refresh_inode(inode, &fattr); + out_valid_renew: + nfs_renew_times(dentry); out_valid: return 1; out_bad: + d_drop(dentry); + if (!list_empty(&dentry->d_subdirs)) + shrink_dcache_parent(dentry); /* Purge readdir caches. */ if (dentry->d_parent->d_inode) { - invalidate_inode_pages(dentry->d_parent->d_inode); - nfs_flush_dircache(dentry->d_parent->d_inode); - } - if (inode && S_ISDIR(inode->i_mode)) { - invalidate_inode_pages(inode); - nfs_flush_dircache(inode); + nfs_zap_caches(dentry->d_parent->d_inode); + NFS_CACHEINV(dentry->d_parent->d_inode); } return 0; } @@ -649,21 +649,6 @@ static void nfs_dentry_delete(struct dentry *dentry) dentry->d_name.name, error); } -#ifdef NFS_PARANOIA - /* - * Sanity check: if the dentry has been unhashed and the - * inode still has users, we could have problems ... - */ - if (list_empty(&dentry->d_hash) && dentry->d_inode) { - struct inode *inode = dentry->d_inode; - int max_count = (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink); - if (inode->i_count > max_count) { -printk("nfs_dentry_delete: %s/%s: ino=%ld, count=%d, nlink=%d\n", -dentry->d_parent->d_name.name, dentry->d_name.name, -inode->i_ino, inode->i_count, inode->i_nlink); - } - } -#endif } static kmem_cache_t *nfs_fh_cachep; @@ -750,14 +735,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry) error = -EACCES; inode = nfs_fhget(dentry, &fhandle, &fattr); if (inode) { -#ifdef NFS_PARANOIA -if (inode->i_count > (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink)) { -printk("nfs_lookup: %s/%s ino=%ld in use, count=%d, nlink=%d\n", -dentry->d_parent->d_name.name, dentry->d_name.name, -inode->i_ino, inode->i_count, inode->i_nlink); -show_dentry(&inode->i_dentry); -} -#endif no_entry: d_add(dentry, inode); nfs_renew_times(dentry); @@ -779,14 +756,6 @@ static int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, inode = nfs_fhget(dentry, fhandle, fattr); if (inode) { -#ifdef NFS_PARANOIA -if (inode->i_count > (S_ISDIR(inode->i_mode) ? 1 : inode->i_nlink)) { -printk("nfs_instantiate: %s/%s ino=%ld in use, count=%d, nlink=%d\n", -dentry->d_parent->d_name.name, dentry->d_name.name, -inode->i_ino, inode->i_count, inode->i_nlink); -show_dentry(&inode->i_dentry); -} -#endif d_instantiate(dentry, inode); nfs_renew_times(dentry); error = 0; @@ -803,16 +772,15 @@ show_dentry(&inode->i_dentry); static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) { int error; - struct nfs_sattr sattr; + struct iattr attr; struct nfs_fattr fattr; struct nfs_fh fhandle; dfprintk(VFS, "NFS: create(%x/%ld, %s\n", dir->i_dev, dir->i_ino, dentry->d_name.name); - sattr.mode = mode; - sattr.uid = sattr.gid = sattr.size = (unsigned) -1; - sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; /* * Invalidate the dir cache before the operation to avoid a race. @@ -820,7 +788,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) invalidate_inode_pages(dir); nfs_flush_dircache(dir); error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name, &sattr, &fhandle, &fattr); + dentry->d_name.name, &attr, &fhandle, &fattr); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); if (error) @@ -834,23 +802,25 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode) static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev) { int error; - struct nfs_sattr sattr; + struct iattr attr; struct nfs_fattr fattr; struct nfs_fh fhandle; dfprintk(VFS, "NFS: mknod(%x/%ld, %s\n", dir->i_dev, dir->i_ino, dentry->d_name.name); - sattr.mode = mode; - sattr.uid = sattr.gid = sattr.size = (unsigned) -1; - if (S_ISCHR(mode) || S_ISBLK(mode)) - sattr.size = rdev; /* get out your barf bag */ - sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; + /* FIXME: move this to a special nfs_proc_mknod() */ + if (S_ISCHR(mode) || S_ISBLK(mode)) { + attr.ia_size = rdev; /* get out your barf bag */ + attr.ia_valid |= ATTR_SIZE; + } invalidate_inode_pages(dir); nfs_flush_dircache(dir); error = nfs_proc_create(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name, &sattr, &fhandle, &fattr); + dentry->d_name.name, &attr, &fhandle, &fattr); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); if (error) @@ -864,16 +834,15 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rde static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int error; - struct nfs_sattr sattr; + struct iattr attr; struct nfs_fattr fattr; struct nfs_fh fhandle; dfprintk(VFS, "NFS: mkdir(%x/%ld, %s\n", dir->i_dev, dir->i_ino, dentry->d_name.name); - sattr.mode = mode | S_IFDIR; - sattr.uid = sattr.gid = sattr.size = (unsigned) -1; - sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + attr.ia_valid = ATTR_MODE; + attr.ia_mode = mode | S_IFDIR; /* * Always drop the dentry, we can't always depend on @@ -885,7 +854,7 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) invalidate_inode_pages(dir); nfs_flush_dircache(dir); error = nfs_proc_mkdir(NFS_DSERVER(dentry), NFS_FH(dentry->d_parent), - dentry->d_name.name, &sattr, &fhandle, &fattr); + dentry->d_name.name, &attr, &fhandle, &fattr); if (!error) dir->i_nlink++; return error; @@ -898,13 +867,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dfprintk(VFS, "NFS: rmdir(%x/%ld, %s\n", dir->i_dev, dir->i_ino, dentry->d_name.name); -#ifdef NFS_PARANOIA -if (dentry->d_inode->i_count > 1) -printk("nfs_rmdir: %s/%s inode busy?? i_count=%d, i_nlink=%d\n", -dentry->d_parent->d_name.name, dentry->d_name.name, -dentry->d_inode->i_count, dentry->d_inode->i_nlink); -#endif - invalidate_inode_pages(dir); nfs_flush_dircache(dir); error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dentry->d_parent), @@ -1082,12 +1044,6 @@ dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count); #endif goto out; } -#ifdef NFS_PARANOIA -if (inode && inode->i_count > inode->i_nlink) -printk("nfs_safe_remove: %s/%s inode busy?? i_count=%d, i_nlink=%d\n", -dentry->d_parent->d_name.name, dentry->d_name.name, -inode->i_count, inode->i_nlink); -#endif /* * Unhash the dentry while we remove the file ... */ @@ -1141,7 +1097,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct nfs_sattr sattr; + struct iattr attr; int error; dfprintk(VFS, "NFS: symlink(%x/%ld, %s, %s)\n", @@ -1160,9 +1116,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name); * Fill in the sattr for the call. * Note: SunOS 4.1.2 crashes if the mode isn't initialized! */ - sattr.mode = S_IFLNK | S_IRWXUGO; - sattr.uid = sattr.gid = sattr.size = (unsigned) -1; - sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; + attr.ia_valid = ATTR_MODE; + attr.ia_mode = S_IFLNK | S_IRWXUGO; /* * Drop the dentry in advance to force a new lookup. @@ -1173,7 +1128,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name); invalidate_inode_pages(dir); nfs_flush_dircache(dir); error = nfs_proc_symlink(NFS_SERVER(dir), NFS_FH(dentry->d_parent), - dentry->d_name.name, symname, &sattr); + dentry->d_name.name, symname, &attr); if (!error) { nfs_renew_times(dentry->d_parent); } else if (error == -EEXIST) { @@ -1332,13 +1287,6 @@ do_rename: * To prevent any new references to the target during the rename, * we unhash the dentry and free the inode in advance. */ -#ifdef NFS_PARANOIA -if (new_inode && - new_inode->i_count > (S_ISDIR(new_inode->i_mode) ? 1 : new_inode->i_nlink)) -printk("nfs_rename: %s/%s inode busy?? i_count=%d, i_nlink=%d\n", -new_dentry->d_parent->d_name.name, new_dentry->d_name.name, -new_inode->i_count, new_inode->i_nlink); -#endif if (!list_empty(&new_dentry->d_hash)) { d_drop(new_dentry); rehash = update; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5421cebf9..ab1e51485 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -37,7 +37,7 @@ #define NFS_PARANOIA 1 static struct inode * __nfs_fhget(struct super_block *, struct nfs_fattr *); -static void nfs_zap_caches(struct inode *); +void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); static void nfs_read_inode(struct inode *); @@ -78,6 +78,8 @@ nfs_read_inode(struct inode * inode) inode->i_mode = 0; inode->i_rdev = 0; inode->i_op = NULL; + NFS_FILEID(inode) = 0; + NFS_FSID(inode) = 0; NFS_CACHEINV(inode); NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); } @@ -415,13 +417,15 @@ restart: dprintk("nfs_free_dentries: found %s/%s, d_count=%d, hashed=%d\n", dentry->d_parent->d_name.name, dentry->d_name.name, dentry->d_count, !list_empty(&dentry->d_hash)); + if (!list_empty(&dentry->d_subdirs)) + shrink_dcache_parent(dentry); if (!dentry->d_count) { dget(dentry); d_drop(dentry); dput(dentry); goto restart; } - if (!list_empty(&dentry->d_hash)) + if (list_empty(&dentry->d_hash)) unhashed++; } return unhashed; @@ -430,7 +434,7 @@ restart: /* * Invalidate the local caches */ -static void +void nfs_zap_caches(struct inode *inode) { NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); @@ -466,6 +470,8 @@ nfs_fill_inode(struct inode *inode, struct nfs_fattr *fattr) * do this once. (We don't allow inodes to change types.) */ if (inode->i_mode == 0) { + NFS_FILEID(inode) = fattr->fileid; + NFS_FSID(inode) = fattr->fsid; inode->i_mode = fattr->mode; if (S_ISREG(inode->i_mode)) inode->i_op = &nfs_file_inode_operations; @@ -487,6 +493,54 @@ nfs_fill_inode(struct inode *inode, struct nfs_fattr *fattr) } /* + * In NFSv3 we can have 64bit inode numbers. In order to support + * this, and re-exported directories (also seen in NFSv2) + * we are forced to allow 2 different inodes to have the same + * i_ino. + */ +static int +nfs_find_actor(struct inode *inode, unsigned long ino, void *opaque) +{ + struct nfs_fattr *fattr = (struct nfs_fattr *)opaque; + if (NFS_FSID(inode) != fattr->fsid) + return 0; + if (NFS_FILEID(inode) != fattr->fileid) + return 0; + return 1; +} + +static int +nfs_inode_is_stale(struct inode *inode, struct nfs_fattr *fattr) +{ + int unhashed; + int is_stale = 0; + + if (inode->i_mode && + (fattr->mode & S_IFMT) != (inode->i_mode & S_IFMT)) + is_stale = 1; + + if (is_bad_inode(inode)) + is_stale = 1; + + /* + * If the inode seems stale, free up cached dentries. + */ + unhashed = nfs_free_dentries(inode); + + /* Assume we're holding an i_count + * + * NB: sockets sometimes have volatile file handles + * don't invalidate their inodes even if all dentries are + * unhashed. + */ + if (unhashed && inode->i_count == unhashed + 1 + && !S_ISSOCK(inode->i_mode) && !S_ISFIFO(inode->i_mode)) + is_stale = 1; + + return is_stale; +} + +/* * This is our own version of iget that looks up inodes by file handle * instead of inode number. We use this technique instead of using * the vfs read_inode function because there is no way to pass the @@ -545,54 +599,40 @@ nfs_fhget(struct dentry *dentry, struct nfs_fh *fhandle, static struct inode * __nfs_fhget(struct super_block *sb, struct nfs_fattr *fattr) { - struct inode *inode; - int max_count, stale_inode, unhashed = 0; + struct inode *inode = NULL; + unsigned long ino; -retry: - inode = iget(sb, fattr->fileid); - if (!inode) + if (!fattr->nlink) { + printk("NFS: Buggy server - nlink == 0!\n"); goto out_no_inode; - /* N.B. This should be impossible ... */ - if (inode->i_ino != fattr->fileid) - goto out_bad_id; + } - /* - * Check for busy inodes, and attempt to get rid of any - * unused local references. If successful, we release the - * inode and try again. - * - * Note that the busy test uses the values in the fattr, - * as the inode may have become a different object. - * (We can probably handle modes changes here, too.) - */ - stale_inode = inode->i_mode && - ((fattr->mode ^ inode->i_mode) & S_IFMT); - stale_inode |= inode->i_count && inode->i_count == unhashed; - max_count = S_ISDIR(fattr->mode) ? 1 : fattr->nlink; - if (stale_inode || inode->i_count > max_count + unhashed) { - dprintk("__nfs_fhget: inode %ld busy, i_count=%d, i_nlink=%d\n", - inode->i_ino, inode->i_count, inode->i_nlink); - unhashed = nfs_free_dentries(inode); - if (stale_inode || inode->i_count > max_count + unhashed) { - printk("__nfs_fhget: inode %ld still busy, i_count=%d\n", - inode->i_ino, inode->i_count); - if (!list_empty(&inode->i_dentry)) { - struct dentry *dentry; - dentry = list_entry(inode->i_dentry.next, - struct dentry, d_alias); - printk("__nfs_fhget: killing %s/%s filehandle\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); - memset(dentry->d_fsdata, 0, - sizeof(struct nfs_fh)); - } - remove_inode_hash(inode); - nfs_invalidate_inode(inode); - unhashed = 0; - } + ino = fattr->fileid; + + while((inode = iget4(sb, ino, nfs_find_actor, fattr)) != NULL) { + + /* + * Check for busy inodes, and attempt to get rid of any + * unused local references. If successful, we release the + * inode and try again. + * + * Note that the busy test uses the values in the fattr, + * as the inode may have become a different object. + * (We can probably handle modes changes here, too.) + */ + if (!nfs_inode_is_stale(inode,fattr)) + break; + + dprintk("__nfs_fhget: inode %ld still busy, i_count=%d\n", + inode->i_ino, inode->i_count); + nfs_zap_caches(inode); + remove_inode_hash(inode); iput(inode); - goto retry; } + + if (!inode) + goto out_no_inode; + nfs_fill_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%ld ct=%d)\n", inode->i_dev, inode->i_ino, inode->i_count); @@ -603,18 +643,14 @@ out: out_no_inode: printk("__nfs_fhget: iget failed\n"); goto out; -out_bad_id: - printk("__nfs_fhget: unexpected inode from iget\n"); - goto out; } int nfs_notify_change(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; - int error; - struct nfs_sattr sattr; struct nfs_fattr fattr; + int error; /* * Make sure the inode is up-to-date. @@ -627,54 +663,29 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error); goto out; } - sattr.mode = (u32) -1; - if (attr->ia_valid & ATTR_MODE) - sattr.mode = attr->ia_mode; - - sattr.uid = (u32) -1; - if (attr->ia_valid & ATTR_UID) - sattr.uid = attr->ia_uid; - - sattr.gid = (u32) -1; - if (attr->ia_valid & ATTR_GID) - sattr.gid = attr->ia_gid; - - sattr.size = (u32) -1; - if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) - sattr.size = attr->ia_size; - - sattr.mtime.seconds = sattr.mtime.useconds = (u32) -1; - if (attr->ia_valid & ATTR_MTIME) { - sattr.mtime.seconds = attr->ia_mtime; - sattr.mtime.useconds = 0; - } - - sattr.atime.seconds = sattr.atime.useconds = (u32) -1; - if (attr->ia_valid & ATTR_ATIME) { - sattr.atime.seconds = attr->ia_atime; - sattr.atime.useconds = 0; - } + if (!S_ISREG(inode->i_mode)) + attr->ia_valid &= ~ATTR_SIZE; error = nfs_wb_all(inode); if (error) goto out; error = nfs_proc_setattr(NFS_DSERVER(dentry), NFS_FH(dentry), - &sattr, &fattr); + &fattr, attr); if (error) goto out; /* * If we changed the size or mtime, update the inode * now to avoid invalidating the page cache. */ - if (sattr.size != (u32) -1) { - if (sattr.size != fattr.size) - printk("nfs_notify_change: sattr=%d, fattr=%d??\n", - sattr.size, fattr.size); - inode->i_size = sattr.size; + if (attr->ia_valid & ATTR_SIZE) { + if (attr->ia_size != fattr.size) + printk("nfs_notify_change: attr=%ld, fattr=%d??\n", + attr->ia_size, fattr.size); + inode->i_size = attr->ia_size; inode->i_mtime = fattr.mtime.seconds; } - if (sattr.mtime.seconds != (u32) -1) + if (attr->ia_valid & ATTR_MTIME) inode->i_mtime = fattr.mtime.seconds; error = nfs_refresh_inode(inode, &fattr); out: @@ -682,6 +693,34 @@ out: } /* + * Wait for the inode to get unlocked. + * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING). + */ +int +nfs_wait_on_inode(struct inode *inode, int flag) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + int intr, error = 0; + + intr = NFS_SERVER(inode)->flags & NFS_MOUNT_INTR; + add_wait_queue(&inode->i_wait, &wait); + for (;;) { + set_task_state(tsk, (intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE)); + error = 0; + if (!(NFS_FLAGS(inode) & flag)) + break; + error = -ERESTARTSYS; + if (intr && signalled()) + break; + schedule(); + } + set_task_state(tsk, TASK_RUNNING); + remove_wait_queue(&inode->i_wait, &wait); + return error; +} + +/* * Externally visible revalidation function */ int @@ -711,7 +750,7 @@ int nfs_release(struct inode *inode, struct file *filp) * the cached attributes have to be refreshed. */ int -_nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry) +__nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry) { struct inode *inode = dentry->d_inode; int status = 0; @@ -720,6 +759,19 @@ _nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry) dfprintk(PAGECACHE, "NFS: revalidating %s/%s, ino=%ld\n", dentry->d_parent->d_name.name, dentry->d_name.name, inode->i_ino); + + if (!inode || is_bad_inode(inode)) + return -ESTALE; + + while (NFS_REVALIDATING(inode)) { + status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); + if (status < 0) + return status; + if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) + return 0; + } + NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; + status = nfs_proc_getattr(server, NFS_FH(dentry), &fattr); if (status) { int error; @@ -759,6 +811,8 @@ _nfs_revalidate_inode(struct nfs_server *server, struct dentry *dentry) dfprintk(PAGECACHE, "NFS: %s/%s revalidation complete\n", dentry->d_parent->d_name.name, dentry->d_name.name); out: + NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; + wake_up(&inode->i_wait); return status; } diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1bc7d3d37..a7e53e6db 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -118,19 +118,35 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) return p; } + +#define SATTR(p, attr, flag, field) \ + *p++ = (attr->ia_valid & flag) ? htonl(attr->field) : ~(u32) 0 static inline u32 * -xdr_encode_sattr(u32 *p, struct nfs_sattr *sattr) +xdr_encode_sattr(u32 *p, struct iattr *attr) { - *p++ = htonl(sattr->mode); - *p++ = htonl(sattr->uid); - *p++ = htonl(sattr->gid); - *p++ = htonl(sattr->size); - *p++ = htonl(sattr->atime.seconds); - *p++ = htonl(sattr->atime.useconds); - *p++ = htonl(sattr->mtime.seconds); - *p++ = htonl(sattr->mtime.useconds); - return p; + SATTR(p, attr, ATTR_MODE, ia_mode); + SATTR(p, attr, ATTR_UID, ia_uid); + SATTR(p, attr, ATTR_GID, ia_gid); + SATTR(p, attr, ATTR_SIZE, ia_size); + + if (attr->ia_valid & (ATTR_ATIME|ATTR_ATIME_SET)) { + *p++ = htonl(attr->ia_atime); + *p++ = 0; + } else { + *p++ = ~(u32) 0; + *p++ = ~(u32) 0; + } + + if (attr->ia_valid & (ATTR_MTIME|ATTR_MTIME_SET)) { + *p++ = htonl(attr->ia_mtime); + *p++ = 0; + } else { + *p++ = ~(u32) 0; + *p++ = ~(u32) 0; + } + return p; } +#undef SATTR /* * NFS encode functions diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 3b48b326a..bb55ce6d6 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -65,7 +65,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, int nfs_proc_setattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_sattr *sattr, struct nfs_fattr *fattr) + struct nfs_fattr *fattr, struct iattr *sattr) { struct nfs_sattrargs arg = { fhandle, sattr }; int status; @@ -123,7 +123,7 @@ nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap, int nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir, - const char *name, struct nfs_sattr *sattr, + const char *name, struct iattr *sattr, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_createargs arg = { dir, name, sattr }; @@ -178,7 +178,7 @@ nfs_proc_link(struct nfs_server *server, struct nfs_fh *fhandle, int nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir, const char *name, const char *path, - struct nfs_sattr *sattr) + struct iattr *sattr) { struct nfs_symlinkargs arg = { dir, name, path, sattr }; int status; @@ -191,7 +191,7 @@ nfs_proc_symlink(struct nfs_server *server, struct nfs_fh *dir, int nfs_proc_mkdir(struct nfs_server *server, struct nfs_fh *dir, - const char *name, struct nfs_sattr *sattr, + const char *name, struct iattr *sattr, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_createargs arg = { dir, name, sattr }; diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 6cd892740..6b0d0f05b 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -59,8 +59,7 @@ struct inode_operations nfs_symlink_inode_operations = { static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode *inode) { struct nfs_readlinkargs rl_args; - struct page *page, **hash; - unsigned long page_cache; + struct page *page, **hash, *page_cache; page = NULL; page_cache = page_cache_alloc(); @@ -75,7 +74,7 @@ repeat: goto unlock_out; } - page = page_cache_entry(page_cache); + page = page_cache; if (add_to_page_cache_unique(page, inode, 0, hash)) { page_cache_release(page); goto repeat; @@ -86,7 +85,7 @@ repeat: * XDR response verification will NULL terminate it. */ rl_args.fh = NFS_FH(dentry); - rl_args.buffer = (const void *)page_cache; + rl_args.buffer = (const void *)page_address(page_cache); if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &rl_args, NULL, 0) < 0) goto error; diff --git a/fs/proc/array.c b/fs/proc/array.c index d7f8ad9dd..249abd8cd 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -38,6 +38,7 @@ * * aeb@cwi.nl : /proc/partitions * + * * Alan Cox : security fixes. * <Alan.Cox@linux.org> * @@ -45,11 +46,6 @@ * * Gerhard Wichert : added BIGMEM support * Siemens AG <Gerhard.Wichert@pdb.siemens.de> - * - * Chuck Lever : safe handling of task_struct - * <cel@monkey.org> - * - * Andrea Arcangeli : SMP race/security fixes. */ #include <linux/types.h> @@ -71,7 +67,6 @@ #include <linux/slab.h> #include <linux/smp.h> #include <linux/signal.h> -#include <linux/smp_lock.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -365,16 +360,24 @@ static int get_meminfo(char * buffer) struct sysinfo i; int len; +/* + * display in kilobytes. + */ +#define K(x) ((x) << (PAGE_SHIFT - 10)) + si_meminfo(&i); si_swapinfo(&i); len = sprintf(buffer, " total: used: free: shared: buffers: cached:\n" - "Mem: %8lu %8lu %8lu %8lu %8lu %8lu\n" + "Mem: %8lu %8lu %8lu %8lu %8lu %8u\n" "Swap: %8lu %8lu %8lu\n", - i.totalram, i.totalram-i.freeram, i.freeram, i.sharedram, i.bufferram, (unsigned long) atomic_read(&page_cache_size)*PAGE_SIZE, - i.totalswap, i.totalswap-i.freeswap, i.freeswap); + K(i.totalram), K(i.totalram-i.freeram), K(i.freeram), + K(i.sharedram), K(i.bufferram), + K(atomic_read(&page_cache_size)), K(i.totalswap), + K(i.totalswap-i.freeswap), K(i.freeswap)); /* - * Tagged format, for easy grepping and expansion. The above will go away - * eventually, once the tools have been updated. + * Tagged format, for easy grepping and expansion. + * The above will go away eventually, once the tools + * have been updated. */ return len + sprintf(buffer+len, "MemTotal: %8lu kB\n" @@ -382,19 +385,20 @@ static int get_meminfo(char * buffer) "MemShared: %8lu kB\n" "Buffers: %8lu kB\n" "Cached: %8u kB\n" - "BigTotal: %8lu kB\n" - "BigFree: %8lu kB\n" + "HighTotal: %8lu kB\n" + "HighFree: %8lu kB\n" "SwapTotal: %8lu kB\n" "SwapFree: %8lu kB\n", - i.totalram >> 10, - i.freeram >> 10, - i.sharedram >> 10, - i.bufferram >> 10, - atomic_read(&page_cache_size) << (PAGE_SHIFT - 10), - i.totalbig >> 10, - i.freebig >> 10, - i.totalswap >> 10, - i.freeswap >> 10); + K(i.totalram), + K(i.freeram), + K(i.sharedram), + K(i.bufferram), + K(atomic_read(&page_cache_size)), + K(i.totalhigh), + K(i.freehigh), + K(i.totalswap), + K(i.freeswap)); +#undef K } static int get_version(char * buffer) @@ -412,69 +416,68 @@ static int get_cmdline(char * buffer) return sprintf(buffer, "%s\n", saved_command_line); } -static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr) +static struct page * get_phys_addr(struct mm_struct * mm, unsigned long ptr) { - pgd_t *page_dir; - pmd_t *page_middle; + pgd_t *pgd; + pmd_t *pmd; pte_t pte; if (ptr >= TASK_SIZE) return 0; - page_dir = pgd_offset(mm,ptr); - if (pgd_none(*page_dir)) + pgd = pgd_offset(mm,ptr); + if (pgd_none(*pgd)) return 0; - if (pgd_bad(*page_dir)) { - printk("bad page directory entry %08lx\n", pgd_val(*page_dir)); - pgd_clear(page_dir); + if (pgd_bad(*pgd)) { + pgd_ERROR(*pgd); + pgd_clear(pgd); return 0; } - page_middle = pmd_offset(page_dir,ptr); - if (pmd_none(*page_middle)) + pmd = pmd_offset(pgd,ptr); + if (pmd_none(*pmd)) return 0; - if (pmd_bad(*page_middle)) { - printk("bad page middle entry %08lx\n", pmd_val(*page_middle)); - pmd_clear(page_middle); + if (pmd_bad(*pmd)) { + pmd_ERROR(*pmd); + pmd_clear(pmd); return 0; } - pte = *pte_offset(page_middle,ptr); + pte = *pte_offset(pmd,ptr); if (!pte_present(pte)) return 0; - return pte_page(pte) + (ptr & ~PAGE_MASK); + return pte_page(pte); } -#include <linux/bigmem.h> - static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer) { - unsigned long addr; + struct page *page; + unsigned long kaddr; int size = 0, result = 0; char c; if (start >= end) return result; for (;;) { - addr = get_phys_addr(mm, start); - if (!addr) + page = get_phys_addr(mm, start); + if (!page) return result; - addr = kmap(addr, KM_READ); + kaddr = kmap(page, KM_READ) + (start & ~PAGE_MASK); do { - c = *(char *) addr; + c = *(char *) kaddr; if (!c) result = size; if (size < PAGE_SIZE) buffer[size++] = c; else { - kunmap(addr, KM_READ); + kunmap(kaddr, KM_READ); return result; } - addr++; + kaddr++; start++; if (!c && start >= end) { - kunmap(addr, KM_READ); + kunmap(kaddr, KM_READ); return result; } - } while (addr & ~PAGE_MASK); - kunmap(addr-1, KM_READ); + } while (kaddr & ~PAGE_MASK); + kunmap(kaddr, KM_READ); } return result; } @@ -483,9 +486,7 @@ static struct mm_struct *get_mm(int pid) { struct task_struct *p; struct mm_struct *mm = NULL; - - /* need kernel lock to avoid the tsk->mm to go away under us */ - lock_kernel(); + read_lock(&tasklist_lock); p = find_task_by_pid(pid); if (p) @@ -493,10 +494,10 @@ static struct mm_struct *get_mm(int pid) if (mm) atomic_inc(&mm->mm_users); read_unlock(&tasklist_lock); - unlock_kernel(); return mm; } + static int get_env(int pid, char * buffer) { struct mm_struct *mm = get_mm(pid); @@ -859,9 +860,6 @@ static inline char * task_mem(struct mm_struct *mm, char *buffer) return buffer; } -/* - * These next two assume that the task's sigmask_lock is held by the caller. - */ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, sigset_t *catch) { @@ -914,115 +912,77 @@ extern inline char *task_cap(struct task_struct *p, char *buffer) cap_t(p->cap_effective)); } -/* - * This is somewhat safer than it was before. However... - * - * Embedded pointers in the task structure may reference data that - * can be changed or that is no longer valid after the tasklist - * lock is released, or that isn't even protected by the tasklist - * lock. Eg. tsk->tty, tsk->sig, and tsk->p_pptr can change after - * we make our own copy of the task structure. This doesn't matter - * unless we are trying to use the pointed-to data as an address. - * So there are still a few safety issues to be addressed here. - */ + static int get_status(int pid, char * buffer) { char * orig = buffer; struct task_struct *tsk; struct mm_struct *mm = NULL; - /* - * We lock the whole kernel here because p->files and p->mm are still - * protected by the global kernel lock. - */ - lock_kernel(); - read_lock(&tasklist_lock); tsk = find_task_by_pid(pid); - if (tsk) { + if (tsk) mm = tsk->mm; - if (mm) - atomic_inc(&mm->mm_users); - - buffer = task_name(tsk, buffer); - buffer = task_state(tsk, buffer); - - spin_lock_irq(&tsk->sigmask_lock); - buffer = task_sig(tsk, buffer); - spin_unlock_irq(&tsk->sigmask_lock); - - buffer = task_cap(tsk, buffer); - } - read_unlock(&tasklist_lock); - - unlock_kernel(); - - /* - * We can't hold the tasklist_lock and jiggle the mmap_sem -- - * that can result in a deadlock. - */ - if (mm) { + if (mm) + atomic_inc(&mm->mm_users); + read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */ + if (!tsk) + return 0; + buffer = task_name(tsk, buffer); + buffer = task_state(tsk, buffer); + if (mm) buffer = task_mem(mm, buffer); + buffer = task_sig(tsk, buffer); + buffer = task_cap(tsk, buffer); + if (mm) mmput(mm); - } - - /* - * (buffer - orig) will be zero on an error exit. - */ return buffer - orig; } static int get_stat(int pid, char * buffer) { struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = NULL; unsigned long vsize, eip, esp, wchan; long priority, nice; - pid_t ppid = 0; + int tty_pgrp; sigset_t sigign, sigcatch; char state; - int res = 0; - unsigned int tty_device; - int tty_pgrp; + int res; read_lock(&tasklist_lock); tsk = find_task_by_pid(pid); - if (!tsk) - goto out_unlock; - /* avoid the task list to go away under us (security) */ - get_page(MAP_NR(tsk) + mem_map); - ppid = tsk->p_pptr->pid; - read_unlock(&tasklist_lock); - - /* we need the big kernel lock to avoid tsk->mm and tsk->tty - to change under us */ - lock_kernel(); - mm = tsk->mm; + if (tsk) + mm = tsk->mm; if (mm) atomic_inc(&mm->mm_users); - tty_device = tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0; - tty_pgrp = tsk->tty ? tsk->tty->pgrp : -1; - unlock_kernel(); - - spin_lock_irq(&tsk->sigmask_lock); - collect_sigign_sigcatch(tsk, &sigign, &sigcatch); - spin_unlock_irq(&tsk->sigmask_lock); - - eip = KSTK_EIP(tsk); - esp = KSTK_ESP(tsk); - wchan = get_wchan(tsk); - + read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */ + if (!tsk) + return 0; state = *get_task_state(tsk); vsize = eip = esp = 0; - if (mm) - { + if (mm) { struct vm_area_struct *vma; down(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) + vma = mm->mmap; + while (vma) { vsize += vma->vm_end - vma->vm_start; + vma = vma->vm_next; + } + eip = KSTK_EIP(tsk); + esp = KSTK_ESP(tsk); up(&mm->mmap_sem); } + wchan = get_wchan(tsk); + + collect_sigign_sigcatch(tsk, &sigign, &sigcatch); + + if (tsk->tty) + tty_pgrp = tsk->tty->pgrp; + else + tty_pgrp = -1; + /* scale priority and nice values from timeslices to -20..20 */ /* to make it look like a "normal" Unix priority/nice value */ priority = tsk->counter; @@ -1036,10 +996,10 @@ static int get_stat(int pid, char * buffer) pid, tsk->comm, state, - ppid, + tsk->p_pptr->pid, tsk->pgrp, tsk->session, - tty_device, + tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0, tty_pgrp, tsk->flags, tsk->min_flt, @@ -1076,16 +1036,9 @@ static int get_stat(int pid, char * buffer) tsk->cnswap, tsk->exit_signal, tsk->processor); - if (mm) mmput(mm); - free_task_struct(tsk); return res; - -out_unlock: - read_unlock(&tasklist_lock); - unlock_kernel(); - return 0; } static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, @@ -1097,7 +1050,7 @@ static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned if (pmd_none(*pmd)) return; if (pmd_bad(*pmd)) { - printk("statm_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd)); + pmd_ERROR(*pmd); pmd_clear(pmd); return; } @@ -1135,7 +1088,7 @@ static inline void statm_pmd_range(pgd_t * pgd, unsigned long address, unsigned if (pgd_none(*pgd)) return; if (pgd_bad(*pgd)) { - printk("statm_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd)); + pgd_ERROR(*pgd); pgd_clear(pgd); return; } @@ -1233,11 +1186,11 @@ static ssize_t read_maps (int pid, struct file * file, char * buf, size_t count, loff_t *ppos) { struct task_struct *p; - struct mm_struct *mm = NULL; struct vm_area_struct * map, * next; char * destptr = buf, * buffer; loff_t lineno; ssize_t column, i; + int volatile_task; long retval; /* @@ -1249,30 +1202,24 @@ static ssize_t read_maps (int pid, struct file * file, char * buf, goto out; retval = -EINVAL; - lock_kernel(); read_lock(&tasklist_lock); p = find_task_by_pid(pid); - if (p) { - mm = p->mm; - if (mm) - atomic_inc(&mm->mm_users); - } - read_unlock(&tasklist_lock); - unlock_kernel(); + read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */ if (!p) goto freepage_out; - /* nothing to map */ - if (!mm || count == 0) + if (!p->mm || count == 0) goto getlen_out; + /* Check whether the mmaps could change if we sleep */ + volatile_task = (p != current || atomic_read(&p->mm->mm_users) > 1); + /* decode f_pos */ lineno = *ppos >> MAPS_LINE_SHIFT; column = *ppos & (MAPS_LINE_LENGTH-1); - down(&mm->mmap_sem); - /* quickly go to line "lineno" */ - for (map = mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++) + /* quickly go to line lineno */ + for (map = p->mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++) continue; for ( ; map ; map = next ) { @@ -1343,13 +1290,17 @@ static ssize_t read_maps (int pid, struct file * file, char * buf, /* done? */ if (count == 0) break; + + /* By writing to user space, we might have slept. + * Stop the loop, to avoid a race condition. + */ + if (volatile_task) + break; } - up(&mm->mmap_sem); /* encode f_pos */ *ppos = (lineno << MAPS_LINE_SHIFT) + column; - mmput(mm); getlen_out: retval = destptr - buf; @@ -1362,31 +1313,28 @@ out: #ifdef __SMP__ static int get_pidcpu(int pid, char * buffer) { - struct task_struct * tsk; + struct task_struct * tsk = current ; int i, len = 0; - /* - * Hold the tasklist_lock to guarantee that the task_struct - * address will remain valid while we examine its contents. - */ read_lock(&tasklist_lock); - tsk = find_task_by_pid(pid); - if (tsk) - get_page(MAP_NR(tsk) + mem_map); - read_unlock(&tasklist_lock); - if (tsk) { - len = sprintf(buffer, - "cpu %lu %lu\n", - HZ_TO_STD(tsk->times.tms_utime), - HZ_TO_STD(tsk->times.tms_stime)); - - for (i = 0 ; i < smp_num_cpus; i++) - len += sprintf(buffer + len, "cpu%d %lu %lu\n", - i, - HZ_TO_STD(tsk->per_cpu_utime[cpu_logical_map(i)]), - HZ_TO_STD(tsk->per_cpu_stime[cpu_logical_map(i)])); - free_task_struct(tsk); - } + if (pid != tsk->pid) + tsk = find_task_by_pid(pid); + read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */ + + if (tsk == NULL) + return 0; + + len = sprintf(buffer, + "cpu %lu %lu\n", + HZ_TO_STD(tsk->times.tms_utime), + HZ_TO_STD(tsk->times.tms_stime)); + + for (i = 0 ; i < smp_num_cpus; i++) + len += sprintf(buffer + len, "cpu%d %lu %lu\n", + i, + HZ_TO_STD(tsk->per_cpu_utime[cpu_logical_map(i)]), + HZ_TO_STD(tsk->per_cpu_stime[cpu_logical_map(i)])); + return len; } #endif @@ -1519,6 +1467,12 @@ static int process_unauthorized(int type, int pid) int ok = 0; read_lock(&tasklist_lock); + + /* + * Grab the lock, find the task, save the uid and + * check it has an mm still (ie its not dead) + */ + p = find_task_by_pid(pid); if (p) { euid=p->euid; @@ -1526,7 +1480,9 @@ static int process_unauthorized(int type, int pid) if(!cap_issubset(p->cap_permitted, current->cap_permitted)) ok=0; } + read_unlock(&tasklist_lock); + if (!p) return 1; diff --git a/fs/proc/mem.c b/fs/proc/mem.c index f9fcb0970..90cd79722 100644 --- a/fs/proc/mem.c +++ b/fs/proc/mem.c @@ -10,7 +10,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/proc_fs.h> -#include <linux/bigmem.h> +#include <linux/highmem.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -79,9 +79,10 @@ static ssize_t mem_read(struct file * file, char * buf, pgd_t *page_dir; pmd_t *page_middle; pte_t pte; - char * page; + struct page * page; struct task_struct * tsk; unsigned long addr; + unsigned long maddr; /* temporary mapped address */ char *tmp; ssize_t scount, i; @@ -102,7 +103,7 @@ static ssize_t mem_read(struct file * file, char * buf, if (pgd_none(*page_dir)) break; if (pgd_bad(*page_dir)) { - printk("Bad page dir entry %08lx\n", pgd_val(*page_dir)); + pgd_ERROR(*page_dir); pgd_clear(page_dir); break; } @@ -110,20 +111,20 @@ static ssize_t mem_read(struct file * file, char * buf, if (pmd_none(*page_middle)) break; if (pmd_bad(*page_middle)) { - printk("Bad page middle entry %08lx\n", pmd_val(*page_middle)); + pmd_ERROR(*page_middle); pmd_clear(page_middle); break; } pte = *pte_offset(page_middle,addr); if (!pte_present(pte)) break; - page = (char *) pte_page(pte) + (addr & ~PAGE_MASK); + page = pte_page(pte); i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > scount) i = scount; - page = (char *) kmap((unsigned long) page, KM_READ); - copy_to_user(tmp, page, i); - kunmap((unsigned long) page, KM_READ); + maddr = kmap(page, KM_READ); + copy_to_user(tmp, (char *)maddr + (addr & ~PAGE_MASK), i); + kunmap(maddr, KM_READ); addr += i; tmp += i; scount -= i; @@ -141,9 +142,10 @@ static ssize_t mem_write(struct file * file, char * buf, pgd_t *page_dir; pmd_t *page_middle; pte_t pte; - char * page; + struct page * page; struct task_struct * tsk; unsigned long addr; + unsigned long maddr; /* temporary mapped address */ char *tmp; long i; @@ -159,7 +161,7 @@ static ssize_t mem_write(struct file * file, char * buf, if (pgd_none(*page_dir)) break; if (pgd_bad(*page_dir)) { - printk("Bad page dir entry %08lx\n", pgd_val(*page_dir)); + pgd_ERROR(*page_dir); pgd_clear(page_dir); break; } @@ -167,7 +169,7 @@ static ssize_t mem_write(struct file * file, char * buf, if (pmd_none(*page_middle)) break; if (pmd_bad(*page_middle)) { - printk("Bad page middle entry %08lx\n", pmd_val(*page_middle)); + pmd_ERROR(*page_middle); pmd_clear(page_middle); break; } @@ -176,13 +178,13 @@ static ssize_t mem_write(struct file * file, char * buf, break; if (!pte_write(pte)) break; - page = (char *) pte_page(pte) + (addr & ~PAGE_MASK); + page = pte_page(pte); i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > count) i = count; - page = (unsigned long) kmap((unsigned long) page, KM_WRITE); - copy_from_user(page, tmp, i); - kunmap((unsigned long) page, KM_WRITE); + maddr = kmap(page, KM_WRITE); + copy_from_user((char *)maddr + (addr & ~PAGE_MASK), tmp, i); + kunmap(maddr, KM_WRITE); addr += i; tmp += i; count -= i; @@ -248,14 +250,14 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma) if (pgd_none(*src_dir)) return -EINVAL; if (pgd_bad(*src_dir)) { - printk("Bad source page dir entry %08lx\n", pgd_val(*src_dir)); + pgd_ERROR(*src_dir); return -EINVAL; } src_middle = pmd_offset(src_dir, stmp); if (pmd_none(*src_middle)) return -EINVAL; if (pmd_bad(*src_middle)) { - printk("Bad source page middle entry %08lx\n", pmd_val(*src_middle)); + pmd_ERROR(*src_middle); return -EINVAL; } src_table = pte_offset(src_middle, stmp); @@ -301,9 +303,9 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma) set_pte(src_table, pte_mkdirty(*src_table)); set_pte(dest_table, *src_table); - mapnr = MAP_NR(pte_page(*src_table)); + mapnr = pte_pagenr(*src_table); if (mapnr < max_mapnr) - get_page(mem_map + MAP_NR(pte_page(*src_table))); + get_page(mem_map + pte_pagenr(*src_table)); stmp += PAGE_SIZE; dtmp += PAGE_SIZE; diff --git a/fs/super.c b/fs/super.c index 693017eee..3b58d13cc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -135,7 +135,7 @@ out: return lptr; } -static void remove_vfsmnt(kdev_t dev) +void remove_vfsmnt(kdev_t dev) { struct vfsmount *lptr, *tofree; @@ -508,7 +508,7 @@ out: /* * Find a super_block with no device assigned. */ -static struct super_block *get_empty_super(void) +struct super_block *get_empty_super(void) { struct super_block *s; |