diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/buffer.c | 1 | ||||
-rw-r--r-- | fs/ext2/inode.c | 533 |
2 files changed, 268 insertions, 266 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index ad0a04e68..e6134c203 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -839,6 +839,7 @@ repeat: out: write_unlock(&hash_table_lock); spin_unlock(&lru_list_lock); + touch_buffer(bh); return bh; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 678eb4d28..5dfe2cf55 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -18,8 +18,6 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) - * - * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 */ #include <linux/fs.h> @@ -28,6 +26,8 @@ #include <linux/sched.h> #include <linux/highuid.h> + + static int ext2_update_inode(struct inode * inode, int do_sync); /* @@ -64,18 +64,23 @@ no_delete: clear_inode(inode); /* We must guarantee clearing of inode... */ } +/* + * ext2_discard_prealloc and ext2_alloc_block are atomic wrt. the + * superblock in the same manner as are ext2_free_blocks and + * ext2_new_block. We just wait on the super rather than locking it + * here, since ext2_new_block will do the necessary locking and we + * can't block until then. + */ void ext2_discard_prealloc (struct inode * inode) { #ifdef EXT2_PREALLOCATE + unsigned short total; + lock_kernel(); - /* Writer: ->i_prealloc* */ if (inode->u.ext2_i.i_prealloc_count) { - unsigned short total = inode->u.ext2_i.i_prealloc_count; - unsigned long block = inode->u.ext2_i.i_prealloc_block; + total = inode->u.ext2_i.i_prealloc_count; inode->u.ext2_i.i_prealloc_count = 0; - inode->u.ext2_i.i_prealloc_block = 0; - /* Writer: end */ - ext2_free_blocks (inode, block, total); + ext2_free_blocks (inode, inode->u.ext2_i.i_prealloc_block, total); } unlock_kernel(); #endif @@ -88,26 +93,22 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) #endif unsigned long result; + wait_on_super (inode->i_sb); #ifdef EXT2_PREALLOCATE - /* Writer: ->i_prealloc* */ if (inode->u.ext2_i.i_prealloc_count && (goal == inode->u.ext2_i.i_prealloc_block || goal + 1 == inode->u.ext2_i.i_prealloc_block)) { result = inode->u.ext2_i.i_prealloc_block++; inode->u.ext2_i.i_prealloc_count--; - /* Writer: end */ -#ifdef EXT2FS_DEBUG ext2_debug ("preallocation hit (%lu/%lu).\n", ++alloc_hits, ++alloc_attempts); -#endif + } else { ext2_discard_prealloc (inode); -#ifdef EXT2FS_DEBUG ext2_debug ("preallocation miss (%lu/%lu).\n", alloc_hits, ++alloc_attempts); -#endif if (S_ISREG(inode->i_mode)) result = ext2_new_block (inode, goal, &inode->u.ext2_i.i_prealloc_count, @@ -298,307 +299,307 @@ no_block: return p; } -/** - * ext2_find_near - find a place for allocation with sufficient locality - * @inode: owner - * @ind: descriptor of indirect block. - * - * This function returns the prefered place for block allocation. - * It is used when heuristic for sequential allocation fails. - * Rules are: - * + if there is a block to the left of our position - allocate near it. - * + if pointer will live in indirect block - allocate near that block. - * + if pointer will live in inode - allocate in the same cylinder group. - * Caller must make sure that @ind is valid and will stay that way. - */ - -static inline unsigned long ext2_find_near(struct inode *inode, Indirect *ind) +static struct buffer_head * inode_getblk (struct inode * inode, int nr, + int new_block, int * err, int metadata, long *phys, int *new) { - u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext2_i.i_data; - u32 *p; - - /* Try to find previous block */ - for (p = ind->p - 1; p >= start; p--) - if (*p) - return le32_to_cpu(*p); - - /* No such thing, so let's try location of indirect block */ - if (ind->bh) - return ind->bh->b_blocknr; + u32 * p; + int tmp, goal = 0; + struct buffer_head * result; + int blocksize = inode->i_sb->s_blocksize; - /* - * It is going to be refered from inode itself? OK, just put it into - * the same cylinder group then. - */ - return (inode->u.ext2_i.i_block_group * - EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block); -} + p = inode->u.ext2_i.i_data + nr; +repeat: + tmp = le32_to_cpu(*p); + if (tmp) { + if (metadata) { + result = getblk (inode->i_dev, tmp, blocksize); + if (tmp == le32_to_cpu(*p)) + return result; + brelse (result); + goto repeat; + } else { + *phys = tmp; + return NULL; + } + } -/** - * ext2_find_goal - find a prefered place for allocation. - * @inode: owner - * @block: block we want - * @chain: chain of indirect blocks - * @partial: pointer to the last triple within a chain. - * - * This function returns the prefered place for block allocation. - */ + if (inode->u.ext2_i.i_next_alloc_block == new_block) + goal = inode->u.ext2_i.i_next_alloc_goal; -static inline unsigned long ext2_find_goal(struct inode *inode, - long block, - Indirect chain[4], - Indirect *partial) -{ - unsigned long goal = 0; + ext2_debug ("hint = %d,", goal); - /* Writer: ->i_next_alloc* */ - if (block == inode->u.ext2_i.i_next_alloc_block + 1) { - inode->u.ext2_i.i_next_alloc_block++; - inode->u.ext2_i.i_next_alloc_goal++; - } - /* Writer: end */ - /* Reader: pointers, ->i_next_alloc* */ - if (verify_chain(chain, partial)) { - /* - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ - if (block == inode->u.ext2_i.i_next_alloc_block) - goal = inode->u.ext2_i.i_next_alloc_goal; + if (!goal) { + for (tmp = nr - 1; tmp >= 0; tmp--) { + if (inode->u.ext2_i.i_data[tmp]) { + goal = le32_to_cpu(inode->u.ext2_i.i_data[tmp]); + break; + } + } if (!goal) - goal = ext2_find_near(inode, partial); + goal = (inode->u.ext2_i.i_block_group * + EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + + le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block); } - /* Reader: end */ - return goal; -} -/** - * ext2_alloc_branch - allocate and set up a chain of blocks. - * @inode: owner - * @num: depth of the chain (number of blocks to allocate) - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. - * - * This function allocates @num blocks, zeroes out all but the last one, - * links them into chain and (if we are synchronous) writes them to disk. - * In other words, it prepares a branch that can be spliced onto the - * inode. It stores the information about that chain in the branch[], in - * the same format as ext2_get_branch() would do. We are calling it after - * we had read the existing part of chain and partial points to the last - * triple of that (one with zero ->key). Upon the exit we have the same - * picture as after the successful ext2_get_block(), excpet that in one - * place chain is disconnected - *branch->p is still zero (we did not - * set the last link), but branch->key contains the number that should - * be placed into *branch->p to fill that gap. - * - * If allocation fails we free all blocks we've allocated (and forget - * ther buffer_heads) and return the error value the from failed - * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain - * as described above and return 0. - */ - -static int ext2_alloc_branch(struct inode *inode, - int num, - unsigned long goal, - int *offsets, - Indirect *branch) -{ - int blocksize = inode->i_sb->s_blocksize; - int n = 0; - int err; - int i; - int parent = ext2_alloc_block(inode, goal, &err); - - branch[0].key = cpu_to_le32(parent); - if (parent) for (n = 1; n < num; n++) { - struct buffer_head *bh; - /* Allocate the next block */ - int nr = ext2_alloc_block(inode, parent, &err); - if (!nr) - break; - branch[n].key = cpu_to_le32(nr); - /* - * Get buffer_head for parent block, zero it out and set - * the pointer to new one, then send parent to disk. - */ - bh = getblk(inode->i_dev, parent, blocksize); - if (!buffer_uptodate(bh)) - wait_on_buffer(bh); - memset(bh->b_data, 0, blocksize); - branch[n].bh = bh; - branch[n].p = (u32*) bh->b_data + offsets[n]; - *branch[n].p = branch[n].key; - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh, 1); - if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); + ext2_debug ("goal = %d.\n", goal); + + tmp = ext2_alloc_block (inode, goal, err); + if (!tmp) + return NULL; + + if (metadata) { + result = getblk (inode->i_dev, tmp, blocksize); + if (!buffer_uptodate(result)) + wait_on_buffer(result); + memset(result->b_data, 0, blocksize); + mark_buffer_uptodate(result, 1); + mark_buffer_dirty(result, 1); + if (*p) { + ext2_free_blocks (inode, tmp, 1); + bforget (result); + goto repeat; } - parent = nr; + } else { + if (*p) { + /* + * Nobody is allowed to change block allocation + * state from under us: + */ + ext2_error (inode->i_sb, "block_getblk", + "data block filled under us"); + BUG(); + ext2_free_blocks (inode, tmp, 1); + goto repeat; + } + *phys = tmp; + result = NULL; + *err = 0; + *new = 1; } - if (n == num) - return 0; + *p = cpu_to_le32(tmp); - /* Allocation failed, free what we already allocated */ - for (i = 1; i < n; i++) - bforget(branch[i].bh); - for (i = 0; i < n; i++) - ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1); - return err; + inode->u.ext2_i.i_next_alloc_block = new_block; + inode->u.ext2_i.i_next_alloc_goal = tmp; + inode->i_ctime = CURRENT_TIME; + inode->i_blocks += blocksize/512; + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) + ext2_sync_inode (inode); + else + mark_inode_dirty(inode); + return result; } -/** - * ext2_splice_branch - splice the allocated branch onto inode. - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext2_alloc_branch) - * @where: location of missing link - * @num: number of blocks we are adding +/* + * metadata / data + * possibly create / access + * can fail due to: - not present + * - out of space * - * This function verifies that chain (up to the missing link) had not - * changed, fills the missing link and does all housekeeping needed in - * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. Otherwise (== chain had been changed) - * we free the new blocks (forgetting their buffer_heads, indeed) and - * return -EAGAIN. + * NULL return in the data case is mandatory. */ - -static inline int ext2_splice_branch(struct inode *inode, - long block, - Indirect chain[4], - Indirect *where, - int num) +static struct buffer_head * block_getblk (struct inode * inode, + struct buffer_head * bh, int nr, + int new_block, int * err, int metadata, long *phys, int *new) { - int i; - - /* Verify that place we are splicing to is still there and vacant */ - - /* Writer: pointers, ->i_next_alloc*, ->i_blocks */ - if (!verify_chain(chain, where-1) || *where->p) - /* Writer: end */ - goto changed; - - /* That's it */ - - *where->p = where->key; - inode->u.ext2_i.i_next_alloc_block = block; - inode->u.ext2_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key); - inode->i_blocks += num * inode->i_sb->s_blocksize/512; - - /* Writer: end */ - - /* We are done with atomic stuff, now do the rest of housekeeping */ - - inode->i_ctime = CURRENT_TIME; + int tmp, goal = 0; + u32 * p; + struct buffer_head * result; + int blocksize = inode->i_sb->s_blocksize; - /* had we spliced it onto indirect block? */ - if (where->bh) { - mark_buffer_dirty(where->bh, 1); - if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { - ll_rw_block (WRITE, 1, &where->bh); - wait_on_buffer(where->bh); + result = NULL; + if (!bh) + goto out; + if (!buffer_uptodate(bh)) { + ll_rw_block (READ, 1, &bh); + wait_on_buffer (bh); + if (!buffer_uptodate(bh)) + goto out; + } + p = (u32 *) bh->b_data + nr; +repeat: + tmp = le32_to_cpu(*p); + if (tmp) { + if (metadata) { + result = getblk (bh->b_dev, tmp, blocksize); + if (tmp == le32_to_cpu(*p)) + goto out; + brelse (result); + goto repeat; + } else { + *phys = tmp; + /* result == NULL */ + goto out; } } - if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) - ext2_sync_inode (inode); - else - mark_inode_dirty(inode); - return 0; - -changed: - for (i = 1; i < num; i++) - bforget(where[i].bh); - for (i = 0; i < num; i++) - ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1); - return -EAGAIN; + if (inode->u.ext2_i.i_next_alloc_block == new_block) + goal = inode->u.ext2_i.i_next_alloc_goal; + if (!goal) { + for (tmp = nr - 1; tmp >= 0; tmp--) { + if (le32_to_cpu(((u32 *) bh->b_data)[tmp])) { + goal = le32_to_cpu(((u32 *)bh->b_data)[tmp]); + break; + } + } + if (!goal) + goal = bh->b_blocknr; + } + tmp = ext2_alloc_block (inode, goal, err); + if (!tmp) + goto out; + if (metadata) { + result = getblk (bh->b_dev, tmp, blocksize); + if (!buffer_uptodate(result)) + wait_on_buffer(result); + memset(result->b_data, 0, inode->i_sb->s_blocksize); + mark_buffer_uptodate(result, 1); + mark_buffer_dirty(result, 1); + if (*p) { + ext2_free_blocks (inode, tmp, 1); + bforget (result); + goto repeat; + } + } else { + if (*p) { + /* + * Nobody is allowed to change block allocation + * state from under us: + */ + ext2_error (inode->i_sb, "block_getblk", + "data block filled under us"); + BUG(); + ext2_free_blocks (inode, tmp, 1); + goto repeat; + } + *phys = tmp; + *new = 1; + } + *p = le32_to_cpu(tmp); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); + } + inode->i_ctime = CURRENT_TIME; + inode->i_blocks += blocksize/512; + mark_inode_dirty(inode); + inode->u.ext2_i.i_next_alloc_block = new_block; + inode->u.ext2_i.i_next_alloc_goal = tmp; + *err = 0; +out: + brelse (bh); + return result; } -/* - * Allocation strategy is simple: if we have to allocate something, we will - * have to go the whole way to leaf. So let's do it before attaching anything - * to tree, set linkage between the newborn blocks, write them if sync is - * required, recheck the path, free and repeat if check fails, otherwise - * set the last missing link (that will protect us from any truncate-generated - * removals - all blocks on the path are immune now) and possibly force the - * write on the parent block. - * That has a nice additional property: no special recovery from the failed - * allocations is needed - we simply release blocks and do not touch anything - * reachable from inode. - */ - static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) { - int err = -EIO; + int ret, err, new; + struct buffer_head *bh; + unsigned long phys; int offsets[4]; + int *p; Indirect chain[4]; Indirect *partial; - unsigned long goal; - int left; - int depth = ext2_block_to_path(inode, iblock, offsets); + int depth; + depth = ext2_block_to_path(inode, iblock, offsets); if (depth == 0) - goto out; + goto abort; lock_kernel(); -reread: partial = ext2_get_branch(inode, depth, offsets, chain, &err); - /* Simplest case - block found, no allocation needed */ if (!partial) { -got_it: + unlock_kernel(); + for (partial = chain + depth - 1; partial > chain; partial--) + brelse(partial->bh); bh_result->b_dev = inode->i_dev; bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key); bh_result->b_state |= (1UL << BH_Mapped); - /* Clean up and exit */ - partial = chain+depth-1; /* the whole chain */ - goto cleanup; + return 0; } - /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) { -cleanup: - while (partial > chain) { - brelse(partial->bh); - partial--; - } + while (partial > chain) { + brelse(partial->bh); + partial--; + } + + if (!create) { unlock_kernel(); -out: - return err; + return 0; } + err = -EIO; + new = 0; + ret = 0; + bh = NULL; + /* - * Indirect block might be removed by truncate while we were - * reading it. Handling of that case (forget what we've got and - * reread) is taken out of the main path. + * If this is a sequential block allocation, set the next_alloc_block + * to this block now so that all the indblock and data block + * allocations use the same goal zone */ - if (err == -EAGAIN) - goto changed; - goal = ext2_find_goal(inode, iblock, chain, partial); - if (!goal) - goto changed; + ext2_debug ("block %lu, next %lu, goal %lu.\n", iblock, + inode->u.ext2_i.i_next_alloc_block, + inode->u.ext2_i.i_next_alloc_goal); - left = (chain + depth) - partial; - err = ext2_alloc_branch(inode, left, goal, - offsets+(partial-chain), partial); - if (err) - goto cleanup; - - if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) - goto changed; + if (iblock == inode->u.ext2_i.i_next_alloc_block + 1) { + inode->u.ext2_i.i_next_alloc_block++; + inode->u.ext2_i.i_next_alloc_goal++; + } - bh_result->b_state |= (1UL << BH_New); - goto got_it; + err = 0; -changed: - while (partial > chain) { - bforget(partial->bh); - partial--; + /* + * ok, these macros clean the logic up a bit and make + * it much more readable: + */ +#define GET_INODE_DATABLOCK(x) \ + inode_getblk(inode, x, iblock, &err, 0, &phys, &new) +#define GET_INODE_PTR(x) \ + inode_getblk(inode, x, iblock, &err, 1, NULL, NULL) +#define GET_INDIRECT_DATABLOCK(x) \ + block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new); +#define GET_INDIRECT_PTR(x) \ + block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL); + + p = offsets; + if (depth == 1) { + bh = GET_INODE_DATABLOCK(*p); + goto out; } - goto reread; + bh = GET_INODE_PTR(*p); + switch (depth) { + default: /* case 4: */ + bh = GET_INDIRECT_PTR(*++p); + case 3: + bh = GET_INDIRECT_PTR(*++p); + case 2: + bh = GET_INDIRECT_DATABLOCK(*++p); + } + +#undef GET_INODE_DATABLOCK +#undef GET_INODE_PTR +#undef GET_INDIRECT_DATABLOCK +#undef GET_INDIRECT_PTR + +out: + if (bh) + BUG(); // temporary debugging check + if (err) + goto abort; + if (!phys) + BUG(); // must not happen either + + bh_result->b_dev = inode->i_dev; + bh_result->b_blocknr = phys; + bh_result->b_state |= (1UL << BH_Mapped); /* safe */ + if (new) + bh_result->b_state |= (1UL << BH_New); + unlock_kernel(); +abort: + return err; } struct buffer_head * ext2_getblk(struct inode * inode, long block, int create, int * err) |