diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-08-08 18:57:46 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-08-08 18:57:46 +0000 |
commit | 2e837819b1563679b55363d469239fdf4f17fbbb (patch) | |
tree | ead655053fe63786a4f95bd32723138483a709ab /fs | |
parent | 5514f4babeeb3af00ee0c325e3cda7a562cc3d65 (diff) |
Merge with Linux 2.4.0-test6-pre5.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext2/inode.c | 535 |
1 files changed, 268 insertions, 267 deletions
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 5dfe2cf55..4f0b78da3 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -18,6 +18,8 @@ * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) + * + * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 */ #include <linux/fs.h> @@ -26,8 +28,6 @@ #include <linux/sched.h> #include <linux/highuid.h> - - static int ext2_update_inode(struct inode * inode, int do_sync); /* @@ -64,23 +64,18 @@ no_delete: clear_inode(inode); /* We must guarantee clearing of inode... */ } -/* - * ext2_discard_prealloc and ext2_alloc_block are atomic wrt. the - * superblock in the same manner as are ext2_free_blocks and - * ext2_new_block. We just wait on the super rather than locking it - * here, since ext2_new_block will do the necessary locking and we - * can't block until then. - */ void ext2_discard_prealloc (struct inode * inode) { #ifdef EXT2_PREALLOCATE - unsigned short total; - lock_kernel(); + /* Writer: ->i_prealloc* */ if (inode->u.ext2_i.i_prealloc_count) { - total = inode->u.ext2_i.i_prealloc_count; + unsigned short total = inode->u.ext2_i.i_prealloc_count; + unsigned long block = inode->u.ext2_i.i_prealloc_block; inode->u.ext2_i.i_prealloc_count = 0; - ext2_free_blocks (inode, inode->u.ext2_i.i_prealloc_block, total); + inode->u.ext2_i.i_prealloc_block = 0; + /* Writer: end */ + ext2_free_blocks (inode, block, total); } unlock_kernel(); #endif @@ -93,22 +88,26 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) #endif unsigned long result; - wait_on_super (inode->i_sb); #ifdef EXT2_PREALLOCATE + /* Writer: ->i_prealloc* */ if (inode->u.ext2_i.i_prealloc_count && (goal == inode->u.ext2_i.i_prealloc_block || goal + 1 == inode->u.ext2_i.i_prealloc_block)) { result = inode->u.ext2_i.i_prealloc_block++; inode->u.ext2_i.i_prealloc_count--; + /* Writer: end */ +#ifdef EXT2FS_DEBUG ext2_debug ("preallocation hit (%lu/%lu).\n", ++alloc_hits, ++alloc_attempts); - +#endif } else { ext2_discard_prealloc (inode); +#ifdef EXT2FS_DEBUG ext2_debug ("preallocation miss (%lu/%lu).\n", alloc_hits, ++alloc_attempts); +#endif if (S_ISREG(inode->i_mode)) result = ext2_new_block (inode, goal, &inode->u.ext2_i.i_prealloc_count, @@ -299,307 +298,309 @@ no_block: return p; } -static struct buffer_head * inode_getblk (struct inode * inode, int nr, - int new_block, int * err, int metadata, long *phys, int *new) +/** + * ext2_find_near - find a place for allocation with sufficient locality + * @inode: owner + * @ind: descriptor of indirect block. + * + * This function returns the prefered place for block allocation. + * It is used when heuristic for sequential allocation fails. + * Rules are: + * + if there is a block to the left of our position - allocate near it. + * + if pointer will live in indirect block - allocate near that block. + * + if pointer will live in inode - allocate in the same cylinder group. + * Caller must make sure that @ind is valid and will stay that way. + */ + +static inline unsigned long ext2_find_near(struct inode *inode, Indirect *ind) { - u32 * p; - int tmp, goal = 0; - struct buffer_head * result; - int blocksize = inode->i_sb->s_blocksize; + u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext2_i.i_data; + u32 *p; - p = inode->u.ext2_i.i_data + nr; -repeat: - tmp = le32_to_cpu(*p); - if (tmp) { - if (metadata) { - result = getblk (inode->i_dev, tmp, blocksize); - if (tmp == le32_to_cpu(*p)) - return result; - brelse (result); - goto repeat; - } else { - *phys = tmp; - return NULL; - } - } + /* Try to find previous block */ + for (p = ind->p - 1; p >= start; p--) + if (*p) + return le32_to_cpu(*p); - if (inode->u.ext2_i.i_next_alloc_block == new_block) - goal = inode->u.ext2_i.i_next_alloc_goal; + /* No such thing, so let's try location of indirect block */ + if (ind->bh) + return ind->bh->b_blocknr; - ext2_debug ("hint = %d,", goal); + /* + * It is going to be refered from inode itself? OK, just put it into + * the same cylinder group then. + */ + return (inode->u.ext2_i.i_block_group * + EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + + le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block); +} - if (!goal) { - for (tmp = nr - 1; tmp >= 0; tmp--) { - if (inode->u.ext2_i.i_data[tmp]) { - goal = le32_to_cpu(inode->u.ext2_i.i_data[tmp]); - break; - } - } - if (!goal) - goal = (inode->u.ext2_i.i_block_group * - EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block); - } +/** + * ext2_find_goal - find a prefered place for allocation. + * @inode: owner + * @block: block we want + * @chain: chain of indirect blocks + * @partial: pointer to the last triple within a chain + * @goal: place to store the result. + * + * Normally this function find the prefered place for block allocation, + * stores it in *@goal and returns zero. If the branch had been changed + * under us we return -EAGAIN. + */ - ext2_debug ("goal = %d.\n", goal); - - tmp = ext2_alloc_block (inode, goal, err); - if (!tmp) - return NULL; - - if (metadata) { - result = getblk (inode->i_dev, tmp, blocksize); - if (!buffer_uptodate(result)) - wait_on_buffer(result); - memset(result->b_data, 0, blocksize); - mark_buffer_uptodate(result, 1); - mark_buffer_dirty(result, 1); - if (*p) { - ext2_free_blocks (inode, tmp, 1); - bforget (result); - goto repeat; - } - } else { - if (*p) { - /* - * Nobody is allowed to change block allocation - * state from under us: - */ - ext2_error (inode->i_sb, "block_getblk", - "data block filled under us"); - BUG(); - ext2_free_blocks (inode, tmp, 1); - goto repeat; - } - *phys = tmp; - result = NULL; - *err = 0; - *new = 1; +static inline int ext2_find_goal(struct inode *inode, + long block, + Indirect chain[4], + Indirect *partial, + unsigned long *goal) +{ + /* Writer: ->i_next_alloc* */ + if (block == inode->u.ext2_i.i_next_alloc_block + 1) { + inode->u.ext2_i.i_next_alloc_block++; + inode->u.ext2_i.i_next_alloc_goal++; + } + /* Writer: end */ + /* Reader: pointers, ->i_next_alloc* */ + if (verify_chain(chain, partial)) { + /* + * try the heuristic for sequential allocation, + * failing that at least try to get decent locality. + */ + if (block == inode->u.ext2_i.i_next_alloc_block) + *goal = inode->u.ext2_i.i_next_alloc_goal; + if (!*goal) + *goal = ext2_find_near(inode, partial); + return 0; } - *p = cpu_to_le32(tmp); - - inode->u.ext2_i.i_next_alloc_block = new_block; - inode->u.ext2_i.i_next_alloc_goal = tmp; - inode->i_ctime = CURRENT_TIME; - inode->i_blocks += blocksize/512; - if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) - ext2_sync_inode (inode); - else - mark_inode_dirty(inode); - return result; + /* Reader: end */ + return -EAGAIN; } -/* - * metadata / data - * possibly create / access - * can fail due to: - not present - * - out of space +/** + * ext2_alloc_branch - allocate and set up a chain of blocks. + * @inode: owner + * @num: depth of the chain (number of blocks to allocate) + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. + * + * This function allocates @num blocks, zeroes out all but the last one, + * links them into chain and (if we are synchronous) writes them to disk. + * In other words, it prepares a branch that can be spliced onto the + * inode. It stores the information about that chain in the branch[], in + * the same format as ext2_get_branch() would do. We are calling it after + * we had read the existing part of chain and partial points to the last + * triple of that (one with zero ->key). Upon the exit we have the same + * picture as after the successful ext2_get_block(), excpet that in one + * place chain is disconnected - *branch->p is still zero (we did not + * set the last link), but branch->key contains the number that should + * be placed into *branch->p to fill that gap. * - * NULL return in the data case is mandatory. + * If allocation fails we free all blocks we've allocated (and forget + * ther buffer_heads) and return the error value the from failed + * ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. */ -static struct buffer_head * block_getblk (struct inode * inode, - struct buffer_head * bh, int nr, - int new_block, int * err, int metadata, long *phys, int *new) + +static int ext2_alloc_branch(struct inode *inode, + int num, + unsigned long goal, + int *offsets, + Indirect *branch) { - int tmp, goal = 0; - u32 * p; - struct buffer_head * result; int blocksize = inode->i_sb->s_blocksize; + int n = 0; + int err; + int i; + int parent = ext2_alloc_block(inode, goal, &err); - result = NULL; - if (!bh) - goto out; - if (!buffer_uptodate(bh)) { - ll_rw_block (READ, 1, &bh); - wait_on_buffer (bh); + branch[0].key = cpu_to_le32(parent); + if (parent) for (n = 1; n < num; n++) { + struct buffer_head *bh; + /* Allocate the next block */ + int nr = ext2_alloc_block(inode, parent, &err); + if (!nr) + break; + branch[n].key = cpu_to_le32(nr); + /* + * Get buffer_head for parent block, zero it out and set + * the pointer to new one, then send parent to disk. + */ + bh = getblk(inode->i_dev, parent, blocksize); if (!buffer_uptodate(bh)) - goto out; - } - p = (u32 *) bh->b_data + nr; -repeat: - tmp = le32_to_cpu(*p); - if (tmp) { - if (metadata) { - result = getblk (bh->b_dev, tmp, blocksize); - if (tmp == le32_to_cpu(*p)) - goto out; - brelse (result); - goto repeat; - } else { - *phys = tmp; - /* result == NULL */ - goto out; + wait_on_buffer(bh); + memset(bh->b_data, 0, blocksize); + branch[n].bh = bh; + branch[n].p = (u32*) bh->b_data + offsets[n]; + *branch[n].p = branch[n].key; + mark_buffer_uptodate(bh, 1); + mark_buffer_dirty(bh, 1); + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { + ll_rw_block (WRITE, 1, &bh); + wait_on_buffer (bh); } + parent = nr; } + if (n == num) + return 0; - if (inode->u.ext2_i.i_next_alloc_block == new_block) - goal = inode->u.ext2_i.i_next_alloc_goal; - if (!goal) { - for (tmp = nr - 1; tmp >= 0; tmp--) { - if (le32_to_cpu(((u32 *) bh->b_data)[tmp])) { - goal = le32_to_cpu(((u32 *)bh->b_data)[tmp]); - break; - } - } - if (!goal) - goal = bh->b_blocknr; - } - tmp = ext2_alloc_block (inode, goal, err); - if (!tmp) - goto out; - if (metadata) { - result = getblk (bh->b_dev, tmp, blocksize); - if (!buffer_uptodate(result)) - wait_on_buffer(result); - memset(result->b_data, 0, inode->i_sb->s_blocksize); - mark_buffer_uptodate(result, 1); - mark_buffer_dirty(result, 1); - if (*p) { - ext2_free_blocks (inode, tmp, 1); - bforget (result); - goto repeat; - } - } else { - if (*p) { - /* - * Nobody is allowed to change block allocation - * state from under us: - */ - ext2_error (inode->i_sb, "block_getblk", - "data block filled under us"); - BUG(); - ext2_free_blocks (inode, tmp, 1); - goto repeat; + /* Allocation failed, free what we already allocated */ + for (i = 1; i < n; i++) + bforget(branch[i].bh); + for (i = 0; i < n; i++) + ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1); + return err; +} + +/** + * ext2_splice_branch - splice the allocated branch onto inode. + * @inode: owner + * @block: (logical) number of block we are adding + * @chain: chain of indirect blocks (with a missing link - see + * ext2_alloc_branch) + * @where: location of missing link + * @num: number of blocks we are adding + * + * This function verifies that chain (up to the missing link) had not + * changed, fills the missing link and does all housekeeping needed in + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. Otherwise (== chain had been changed) + * we free the new blocks (forgetting their buffer_heads, indeed) and + * return -EAGAIN. + */ + +static inline int ext2_splice_branch(struct inode *inode, + long block, + Indirect chain[4], + Indirect *where, + int num) +{ + int i; + + /* Verify that place we are splicing to is still there and vacant */ + + /* Writer: pointers, ->i_next_alloc*, ->i_blocks */ + if (!verify_chain(chain, where-1) || *where->p) + /* Writer: end */ + goto changed; + + /* That's it */ + + *where->p = where->key; + inode->u.ext2_i.i_next_alloc_block = block; + inode->u.ext2_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key); + inode->i_blocks += num * inode->i_sb->s_blocksize/512; + + /* Writer: end */ + + /* We are done with atomic stuff, now do the rest of housekeeping */ + + inode->i_ctime = CURRENT_TIME; + + /* had we spliced it onto indirect block? */ + if (where->bh) { + mark_buffer_dirty(where->bh, 1); + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { + ll_rw_block (WRITE, 1, &where->bh); + wait_on_buffer(where->bh); } - *phys = tmp; - *new = 1; - } - *p = le32_to_cpu(tmp); - mark_buffer_dirty(bh, 1); - if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) { - ll_rw_block (WRITE, 1, &bh); - wait_on_buffer (bh); } - inode->i_ctime = CURRENT_TIME; - inode->i_blocks += blocksize/512; - mark_inode_dirty(inode); - inode->u.ext2_i.i_next_alloc_block = new_block; - inode->u.ext2_i.i_next_alloc_goal = tmp; - *err = 0; -out: - brelse (bh); - return result; + + if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) + ext2_sync_inode (inode); + else + mark_inode_dirty(inode); + return 0; + +changed: + for (i = 1; i < num; i++) + bforget(where[i].bh); + for (i = 0; i < num; i++) + ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1); + return -EAGAIN; } +/* + * Allocation strategy is simple: if we have to allocate something, we will + * have to go the whole way to leaf. So let's do it before attaching anything + * to tree, set linkage between the newborn blocks, write them if sync is + * required, recheck the path, free and repeat if check fails, otherwise + * set the last missing link (that will protect us from any truncate-generated + * removals - all blocks on the path are immune now) and possibly force the + * write on the parent block. + * That has a nice additional property: no special recovery from the failed + * allocations is needed - we simply release blocks and do not touch anything + * reachable from inode. + */ + static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) { - int ret, err, new; - struct buffer_head *bh; - unsigned long phys; + int err = -EIO; int offsets[4]; - int *p; Indirect chain[4]; Indirect *partial; - int depth; + unsigned long goal; + int left; + int depth = ext2_block_to_path(inode, iblock, offsets); - depth = ext2_block_to_path(inode, iblock, offsets); if (depth == 0) - goto abort; + goto out; lock_kernel(); +reread: partial = ext2_get_branch(inode, depth, offsets, chain, &err); + /* Simplest case - block found, no allocation needed */ if (!partial) { - unlock_kernel(); - for (partial = chain + depth - 1; partial > chain; partial--) - brelse(partial->bh); +got_it: bh_result->b_dev = inode->i_dev; bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key); bh_result->b_state |= (1UL << BH_Mapped); - return 0; + /* Clean up and exit */ + partial = chain+depth-1; /* the whole chain */ + goto cleanup; } - while (partial > chain) { - brelse(partial->bh); - partial--; - } - - if (!create) { + /* Next simple case - plain lookup or failed read of indirect block */ + if (!create || err == -EIO) { +cleanup: + while (partial > chain) { + brelse(partial->bh); + partial--; + } unlock_kernel(); - return 0; +out: + return err; } - err = -EIO; - new = 0; - ret = 0; - bh = NULL; - /* - * If this is a sequential block allocation, set the next_alloc_block - * to this block now so that all the indblock and data block - * allocations use the same goal zone + * Indirect block might be removed by truncate while we were + * reading it. Handling of that case (forget what we've got and + * reread) is taken out of the main path. */ + if (err == -EAGAIN) + goto changed; - ext2_debug ("block %lu, next %lu, goal %lu.\n", iblock, - inode->u.ext2_i.i_next_alloc_block, - inode->u.ext2_i.i_next_alloc_goal); + if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0) + goto changed; - if (iblock == inode->u.ext2_i.i_next_alloc_block + 1) { - inode->u.ext2_i.i_next_alloc_block++; - inode->u.ext2_i.i_next_alloc_goal++; - } + left = (chain + depth) - partial; + err = ext2_alloc_branch(inode, left, goal, + offsets+(partial-chain), partial); + if (err) + goto cleanup; - err = 0; + if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) + goto changed; - /* - * ok, these macros clean the logic up a bit and make - * it much more readable: - */ -#define GET_INODE_DATABLOCK(x) \ - inode_getblk(inode, x, iblock, &err, 0, &phys, &new) -#define GET_INODE_PTR(x) \ - inode_getblk(inode, x, iblock, &err, 1, NULL, NULL) -#define GET_INDIRECT_DATABLOCK(x) \ - block_getblk (inode, bh, x, iblock, &err, 0, &phys, &new); -#define GET_INDIRECT_PTR(x) \ - block_getblk (inode, bh, x, iblock, &err, 1, NULL, NULL); - - p = offsets; - if (depth == 1) { - bh = GET_INODE_DATABLOCK(*p); - goto out; - } - bh = GET_INODE_PTR(*p); - switch (depth) { - default: /* case 4: */ - bh = GET_INDIRECT_PTR(*++p); - case 3: - bh = GET_INDIRECT_PTR(*++p); - case 2: - bh = GET_INDIRECT_DATABLOCK(*++p); - } - -#undef GET_INODE_DATABLOCK -#undef GET_INODE_PTR -#undef GET_INDIRECT_DATABLOCK -#undef GET_INDIRECT_PTR + bh_result->b_state |= (1UL << BH_New); + goto got_it; -out: - if (bh) - BUG(); // temporary debugging check - if (err) - goto abort; - if (!phys) - BUG(); // must not happen either - - bh_result->b_dev = inode->i_dev; - bh_result->b_blocknr = phys; - bh_result->b_state |= (1UL << BH_Mapped); /* safe */ - if (new) - bh_result->b_state |= (1UL << BH_New); - unlock_kernel(); -abort: - return err; +changed: + while (partial > chain) { + bforget(partial->bh); + partial--; + } + goto reread; } struct buffer_head * ext2_getblk(struct inode * inode, long block, int create, int * err) |