summaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c429
1 files changed, 149 insertions, 280 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 0b1fd9c61..6bf84ca27 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1236,7 +1236,7 @@ static void unmap_buffer(struct buffer_head * bh)
* we have truncated the file and are going to free the
* blocks on-disk..
*/
-int block_flushpage(struct inode *inode, struct page *page, unsigned long offset)
+int block_flushpage(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
@@ -1324,9 +1324,8 @@ static void unmap_underlying_metadata(struct buffer_head * bh)
* block_write_full_page() is SMP-safe - currently it's still
* being called with the kernel lock held, but the code is ready.
*/
-int block_write_full_page(struct file *file, struct page *page)
+int block_write_full_page(struct dentry *dentry, struct page *page)
{
- struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
int err, i;
unsigned long block;
@@ -1379,24 +1378,16 @@ out:
return err;
}
-int block_write_partial_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+int block_write_zero_range(struct inode *inode, struct page *page,
+ unsigned zerofrom, unsigned from, unsigned to,
+ const char * buf)
{
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
+ unsigned zeroto = 0, block_start, block_end;
unsigned long block;
- int err, partial;
- unsigned long blocksize, start_block, end_block;
- unsigned long start_offset, start_bytes, end_bytes;
- unsigned long bbits, blocks, i, len;
- struct buffer_head *bh, *head;
- char *target_buf, *kaddr;
- int need_balance_dirty;
-
- kaddr = (char *)kmap(page);
- target_buf = kaddr + offset;
-
- if (!PageLocked(page))
- BUG();
+ int err = 0, partial = 0, need_balance_dirty = 0;
+ unsigned blocksize, bbits;
+ struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
+ char *kaddr = (char *)kmap(page);
blocksize = inode->i_sb->s_blocksize;
if (!page->buffers)
@@ -1405,49 +1396,21 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long
bbits = inode->i_sb->s_blocksize_bits;
block = page->index << (PAGE_CACHE_SHIFT - bbits);
- blocks = PAGE_CACHE_SIZE >> bbits;
- start_block = offset >> bbits;
- end_block = (offset + bytes - 1) >> bbits;
- start_offset = offset & (blocksize - 1);
- start_bytes = blocksize - start_offset;
- if (start_bytes > bytes)
- start_bytes = bytes;
- end_bytes = (offset+bytes) & (blocksize - 1);
- if (end_bytes > bytes)
- end_bytes = bytes;
-
- if (offset < 0 || offset >= PAGE_SIZE)
- BUG();
- if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
- BUG();
- if (start_block < 0 || start_block >= blocks)
- BUG();
- if (end_block < 0 || end_block >= blocks)
- BUG();
- i = 0;
- bh = head;
- partial = 0;
- need_balance_dirty = 0;
- do {
+ /*
+ * First pass - map what needs to be mapped, initiate reads
+ * on the boundaries if needed (i.e. if block is partially covered
+ * _and_ is not up-to-date _and_ is not new).
+ */
+ for(bh = head, block_start = 0; bh != head || !block_start;
+ block++, block_start=block_end, bh = bh->b_this_page) {
if (!bh)
BUG();
-
- if ((i < start_block) || (i > end_block)) {
- if (!buffer_uptodate(bh))
- partial = 1;
- goto skip;
- }
-
- /*
- * If the buffer is not up-to-date, we need to ask the low-level
- * FS to do something for us (we used to have assumptions about
- * the meaning of b_blocknr etc, that's bad).
- *
- * If "update" is set, that means that the low-level FS should
- * try to make sure that the block is up-to-date because we're
- * not going to fill it completely.
- */
+ block_end = block_start+blocksize;
+ if (block_end <= zerofrom)
+ continue;
+ if (block_start >= to)
+ break;
bh->b_end_io = end_buffer_io_sync;
if (!buffer_mapped(bh)) {
err = inode->i_op->get_block(inode, block, bh, 1);
@@ -1455,71 +1418,73 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long
goto out;
unmap_underlying_metadata(bh);
}
-
- if (!buffer_uptodate(bh) && (start_offset || (end_bytes && (i == end_block)))) {
- if (buffer_new(bh)) {
- memset(kaddr + i*blocksize, 0, blocksize);
- } else {
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- err = -EIO;
- if (!buffer_uptodate(bh))
- goto out;
- }
- }
-
- len = blocksize;
- if (start_offset) {
- len = start_bytes;
- start_offset = 0;
- } else if (end_bytes && (i == end_block)) {
- len = end_bytes;
- end_bytes = 0;
+ if (buffer_new(bh)) {
+ zeroto = block_end;
+ if (block_start < zerofrom)
+ zerofrom = block_start;
+ continue;
}
- if (target_buf >= kaddr + PAGE_SIZE)
- BUG();
- if (target_buf+len-1 >= kaddr + PAGE_SIZE)
- BUG();
- err = copy_from_user(target_buf, buf, len);
- target_buf += len;
- buf += len;
-
- /*
- * we dirty buffers only after copying the data into
- * the page - this way we can dirty the buffer even if
- * the bh is still doing IO.
- *
- * NOTE! This also does a direct dirty balace check,
- * rather than relying on bdflush just waking up every
- * once in a while. This is to catch (and slow down)
- * the processes that write tons of buffer..
- *
- * Note how we do NOT want to do this in the full block
- * case: full pages are flushed not by the people who
- * dirtied them, but by people who need memory. And we
- * should not penalize them for somebody else writing
- * lots of dirty pages.
- */
- set_bit(BH_Uptodate, &bh->b_state);
- if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
- __mark_dirty(bh, 0);
- need_balance_dirty = 1;
+ if (!buffer_uptodate(bh) &&
+ (block_start < zerofrom || block_end > to)) {
+ ll_rw_block(READ, 1, &bh);
+ *wait_bh++=bh;
}
-
- if (err) {
- err = -EFAULT;
+ }
+ /*
+ * If we issued read requests - let them complete.
+ */
+ while(wait_bh > wait) {
+ wait_on_buffer(*--wait_bh);
+ err = -EIO;
+ if (!buffer_uptodate(*wait_bh))
goto out;
+ }
+ /*
+ * Now we can copy the data.
+ */
+ if (zerofrom < from)
+ memset(kaddr+zerofrom, 0, from-zerofrom);
+ if (from < to)
+ err = copy_from_user(kaddr+from, buf, to-from);
+ if (to < zeroto)
+ memset(kaddr+to, 0, zeroto-to);
+ else
+ zeroto = to;
+ if (err < 0)
+ goto out;
+ /*
+ * Second pass: check if all out-of-range blocks are up-to-date
+ * and mark the rest up-to-date and dirty.
+ *
+ * NOTE! This also does a direct dirty balace check,
+ * rather than relying on bdflush just waking up every
+ * once in a while. This is to catch (and slow down)
+ * the processes that write tons of buffer..
+ *
+ * Note how we do NOT want to do this in the full block
+ * case: full pages are flushed not by the people who
+ * dirtied them, but by people who need memory. And we
+ * should not penalize them for somebody else writing
+ * lots of dirty pages.
+ */
+ for(bh = head, block_start = 0;
+ bh != head || !block_start;
+ block_start=block_end, bh = bh->b_this_page) {
+ block_end = block_start + blocksize;
+ if (block_end <= zerofrom || block_start >= zeroto) {
+ if (!buffer_uptodate(bh))
+ partial = 1;
+ } else {
+ set_bit(BH_Uptodate, &bh->b_state);
+ if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
+ __mark_dirty(bh, 0);
+ need_balance_dirty = 1;
+ }
}
-
-skip:
- i++;
- block++;
- bh = bh->b_this_page;
- } while (bh != head);
+ }
if (need_balance_dirty)
balance_dirty(bh->b_dev);
-
/*
* is this a partial write that happened to make all buffers
* uptodate then we can optimize away a bogus readpage() for
@@ -1529,183 +1494,48 @@ skip:
if (!partial)
SetPageUptodate(page);
kunmap(page);
- return bytes;
+ return 0;
out:
ClearPageUptodate(page);
kunmap(page);
return err;
}
-/*
- * For moronic filesystems that do not allow holes in file.
- * we allow offset==PAGE_SIZE, bytes==0
- */
-
-int block_write_cont_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+int block_write_partial_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
{
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
- unsigned long block;
- int err, partial;
- unsigned long blocksize, start_block, end_block;
- unsigned long start_offset, start_bytes, end_bytes;
- unsigned long bbits, blocks, i, len;
- struct buffer_head *bh, *head;
- char * target_buf, *target_data;
- unsigned long data_offset = offset;
- int need_balance_dirty;
-
- offset = inode->i_size - (page->index << PAGE_CACHE_SHIFT);
- if (page->index > (inode->i_size >> PAGE_CACHE_SHIFT))
- offset = 0;
- else if (offset >= data_offset)
- offset = data_offset;
- bytes += data_offset - offset;
-
- target_buf = (char *)page_address(page) + offset;
- target_data = (char *)page_address(page) + data_offset;
+ struct inode *inode = file->f_dentry->d_inode;
+ int err;
if (!PageLocked(page))
BUG();
-
- blocksize = inode->i_sb->s_blocksize;
- if (!page->buffers)
- create_empty_buffers(page, inode, blocksize);
- head = page->buffers;
-
- bbits = inode->i_sb->s_blocksize_bits;
- block = page->index << (PAGE_CACHE_SHIFT - bbits);
- blocks = PAGE_CACHE_SIZE >> bbits;
- start_block = offset >> bbits;
- end_block = (offset + bytes - 1) >> bbits;
- start_offset = offset & (blocksize - 1);
- start_bytes = blocksize - start_offset;
- if (start_bytes > bytes)
- start_bytes = bytes;
- end_bytes = (offset+bytes) & (blocksize - 1);
- if (end_bytes > bytes)
- end_bytes = bytes;
-
- if (offset < 0 || offset > PAGE_SIZE)
+ if (offset < 0 || offset >= PAGE_SIZE)
BUG();
if (bytes+offset < 0 || bytes+offset > PAGE_SIZE)
BUG();
- if (start_block < 0 || start_block > blocks)
- BUG();
- if (end_block < 0 || end_block >= blocks)
- BUG();
-
- i = 0;
- bh = head;
- partial = 0;
- need_balance_dirty = 0;
- do {
- if (!bh)
- BUG();
-
- if ((i < start_block) || (i > end_block)) {
- if (!buffer_uptodate(bh))
- partial = 1;
- goto skip;
- }
-
- /*
- * If the buffer is not up-to-date, we need to ask the low-level
- * FS to do something for us (we used to have assumptions about
- * the meaning of b_blocknr etc, that's bad).
- *
- * If "update" is set, that means that the low-level FS should
- * try to make sure that the block is up-to-date because we're
- * not going to fill it completely.
- */
- bh->b_end_io = end_buffer_io_sync;
- if (!buffer_mapped(bh)) {
- err = inode->i_op->get_block(inode, block, bh, 1);
- if (err)
- goto out;
- unmap_underlying_metadata(bh);
- }
-
- if (!buffer_uptodate(bh) && (start_offset || (end_bytes && (i == end_block)))) {
- if (buffer_new(bh)) {
- memset(bh->b_data, 0, bh->b_size);
- } else {
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- err = -EIO;
- if (!buffer_uptodate(bh))
- goto out;
- }
- }
-
- len = blocksize;
- if (start_offset) {
- len = start_bytes;
- start_offset = 0;
- } else if (end_bytes && (i == end_block)) {
- len = end_bytes;
- end_bytes = 0;
- }
- err = 0;
- if (target_buf+len<=target_data)
- memset(target_buf, 0, len);
- else if (target_buf<target_data) {
- memset(target_buf, 0, target_data-target_buf);
- copy_from_user(target_data, buf,
- len+target_buf-target_data);
- } else
- err = copy_from_user(target_buf, buf, len);
- target_buf += len;
- buf += len;
-
- /*
- * we dirty buffers only after copying the data into
- * the page - this way we can dirty the buffer even if
- * the bh is still doing IO.
- *
- * NOTE! This also does a direct dirty balace check,
- * rather than relying on bdflush just waking up every
- * once in a while. This is to catch (and slow down)
- * the processes that write tons of buffer..
- *
- * Note how we do NOT want to do this in the full block
- * case: full pages are flushed not by the people who
- * dirtied them, but by people who need memory. And we
- * should not penalize them for somebody else writing
- * lots of dirty pages.
- */
- set_bit(BH_Uptodate, &bh->b_state);
- if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
- __mark_dirty(bh, 0);
- need_balance_dirty = 1;
- }
- if (err) {
- err = -EFAULT;
- goto out;
- }
+ err = block_write_range(inode, page, offset, bytes, buf);
+ return err ? err : bytes;
+}
-skip:
- i++;
- block++;
- bh = bh->b_this_page;
- } while (bh != head);
+/*
+ * For moronic filesystems that do not allow holes in file.
+ * we allow offset==PAGE_SIZE, bytes==0
+ */
- if (need_balance_dirty)
- balance_dirty(bh->b_dev);
+int block_write_cont_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ int err;
+ unsigned zerofrom = offset;
- /*
- * is this a partial write that happened to make all buffers
- * uptodate then we can optimize away a bogus readpage() for
- * the next read(). Here we 'discover' wether the page went
- * uptodate as a result of this (potentially partial) write.
- */
- if (!partial)
- SetPageUptodate(page);
- return bytes;
-out:
- ClearPageUptodate(page);
- return err;
+ if (page->index > (inode->i_size >> PAGE_CACHE_SHIFT))
+ zerofrom = 0;
+ else if (page->index == (inode->i_size >> PAGE_CACHE_SHIFT) &&
+ offset > (inode->i_size & ~PAGE_CACHE_MASK))
+ zerofrom = inode->i_size & ~PAGE_CACHE_MASK;
+ err = block_write_zero_range(inode, page, zerofrom,offset,offset+bytes,
+ buf);
+ return err ? err : bytes;
}
@@ -1998,10 +1828,8 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
* mark_buffer_uptodate() functions propagate buffer state into the
* page struct once IO has completed.
*/
-int block_read_full_page(struct file * file, struct page * page)
+static inline int __block_read_full_page(struct inode *inode, struct page *page)
{
- struct dentry *dentry = file->f_dentry;
- struct inode *inode = dentry->d_inode;
unsigned long iblock;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize, blocks;
@@ -2060,6 +1888,47 @@ int block_read_full_page(struct file * file, struct page * page)
return 0;
}
+int block_read_full_page(struct dentry *dentry, struct page *page)
+{
+ return __block_read_full_page(dentry->d_inode, page);
+}
+
+int block_symlink(struct inode *inode, const char *symname, int len)
+{
+ struct page *page = grab_cache_page(&inode->i_data, 0);
+ mm_segment_t fs;
+ int err = -ENOMEM;
+
+ if (!page)
+ goto fail;
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ err = block_write_range(inode, page, 0, len-1, symname);
+ set_fs(fs);
+ inode->i_size = len-1;
+ if (err)
+ goto fail_write;
+ /*
+ * Notice that we are _not_ going to block here - end of page is
+ * unmapped, so this will only try to map the rest of page, see
+ * that it is unmapped (typically even will not look into inode -
+ * ->i_size will be enough for everything) and zero it out.
+ * OTOH it's obviously correct and should make the page up-to-date.
+ */
+ err = __block_read_full_page(inode, page);
+ wait_on_page(page);
+ page_cache_release(page);
+ if (err < 0)
+ goto fail;
+ mark_inode_dirty(inode);
+ return 0;
+fail_write:
+ UnlockPage(page);
+ page_cache_release(page);
+fail:
+ return err;
+}
+
/*
* Try to increase the number of buffers available: the size argument
* is used to determine what kind of buffers we want.