diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 144 |
1 files changed, 93 insertions, 51 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7301631abe04..3d9020cf6f6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -18,10 +18,12 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include "dm-bio-list.h" #include <linux/delay.h> -#include <linux/raid/raid10.h> -#include <linux/raid/bitmap.h> +#include <linux/blkdev.h> +#include <linux/seq_file.h> +#include "md.h" +#include "raid10.h" +#include "bitmap.h" /* * RAID10 provides a combination of RAID0 and RAID1 functionality. @@ -186,7 +188,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) static void free_r10bio(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; /* * Wake up any possible resync thread that waits for the device @@ -200,7 +202,7 @@ static void free_r10bio(r10bio_t *r10_bio) static void put_buf(r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; mempool_free(r10_bio, conf->r10buf_pool); @@ -211,7 +213,7 @@ static void reschedule_retry(r10bio_t *r10_bio) { unsigned long flags; mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; spin_lock_irqsave(&conf->device_lock, flags); list_add(&r10_bio->retry_list, &conf->retry_list); @@ -243,7 +245,7 @@ static void raid_end_bio_io(r10bio_t *r10_bio) */ static inline void update_head_pos(int slot, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; conf->mirrors[r10_bio->devs[slot].devnum].head_position = r10_bio->devs[slot].addr + (r10_bio->sectors); @@ -254,7 +256,7 @@ static void raid10_end_read_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; slot = r10_bio->read_slot; @@ -295,7 +297,7 @@ static void raid10_end_write_request(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); int slot, dev; - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; for (slot = 0; slot < conf->copies; slot++) if (r10_bio->devs[slot].bio == bio) @@ -459,7 +461,7 @@ static int raid10_mergeable_bvec(struct request_queue *q, mddev_t *mddev = q->queuedata; sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; - unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int chunk_sectors = mddev->chunk_sectors; unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; @@ -594,7 +596,7 @@ rb_out: static void unplug_slaves(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; rcu_read_lock(); @@ -626,7 +628,7 @@ static void raid10_unplug(struct request_queue *q) static int raid10_congested(void *data, int bits) { mddev_t *mddev = data; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, ret = 0; rcu_read_lock(); @@ -786,7 +788,7 @@ static void unfreeze_array(conf_t *conf) static int make_request(struct request_queue *q, struct bio * bio) { mddev_t *mddev = q->queuedata; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; @@ -979,11 +981,11 @@ static int make_request(struct request_queue *q, struct bio * bio) static void status(struct seq_file *seq, mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i; if (conf->near_copies < conf->raid_disks) - seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); + seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2); if (conf->near_copies > 1) seq_printf(seq, " %d near-copies", conf->near_copies); if (conf->far_copies > 1) { @@ -1004,7 +1006,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; /* * If it is not operational, then we have already marked it as dead @@ -1149,15 +1151,15 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) for ( ; mirror <= last ; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) + blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); p->head_position = 0; rdev->raid_disk = mirror; @@ -1168,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) break; } + md_integrity_add_rdev(rdev, mddev); print_conf(conf); return err; } @@ -1201,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) /* lost the race, try later */ err = -EBUSY; p->rdev = rdev; + goto abort; } + md_integrity_register(mddev); } abort: @@ -1213,7 +1218,7 @@ abort: static void end_sync_read(struct bio *bio, int error) { r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); - conf_t *conf = mddev_to_conf(r10_bio->mddev); + conf_t *conf = r10_bio->mddev->private; int i,d; for (i=0; i<conf->copies; i++) @@ -1251,7 +1256,7 @@ static void end_sync_write(struct bio *bio, int error) int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i,d; for (i = 0; i < conf->copies; i++) @@ -1298,7 +1303,7 @@ static void end_sync_write(struct bio *bio, int error) */ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, first; struct bio *tbio, *fbio; @@ -1398,7 +1403,7 @@ done: static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; int i, d; struct bio *bio, *wbio; @@ -1547,7 +1552,7 @@ static void raid10d(mddev_t *mddev) r10bio_t *r10_bio; struct bio *bio; unsigned long flags; - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; int unplug=0; mdk_rdev_t *rdev; @@ -1570,7 +1575,7 @@ static void raid10d(mddev_t *mddev) spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r10_bio->mddev; - conf = mddev_to_conf(mddev); + conf = mddev->private; if (test_bit(R10BIO_IsSync, &r10_bio->state)) { sync_request_write(mddev, r10_bio); unplug = 1; @@ -1678,7 +1683,7 @@ static int init_resync(conf_t *conf) static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; r10bio_t *r10_bio; struct bio *biolist = NULL, *bio; sector_t max_sector, nr_sectors; @@ -1695,7 +1700,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return 0; skipped: - max_sector = mddev->size << 1; + max_sector = mddev->dev_sectors; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) max_sector = mddev->resync_max_sectors; if (sector_nr >= max_sector) { @@ -1807,17 +1812,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i r10_bio->sector = sect; raid10_find_phys(conf, r10_bio); - /* Need to check if this section will still be + + /* Need to check if the array will still be * degraded */ - for (j=0; j<conf->copies;j++) { - int d = r10_bio->devs[j].devnum; - if (conf->mirrors[d].rdev == NULL || - test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { + for (j=0; j<conf->raid_disks; j++) + if (conf->mirrors[j].rdev == NULL || + test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { still_degraded = 1; break; } - } + must_sync = bitmap_start_sync(mddev->bitmap, sect, &sync_blocks, still_degraded); @@ -2020,18 +2025,38 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i goto skipped; } +static sector_t +raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) +{ + sector_t size; + conf_t *conf = mddev->private; + + if (!raid_disks) + raid_disks = mddev->raid_disks; + if (!sectors) + sectors = mddev->dev_sectors; + + size = sectors >> conf->chunk_shift; + sector_div(size, conf->far_copies); + size = size * raid_disks; + sector_div(size, conf->near_copies); + + return size << conf->chunk_shift; +} + static int run(mddev_t *mddev) { conf_t *conf; - int i, disk_idx; + int i, disk_idx, chunk_size; mirror_info_t *disk; mdk_rdev_t *rdev; int nc, fc, fo; sector_t stride, size; - if (mddev->chunk_size < PAGE_SIZE) { + if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || + !is_power_of_2(mddev->chunk_sectors)) { printk(KERN_ERR "md/raid10: chunk size must be " - "at least PAGE_SIZE(%ld).\n", PAGE_SIZE); + "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); return -EINVAL; } @@ -2074,9 +2099,9 @@ static int run(mddev_t *mddev) conf->far_copies = fc; conf->copies = nc*fc; conf->far_offset = fo; - conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; - conf->chunk_shift = ffz(~mddev->chunk_size) - 9; - size = mddev->size >> (conf->chunk_shift-1); + conf->chunk_mask = mddev->chunk_sectors - 1; + conf->chunk_shift = ffz(~mddev->chunk_sectors); + size = mddev->dev_sectors >> conf->chunk_shift; sector_div(size, fc); size = size * conf->raid_disks; sector_div(size, nc); @@ -2089,7 +2114,7 @@ static int run(mddev_t *mddev) */ stride += conf->raid_disks - 1; sector_div(stride, conf->raid_disks); - mddev->size = stride << (conf->chunk_shift-1); + mddev->dev_sectors = stride << conf->chunk_shift; if (fo) stride = 1; @@ -2108,6 +2133,14 @@ static int run(mddev_t *mddev) spin_lock_init(&conf->device_lock); mddev->queue->queue_lock = &conf->device_lock; + chunk_size = mddev->chunk_sectors << 9; + blk_queue_io_min(mddev->queue, chunk_size); + if (conf->raid_disks % conf->near_copies) + blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); + else + blk_queue_io_opt(mddev->queue, chunk_size * + (conf->raid_disks / conf->near_copies)); + list_for_each_entry(rdev, &mddev->disks, same_set) { disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks @@ -2116,16 +2149,15 @@ static int run(mddev_t *mddev) disk = conf->mirrors + disk_idx; disk->rdev = rdev; - - blk_queue_stack_limits(mddev->queue, - rdev->bdev->bd_disk->queue); + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_sector to one PAGE, as * a one page request is never in violation. */ if (rdev->bdev->bd_disk->queue->merge_bvec_fn && - mddev->queue->max_sectors > (PAGE_SIZE>>9)) - mddev->queue->max_sectors = (PAGE_SIZE>>9); + queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9)) + blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->head_position = 0; } @@ -2164,6 +2196,10 @@ static int run(mddev_t *mddev) goto out_free_conf; } + if (mddev->recovery_cp != MaxSector) + printk(KERN_NOTICE "raid10: %s is not clean" + " -- starting background reconstruction\n", + mdname(mddev)); printk(KERN_INFO "raid10: raid set %s active with %d out of %d devices\n", mdname(mddev), mddev->raid_disks - mddev->degraded, @@ -2171,8 +2207,8 @@ static int run(mddev_t *mddev) /* * Ok, everything is just fine now */ - mddev->array_sectors = size << conf->chunk_shift; - mddev->resync_max_sectors = size << conf->chunk_shift; + md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); + mddev->resync_max_sectors = raid10_size(mddev, 0, 0); mddev->queue->unplug_fn = raid10_unplug; mddev->queue->backing_dev_info.congested_fn = raid10_congested; @@ -2183,7 +2219,8 @@ static int run(mddev_t *mddev) * maybe... */ { - int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE); + int stripe = conf->raid_disks * + ((mddev->chunk_sectors << 9) / PAGE_SIZE); stripe /= conf->near_copies; if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) mddev->queue->backing_dev_info.ra_pages = 2* stripe; @@ -2191,6 +2228,7 @@ static int run(mddev_t *mddev) if (conf->near_copies < mddev->raid_disks) blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); + md_integrity_register(mddev); return 0; out_free_conf: @@ -2206,7 +2244,10 @@ out: static int stop(mddev_t *mddev) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; + + raise_barrier(conf, 0); + lower_barrier(conf); md_unregister_thread(mddev->thread); mddev->thread = NULL; @@ -2221,7 +2262,7 @@ static int stop(mddev_t *mddev) static void raid10_quiesce(mddev_t *mddev, int state) { - conf_t *conf = mddev_to_conf(mddev); + conf_t *conf = mddev->private; switch(state) { case 1: @@ -2255,6 +2296,7 @@ static struct mdk_personality raid10_personality = .spare_active = raid10_spare_active, .sync_request = sync_request, .quiesce = raid10_quiesce, + .size = raid10_size, }; static int __init raid_init(void) |