1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
|
/*
md_k.h : kernel internal structure of the Linux MD driver
Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _MD_K_H
#define _MD_K_H
#define MD_RESERVED 0UL
#define LINEAR 1UL
#define STRIPED 2UL
#define RAID0 STRIPED
#define RAID1 3UL
#define RAID5 4UL
#define TRANSLUCENT 5UL
#define HSM 6UL
#define MAX_PERSONALITY 7UL
extern inline int pers_to_level (int pers)
{
switch (pers) {
case HSM: return -3;
case TRANSLUCENT: return -2;
case LINEAR: return -1;
case RAID0: return 0;
case RAID1: return 1;
case RAID5: return 5;
}
panic("pers_to_level()");
}
extern inline int level_to_pers (int level)
{
switch (level) {
case -3: return HSM;
case -2: return TRANSLUCENT;
case -1: return LINEAR;
case 0: return RAID0;
case 1: return RAID1;
case 4:
case 5: return RAID5;
}
return MD_RESERVED;
}
typedef struct mddev_s mddev_t;
typedef struct mdk_rdev_s mdk_rdev_t;
#if (MINORBITS != 8)
#error MD doesnt handle bigger kdev yet
#endif
#define MAX_REAL 12 /* Max number of disks per md dev */
#define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */
/*
* Maps a kdev to an mddev/subdev. How 'data' is handled is up to
* the personality. (eg. HSM uses this to identify individual LVs)
*/
typedef struct dev_mapping_s {
mddev_t *mddev;
void *data;
} dev_mapping_t;
extern dev_mapping_t mddev_map [MAX_MD_DEVS];
extern inline mddev_t * kdev_to_mddev (kdev_t dev)
{
if (MAJOR(dev) != MD_MAJOR)
BUG();
return mddev_map[MINOR(dev)].mddev;
}
/*
* options passed in raidrun:
*/
#define MAX_CHUNK_SIZE (4096*1024)
/*
* default readahead
*/
#define MD_READAHEAD (256 * 512)
extern inline int disk_faulty(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_FAULTY);
}
extern inline int disk_active(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_ACTIVE);
}
extern inline int disk_sync(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_SYNC);
}
extern inline int disk_spare(mdp_disk_t * d)
{
return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
}
extern inline int disk_removed(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_REMOVED);
}
extern inline void mark_disk_faulty(mdp_disk_t * d)
{
d->state |= (1 << MD_DISK_FAULTY);
}
extern inline void mark_disk_active(mdp_disk_t * d)
{
d->state |= (1 << MD_DISK_ACTIVE);
}
extern inline void mark_disk_sync(mdp_disk_t * d)
{
d->state |= (1 << MD_DISK_SYNC);
}
extern inline void mark_disk_spare(mdp_disk_t * d)
{
d->state = 0;
}
extern inline void mark_disk_removed(mdp_disk_t * d)
{
d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
}
extern inline void mark_disk_inactive(mdp_disk_t * d)
{
d->state &= ~(1 << MD_DISK_ACTIVE);
}
extern inline void mark_disk_nonsync(mdp_disk_t * d)
{
d->state &= ~(1 << MD_DISK_SYNC);
}
/*
* MD's 'extended' device
*/
struct mdk_rdev_s
{
struct md_list_head same_set; /* RAID devices within the same set */
struct md_list_head all; /* all RAID devices */
struct md_list_head pending; /* undetected RAID devices */
kdev_t dev; /* Device number */
kdev_t old_dev; /* "" when it was last imported */
int size; /* Device size (in blocks) */
mddev_t *mddev; /* RAID array if running */
unsigned long last_events; /* IO event timestamp */
struct inode *inode; /* Lock inode */
struct file filp; /* Lock file */
mdp_super_t *sb;
int sb_offset;
int faulty; /* if faulty do not issue IO requests */
int desc_nr; /* descriptor index in the superblock */
};
/*
* disk operations in a working array:
*/
#define DISKOP_SPARE_INACTIVE 0
#define DISKOP_SPARE_WRITE 1
#define DISKOP_SPARE_ACTIVE 2
#define DISKOP_HOT_REMOVE_DISK 3
#define DISKOP_HOT_ADD_DISK 4
typedef struct mdk_personality_s mdk_personality_t;
struct mddev_s
{
void *private;
mdk_personality_t *pers;
int __minor;
mdp_super_t *sb;
int nb_dev;
struct md_list_head disks;
int sb_dirty;
mdu_param_t param;
int ro;
unsigned int curr_resync;
unsigned long resync_start;
char *name;
int recovery_running;
struct semaphore reconfig_sem;
struct semaphore recovery_sem;
struct semaphore resync_sem;
struct md_list_head all_mddevs;
request_queue_t queue;
};
struct mdk_personality_s
{
char *name;
int (*map)(mddev_t *mddev, kdev_t dev, kdev_t *rdev,
unsigned long *rsector, unsigned long size);
int (*make_request)(request_queue_t *q, mddev_t *mddev, int rw, struct buffer_head * bh);
void (*end_request)(struct buffer_head * bh, int uptodate);
int (*run)(mddev_t *mddev);
int (*stop)(mddev_t *mddev);
int (*status)(char *page, mddev_t *mddev);
int (*ioctl)(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg);
int max_invalid_dev;
int (*error_handler)(mddev_t *mddev, kdev_t dev);
/*
* Some personalities (RAID-1, RAID-5) can have disks hot-added and
* hot-removed. Hot removal is different from failure. (failure marks
* a disk inactive, but the disk is still part of the array) The interface
* to such operations is the 'pers->diskop()' function, can be NULL.
*
* the diskop function can change the pointer pointing to the incoming
* descriptor, but must do so very carefully. (currently only
* SPARE_ACTIVE expects such a change)
*/
int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
int (*stop_resync)(mddev_t *mddev);
int (*restart_resync)(mddev_t *mddev);
};
/*
* Currently we index md_array directly, based on the minor
* number. This will have to change to dynamic allocation
* once we start supporting partitioning of md devices.
*/
extern inline int mdidx (mddev_t * mddev)
{
return mddev->__minor;
}
extern inline kdev_t mddev_to_kdev(mddev_t * mddev)
{
return MKDEV(MD_MAJOR, mdidx(mddev));
}
extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
/*
* iterates through some rdev ringlist. It's safe to remove the
* current 'rdev'. Dont touch 'tmp' though.
*/
#define ITERATE_RDEV_GENERIC(head,field,rdev,tmp) \
\
for (tmp = head.next; \
rdev = md_list_entry(tmp, mdk_rdev_t, field), \
tmp = tmp->next, tmp->prev != &head \
; )
/*
* iterates through the 'same array disks' ringlist
*/
#define ITERATE_RDEV(mddev,rdev,tmp) \
ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)
/*
* Same as above, but assumes that the device has rdev->desc_nr numbered
* from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
*/
#define ITERATE_RDEV_ORDERED(mddev,rdev,i) \
for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)
/*
* Iterates through all 'RAID managed disks'
*/
#define ITERATE_RDEV_ALL(rdev,tmp) \
ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp)
/*
* Iterates through 'pending RAID disks'
*/
#define ITERATE_RDEV_PENDING(rdev,tmp) \
ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
/*
* iterates through all used mddevs in the system.
*/
#define ITERATE_MDDEV(mddev,tmp) \
\
for (tmp = all_mddevs.next; \
mddev = md_list_entry(tmp, mddev_t, all_mddevs), \
tmp = tmp->next, tmp->prev != &all_mddevs \
; )
extern inline int lock_mddev (mddev_t * mddev)
{
return down_interruptible(&mddev->reconfig_sem);
}
extern inline void unlock_mddev (mddev_t * mddev)
{
up(&mddev->reconfig_sem);
}
#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
x = y; y = __tmp; } while (0)
typedef struct mdk_thread_s {
void (*run) (void *data);
void *data;
md_wait_queue_head_t wqueue;
unsigned long flags;
struct semaphore *sem;
struct task_struct *tsk;
const char *name;
} mdk_thread_t;
#define THREAD_WAKEUP 0
#define MAX_DISKNAME_LEN 64
typedef struct dev_name_s {
struct md_list_head list;
kdev_t dev;
char name [MAX_DISKNAME_LEN];
} dev_name_t;
#endif _MD_K_H
|