diff options
Diffstat (limited to 'fs/hpfs')
-rw-r--r-- | fs/hpfs/Makefile | 4 | ||||
-rw-r--r-- | fs/hpfs/hpfs.h | 12 | ||||
-rw-r--r-- | fs/hpfs/hpfs_caps.c | 170 | ||||
-rw-r--r-- | fs/hpfs/hpfs_caps.h | 4 | ||||
-rw-r--r-- | fs/hpfs/hpfs_fs.c | 156 |
5 files changed, 261 insertions, 85 deletions
diff --git a/fs/hpfs/Makefile b/fs/hpfs/Makefile index 94ab74d5d..fec1d65f4 100644 --- a/fs/hpfs/Makefile +++ b/fs/hpfs/Makefile @@ -14,10 +14,10 @@ .s.o: $(AS) -o $*.o $< -OBJS= hpfs_fs.o +OBJS= hpfs_fs.o hpfs_caps.o hpfs.o: $(OBJS) - ln -f hpfs_fs.o hpfs.o + $(LD) -r -o hpfs.o $(OBJS) dep: $(CPP) -M *.c > .depend diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 3121a415d..53d1ab2d5 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -134,8 +134,9 @@ struct hpfs_spare_block /* The code page info pointed to by the spare block consists of an index - block and blocks containing character maps. The following is pretty - sketchy, but Linux doesn't use code pages so it doesn't matter. */ + block and blocks containing uppercasing tables. I don't know what + these are for (CHKDSK, maybe?) -- OS/2 does not seem to use them + itself. Linux doesn't use them either. */ /* block pointed to by spareblock->code_page_dir */ @@ -174,7 +175,7 @@ struct code_page_data unsigned short ix; /* index */ unsigned short code_page_number; /* code page number */ unsigned short zero1; - unsigned char map[128]; /* map for chars 80..ff */ + unsigned char map[128]; /* upcase table for chars 80..ff */ unsigned short zero2; } code_page[3]; unsigned char incognita[78]; @@ -256,10 +257,11 @@ struct hpfs_dirent { time_t creation_date; /* ctime */ unsigned ea_size; /* total EA length, bytes */ unsigned char zero1; - unsigned char locality; /* 0=unk 1=seq 2=random 3=both */ + unsigned char ix; /* code page index (of filename), see + struct code_page_data */ unsigned char namelen, name[1]; /* file name */ /* dnode_secno down; btree down pointer, if present, - follows name on next word boundary, or maybe it's + follows name on next word boundary, or maybe it precedes next dirent, which is on a word boundary. */ }; diff --git a/fs/hpfs/hpfs_caps.c b/fs/hpfs/hpfs_caps.c new file mode 100644 index 000000000..61331c1d2 --- /dev/null +++ b/fs/hpfs/hpfs_caps.c @@ -0,0 +1,170 @@ +/* Capitalization rules for HPFS */ + +/* In OS/2, HPFS filenames preserve upper and lower case letter distinctions + but filename matching ignores case. That is, creating a file "Foo" + actually creates a file named "Foo" which can be looked up as "Foo", + "foo", or "FOO", among other possibilities. + + Also, HPFS is internationalized -- a table giving the uppercase + equivalent of every character is stored in the filesystem, so that + any national character set may be used. If several different + national character sets are in use, several tables are stored + in the filesystem. + + It would be perfectly reasonable for Linux HPFS to act as a Unix + filesystem and match "Foo" only if asked for "Foo" exactly. But + the sort order of HPFS directories is case-insensitive, so Linux + still has to know the capitalization rules used by OS/2. Because + of this, it turns out to be more natural for us to be case-insensitive + than not. + + Currently the standard character set used by Linux is Latin-1. + Work is underway to permit people to use UTF-8 instead, therefore + all code that depends on the character set is segregated here. + + (It would be wonderful if Linux HPFS could be independent of what + character set is in use on the Linux side, but because of the + necessary case folding this is impossible.) + + There is a map from Latin-1 into code page 850 for every printing + character in Latin-1. The NLS documentation of OS/2 shows that + everybody has 850 available unless they don't have Western latin + chars available at all (so fitting them to Linux without Unicode + is a doomed exercise). + + It is not clear exactly how HPFS.IFS handles the situation when + multiple code pages are in use. Experiments show that + + - tables on the disk give uppercasing rules for the installed code pages + + - each directory entry is tagged with what code page was current + when that name was created + + - doing just CHCP, without changing what's on the disk in any way, + can change what DIR reports, and what name a case-folded match + will match. + + This means, I think, that HPFS.IFS operates in the current code + page, without regard to the uppercasing information recorded in + the tables on the disk. It does record the uppercasing rules + it used, perhaps for CHKDSK, but it does not appear to use them + itself. + + So: Linux, a Latin-1 system, will operate in code page 850. We + recode between 850 and Latin-1 when dealing with the names actually + on the disk. We don't use the uppercasing tables either. + + In a hypothetical UTF-8 implementation, one reasonable way to + proceed that matches OS/2 (for least surprise) is: do case + translation in UTF-8, and recode to/from one of the code pages + available on the mounted filesystem. Reject as invalid any name + containing chars that can't be represented on disk by one of the + code pages OS/2 is using. Recoding from on-disk names to UTF-8 + could use the code page tags, though this is not what OS/2 does. */ + +static const unsigned char tb_cp850_to_latin1[128] = +{ + 199, 252, 233, 226, 228, 224, 229, 231, + 234, 235, 232, 239, 238, 236, 196, 197, + 201, 230, 198, 244, 246, 242, 251, 249, + 255, 214, 220, 248, 163, 216, 215, 159, + 225, 237, 243, 250, 241, 209, 170, 186, + 191, 174, 172, 189, 188, 161, 171, 187, + 155, 156, 157, 144, 151, 193, 194, 192, + 169, 135, 128, 131, 133, 162, 165, 147, + 148, 153, 152, 150, 145, 154, 227, 195, + 132, 130, 137, 136, 134, 129, 138, 164, + 240, 208, 202, 203, 200, 158, 205, 206, + 207, 149, 146, 141, 140, 166, 204, 139, + 211, 223, 212, 210, 245, 213, 181, 254, + 222, 218, 219, 217, 253, 221, 175, 180, + 173, 177, 143, 190, 182, 167, 247, 184, + 176, 168, 183, 185, 179, 178, 142, 160, +}; + +#if 0 +static const unsigned char tb_latin1_to_cp850[128] = +{ + 186, 205, 201, 187, 200, 188, 204, 185, + 203, 202, 206, 223, 220, 219, 254, 242, + 179, 196, 218, 191, 192, 217, 195, 180, + 194, 193, 197, 176, 177, 178, 213, 159, + 255, 173, 189, 156, 207, 190, 221, 245, + 249, 184, 166, 174, 170, 240, 169, 238, + 248, 241, 253, 252, 239, 230, 244, 250, + 247, 251, 167, 175, 172, 171, 243, 168, + 183, 181, 182, 199, 142, 143, 146, 128, + 212, 144, 210, 211, 222, 214, 215, 216, + 209, 165, 227, 224, 226, 229, 153, 158, + 157, 235, 233, 234, 154, 237, 232, 225, + 133, 160, 131, 198, 132, 134, 145, 135, + 138, 130, 136, 137, 141, 161, 140, 139, + 208, 164, 149, 162, 147, 228, 148, 246, + 155, 151, 163, 150, 129, 236, 231, 152, +}; +#endif + +#define A_GRAVE 0300 +#define THORN 0336 +#define MULTIPLY 0327 +#define a_grave 0340 +#define thorn 0376 +#define divide 0367 + +static inline unsigned latin1_upcase (unsigned c) +{ + if (c - 'a' <= 'z' - 'a' + || (c - a_grave <= thorn - a_grave + && c != divide)) + return c - 'a' + 'A'; + else + return c; +} + +static inline unsigned latin1_downcase (unsigned c) +{ + if (c - 'A' <= 'Z' - 'A' + || (c - A_GRAVE <= THORN - A_GRAVE + && c != MULTIPLY)) + return c + 'a' - 'A'; + else + return c; +} + +#if 0 +static inline unsigned latin1_to_cp850 (unsigned c) +{ + if ((signed) c - 128 >= 0) + return tb_latin1_to_cp850[c - 128]; + else + return c; +} +#endif + +static inline unsigned cp850_to_latin1 (unsigned c) +{ + if ((signed) c - 128 >= 0) + return tb_cp850_to_latin1[c - 128]; + else + return c; +} + +unsigned hpfs_char_to_upper_linux (unsigned c) +{ + return latin1_upcase (cp850_to_latin1 (c)); +} + +unsigned linux_char_to_upper_linux (unsigned c) +{ + return latin1_upcase (c); +} + +unsigned hpfs_char_to_lower_linux (unsigned c) +{ + return latin1_downcase (cp850_to_latin1 (c)); +} + +unsigned hpfs_char_to_linux (unsigned c) +{ + return cp850_to_latin1 (c); +} diff --git a/fs/hpfs/hpfs_caps.h b/fs/hpfs/hpfs_caps.h new file mode 100644 index 000000000..c4e49e97d --- /dev/null +++ b/fs/hpfs/hpfs_caps.h @@ -0,0 +1,4 @@ +unsigned hpfs_char_to_linux (unsigned c); +unsigned hpfs_char_to_lower_linux (unsigned c); +unsigned hpfs_char_to_upper_linux (unsigned c); +unsigned linux_char_to_upper_linux (unsigned c); diff --git a/fs/hpfs/hpfs_fs.c b/fs/hpfs/hpfs_fs.c index c05cf56ab..ec16d8af3 100644 --- a/fs/hpfs/hpfs_fs.c +++ b/fs/hpfs/hpfs_fs.c @@ -25,6 +25,7 @@ #include <asm/segment.h> #include "hpfs.h" +#include "hpfs_caps.h" /* * HPFS is a mixture of 512-byte blocks and 2048-byte blocks. The 2k blocks @@ -117,9 +118,6 @@ /* notation */ -#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) -#define ROUND_UP(x) (((x)+3) & ~3) - #define little_ushort(x) (*(unsigned short *) &(x)) typedef void nonconst; @@ -127,7 +125,7 @@ typedef void nonconst; static void hpfs_read_inode(struct inode *); static void hpfs_put_super(struct super_block *); -static void hpfs_statfs(struct super_block *, struct statfs *); +static void hpfs_statfs(struct super_block *, struct statfs *, int); static int hpfs_remount_fs(struct super_block *, int *, char *); static const struct super_operations hpfs_sops = @@ -186,7 +184,7 @@ static const struct inode_operations hpfs_file_iops = static int hpfs_dir_read(struct inode *inode, struct file *filp, char *buf, int count); static int hpfs_readdir(struct inode *inode, struct file *filp, - struct dirent *dirent, int count); + void *dirent, filldir_t filldir); static int hpfs_lookup(struct inode *, const char *, int, struct inode **); static const struct file_operations hpfs_dir_ops = @@ -247,8 +245,6 @@ static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, struct quad_buffer_head *qbh); static struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp, struct quad_buffer_head *qbh); -static void write_one_dirent(struct dirent *dirent, const unsigned char *name, - unsigned namelen, ino_t ino, int lowercase); static dnode_secno dir_subdno(struct inode *inode, unsigned pos); static struct hpfs_dirent *map_nth_dirent(dev_t dev, dnode_secno dno, int n, @@ -726,12 +722,13 @@ static void hpfs_put_super(struct super_block *s) * directory band -- not exactly right but pretty analogous. */ -static void hpfs_statfs(struct super_block *s, struct statfs *buf) +static void hpfs_statfs(struct super_block *s, struct statfs *buf, int bufsiz) { + struct statfs tmp; + /* * count the bits in the bitmaps, unless we already have */ - if (s->s_hpfs_n_free == -1) { s->s_hpfs_n_free = count_bitmap(s); s->s_hpfs_n_free_dnodes = @@ -741,15 +738,15 @@ static void hpfs_statfs(struct super_block *s, struct statfs *buf) /* * fill in the user statfs struct */ - - put_fs_long(s->s_magic, &buf->f_type); - put_fs_long(512, &buf->f_bsize); - put_fs_long(s->s_hpfs_fs_size, &buf->f_blocks); - put_fs_long(s->s_hpfs_n_free, &buf->f_bfree); - put_fs_long(s->s_hpfs_n_free, &buf->f_bavail); - put_fs_long(s->s_hpfs_dirband_size, &buf->f_files); - put_fs_long(s->s_hpfs_n_free_dnodes, &buf->f_ffree); - put_fs_long(254, &buf->f_namelen); + tmp.f_type = s->s_magic; + tmp.f_bsize = 512; + tmp.f_blocks = s->s_hpfs_fs_size; + tmp.f_bfree = s->s_hpfs_n_free; + tmp.f_bavail = s->s_hpfs_n_free; + tmp.f_files = s->s_hpfs_dirband_size; + tmp.f_ffree = s->s_hpfs_n_free_dnodes; + tmp.f_namelen = 254; + memcpy_tofs(buf, &tmp, bufsiz); } /* @@ -1209,12 +1206,8 @@ static inline int memcasecmp(const unsigned char *s1, const unsigned char *s2, if (n != 0) do { - unsigned c1 = *s1++; - unsigned c2 = *s2++; - if (c1 - 'a' < 26) - c1 -= 040; - if (c2 - 'a' < 26) - c2 -= 040; + unsigned c1 = linux_char_to_upper_linux (*s1++); + unsigned c2 = hpfs_char_to_upper_linux (*s2++); if ((t = c1 - c2) != 0) return t; } while (--n != 0); @@ -1310,6 +1303,11 @@ static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, * fixed, throw this out and just walk the tree and write records into * the user buffer.) * + * [ we now can handle multiple dirents, although the current libc doesn't + * use that. The way hpfs does this is pretty strange, as we need to do + * the name translation etc before calling "filldir()". This is untested, + * as I don't have any hpfs partitions to test against. Linus ] + * * We keep track of our position in the dnode tree with a sort of * dewey-decimal record of subtree locations. Like so: * @@ -1329,82 +1327,84 @@ static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, * we won't have to repeatedly scan the top levels of the tree. */ -static int hpfs_readdir(struct inode *inode, struct file *filp, - struct dirent *dirent, int likely_story) +/* + * Translate the given name: Blam it to lowercase if the mount option said to. + */ + +static void translate_hpfs_name(const unsigned char * from, int len, char * to, int lowercase) +{ + while (len > 0) { + unsigned t = *from; + len--; + if (lowercase) + t = hpfs_char_to_lower_linux (t); + else + t = hpfs_char_to_linux (t); + *to = t; + from++; + to++; + } +} + +static int hpfs_readdir(struct inode *inode, struct file *filp, void * dirent, + filldir_t filldir) { struct quad_buffer_head qbh; struct hpfs_dirent *de; int namelen, lc; ino_t ino; + char * tempname; + long old_pos; if (inode == 0 || inode->i_sb == 0 || !S_ISDIR(inode->i_mode)) return -EBADF; + tempname = (char *) __get_free_page(GFP_KERNEL); + if (!tempname) + return -ENOMEM; + lc = inode->i_sb->s_hpfs_lowercase; + switch ((long) filp->f_pos) { + case -2: + break; - switch ((off_t) filp->f_pos) { case 0: - write_one_dirent(dirent, ".", 1, inode->i_ino, lc); + if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino) < 0) + break; filp->f_pos = -1; - return ROUND_UP(NAME_OFFSET(dirent) + 2); + /* fall through */ case -1: - write_one_dirent(dirent, "..", 2, - inode->i_hpfs_parent_dir, lc); + if (filldir(dirent, "..", 2, filp->f_pos, inode->i_hpfs_parent_dir) < 0) + break; filp->f_pos = 1; - return ROUND_UP(NAME_OFFSET(dirent) + 3); - - case -2: - return 0; + /* fall through */ default: - de = map_pos_dirent(inode, &filp->f_pos, &qbh); - if (!de) { - filp->f_pos = -2; - return 0; + for (;;) { + old_pos = filp->f_pos; + de = map_pos_dirent(inode, &filp->f_pos, &qbh); + if (!de) { + filp->f_pos = -2; + break; + } + namelen = de->namelen; + translate_hpfs_name(de->name, namelen, tempname, lc); + if (de->directory) + ino = dir_ino(de->fnode); + else + ino = file_ino(de->fnode); + brelse4(&qbh); + if (filldir(dirent, tempname, namelen, old_pos, ino) < 0) { + filp->f_pos = old_pos; + break; + } } - - namelen = de->namelen; - if (de->directory) - ino = dir_ino(de->fnode); - else - ino = file_ino(de->fnode); - write_one_dirent(dirent, de->name, namelen, ino, lc); - brelse4(&qbh); - - return ROUND_UP(NAME_OFFSET(dirent) + namelen + 1); } -} - -/* - * Send the given name and ino off to the user dirent struct at *dirent. - * Blam it to lowercase if the mount option said to. - * - * Note that Linux d_reclen is the length of the file name, and has nothing - * to do with the length of the dirent record. - */ - -static void write_one_dirent(struct dirent *dirent, const unsigned char *name, - unsigned namelen, ino_t ino, int lowercase) -{ - unsigned n; - - put_fs_long(ino, &dirent->d_ino); - put_fs_word(namelen, &dirent->d_reclen); - - if (lowercase) - for (n = namelen; n != 0;) { - unsigned t = name[--n]; - if (t - 'A' < 26) - t += 040; - put_fs_byte(t, &dirent->d_name[n]); - } - else - memcpy_tofs(dirent->d_name, name, namelen); - - put_fs_byte(0, &dirent->d_name[namelen]); + free_page((unsigned long) tempname); + return 0; } /* |