5 files changed, 261 insertions, 85 deletions
diff --git a/fs/hpfs/Makefile b/fs/hpfs/Makefile
index 94ab74d5d..fec1d65f4 100644
--- a/fs/hpfs/Makefile
+++ b/fs/hpfs/Makefile
@@ -14,10 +14,10 @@
 .s.o:
 	$(AS) -o $*.o $<
 
-OBJS=	hpfs_fs.o
+OBJS=	hpfs_fs.o hpfs_caps.o
 
 hpfs.o: $(OBJS)
-	ln -f hpfs_fs.o hpfs.o
+	$(LD) -r -o hpfs.o $(OBJS)
 
 dep:
 	$(CPP) -M *.c > .depend
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
index 3121a415d..53d1ab2d5 100644
--- a/fs/hpfs/hpfs.h
+++ b/fs/hpfs/hpfs.h
@@ -134,8 +134,9 @@ struct hpfs_spare_block
 
 
 /* The code page info pointed to by the spare block consists of an index
-   block and blocks containing character maps.  The following is pretty
-   sketchy, but Linux doesn't use code pages so it doesn't matter. */
+   block and blocks containing uppercasing tables.  I don't know what
+   these are for (CHKDSK, maybe?) -- OS/2 does not seem to use them
+   itself.  Linux doesn't use them either. */
 
 /* block pointed to by spareblock->code_page_dir */
 
@@ -174,7 +175,7 @@ struct code_page_data
     unsigned short ix;			/* index */
     unsigned short code_page_number;	/* code page number */
     unsigned short zero1;
-    unsigned char map[128];		/* map for chars 80..ff */
+    unsigned char map[128];		/* upcase table for chars 80..ff */
     unsigned short zero2;
   } code_page[3];
   unsigned char incognita[78];
@@ -256,10 +257,11 @@ struct hpfs_dirent {
   time_t creation_date;			/* ctime */
   unsigned ea_size;			/* total EA length, bytes */
   unsigned char zero1;
-  unsigned char locality;		/* 0=unk 1=seq 2=random 3=both */
+  unsigned char ix;			/* code page index (of filename), see
+					   struct code_page_data */
   unsigned char namelen, name[1];	/* file name */
   /* dnode_secno down;	  btree down pointer, if present,
-     			  follows name on next word boundary, or maybe it's
+     			  follows name on next word boundary, or maybe it
 			  precedes next dirent, which is on a word boundary. */
 };
 
diff --git a/fs/hpfs/hpfs_caps.c b/fs/hpfs/hpfs_caps.c
new file mode 100644
index 000000000..61331c1d2
--- /dev/null
+++ b/fs/hpfs/hpfs_caps.c
@@ -0,0 +1,170 @@
+/* Capitalization rules for HPFS */
+
+/* In OS/2, HPFS filenames preserve upper and lower case letter distinctions
+   but filename matching ignores case.  That is, creating a file "Foo"
+   actually creates a file named "Foo" which can be looked up as "Foo",
+   "foo", or "FOO", among other possibilities.
+
+   Also, HPFS is internationalized -- a table giving the uppercase
+   equivalent of every character is stored in the filesystem, so that
+   any national character set may be used.  If several different
+   national character sets are in use, several tables are stored
+   in the filesystem.
+
+   It would be perfectly reasonable for Linux HPFS to act as a Unix
+   filesystem and match "Foo" only if asked for "Foo" exactly.  But
+   the sort order of HPFS directories is case-insensitive, so Linux
+   still has to know the capitalization rules used by OS/2.  Because
+   of this, it turns out to be more natural for us to be case-insensitive
+   than not.
+
+   Currently the standard character set used by Linux is Latin-1.
+   Work is underway to permit people to use UTF-8 instead, therefore
+   all code that depends on the character set is segregated here.
+
+   (It would be wonderful if Linux HPFS could be independent of what
+   character set is in use on the Linux side, but because of the
+   necessary case folding this is impossible.)
+
+   There is a map from Latin-1 into code page 850 for every printing
+   character in Latin-1.  The NLS documentation of OS/2 shows that
+   everybody has 850 available unless they don't have Western latin
+   chars available at all (so fitting them to Linux without Unicode
+   is a doomed exercise).
+
+   It is not clear exactly how HPFS.IFS handles the situation when
+   multiple code pages are in use.  Experiments show that
+
+   - tables on the disk give uppercasing rules for the installed code pages
+
+   - each directory entry is tagged with what code page was current
+     when that name was created
+
+   - doing just CHCP, without changing what's on the disk in any way,
+     can change what DIR reports, and what name a case-folded match
+     will match.
+
+   This means, I think, that HPFS.IFS operates in the current code
+   page, without regard to the uppercasing information recorded in
+   the tables on the disk.  It does record the uppercasing rules
+   it used, perhaps for CHKDSK, but it does not appear to use them
+   itself.
+
+   So: Linux, a Latin-1 system, will operate in code page 850.  We
+   recode between 850 and Latin-1 when dealing with the names actually
+   on the disk.  We don't use the uppercasing tables either.
+
+   In a hypothetical UTF-8 implementation, one reasonable way to
+   proceed that matches OS/2 (for least surprise) is: do case
+   translation in UTF-8, and recode to/from one of the code pages
+   available on the mounted filesystem.  Reject as invalid any name
+   containing chars that can't be represented on disk by one of the
+   code pages OS/2 is using.  Recoding from on-disk names to UTF-8
+   could use the code page tags, though this is not what OS/2 does. */
+
+static const unsigned char tb_cp850_to_latin1[128] =
+{
+  199, 252, 233, 226, 228, 224, 229, 231,
+  234, 235, 232, 239, 238, 236, 196, 197,
+  201, 230, 198, 244, 246, 242, 251, 249,
+  255, 214, 220, 248, 163, 216, 215, 159,
+  225, 237, 243, 250, 241, 209, 170, 186,
+  191, 174, 172, 189, 188, 161, 171, 187,
+  155, 156, 157, 144, 151, 193, 194, 192,
+  169, 135, 128, 131, 133, 162, 165, 147,
+  148, 153, 152, 150, 145, 154, 227, 195,
+  132, 130, 137, 136, 134, 129, 138, 164,
+  240, 208, 202, 203, 200, 158, 205, 206,
+  207, 149, 146, 141, 140, 166, 204, 139,
+  211, 223, 212, 210, 245, 213, 181, 254,
+  222, 218, 219, 217, 253, 221, 175, 180,
+  173, 177, 143, 190, 182, 167, 247, 184,
+  176, 168, 183, 185, 179, 178, 142, 160,
+};
+
+#if 0
+static const unsigned char tb_latin1_to_cp850[128] =
+{
+  186, 205, 201, 187, 200, 188, 204, 185,
+  203, 202, 206, 223, 220, 219, 254, 242,
+  179, 196, 218, 191, 192, 217, 195, 180,
+  194, 193, 197, 176, 177, 178, 213, 159,
+  255, 173, 189, 156, 207, 190, 221, 245,
+  249, 184, 166, 174, 170, 240, 169, 238,
+  248, 241, 253, 252, 239, 230, 244, 250,
+  247, 251, 167, 175, 172, 171, 243, 168,
+  183, 181, 182, 199, 142, 143, 146, 128,
+  212, 144, 210, 211, 222, 214, 215, 216,
+  209, 165, 227, 224, 226, 229, 153, 158,
+  157, 235, 233, 234, 154, 237, 232, 225,
+  133, 160, 131, 198, 132, 134, 145, 135,
+  138, 130, 136, 137, 141, 161, 140, 139,
+  208, 164, 149, 162, 147, 228, 148, 246,
+  155, 151, 163, 150, 129, 236, 231, 152,
+};
+#endif
+
+#define A_GRAVE 0300
+#define THORN	0336   
+#define MULTIPLY 0327
+#define a_grave 0340
+#define thorn	0376
+#define divide	0367
+
+static inline unsigned latin1_upcase (unsigned c)
+{
+  if (c - 'a' <= 'z' - 'a'
+      || (c - a_grave <= thorn - a_grave
+	  && c != divide))
+    return c - 'a' + 'A';
+  else
+    return c;
+}
+
+static inline unsigned latin1_downcase (unsigned c)
+{
+  if (c - 'A' <= 'Z' - 'A'
+      || (c - A_GRAVE <= THORN - A_GRAVE
+	  && c != MULTIPLY))
+    return c + 'a' - 'A';
+  else
+    return c;
+}
+
+#if 0
+static inline unsigned latin1_to_cp850 (unsigned c)
+{
+  if ((signed) c - 128 >= 0)
+    return tb_latin1_to_cp850[c - 128];
+  else
+    return c;
+}
+#endif
+
+static inline unsigned cp850_to_latin1 (unsigned c)
+{
+  if ((signed) c - 128 >= 0)
+    return tb_cp850_to_latin1[c - 128];
+  else
+    return c;
+}
+
+unsigned hpfs_char_to_upper_linux (unsigned c)
+{
+  return latin1_upcase (cp850_to_latin1 (c));
+}
+
+unsigned linux_char_to_upper_linux (unsigned c)
+{
+  return latin1_upcase (c);
+}
+
+unsigned hpfs_char_to_lower_linux (unsigned c)
+{
+  return latin1_downcase (cp850_to_latin1 (c));
+}
+
+unsigned hpfs_char_to_linux (unsigned c)
+{
+  return cp850_to_latin1 (c);
+}
diff --git a/fs/hpfs/hpfs_caps.h b/fs/hpfs/hpfs_caps.h
new file mode 100644
index 000000000..c4e49e97d
--- /dev/null
+++ b/fs/hpfs/hpfs_caps.h
@@ -0,0 +1,4 @@
+unsigned hpfs_char_to_linux (unsigned c);
+unsigned hpfs_char_to_lower_linux (unsigned c);
+unsigned hpfs_char_to_upper_linux (unsigned c);
+unsigned linux_char_to_upper_linux (unsigned c);
diff --git a/fs/hpfs/hpfs_fs.c b/fs/hpfs/hpfs_fs.c
index c05cf56ab..ec16d8af3 100644
--- a/fs/hpfs/hpfs_fs.c
+++ b/fs/hpfs/hpfs_fs.c
@@ -25,6 +25,7 @@
 #include <asm/segment.h>
 
 #include "hpfs.h"
+#include "hpfs_caps.h"
 
 /* 
  * HPFS is a mixture of 512-byte blocks and 2048-byte blocks.  The 2k blocks
@@ -117,9 +118,6 @@
 
 /* notation */
 
-#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
-#define ROUND_UP(x) (((x)+3) & ~3)
-
 #define little_ushort(x) (*(unsigned short *) &(x))
 typedef void nonconst;
 
@@ -127,7 +125,7 @@ typedef void nonconst;
 
 static void hpfs_read_inode(struct inode *);
 static void hpfs_put_super(struct super_block *);
-static void hpfs_statfs(struct super_block *, struct statfs *);
+static void hpfs_statfs(struct super_block *, struct statfs *, int);
 static int hpfs_remount_fs(struct super_block *, int *, char *);
 
 static const struct super_operations hpfs_sops =
@@ -186,7 +184,7 @@ static const struct inode_operations hpfs_file_iops =
 static int hpfs_dir_read(struct inode *inode, struct file *filp,
 			 char *buf, int count);
 static int hpfs_readdir(struct inode *inode, struct file *filp,
-			struct dirent *dirent, int count);
+			void *dirent, filldir_t filldir);
 static int hpfs_lookup(struct inode *, const char *, int, struct inode **);
 
 static const struct file_operations hpfs_dir_ops =
@@ -247,8 +245,6 @@ static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno,
 				      struct quad_buffer_head *qbh);
 static struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
 					  struct quad_buffer_head *qbh);
-static void write_one_dirent(struct dirent *dirent, const unsigned char *name,
-			     unsigned namelen, ino_t ino, int lowercase);
 static dnode_secno dir_subdno(struct inode *inode, unsigned pos);
 static struct hpfs_dirent *map_nth_dirent(dev_t dev, dnode_secno dno,
 					  int n,
@@ -726,12 +722,13 @@ static void hpfs_put_super(struct super_block *s)
  * directory band -- not exactly right but pretty analogous.
  */
 
-static void hpfs_statfs(struct super_block *s, struct statfs *buf)
+static void hpfs_statfs(struct super_block *s, struct statfs *buf, int bufsiz)
 {
+	struct statfs tmp;
+
 	/*
 	 * count the bits in the bitmaps, unless we already have
 	 */
-
 	if (s->s_hpfs_n_free == -1) {
 		s->s_hpfs_n_free = count_bitmap(s);
 		s->s_hpfs_n_free_dnodes =
@@ -741,15 +738,15 @@ static void hpfs_statfs(struct super_block *s, struct statfs *buf)
 	/*
 	 * fill in the user statfs struct
 	 */
-
-	put_fs_long(s->s_magic, &buf->f_type);
-	put_fs_long(512, &buf->f_bsize);
-	put_fs_long(s->s_hpfs_fs_size, &buf->f_blocks);
-	put_fs_long(s->s_hpfs_n_free, &buf->f_bfree);
-	put_fs_long(s->s_hpfs_n_free, &buf->f_bavail);
-	put_fs_long(s->s_hpfs_dirband_size, &buf->f_files);
-	put_fs_long(s->s_hpfs_n_free_dnodes, &buf->f_ffree);
-	put_fs_long(254, &buf->f_namelen);
+	tmp.f_type = s->s_magic;
+	tmp.f_bsize = 512;
+	tmp.f_blocks = s->s_hpfs_fs_size;
+	tmp.f_bfree = s->s_hpfs_n_free;
+	tmp.f_bavail = s->s_hpfs_n_free;
+	tmp.f_files = s->s_hpfs_dirband_size;
+	tmp.f_ffree = s->s_hpfs_n_free_dnodes;
+	tmp.f_namelen = 254;
+	memcpy_tofs(buf, &tmp, bufsiz);
 }
 
 /*
@@ -1209,12 +1206,8 @@ static inline int memcasecmp(const unsigned char *s1, const unsigned char *s2,
 
 	if (n != 0)
 		do {
-			unsigned c1 = *s1++;
-			unsigned c2 = *s2++;
-			if (c1 - 'a' < 26)
-				c1 -= 040;
-			if (c2 - 'a' < 26)
-				c2 -= 040;
+			unsigned c1 = linux_char_to_upper_linux (*s1++);
+			unsigned c2 = hpfs_char_to_upper_linux (*s2++);
 			if ((t = c1 - c2) != 0)
 				return t;
 		} while (--n != 0);
@@ -1310,6 +1303,11 @@ static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno,
  * fixed, throw this out and just walk the tree and write records into
  * the user buffer.)
  *
+ * [ we now can handle multiple dirents, although the current libc doesn't
+ *   use that. The way hpfs does this is pretty strange, as we need to do
+ *   the name translation etc before calling "filldir()". This is untested,
+ *   as I don't have any hpfs partitions to test against.   Linus ]
+ *
  * We keep track of our position in the dnode tree with a sort of
  * dewey-decimal record of subtree locations.  Like so:
  *
@@ -1329,82 +1327,84 @@ static struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno,
  * we won't have to repeatedly scan the top levels of the tree. 
  */
 
-static int hpfs_readdir(struct inode *inode, struct file *filp,
-			struct dirent *dirent, int likely_story)
+/*
+ * Translate the given name: Blam it to lowercase if the mount option said to.
+ */
+
+static void translate_hpfs_name(const unsigned char * from, int len, char * to, int lowercase)
+{
+	while (len > 0) {
+		unsigned t = *from;
+		len--;
+		if (lowercase)
+			t = hpfs_char_to_lower_linux (t);
+		else
+			t = hpfs_char_to_linux (t);
+		*to = t;
+		from++;
+		to++;
+	}
+}
+
+static int hpfs_readdir(struct inode *inode, struct file *filp, void * dirent,
+	filldir_t filldir)
 {
 	struct quad_buffer_head qbh;
 	struct hpfs_dirent *de;
 	int namelen, lc;
 	ino_t ino;
+	char * tempname;
+	long old_pos;
 
 	if (inode == 0
 	    || inode->i_sb == 0
 	    || !S_ISDIR(inode->i_mode))
 		return -EBADF;
 
+	tempname = (char *) __get_free_page(GFP_KERNEL);
+	if (!tempname)
+		return -ENOMEM;
+
 	lc = inode->i_sb->s_hpfs_lowercase;
+	switch ((long) filp->f_pos) {
+	case -2:
+		break;
 
-	switch ((off_t) filp->f_pos) {
 	case 0:
-		write_one_dirent(dirent, ".", 1, inode->i_ino, lc);
+		if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino) < 0)
+			break;
 		filp->f_pos = -1;
-		return ROUND_UP(NAME_OFFSET(dirent) + 2);
+		/* fall through */
 
 	case -1:
-		write_one_dirent(dirent, "..", 2,
-				 inode->i_hpfs_parent_dir, lc);
+		if (filldir(dirent, "..", 2, filp->f_pos, inode->i_hpfs_parent_dir) < 0)
+			break;
 		filp->f_pos = 1;
-		return ROUND_UP(NAME_OFFSET(dirent) + 3);
-
-	case -2:
-		return 0;
+		/* fall through */
 
 	default:
-		de = map_pos_dirent(inode, &filp->f_pos, &qbh);
-		if (!de) {
-			filp->f_pos = -2;
-			return 0;
+		for (;;) {
+			old_pos = filp->f_pos;
+			de = map_pos_dirent(inode, &filp->f_pos, &qbh);
+			if (!de) {
+				filp->f_pos = -2;
+				break;
+			}
+			namelen = de->namelen;
+			translate_hpfs_name(de->name, namelen, tempname, lc);
+			if (de->directory)
+				ino = dir_ino(de->fnode);
+			else
+				ino = file_ino(de->fnode);
+			brelse4(&qbh);
+			if (filldir(dirent, tempname, namelen, old_pos, ino) < 0) {
+				filp->f_pos = old_pos;
+				break;
+			}
 		}
-
-		namelen = de->namelen;
-		if (de->directory)
-			ino = dir_ino(de->fnode);
-		else
-			ino = file_ino(de->fnode);
-		write_one_dirent(dirent, de->name, namelen, ino, lc);
-		brelse4(&qbh);
-
-		return ROUND_UP(NAME_OFFSET(dirent) + namelen + 1);
 	}
-}
-
-/*
- * Send the given name and ino off to the user dirent struct at *dirent.
- * Blam it to lowercase if the mount option said to.
- *
- * Note that Linux d_reclen is the length of the file name, and has nothing
- * to do with the length of the dirent record.
- */
-
-static void write_one_dirent(struct dirent *dirent, const unsigned char *name,
-			     unsigned namelen, ino_t ino, int lowercase)
-{
-	unsigned n;
-
-	put_fs_long(ino, &dirent->d_ino);
-	put_fs_word(namelen, &dirent->d_reclen);
-
-	if (lowercase)
-		for (n = namelen; n != 0;) {
-			unsigned t = name[--n];
-			if (t - 'A' < 26)
-				t += 040;
-			put_fs_byte(t, &dirent->d_name[n]);
-		}
-	else
-		memcpy_tofs(dirent->d_name, name, namelen);
-
-	put_fs_byte(0, &dirent->d_name[namelen]);
+	free_page((unsigned long) tempname);
+	return 0;
 }
 
 /*