summaryrefslogtreecommitdiffstats
path: root/fs/umsdos/mangle.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/umsdos/mangle.c')
-rw-r--r--fs/umsdos/mangle.c478
1 files changed, 478 insertions, 0 deletions
diff --git a/fs/umsdos/mangle.c b/fs/umsdos/mangle.c
new file mode 100644
index 000000000..1f59447e9
--- /dev/null
+++ b/fs/umsdos/mangle.c
@@ -0,0 +1,478 @@
+/*
+ * linux/fs/umsdos/mangle.c
+ *
+ * Written 1993 by Jacques Gelinas
+ *
+ * Control the mangling of file name to fit msdos name space.
+ * Many optimisation by GLU == dglaude@is1.vub.ac.be (GLAUDE DAVID)
+*/
+#include <linux/errno.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/umsdos_fs.h>
+
+/*
+ Complete the mangling of the MSDOS fake name
+ based on the position of the entry in the EMD file.
+
+ Simply complete the job of umsdos_parse; fill the extension.
+
+ Beware that info->f_pos must be set.
+*/
+void umsdos_manglename (struct umsdos_info *info)
+{
+ if (info->msdos_reject){
+ /* #Specification: file name / non MSDOS conforming / mangling
+ Each non MSDOS conforming file has a special extension
+ build from the entry position in the EMD file.
+
+ This number is then transform in a base 32 number, where
+ each digit is expressed like hexadecimal number, using
+ digit and letter, except it uses 22 letters from 'a' to 'v'.
+ The number 32 comes from 2**5. It is faster to split a binary
+ number using a base which is a power of two. And I was 32
+ when I started this project. Pick your answer :-) .
+
+ If the result is '0', it is replace with '_', simply
+ to make it odd.
+
+ This is true for the first two character of the extension.
+ The last one is taken from a list of odd character, which
+ are:
+
+ { } ( ) ! ` ^ & @
+
+ With this scheme, we can produce 9216 ( 9* 32 * 32)
+ different extensions which should not clash with any useful
+ extension already popular or meaningful. Since most directory
+ have much less than 32 * 32 files in it, the first character
+ of the extension of any mangle name will be {.
+
+ Here are the reason to do this (this kind of mangling).
+
+ -The mangling is deterministic. Just by the extension, we
+ are able to locate the entry in the EMD file.
+
+ -By keeping to beginning of the file name almost unchanged,
+ we are helping the MSDOS user.
+
+ -The mangling produces names not too ugly, so an msdos user
+ may live with it (remember it, type it, etc...).
+
+ -The mangling produces names ugly enough so no one will
+ ever think of using such a name in real life. This is not
+ fool proof. I don't think there is a total solution to this.
+ */
+ union {
+ int entry_num;
+ struct {
+ unsigned num1:5,num2:5,num3:5;
+ }num;
+ } u;
+ char *pt = info->fake.fname + info->fake.len;
+ /* lookup for encoding the last character of the extension */
+ /* It contain valid character after the ugly one to make sure */
+ /* even if someone overflow the 32 * 32 * 9 limit, it still do */
+ /* something */
+ #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
+ static char lookup3[]={
+ SPECIAL_MANGLING,
+ /* This is the start of lookup12 */
+ '_','1','2','3','4','5','6','7','8','9',
+ 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
+ 'p','q','r','s','t','u','v'
+ };
+ #define lookup12 (lookup3+9)
+ u.entry_num = info->f_pos / UMSDOS_REC_SIZE;
+ if (u.entry_num > (9* 32 * 32)){
+ printk ("UMSDOS: More than 9216 file in a directory.\n"
+ "This may break the mangling strategy.\n"
+ "Not a killer problem. See doc.\n");
+ }
+ *pt++ = '.';
+ *pt++ = lookup3 [u.num.num3];
+ *pt++ = lookup12[u.num.num2];
+ *pt++ = lookup12[u.num.num1];
+ *pt = '\0'; /* help doing printk */
+ info->fake.len += 4;
+ info->msdos_reject = 0; /* Avoid mangling twice */
+ }
+}
+
+/*
+ Evaluate the record size needed to store of name of len character.
+ The value returned is a multiple of UMSDOS_REC_SIZE.
+*/
+int umsdos_evalrecsize (int len)
+{
+ struct umsdos_dirent dirent;
+ int nbrec = 1+((len-1+(dirent.name-(char*)&dirent))
+ / UMSDOS_REC_SIZE);
+ return nbrec * UMSDOS_REC_SIZE;
+ /*
+ GLU This should be inlined or something to speed it up to the max.
+ GLU nbrec is absolutely not needed to return the value.
+ */
+}
+#ifdef TEST
+int umsdos_evalrecsize_old (int len)
+{
+ struct umsdos_dirent dirent;
+ int size = len + (dirent.name-(char*)&dirent);
+ int nbrec = size / UMSDOS_REC_SIZE;
+ int extra = size % UMSDOS_REC_SIZE;
+ if (extra > 0) nbrec++;
+ return nbrec * UMSDOS_REC_SIZE;
+}
+#endif
+/*
+ Fill the struct info with the full and msdos name of a file
+ Return 0 if all is ok, a negative error code otherwise.
+*/
+int umsdos_parse (
+ const char *fname,
+ int len,
+ struct umsdos_info *info)
+{
+ int ret = -ENAMETOOLONG;
+ /* #Specification: file name / too long
+ If a file name exceed UMSDOS maxima, the file name is silently
+ truncated. This makes it conformant with the other file system
+ of Linux (minix and ext2 at least).
+ */
+ if (len > UMSDOS_MAXNAME) len = UMSDOS_MAXNAME;
+ {
+ const char *firstpt=NULL; /* First place we saw a . in fname */
+ /* #Specification: file name / non MSDOS conforming / base length 0
+ file name beginning with a period '.' are invalid for MsDOS.
+ It needs absolutely a base name. So the file name is mangled
+ */
+ int ivldchar = fname[0] == '.';/* At least one invalid character */
+ int msdos_len = len;
+ int base_len;
+ /*
+ cardinal_per_size tells if there exist at least one
+ DOS pseudo devices on length n. See the test below.
+ */
+ static const char cardinal_per_size[9]={
+ 0, 0, 0, 1, 1, 0, 1, 0, 1
+ };
+ /*
+ lkp translate all character to acceptable character (for DOS).
+ When lkp[n] == n, it means also it is an acceptable one.
+ So it serve both as a flag and as a translator.
+ */
+ static char lkp[256];
+ static char is_init=0;
+ if (!is_init){
+ /*
+ Initialisation of the array is easier and less error prone
+ like this.
+ */
+ int i;
+ static char *spc = "\"*+,/:;<=>?[\\]|~";
+ is_init = 1;
+ for (i=0; i<=32; i++) lkp[i] = '#';
+ for (i=33; i<'A'; i++) lkp[i] = (char)i;
+ for (i='A'; i<='Z'; i++) lkp[i] = (char)(i+('a'-'A'));
+ for (i='Z'+1; i<127; i++) lkp[i] = (char)i;
+ for (i=128; i<256; i++) lkp[i] = '#';
+
+ lkp['.'] = '_';
+ while (*spc != '\0') lkp[(unsigned char)(*spc++)] = '#';
+ }
+ /* GLU
+ file name which are longer than 8+'.'+3 are invalid for MsDOS.
+ So the file name is to be mangled no more test needed.
+ This Speed Up for long and very long name.
+ The position of the last point is no more necessary anyway.
+ */
+ if (len<=(8+1+3)){
+ const char *pt = fname;
+ const char *endpt = fname + len;
+ while (pt < endpt){
+ if (*pt == '.'){
+ if (firstpt != NULL){
+ /* 2 . in a file name. Reject */
+ ivldchar = 1;
+ break;
+ }else{
+ int extlen = (int)(endpt - pt);
+ firstpt = pt;
+ if (firstpt - fname > 8){
+ /* base name longer than 8: reject */
+ ivldchar = 1;
+ break;
+ }else if (extlen > 4){
+ /* Extension longer than 4 (including .): reject */
+ ivldchar = 1;
+ break;
+ }else if (extlen == 1){
+ /* #Specification: file name / non MSDOS conforming / last char == .
+ If the last character of a file name is
+ a period, mangling is applied. MsDOS do
+ not support those file name.
+ */
+ ivldchar = 1;
+ break;
+ }else if (extlen == 4){
+ /* #Specification: file name / non MSDOS conforming / mangling clash
+ To avoid clash with the umsdos mangling, any file
+ with a special character as the first character
+ of the extension will be mangled. This solve the
+ following problem:
+
+ touch FILE
+ # FILE is invalid for DOS, so mangling is applied
+ # file.{_1 is created in the DOS directory
+ touch file.{_1
+ # To UMSDOS file point to a single DOS entry.
+ # So file.{_1 has to be mangled.
+ */
+ static char special[]={
+ SPECIAL_MANGLING,'\0'
+ };
+ if (strchr(special,firstpt[1])!= NULL){
+ ivldchar = 1;
+ break;
+ }
+ }
+ }
+ }else if (lkp[(unsigned char)(*pt)] != *pt){
+ ivldchar = 1;
+ break;
+ }
+ pt++;
+ }
+ }else{
+ ivldchar = 1;
+ }
+ if (ivldchar
+ || (firstpt == NULL && len > 8)
+ || (len == UMSDOS_EMD_NAMELEN
+ && memcmp(fname,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN)==0)){
+ /* #Specification: file name / --linux-.---
+ The name of the EMD file --linux-.--- is map to a mangled
+ name. So UMSDOS does not restrict its use.
+ */
+ /* #Specification: file name / non MSDOS conforming / mangling
+ Non MSDOS conforming file name must use some alias to fit
+ in the MSDOS name space.
+
+ The strategy is simple. The name is simply truncated to
+ 8 char. points are replace with underscore and a
+ number is given as an extension. This number correspond
+ to the entry number in the EMD file. The EMD file
+ only need to carry the real name.
+
+ Upper case is also convert to lower case.
+ Control character are converted to #.
+ Space are converted to #.
+ The following character are also converted to #.
+ " * + , / : ; < = > ? [ \ ] | ~
+
+ Sometime, the problem is not in MsDOS itself but in
+ command.com.
+ */
+ int i;
+ char *pt = info->fake.fname;
+ base_len = msdos_len = (msdos_len>8) ? 8 : msdos_len;
+ /*
+ There is no '.' any more so we know for a fact that
+ the base length is the length.
+ */
+ memcpy (info->fake.fname,fname,msdos_len);
+ for (i=0; i<msdos_len; i++, pt++) *pt = lkp[(unsigned char)(*pt)];
+ *pt = '\0'; /* GLU C'est sur on a un 0 a la fin */
+ info->msdos_reject = 1;
+ /*
+ The numeric extension is added only when we know
+ the position in the EMD file, in umsdos_newentry(),
+ umsdos_delentry(), and umsdos_findentry().
+ See umsdos_manglename().
+ */
+ }else{
+ /* Conforming MSDOS file name */
+ strcpy (info->fake.fname,fname); /* GLU C'est sur on a un 0 a la fin */
+ info->msdos_reject = 0;
+ base_len = firstpt != NULL ? (int)(firstpt - fname) : len;
+ }
+ if (cardinal_per_size[base_len]){
+ /* #Specification: file name / MSDOS devices / mangling
+ To avoid unreachable file from MsDOS, any MsDOS conforming
+ file with a basename equal to one of the MsDOS pseudo
+ devices will be mangled.
+
+ If a file such as "prn" was created, it would be unreachable
+ under MsDOS because prn is assumed to be the printer, even
+ if the file does have an extension.
+
+ Since the extension is unimportant to MsDOS, we must patch
+ the basename also. We simply insert a minus '-'. To avoid
+ conflict with valid file with a minus in front (such as
+ "-prn"), we add an mangled extension like any other
+ mangled file name.
+
+ Here is the list of DOS pseudo devices:
+
+ "prn","con","aux","nul",
+ "lpt1","lpt2","lpt3","lpt4",
+ "com1","com2","com3","com4",
+ "clock$"
+
+ and some standard ones for common DOS programs
+
+ "emmxxxx0","xmsxxxx0","setverxx"
+
+ (Thanks to Chris Hall <CAH17@PHOENIX.CAMBRIDGE.AC.UK>
+ for pointing these to me).
+
+ Is there one missing ?
+ */
+ /* This table must be ordered by length */
+ static const char *tbdev[]={
+ "prn","con","aux","nul",
+ "lpt1","lpt2","lpt3","lpt4",
+ "com1","com2","com3","com4",
+ "clock$",
+ "emmxxxx0","xmsxxxx0","setverxx"
+ };
+ /* Tell where to find in tbdev[], the first name of */
+ /* a certain length */
+ static const char start_ind_dev[9]={
+ 0, 0, 0, 4, 12, 12, 13, 13, 16
+ };
+ char basen[9];
+ int i;
+ for (i=start_ind_dev[base_len-1]; i<start_ind_dev[base_len]; i++){
+ if (memcmp(info->fake.fname,tbdev[i],base_len)==0){
+ memcpy (basen,info->fake.fname,base_len);
+ basen[base_len] = '\0'; /* GLU C'est sur on a un 0 a la fin */
+ /*
+ GLU On ne fait cela que si necessaire, on essaye d'etre le
+ GLU simple dans le cas general (le plus frequent).
+ */
+ info->fake.fname[0] = '-';
+ strcpy (info->fake.fname+1,basen); /* GLU C'est sur on a un 0 a la fin */
+ msdos_len = (base_len==8) ? 8 : base_len + 1;
+ info->msdos_reject = 1;
+ break;
+ }
+ }
+ }
+ info->fake.fname[msdos_len] = '\0'; /* Help doing printk */
+ /* GLU Ce zero devrais deja y etre ! (invariant ?) */
+ info->fake.len = msdos_len;
+ /* Pourquoi ne pas utiliser info->fake.len partout ??? plus long ?*/
+ memcpy (info->entry.name,fname,len);
+ info->entry.name_len = len;
+ ret = 0;
+ }
+ /*
+ Evaluate how many record are needed to store this entry.
+ */
+ info->recsize = umsdos_evalrecsize (len);
+ return ret;
+}
+
+#ifdef TEST
+
+struct MANG_TEST{
+ char *fname; /* Name to validate */
+ int msdos_reject; /* Expected msdos_reject flag */
+ char *msname; /* Expected msdos name */
+};
+
+struct MANG_TEST tb[]={
+ "hello", 0, "hello",
+ "hello.1", 0, "hello.1",
+ "hello.1_", 0, "hello.1_",
+ "prm", 0, "prm",
+
+#ifdef PROPOSITION
+ "HELLO", 1, "hello",
+ "Hello.1", 1, "hello.1",
+ "Hello.c", 1, "hello.c",
+#elseif
+/*
+ Je trouve les trois exemples ci-dessous tres "malheureux".
+ Je propose de mettre en minuscule dans un passe preliminaire,
+ et de tester apres si il y a d'autres caracters "mechants".
+ Bon, je ne l'ai pas fait, parceque ce n'est pas si facilement
+ modifiable que ca. Mais c'est pour le principe.
+ Evidemment cela augmente les chances de "Collision",
+ par exemple: entre "HELLO" et "Hello", mais ces problemes
+ peuvent etre traiter ailleur avec les autres collisions.
+*/
+ "HELLO", 1, "hello",
+ "Hello.1", 1, "hello_1",
+ "Hello.c", 1, "hello_c",
+#endif
+
+ "hello.{_1", 1, "hello_{_",
+ "hello\t", 1, "hello#",
+ "hello.1.1", 1, "hello_1_",
+ "hel,lo", 1, "hel#lo",
+ "Salut.Tu.vas.bien?", 1, "salut_tu",
+ ".profile", 1, "_profile",
+ ".xv", 1, "_xv",
+ "toto.", 1, "toto_",
+ "clock$.x", 1, "-clock$",
+ "emmxxxx0", 1, "-emmxxxx",
+ "emmxxxx0.abcd", 1, "-emmxxxx",
+ "aux", 1, "-aux",
+ "prn", 1, "-prn",
+ "prn.abc", 1, "-prn",
+ "PRN", 1, "-prn",
+/*
+GLU ATTENTION : Le resultat de ceux-ci sont differents avec ma version
+GLU du mangle par rapport au mangle originale.
+GLU CAUSE: La maniere de calculer la variable baselen.
+GLU Pour toi c'est toujours 3
+GLU Pour moi c'est respectivement 7, 8 et 8
+*/
+ "PRN.abc", 1, "prn_abc",
+ "Prn.abcd", 1, "prn_abcd",
+ "prn.abcd", 1, "prn_abcd",
+ "Prn.abcdefghij", 1, "prn_abcd"
+};
+
+int main (int argc, char *argv[])
+{
+ int i,rold,rnew;
+ printf ("Testing the umsdos_parse.\n");
+ for (i=0; i<sizeof(tb)/sizeof(tb[0]); i++){
+ struct MANG_TEST *pttb = tb+i;
+ struct umsdos_info info;
+ int ok = umsdos_parse (pttb->fname,strlen(pttb->fname),&info);
+ if (strcmp(info.fake.fname,pttb->msname)!=0){
+ printf ("**** %s -> ",pttb->fname);
+ printf ("%s <> %s\n",info.fake.fname,pttb->msname);
+ }else if (info.msdos_reject != pttb->msdos_reject){
+ printf ("**** %s -> %s ",pttb->fname,pttb->msname);
+ printf ("%d <> %d\n",info.msdos_reject,pttb->msdos_reject);
+ }else{
+ printf (" %s -> %s %d\n",pttb->fname,pttb->msname
+ ,pttb->msdos_reject);
+ }
+ }
+ printf ("Testing the new umsdos_evalrecsize.");
+ for (i=0; i<UMSDOS_MAXNAME ; i++){
+ rnew=umsdos_evalrecsize (i);
+ rold=umsdos_evalrecsize_old (i);
+ if (!(i%UMSDOS_REC_SIZE)){
+ printf ("\n%d:\t",i);
+ }
+ if (rnew!=rold){
+ printf ("**** %d newres: %d != %d \n", i, rnew, rold);
+ }else{
+ printf(".");
+ }
+ }
+ printf ("\nEnd of Testing.\n");
+
+ return 0;
+}
+
+#endif