diff options
Diffstat (limited to 'fs/umsdos/mangle.c')
-rw-r--r-- | fs/umsdos/mangle.c | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/fs/umsdos/mangle.c b/fs/umsdos/mangle.c new file mode 100644 index 000000000..1f59447e9 --- /dev/null +++ b/fs/umsdos/mangle.c @@ -0,0 +1,478 @@ +/* + * linux/fs/umsdos/mangle.c + * + * Written 1993 by Jacques Gelinas + * + * Control the mangling of file name to fit msdos name space. + * Many optimisation by GLU == dglaude@is1.vub.ac.be (GLAUDE DAVID) +*/ +#include <linux/errno.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/umsdos_fs.h> + +/* + Complete the mangling of the MSDOS fake name + based on the position of the entry in the EMD file. + + Simply complete the job of umsdos_parse; fill the extension. + + Beware that info->f_pos must be set. +*/ +void umsdos_manglename (struct umsdos_info *info) +{ + if (info->msdos_reject){ + /* #Specification: file name / non MSDOS conforming / mangling + Each non MSDOS conforming file has a special extension + build from the entry position in the EMD file. + + This number is then transform in a base 32 number, where + each digit is expressed like hexadecimal number, using + digit and letter, except it uses 22 letters from 'a' to 'v'. + The number 32 comes from 2**5. It is faster to split a binary + number using a base which is a power of two. And I was 32 + when I started this project. Pick your answer :-) . + + If the result is '0', it is replace with '_', simply + to make it odd. + + This is true for the first two character of the extension. + The last one is taken from a list of odd character, which + are: + + { } ( ) ! ` ^ & @ + + With this scheme, we can produce 9216 ( 9* 32 * 32) + different extensions which should not clash with any useful + extension already popular or meaningful. Since most directory + have much less than 32 * 32 files in it, the first character + of the extension of any mangle name will be {. + + Here are the reason to do this (this kind of mangling). + + -The mangling is deterministic. Just by the extension, we + are able to locate the entry in the EMD file. + + -By keeping to beginning of the file name almost unchanged, + we are helping the MSDOS user. + + -The mangling produces names not too ugly, so an msdos user + may live with it (remember it, type it, etc...). + + -The mangling produces names ugly enough so no one will + ever think of using such a name in real life. This is not + fool proof. I don't think there is a total solution to this. + */ + union { + int entry_num; + struct { + unsigned num1:5,num2:5,num3:5; + }num; + } u; + char *pt = info->fake.fname + info->fake.len; + /* lookup for encoding the last character of the extension */ + /* It contain valid character after the ugly one to make sure */ + /* even if someone overflow the 32 * 32 * 9 limit, it still do */ + /* something */ + #define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@' + static char lookup3[]={ + SPECIAL_MANGLING, + /* This is the start of lookup12 */ + '_','1','2','3','4','5','6','7','8','9', + 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', + 'p','q','r','s','t','u','v' + }; + #define lookup12 (lookup3+9) + u.entry_num = info->f_pos / UMSDOS_REC_SIZE; + if (u.entry_num > (9* 32 * 32)){ + printk ("UMSDOS: More than 9216 file in a directory.\n" + "This may break the mangling strategy.\n" + "Not a killer problem. See doc.\n"); + } + *pt++ = '.'; + *pt++ = lookup3 [u.num.num3]; + *pt++ = lookup12[u.num.num2]; + *pt++ = lookup12[u.num.num1]; + *pt = '\0'; /* help doing printk */ + info->fake.len += 4; + info->msdos_reject = 0; /* Avoid mangling twice */ + } +} + +/* + Evaluate the record size needed to store of name of len character. + The value returned is a multiple of UMSDOS_REC_SIZE. +*/ +int umsdos_evalrecsize (int len) +{ + struct umsdos_dirent dirent; + int nbrec = 1+((len-1+(dirent.name-(char*)&dirent)) + / UMSDOS_REC_SIZE); + return nbrec * UMSDOS_REC_SIZE; + /* + GLU This should be inlined or something to speed it up to the max. + GLU nbrec is absolutely not needed to return the value. + */ +} +#ifdef TEST +int umsdos_evalrecsize_old (int len) +{ + struct umsdos_dirent dirent; + int size = len + (dirent.name-(char*)&dirent); + int nbrec = size / UMSDOS_REC_SIZE; + int extra = size % UMSDOS_REC_SIZE; + if (extra > 0) nbrec++; + return nbrec * UMSDOS_REC_SIZE; +} +#endif +/* + Fill the struct info with the full and msdos name of a file + Return 0 if all is ok, a negative error code otherwise. +*/ +int umsdos_parse ( + const char *fname, + int len, + struct umsdos_info *info) +{ + int ret = -ENAMETOOLONG; + /* #Specification: file name / too long + If a file name exceed UMSDOS maxima, the file name is silently + truncated. This makes it conformant with the other file system + of Linux (minix and ext2 at least). + */ + if (len > UMSDOS_MAXNAME) len = UMSDOS_MAXNAME; + { + const char *firstpt=NULL; /* First place we saw a . in fname */ + /* #Specification: file name / non MSDOS conforming / base length 0 + file name beginning with a period '.' are invalid for MsDOS. + It needs absolutely a base name. So the file name is mangled + */ + int ivldchar = fname[0] == '.';/* At least one invalid character */ + int msdos_len = len; + int base_len; + /* + cardinal_per_size tells if there exist at least one + DOS pseudo devices on length n. See the test below. + */ + static const char cardinal_per_size[9]={ + 0, 0, 0, 1, 1, 0, 1, 0, 1 + }; + /* + lkp translate all character to acceptable character (for DOS). + When lkp[n] == n, it means also it is an acceptable one. + So it serve both as a flag and as a translator. + */ + static char lkp[256]; + static char is_init=0; + if (!is_init){ + /* + Initialisation of the array is easier and less error prone + like this. + */ + int i; + static char *spc = "\"*+,/:;<=>?[\\]|~"; + is_init = 1; + for (i=0; i<=32; i++) lkp[i] = '#'; + for (i=33; i<'A'; i++) lkp[i] = (char)i; + for (i='A'; i<='Z'; i++) lkp[i] = (char)(i+('a'-'A')); + for (i='Z'+1; i<127; i++) lkp[i] = (char)i; + for (i=128; i<256; i++) lkp[i] = '#'; + + lkp['.'] = '_'; + while (*spc != '\0') lkp[(unsigned char)(*spc++)] = '#'; + } + /* GLU + file name which are longer than 8+'.'+3 are invalid for MsDOS. + So the file name is to be mangled no more test needed. + This Speed Up for long and very long name. + The position of the last point is no more necessary anyway. + */ + if (len<=(8+1+3)){ + const char *pt = fname; + const char *endpt = fname + len; + while (pt < endpt){ + if (*pt == '.'){ + if (firstpt != NULL){ + /* 2 . in a file name. Reject */ + ivldchar = 1; + break; + }else{ + int extlen = (int)(endpt - pt); + firstpt = pt; + if (firstpt - fname > 8){ + /* base name longer than 8: reject */ + ivldchar = 1; + break; + }else if (extlen > 4){ + /* Extension longer than 4 (including .): reject */ + ivldchar = 1; + break; + }else if (extlen == 1){ + /* #Specification: file name / non MSDOS conforming / last char == . + If the last character of a file name is + a period, mangling is applied. MsDOS do + not support those file name. + */ + ivldchar = 1; + break; + }else if (extlen == 4){ + /* #Specification: file name / non MSDOS conforming / mangling clash + To avoid clash with the umsdos mangling, any file + with a special character as the first character + of the extension will be mangled. This solve the + following problem: + + touch FILE + # FILE is invalid for DOS, so mangling is applied + # file.{_1 is created in the DOS directory + touch file.{_1 + # To UMSDOS file point to a single DOS entry. + # So file.{_1 has to be mangled. + */ + static char special[]={ + SPECIAL_MANGLING,'\0' + }; + if (strchr(special,firstpt[1])!= NULL){ + ivldchar = 1; + break; + } + } + } + }else if (lkp[(unsigned char)(*pt)] != *pt){ + ivldchar = 1; + break; + } + pt++; + } + }else{ + ivldchar = 1; + } + if (ivldchar + || (firstpt == NULL && len > 8) + || (len == UMSDOS_EMD_NAMELEN + && memcmp(fname,UMSDOS_EMD_FILE,UMSDOS_EMD_NAMELEN)==0)){ + /* #Specification: file name / --linux-.--- + The name of the EMD file --linux-.--- is map to a mangled + name. So UMSDOS does not restrict its use. + */ + /* #Specification: file name / non MSDOS conforming / mangling + Non MSDOS conforming file name must use some alias to fit + in the MSDOS name space. + + The strategy is simple. The name is simply truncated to + 8 char. points are replace with underscore and a + number is given as an extension. This number correspond + to the entry number in the EMD file. The EMD file + only need to carry the real name. + + Upper case is also convert to lower case. + Control character are converted to #. + Space are converted to #. + The following character are also converted to #. + " * + , / : ; < = > ? [ \ ] | ~ + + Sometime, the problem is not in MsDOS itself but in + command.com. + */ + int i; + char *pt = info->fake.fname; + base_len = msdos_len = (msdos_len>8) ? 8 : msdos_len; + /* + There is no '.' any more so we know for a fact that + the base length is the length. + */ + memcpy (info->fake.fname,fname,msdos_len); + for (i=0; i<msdos_len; i++, pt++) *pt = lkp[(unsigned char)(*pt)]; + *pt = '\0'; /* GLU C'est sur on a un 0 a la fin */ + info->msdos_reject = 1; + /* + The numeric extension is added only when we know + the position in the EMD file, in umsdos_newentry(), + umsdos_delentry(), and umsdos_findentry(). + See umsdos_manglename(). + */ + }else{ + /* Conforming MSDOS file name */ + strcpy (info->fake.fname,fname); /* GLU C'est sur on a un 0 a la fin */ + info->msdos_reject = 0; + base_len = firstpt != NULL ? (int)(firstpt - fname) : len; + } + if (cardinal_per_size[base_len]){ + /* #Specification: file name / MSDOS devices / mangling + To avoid unreachable file from MsDOS, any MsDOS conforming + file with a basename equal to one of the MsDOS pseudo + devices will be mangled. + + If a file such as "prn" was created, it would be unreachable + under MsDOS because prn is assumed to be the printer, even + if the file does have an extension. + + Since the extension is unimportant to MsDOS, we must patch + the basename also. We simply insert a minus '-'. To avoid + conflict with valid file with a minus in front (such as + "-prn"), we add an mangled extension like any other + mangled file name. + + Here is the list of DOS pseudo devices: + + "prn","con","aux","nul", + "lpt1","lpt2","lpt3","lpt4", + "com1","com2","com3","com4", + "clock$" + + and some standard ones for common DOS programs + + "emmxxxx0","xmsxxxx0","setverxx" + + (Thanks to Chris Hall <CAH17@PHOENIX.CAMBRIDGE.AC.UK> + for pointing these to me). + + Is there one missing ? + */ + /* This table must be ordered by length */ + static const char *tbdev[]={ + "prn","con","aux","nul", + "lpt1","lpt2","lpt3","lpt4", + "com1","com2","com3","com4", + "clock$", + "emmxxxx0","xmsxxxx0","setverxx" + }; + /* Tell where to find in tbdev[], the first name of */ + /* a certain length */ + static const char start_ind_dev[9]={ + 0, 0, 0, 4, 12, 12, 13, 13, 16 + }; + char basen[9]; + int i; + for (i=start_ind_dev[base_len-1]; i<start_ind_dev[base_len]; i++){ + if (memcmp(info->fake.fname,tbdev[i],base_len)==0){ + memcpy (basen,info->fake.fname,base_len); + basen[base_len] = '\0'; /* GLU C'est sur on a un 0 a la fin */ + /* + GLU On ne fait cela que si necessaire, on essaye d'etre le + GLU simple dans le cas general (le plus frequent). + */ + info->fake.fname[0] = '-'; + strcpy (info->fake.fname+1,basen); /* GLU C'est sur on a un 0 a la fin */ + msdos_len = (base_len==8) ? 8 : base_len + 1; + info->msdos_reject = 1; + break; + } + } + } + info->fake.fname[msdos_len] = '\0'; /* Help doing printk */ + /* GLU Ce zero devrais deja y etre ! (invariant ?) */ + info->fake.len = msdos_len; + /* Pourquoi ne pas utiliser info->fake.len partout ??? plus long ?*/ + memcpy (info->entry.name,fname,len); + info->entry.name_len = len; + ret = 0; + } + /* + Evaluate how many record are needed to store this entry. + */ + info->recsize = umsdos_evalrecsize (len); + return ret; +} + +#ifdef TEST + +struct MANG_TEST{ + char *fname; /* Name to validate */ + int msdos_reject; /* Expected msdos_reject flag */ + char *msname; /* Expected msdos name */ +}; + +struct MANG_TEST tb[]={ + "hello", 0, "hello", + "hello.1", 0, "hello.1", + "hello.1_", 0, "hello.1_", + "prm", 0, "prm", + +#ifdef PROPOSITION + "HELLO", 1, "hello", + "Hello.1", 1, "hello.1", + "Hello.c", 1, "hello.c", +#elseif +/* + Je trouve les trois exemples ci-dessous tres "malheureux". + Je propose de mettre en minuscule dans un passe preliminaire, + et de tester apres si il y a d'autres caracters "mechants". + Bon, je ne l'ai pas fait, parceque ce n'est pas si facilement + modifiable que ca. Mais c'est pour le principe. + Evidemment cela augmente les chances de "Collision", + par exemple: entre "HELLO" et "Hello", mais ces problemes + peuvent etre traiter ailleur avec les autres collisions. +*/ + "HELLO", 1, "hello", + "Hello.1", 1, "hello_1", + "Hello.c", 1, "hello_c", +#endif + + "hello.{_1", 1, "hello_{_", + "hello\t", 1, "hello#", + "hello.1.1", 1, "hello_1_", + "hel,lo", 1, "hel#lo", + "Salut.Tu.vas.bien?", 1, "salut_tu", + ".profile", 1, "_profile", + ".xv", 1, "_xv", + "toto.", 1, "toto_", + "clock$.x", 1, "-clock$", + "emmxxxx0", 1, "-emmxxxx", + "emmxxxx0.abcd", 1, "-emmxxxx", + "aux", 1, "-aux", + "prn", 1, "-prn", + "prn.abc", 1, "-prn", + "PRN", 1, "-prn", +/* +GLU ATTENTION : Le resultat de ceux-ci sont differents avec ma version +GLU du mangle par rapport au mangle originale. +GLU CAUSE: La maniere de calculer la variable baselen. +GLU Pour toi c'est toujours 3 +GLU Pour moi c'est respectivement 7, 8 et 8 +*/ + "PRN.abc", 1, "prn_abc", + "Prn.abcd", 1, "prn_abcd", + "prn.abcd", 1, "prn_abcd", + "Prn.abcdefghij", 1, "prn_abcd" +}; + +int main (int argc, char *argv[]) +{ + int i,rold,rnew; + printf ("Testing the umsdos_parse.\n"); + for (i=0; i<sizeof(tb)/sizeof(tb[0]); i++){ + struct MANG_TEST *pttb = tb+i; + struct umsdos_info info; + int ok = umsdos_parse (pttb->fname,strlen(pttb->fname),&info); + if (strcmp(info.fake.fname,pttb->msname)!=0){ + printf ("**** %s -> ",pttb->fname); + printf ("%s <> %s\n",info.fake.fname,pttb->msname); + }else if (info.msdos_reject != pttb->msdos_reject){ + printf ("**** %s -> %s ",pttb->fname,pttb->msname); + printf ("%d <> %d\n",info.msdos_reject,pttb->msdos_reject); + }else{ + printf (" %s -> %s %d\n",pttb->fname,pttb->msname + ,pttb->msdos_reject); + } + } + printf ("Testing the new umsdos_evalrecsize."); + for (i=0; i<UMSDOS_MAXNAME ; i++){ + rnew=umsdos_evalrecsize (i); + rold=umsdos_evalrecsize_old (i); + if (!(i%UMSDOS_REC_SIZE)){ + printf ("\n%d:\t",i); + } + if (rnew!=rold){ + printf ("**** %d newres: %d != %d \n", i, rnew, rold); + }else{ + printf("."); + } + } + printf ("\nEnd of Testing.\n"); + + return 0; +} + +#endif |