Input- to font-encoding character translation in unixTeX This file contains patches to TeX's source files so that, after recompiling, TeX reads the environment variable TEXCHARTRANSLATE and, according to its value, it sets the appropriate translation table. The translation tables affect the translation between external character encoding (`inputencoding') and internal TeX's symbol encoding (`fontencoding'). Currently, two translation tables exist: ISO 8859-1 <--> T1 (Cork) when TEXCHARTRANSLATE=="IL1.T1", and ISO 8859-2 <--> T1 (Cork) when TEXCHARTRANSLATE=="IL2.T1". If the environment variable is not set or has value "." ( is a string not containing a dot) the mapping is identical (trivial translation). Otherwise (for all other settings), the mapping makes all characters below ' ' and above '~' undefined (i.e. restricts to OT1 font encoding). This scheme is extensible -- please make suggestions for adding new tables. To apply these patches successfully you must * place it in the directory where TeX's source is located (usually .../src/web2c-6.1/web2c/tex/) under the name chartranslate.patch * cd there * type ` patch >chartranslate.patch command. Enjoy! Libor Skarvada Petr Sojka --- tex.ch *************** *** 275,287 **** %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % [2.23] Allow any character as input. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @x for i:=0 to @'37 do xchr[i]:=' '; for i:=@'177 to @'377 do xchr[i]:=' '; @y ! for i:=0 to @'37 do xchr[i]:=chr(i); ! for i:=@'177 to @'377 do xchr[i]:=chr(i); @z %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --- 275,347 ---- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % [2.23] Allow any character as input. + % enable T1 (Cork) <-> ISO 8859-[12] transcription { on request (if + % TEXCHARTRANSLATE environment variable is set to "IL1.T1", "IL2.T1") } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @x + To get the most ``permissive'' character set, change |' '| on the + right of these assignment statements to |chr(i)|. + @y + To get the most ``permissive'' character set, change |' '| on the + right of these assignment statements to |chr(i)|. + + If we, however, want \TeX\ would ``understand'' more than one character + set on input, the content of |xchr| and |xord| arrays will depend on + the encoding. + We define function |ext_enc| (written in C) which reads the environment + variable TEXCHARTRANSLATE whose value is the pair of character encodings + (external, internal) being in use. Then the apprpriate ``translation + tables'' |xchr|, |xord| are chosen. + + So far only translations ISO~8859-1$\leftrightarrow$T1 and + iso~8859-2$\leftrightarrow$T1 are supported (when |ext_enc| returns + the value 1 or 2 respectively). + + The following macro saves some typing. + |xc(i)(j)| stands for |xchr[i]:=chr(j)|. + @d xc(#)==xchr[#]:=cccc + @d cccc(#)==chr(#) + @z + % + @x for i:=0 to @'37 do xchr[i]:=' '; for i:=@'177 to @'377 do xchr[i]:=' '; @y ! for i:=0 to 31 do xc(i)(i); ! for i:=127 to 191 do xc(i)(32); ! case ext_enc of ! 0: { do 1-1 identity mapping } ! for i:=127 to 255 do xc(i)(i); ! 1: { external encoding is ISO~8859-1, internal encoding is T1 } ! begin ! for i:=192 to 255 do xc(i)(i); ! xc(158)(240); xc(159)(167); xc(184)(255); xc(189)(161); xc(190)(191); ! xc(191)(163); xc(215)(32); xc(223)(32); xc(240)(32); xc(247)(32); ! xc(255)(223) ! end; ! 2: { external encoding is ISO~8859-2, internal encoding is T1 } ! begin ! for i:=192 to 255 do xc(i)(32); ! xc(128)(195); xc(129)(161); xc(130)(198); xc(131)(200); xc(132)(207); ! xc(133)(204); xc(134)(202); xc(136)(197); xc(137)(165); xc(138)(163); ! xc(139)(209); xc(140)(210); xc(142)(213); xc(143)(192); xc(144)(216); ! xc(145)(166); xc(146)(169); xc(147)(170); xc(148)(171); xc(149)(222); ! xc(150)(219); xc(151)(217); xc(153)(172); xc(154)(174); xc(155)(175); ! xc(158)(240); xc(159)(167); xc(160)(227); xc(161)(177); xc(162)(230); ! xc(163)(232); xc(164)(239); xc(165)(236); xc(166)(234); xc(168)(229); ! xc(169)(181); xc(170)(179); xc(171)(241); xc(172)(242); xc(174)(245); ! xc(175)(224); xc(176)(248); xc(177)(182); xc(178)(185); xc(179)(186); ! xc(180)(187); xc(181)(254); xc(182)(251); xc(183)(249); xc(185)(188); ! xc(186)(190); xc(187)(191); xc(193)(193); xc(194)(194); xc(196)(196); ! xc(199)(199); xc(201)(201); xc(203)(203); xc(205)(205); xc(206)(206); ! xc(208)(208); xc(211)(211); xc(212)(212); xc(214)(214); xc(218)(218); ! xc(220)(220); xc(221)(221); xc(223)(223); xc(225)(225); xc(226)(226); ! xc(228)(228); xc(231)(231); xc(233)(233); xc(235)(235); xc(237)(237); ! xc(238)(238); xc(243)(243); xc(244)(244); xc(246)(246); xc(250)(250); ! xc(252)(252); xc(253)(253); xc(255)(223) ! end ! { else do nothing --- external encoding is 7-bit ASCII } ! endcases;@/ @z %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% *************** *** 547,552 **** --- 607,621 ---- end; exit: end; + @z + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + % [4.49] Make characters 128--255 printable + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + @x + (k<" ")or(k>"~") + @y + (k<" ")or(k=invalid_code) {or(k>=128)and(k<160)} @z %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --- ../lib/texmf.defines *************** *** 29,34 **** --- 29,35 ---- @define function inputln (); @define function wopenin (); @define function wopenout (); + @define function extenc; @define procedure bclose (); @define procedure blankrectangle (); --- ../lib/texmf.h *************** *** 10,15 **** --- 10,17 ---- #define NO_DEBUG #endif + #define NONASCII 1 + #include "cpascal.h" #ifdef TeX --- ../lib/texmf.c *************** *** 799,801 **** --- 799,821 ---- #endif /* TRAP */ } #endif /* MF */ + + + /* This reads the environment to find out which pair of + external-internal character encodings is used */ + + integer + extenc () + { + char *tt; + tt = getenv("TEXCHARTRANSLATE"); + if (tt == NULL) return 0; + if (strcasecmp(tt,"IL1.T1")==0) return 1; + if (strcasecmp(tt,"IL2.T1")==0) return 2; + if (strchr(tt,'.')!=NULL) + { int le; + le = strcspn(tt,"."); + if (strncmp(tt,tt+le+1,le)==0) return 0; + } + return -1; + }