%{ #ifndef lint static char *sccsid="@(#)s2l.y 1.2 (LBL) 2/4/85"; #endif /* scribe-to-latex converter. * * copyright (c) 1984 by Van Jacobson, Lawrence Berkeley Laboratory * This program may be freely redistributed but not for profit. This * comment must remain in the program or any derivative. * * I would appreciate comments, suggestions, bug fixes, etc. My * mailing addresses are: * van@lbl-csam.arpa or van@lbl-rtsg.arpa (ARPANET or CSNET) * ucbvax!lbl-csam!van (UUCP) * * Expect to get 81 shift/reduce & 48 reduce/reduce conflicts when * running this thing through yacc. The grammar does more-or-less * what I wanted so I didn't bother to add the precedence & assoc. * rules needed to get rid of the confict msgs. * * Change this grammar with fear & trembling (I do). No text is * kept around by the parser so the order of the reductions is very * important. The reduction that outputs the current token has to * happen before the next shift wipes out that token (except in some * cases where the parse stack or the production encodes the token). * * Modifications. * -------------- * 05Feb85, vj Bug fixes from users: added "comment" to allowed names * in -s file; added "format" as kw in addition to env; * fixed bug in kw->env handling; fixed mis-count of arguments * when "-s" present. * 30Dec84, vj Changed description env to handle null tags. Added * a few more scribe 2-char control seq. to scanner. * 28Dec84, vj Bug fixes from Richard Furuta * 16Aug84 Van Jacobson, written. */ #include #include #include #include "symtab.h" #define DELBUFLEN 1024 /* "delayed" text buffer length (see comments * with routine "dprintf" */ static struct stab *lastsym; /* result of last "lookup" */ static int kflag = 0; /* set to print unrecog keywords */ static int dirquote = 1; /* set to covert quotes to directed quotes */ static int dquote=0; /* set 1 if we're expecting a closing double quote */ static int informat=0; /* set 1 if we're doing a "format" environment */ static int incomment=0; /* set 1 if we're inside a comment */ static int delayout=0; /* set 1 if we have to delay output */ static char *inputfile; /* current input file name */ static inputline=1; /* line number in input file */ %} %union { int ival; struct stab *sval; } %token KW_REP /* keyword to be replaced */ %token KW_ENV /* keyword to be made environment [begin()..end()] */ %token KW_DEL /* keyword to be deleted */ %token KW_COM /* comment */ %token KW_FORMAT /* keyword introducing "format" environment */ %token FONT /* keyword to be treated like font */ %token KW_IGN /* keyword to ignore in @description label */ %token ITEMIZE DESCRIPT %type kw_rep %type kw_env %type kw_format %type itemize %type descrip %type istart %type dstart %token ENV_REP /* environment to be renamed */ %token ENV_DEL /* environment to be deleted */ %token ENV_KW /* environment to be made keyword */ %token ENV_FORMAT /* "format" (tabular) environment */ %token END_ENV %token ENV_ITEMIZE %token ENV_DESCRIP %token ENV_COM /* comments */ %type env_rep %type env_kw %type env_itemize %type env_descrip %type env_format %token BLANKLINE %token INCHES %token COMMAND %token SUP SUB %token TAB RJUST CENTER POINT NPSPACE SETTAB LBREAK STILDE HYPHEN %token CHAR %token LBRACK RBRACK %% sdoc : sdoc foo | foo ; foo : control | ignore | env | special | text | error '\n' ; control : kw_rep balbrack | itemize optnl items RBRACK { dprintf("\\end{%s}",$1->s_reptext); } | itemize optnl items blanklines RBRACK { dprintf("\\end{%s}",$1->s_reptext); } | descrip optnl ditems RBRACK { dprintf("\\end{%s}",$1->s_reptext); } | descrip optnl ditems blanklines RBRACK { dprintf("\\end{%s}",$1->s_reptext); } | kw_del sdoc RBRACK | kw_env LBRACK optnl sdoc RBRACK { dprintf("\\end{%s}",$1->s_reptext); } | kw_format LBRACK optnl sdoc RBRACK { dprintf("\\end{%s}",$1->s_reptext); informat = 0; } | supsub balbrack { dprintf( "}$" ); } | font sdoc rbrack | kw_com sdoc RBRACK optnl { dprintf("}\n"); incomment=0; } ; ignore : kw_ign balbrack ; env : env_rep sdoc endenv | env_format optnl sdoc endenv { informat = 0; } | ENV_DEL sdoc END_ENV | env_kw sdoc END_ENV { dprintf( "}" ); } | env_itemize optnl items endenv | env_itemize optnl items blanklines endenv | env_descrip optnl ditems endenv | env_descrip optnl ditems blanklines endenv | env_com sdoc endenv optnl { dprintf("\n"); incomment=0; } ; env_rep : ENV_REP { dprintf("\\begin{%s}",lastsym->s_reptext); } ; env_format: ENV_FORMAT { dprintf("\\begin{%s}\n",lastsym->s_reptext); informat = 1; } ; endenv : END_ENV { dprintf("\\end{%s}",lastsym->s_reptext); } ; env_kw : ENV_KW { dprintf("\\%s{",lastsym->s_reptext); } ; env_com : ENV_COM { dprintf("%% \\begin{%s}", lastsym->s_reptext); incomment=1; } ; kw_com : KW_COM LBRACK { dprintf("%% \\%s{", lastsym->s_reptext); incomment=1; } ; kw_rep : KW_REP { dprintf("\\%s",lastsym->s_reptext); } ; kw_ign : KW_IGN { dprintf("\\%s",lastsym->s_reptext); } ; kw_del : KW_DEL LBRACK optnl ; kw_env : KW_ENV { dprintf("\\begin{%s}\n",lastsym->s_reptext); $$ = lastsym; } ; kw_format: KW_FORMAT { dprintf("\\begin{%s}\n",lastsym->s_reptext); $$ = lastsym; informat = 1; } ; itemize : istart LBRACK { dprintf("\\begin{%s}\n",$1->s_reptext); $$ = $1; } ; istart : ITEMIZE { $$ = lastsym; } ; descrip : dstart LBRACK { dprintf("\\begin{%s}\n",$1->s_reptext); $$ = $1; } ; dstart : DESCRIPT { $$ = lastsym; } ; env_itemize: ENV_ITEMIZE { dprintf("\\begin{%s}\n", lastsym->s_reptext); } ; env_descrip: ENV_DESCRIP { dprintf("\\begin{%s}\n", lastsym->s_reptext); } ; optnl : '\n' | optnl BLANKLINE | ; items : items blanklines item | item ; item : item newline | item line | item ignore | item env | itemstrt ; itemstrt: { dprintf("\\item "); dquote=0; } ; /* these productions describe the list of items for an @description environment. * The list is a bunch of items separated by one or more blank lines. There * may also be interspersed keywords like "@index()" which we try to ignore * and interspersed environments which couldn't be part of a tag. */ ditems : ditems blanklines ditem | ditem ; /* A single description item consists of stuff that we ignore, a first line * that we search for a tag & other lines. Or it may consist entirely of * stuff we ignore. */ ditem : dignores dfirstl lines | dignores | error '\n' { flushout("\\item[] "); } ; dignores: dignores ignore newline | dignores ignore | dignores env newline | dignores env | ; dfirstl : ditemstrt line TAB { flushout("\\item["); dprintf("] "); } | ditemstrt line '\n' { flushout("\\item[] "); dprintf("\n"); } ; lines : lines newline | lines linepart | lines ignore | lines env | ; /* we think we have the start of an item - delay output until we know * whether or not to put a "\item[" in front of it */ ditemstrt: { delayout=1; dquote=0; } ; line : line linepart | line env | line ignore | linepart ; linepart: control | special | char ; supsub : SUP { dprintf("$^{\\hbox"); } | SUB { dprintf("$_{\\hbox"); } ; font : FONT LBRACK { dprintf("{\\%s ", lastsym->s_reptext); } ; balbrack: lbrack sdoc rbrack | lbrack rbrack ; lbrack : LBRACK { dprintf("{"); } ; rbrack : RBRACK { dprintf("}"); } ; special : POINT { dprintf(".~"); } | NPSPACE { dprintf("\\ "); } | LBREAK { dprintf("\\newline{}"); } | STILDE { dprintf("%%"); } | HYPHEN { dprintf("\\-"); } | COMMAND { dprintf("\\%s",&yytext[1]); } ; text : text char | char | newline | BLANKLINE { dprintf(incomment?"\n%% ":"\n"); dquote=0; } | TAB { dprintf(" & "); } | CENTER { dprintf(" & "); } | RJUST { dprintf(" & "); } | SETTAB { dprintf(" & "); } ; char : CHAR { dprintf(yytext); } | '@' { dprintf("j"); } | INCHES { dprintf("in"); } | '{' { dprintf("\\{"); } | '}' { dprintf("\\}"); } | '#' { dprintf("\\#"); } | '$' { dprintf("\\$"); } | '%' { dprintf("\\%%"); } | '&' { dprintf("\\&"); } | '~' { dprintf("{\\tt\\char`\\~}"); } | '_' { dprintf("{\\tt\\char`\\_}"); } | '^' { dprintf("{\\tt\\char`\\^}"); } | '\\' { dprintf("{\\tt\\char`\\\\}"); } | '<' { dprintf("{\\tt\\char`\\<}"); } | '>' { dprintf("{\\tt\\char`\\>}"); } | '|' { dprintf("{\\tt\\char`\\|}"); } | '"' { if( dirquote ) { dprintf( dquote? "'":"`"); dquote=!dquote; } else dprintf("{\\tt\\char`\\\"}"); } ; newline : '\n' { if ( incomment ) dprintf("\n%% "); else dprintf(informat?"\\\\\n":"\n"); } ; blanklines: blanklines BLANKLINE | BLANKLINE { dprintf(incomment?"\n%% ":"\n"); dquote=0; } ; %% main( argc, argv ) char **argv; { char *pname = *argv; init_st(); argc--, argv++; while( argc > 0 && argv[0][0] == '-' ) { if ( strcmp(*argv, "-s")==0 ) { if ( argc < 1 ) { fprintf( stderr, "%s: filename must follow -s\n", pname ); exit(1); } argv++; argc--; read_st( *argv ); } else { if( index( *argv, 'k' ) ) kflag++; if( index( *argv, 'q' ) ) dirquote = 0; } argv++; argc--; } if ( argc > 0 ) while ( argc-- > 0 ) { if ( freopen( *argv, "r", stdin ) == NULL ) { perror( *argv ); exit(1); } inputfile = *argv; inputline = 1; yyparse(); argv++; } else { inputfile = ""; yyparse(); } } yyerror( str ) char *str; { fprintf( stderr, "s2l: %s in %s line %d\n", str, inputfile, inputline ); } /* initialize the keyword symbol table */ init_st() { /* straight keyword replacement */ enter("@begin", KW_REP, "begin"); enter("@end", KW_REP, "end"); enter("@unnumbered",KW_REP,"chapter*"); enter("@bigsection",KW_REP,"chapter"); enter("@blankspace",KW_REP,"vspace"); enter("@ux",KW_REP,"underline"); /* keywords to turn into environments */ enter("@example",KW_ENV,"example"); enter("@center",KW_ENV,"center"); /* keywords that behave like fonts (eg, @i{} -> {\it } */ enter("@i",FONT,"it"); enter("@u",FONT,"it"); enter("@un",FONT,"it"); enter("@b",FONT,"bf"); enter("@p",FONT,"bf"); enter("@t",FONT,"tt"); enter("@c",FONT,"sc"); enter("@r",FONT,"rm"); enter("@w",FONT,"obeyspaces"); /* list environments */ enter("@itemize",ITEMIZE,"itemize"); enter("@enumerate",ITEMIZE,"enumerate"); enter("@description",DESCRIPT,"description"); enter("@multiple",KW_DEL,""); /* the begin-end form of list environments */ enter("itemize",ENV_ITEMIZE,"itemize"); enter("enumerate",ENV_ITEMIZE,"enumerate"); enter("description",ENV_DESCRIP,"description"); enter("multiple",ENV_DEL,""); /* environments to be renamed */ enter("@format",KW_FORMAT,"tabular"); enter("format",ENV_FORMAT,"tabular"); /* comments */ enter("@comment",KW_COM,"comment"); enter("comment",ENV_COM,"comment"); /* keywords to "ignore" in description env */ enter("@tabs",KW_IGN,"tabs"); enter("@index",KW_IGN,"index"); } /* Read User-specified entries into the symbol table */ #define ENTER(txt,typ,rtxt) enter(txt,*txt=='@'?typ,rtxt) read_st( fname ) char *fname; { FILE *stream; char text[128], stype[128], reptext[128]; register int sstat; if ( (stream = fopen( fname, "r" )) == NULL ) { perror( stream ); exit(1); } while ( (sstat = fscanf( stream, "%s %s %s \n", text, stype, reptext )) != EOF ) { if ( sstat == 3 ) { switch( stype[0] ) { case 'R': case 'r': ENTER( text, KW_REP:ENV_REP, reptext ); break; case 'D': case 'd': ENTER( text, KW_DEL:ENV_DEL, reptext ); break; case 'F': case 'f': enter( text, FONT, reptext ); break; case 'E': case 'e': ENTER( text, KW_ENV:ENV_KW, reptext ); break; case 'I': case 'i': case 'L': case 'l': ENTER( text, ITEMIZE:ENV_ITEMIZE, reptext ); break; case 'T': case 't': ENTER( text, DESCRIPT:ENV_DESCRIP, reptext ); break; case 'A': case 'a': ENTER( text, KW_FORMAT:ENV_FORMAT, reptext ); break; case 'N': case 'n': enter( text, KW_IGN, reptext ); break; case 'C': case 'c': ENTER( text, KW_COM:ENV_COM, reptext ); break; default: fprintf(stderr,"-unknown type `%s'\n",stype); exit(1); } } } fclose( stream ); } /* dprintf - a (possibly delayed) printf * This routine is used for all Latex output. If the flag "delayout" * isn't set, it's just a printf. If delayout is set, an sprintf is done * & the text is accumulated in a local buffer. The buffer is written out * when by a call to flushout. This routine is intended to handle cases * where yoyoy don't know what text to generate until several tokens later * in the input. In particular, for an "@description" item yoy don't know * whether to put an "\item[" at the beginning of the line until yoy see a * tab or newline. Because this routine uses a static buffer, it will * screw up on things like a description environment inside of a * description tag (which I can't imagine happening). The static buffer * is also finite so yoy can't delay a very large portion of the document. */ static char delbuf[DELBUFLEN]; /* buffer for delayed text */ static int delcnt = 0; /* count of characters in delbuf */ dprintf( fmt, a1, a2, a3, a4 ) char *fmt; { if( delayout ) { /* we're delaying output text */ sprintf( &delbuf[delcnt], fmt, a1, a2, a3, a4 ); delcnt = strlen( delbuf ); if( delcnt >= (DELBUFLEN-50) ) { yyerror( "delayed text buffer overflow" ); exit(1); } } else { printf( fmt, a1, a2, a3, a4 ); } } /* flush the delayed output buffer, inserting the string "prefix" * just after the last newline in the buffer. */ flushout( prefix ) char *prefix; { char *nlpos; if( delayout ) { if( nlpos = rindex( delbuf, '\n' ) ) { *nlpos = '\0'; printf( "%s", delbuf ); nlpos++; printf( "\n%s%s", prefix, nlpos ); } else { printf( "%s%s", prefix, delbuf ); } delcnt = 0; delayout = 0; } } #include "lex.yy.c"