/* Simple program to check LaTeX code for matching parentheses, etc. For installation & usage, see README and run 'check-parens --help'. TODO(?): - handling of empty '\right.' delimiter is hacky. - delimiter size detection is a bit a hack: '\big{(}' doesn't work. - false positives for special constructs like: '\newenvironment{\foo}{\begin{bar}}{\end{bar}}' Copyright (C) 2012--2015 Jaap Eldering (jaap@jaapeldering.nl). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "check-parens-defconfig.hxx" using namespace std; #define PROGRAM "check-parens" #define VERSION "2015.07.24" #define AUTHORS "Jaap Eldering" const int EXIT_NONFATAL = 1; const int EXIT_FATAL = 2; const string defconfigfilename = ".check-parens.conf"; enum paren_type { paren_hard, paren_soft, paren_env }; char *progname; char *filename; FILE *texfile; int ignore_soft; int ignore_size; int file_line_error; int debug_level; int show_help; int show_version; struct option const long_opts[] = { {"ignore-soft", no_argument, &ignore_soft, 1 }, {"ignore-size", no_argument, &ignore_size, 1 }, {"file-line-error", no_argument, &file_line_error, 1 }, {"no-file-line-error", no_argument, &file_line_error, 0 }, {"config-file", required_argument, NULL, 'c'}, {"debug", no_argument, NULL, 'd'}, {"help", no_argument, &show_help, 1 }, {"version", no_argument, &show_version, 1 }, { NULL, 0, NULL, 0 } }; void version() { printf("\ %s -- version %s\n\ Written by %s\n\n\ %s comes with ABSOLUTELY NO WARRANTY. This is free software, and\n\ you are welcome to redistribute it under the GNU General Public Licence\n\ as published by the Free Software Foundation; either version 3, or (at your\n\ option) any later version.\n",PROGRAM,VERSION,AUTHORS,PROGRAM); exit(0); } void usage() { printf("\ Usage: %s [option]... ...\n\ Check LaTeX source for non-matching parentheses.\n\ The exitcode is the number of files with fatal errors.\n\ \n\ -i --ignore-soft ignore mismatch of \"soft\" parentheses that\n\ do not break LaTeX syntax\n\ -s --ignore-size ignore mismatch of delimiter sizes\n\ --file-line-error format mismatch messages as 'file:line:char:error'\n\ just as many compilers do (default)\n\ --no-file-line-error print messages in more human readable format\n\ -c --config-file=FILE read configuration from FILE (default: ~/%s)\n\ -d --debug add debugging output, repeat for more verbosity\n\ --help display this help and exit\n\ --version output version information and exit\n", progname, defconfigfilename.c_str()); exit(0); } void error(int errnum, const char *format, ...) { va_list ap; va_start(ap,format); fprintf(stderr,"%s",progname); if ( format!=NULL ) { fprintf(stderr,": "); vfprintf(stderr,format,ap); } if ( errnum!=0 ) { fprintf(stderr,": %s",strerror(errnum)); } if ( format==NULL && errnum==0 ) { fprintf(stderr,": unknown error"); } fprintf(stderr,"\nTry `%s --help' for more information.\n",progname); va_end(ap); exit(-1); } void debug(int level, const char *format, ...) { va_list ap; if ( debug_level paren_list; map delim_list; map paren_open, paren_close; struct paren_stack { vector data; int lasthard; paren_stack(): data(), lasthard(-1) {} bool empty() { return data.empty(); } size_t size() { return data.size(); } void push(const paren& p) { if ( !p.type==paren_soft ) lasthard = data.size(); data.push_back(p); } const paren& top() { return data.back(); } void pop() { data.pop_back(); while ( lasthard>=(int)data.size() || (lasthard>=0 && data[lasthard].type==paren_soft) ) lasthard--; } // Check if argument is at top (or highest "hard" parenthesis) int attop(const string& s, bool soft) { if ( soft ) { return data.size()>0 && s==data.back().close; } else { return lasthard>=0 && s==data[lasthard].close; } } // Remove irrelevant elements from stack, e.g. before printing it. void clean() { for(int i=data.size()-1; i>=0; --i) { paren& p = data[i]; if ( (ignore_soft && p.type==paren_soft) ) { data.erase(data.begin()+i); } } } }; int readchar() { int c = getc(texfile); pos++; if ( c==EOF ) return EOF; if ( c=='\n' ) { line++; pos = 0; } return c; } int unreadchar(int c) { if ( c=='\n' ) { line--; pos = 0; } pos--; return ungetc(c,texfile); } void message(size_t line, size_t pos, const char *format, ...) { va_list ap; va_start(ap,format); if ( file_line_error ) { if ( line!=0 ) printf("%s:%zu:%zu: ",filename,line,pos); } else { if ( line!=0 ) printf("Line %zu, char %zu: ",line,pos); } vprintf(format,ap); printf("\n"); va_end(ap); } void readconfig(istream& conf) { string line; while ( getline(conf,line) && !conf.fail() ) { istringstream ss(line); string command, par_open, par_close, delim, token; if ( !(ss >> command) || command.substr(0,1)=="#" ) continue; if ( command=="paren" ) { ss >> token; if ( token!="=" ) error(0,"expected `=' after `%s' command",command.c_str()); if ( ! (ss >> par_open >> par_close) ) error(0,"expected paren open and close"); paren_type type = paren_hard; if ( ss >> token && token=="soft" ) type = paren_soft; paren_list.push_back(paren(par_open,par_close,type)); } else if ( command=="delimsize" ) { ss >> token; if ( token!="=" ) error(0,"expected `=' after `%s' command",command.c_str()); if ( ! (ss >> delim) ) error(0,"expected a paren delimiter"); delim_list[delim] = delim; while ( ss >> token ) delim_list[token] = delim; } else { error(0,"found unknown command `%s' in configuration file",command.c_str()); } } } int readtoken(string &tok, bool verbatim = false) { int c; if ( (c=readchar())==EOF ) return 0; currline = line; currpos = pos; tok = string(1,c); // Gobble comments until end of line if ( !verbatim && c=='%' ) { while ( c!=EOF && c!='\n' ) c = readchar(); } if ( c!='\\' ) return 1; if ( (c=readchar())==EOF ) return 1; tok += c; if ( !isalpha(c) ) return 1; while ( (c=readchar())!=EOF ) { if ( isalpha(c) ) { tok += c; } else { unreadchar(c); break; } } if ( (tok=="\\begin" || tok=="\\end") && c=='{' ) { size_t saveline = currline; size_t savepos = currpos; tok += readchar(); string tok2; while ( readtoken(tok2)!=EOF && tok2!="}" ) tok += tok2; if ( tok2!="}" ) { message(saveline,savepos,"unclosed environment delimiter."); return 0; } tok += tok2; currline = saveline; currpos = savepos; } return 1; } void printstack(paren_stack stk) { message(0,0,"Parentheses stack contains %zu elements:",stk.size()); while ( !stk.empty() ) { paren p = stk.top(); stk.pop(); message(p.line,p.pos,"'%s' (requires closing '%s').", p.open.c_str(),p.close.c_str()); } } // Returns 0 on success, or EXIT_NONFATAL or EXIT_FATAL. int checkfile(char * const fname) { string tok, delimsize, delimnext; paren_stack stk; bool soft_paren; bool nonfatal_error = false; filename = fname; texfile = fopen(filename,"r"); if ( texfile==NULL ) error(errno,"opening file '%s'",filename); line = 1, pos = 0; currline = 0, currpos = 0; debug(1,"starting to check file '%s'.\n",fname); while ( readtoken(tok) ) { debug(2,"%zu:%zu tok = '%s'\n",currline,currpos,tok.c_str()); // (Re)set parenthesis delimiter size: delimsize = delimnext; delimnext = ( delim_list.count(tok) ? tok : "" ); // Gobble whitespace after delimiter size tokens if ( delimsize!="" && isspace(tok[0]) ) { delimnext = delimsize; continue; } // Skip over \verb and verbatim environments: if ( tok=="\\verb" ) { size_t saveline = currline; size_t savepos = currpos; char c, endchar = readchar(); if ( endchar==EOF ) { message(currline,currpos,"Unexpected end of file."); printstack(stk); return EXIT_FATAL; } debug(1,"%zu:%zu found '%s', searching for end character '%c'\n", saveline,savepos,tok.c_str(),endchar); do { c = readchar(); if ( c==EOF ) { message(currline,currpos,"Unexpected end of file."); printstack(stk); return EXIT_FATAL; } } while ( c!=endchar ); debug(1,"%zu:%zu found verbatim end '%c'\n",currline,currpos,c); continue; } if ( tok=="\\begin{verbatim}" ) { string endtok = "\\end"+tok.substr(6); debug(1,"%zu:%zu found '%s', searching for end token '%s'\n", currline,currpos,tok.c_str(),endtok.c_str()); do { readtoken(tok,true); } while ( tok!=endtok ); debug(1,"%zu:%zu found verbatim end '%s'\n", currline,currpos,tok.c_str()); continue; } // First try to pop token from stack, then push; // then matching of mathmode $$'s works out-of-the-box. soft_paren = false; if ( paren_close.count(tok) ) soft_paren = (paren_close[tok].type==paren_soft); if ( !stk.empty() && stk.attop(tok,soft_paren) ) { // Print warnings for any soft parentheses dropped: paren p = stk.top(); stk.pop(); while ( p.close!=tok ) { if ( !ignore_soft ) { message(p.line,p.pos, "closing '%s' at %zu:%zu does not match opening '%s'.", tok.c_str(),currline,currpos,p.open.c_str()); nonfatal_error = true; } p = stk.top(); stk.pop(); } debug(1,"pop '%s' %s %s\n",tok.c_str(),p.size.c_str(),delimsize.c_str()); // Check for same delimiter size: if ( !ignore_size && delim_list[p.size]!=delim_list[delimsize] ) { message(currline,currpos, "size '%s' of '%s' does not match " "opening size '%s' at %zu:%zu.", delimsize.c_str(),tok.c_str(), p.size.c_str(),p.line,p.pos); nonfatal_error = true; } continue; } // Special case closing '\right.' if ( !stk.empty() ) { paren p = stk.top(); if ( tok=="." && delimsize=="\\right" && p.type==paren_soft && delim_list[p.size]=="\\left" ) { stk.pop(); continue; } } // Now push new open tokens onto the stack: if ( paren_open.count(tok) ) { debug(1,"push '%s' %s\n",tok.c_str(),delimsize.c_str()); stk.push(paren(paren_open[tok],delimsize)); continue; } if ( tok.substr(0,7)=="\\begin{" ) { debug(1,"push '%s'\n",tok.c_str()); stk.push(paren(tok,"\\end"+tok.substr(6))); continue; } // Finally, any stack-able token now means a close error: if ( paren_close.count(tok) || tok.substr(0,5)=="\\end{" ) { soft_paren = false; if ( paren_close.count(tok) ) soft_paren = (paren_close[tok].type==paren_soft); if ( !soft_paren || !ignore_soft ) { if ( stk.empty() ) { message(currline,currpos, "closing '%s' without matching open.", tok.c_str()); nonfatal_error = true; } else { paren p = stk.top(); message(p.line,p.pos, "closing '%s' at %zu:%zu does not match opening '%s'.", tok.c_str(),currline,currpos,p.open.c_str()); nonfatal_error = true; } } if ( !soft_paren ) { message(0,0,"Aborting after fatal mismatch in file '%s'.",filename); printstack(stk); return EXIT_FATAL; } } } debug(1,"finished checking file '%s', checking stack.\n",fname); stk.clean(); if ( !stk.empty() ) { message(0,0,"Unmatched elements at end of file '%s'.",filename); printstack(stk); return EXIT_FATAL; } if ( fclose(texfile)!=0 ) error(errno,"closing file '%s'",filename); if ( nonfatal_error ) return EXIT_NONFATAL; return 0; } int main(int argc, char **argv) { string configfilename; int opt, res; progname = argv[0]; /* Parse command-line options */ ignore_soft = ignore_size = debug_level = 0; file_line_error = 1; show_help = show_version = 0; opterr = 0; while ( (opt = getopt_long(argc,argv,"+isc:d",long_opts,(int *) 0))!=-1 ) { switch ( opt ) { case 0: /* long-only option */ break; case 'i': ignore_soft = 1; break; case 's': ignore_size = 1; break; case 'c': configfilename = string(optarg); break; case 'd': debug_level++; break; case ':': /* getopt error */ case '?': error(0,"unknown option or missing argument `%c'",optopt); break; default: error(0,"getopt returned character code `%c' ??",(char)opt); } } if ( show_help ) usage(); if ( show_version ) version(); if ( argc<=optind ) error(0,"no file(s) specified"); // Read configuration from default file if it exists: if ( configfilename=="" && getenv("HOME")!=NULL ) { configfilename = string(getenv("HOME")) + "/" + defconfigfilename; ifstream test(configfilename.c_str()); if ( !test.good() ) configfilename = ""; } // For some reason (at least my) GCC doesn't support std::string argument? ifstream conffile(configfilename.c_str()); if ( configfilename!="" && !conffile.good() ) { error(errno,"cannot open configuration file `%s'",configfilename.c_str()); } if ( configfilename!="" ) { readconfig(conffile); debug(1,"using configuration file = '%s'\n",configfilename.c_str()); } else { istringstream confstr(defconfigstring); readconfig(confstr); debug(1,"using internal configuration\n"); } // Initialize map for opening/closing parentheses: for(size_t i=0; i0 ) printf("\n"); } else { printf("In file %s:\n",argv[optind]); } error = checkfile(argv[optind]); if ( error>=2 ) res++; if ( !file_line_error ) printf("\n"); } return res; }