### ==================================================================== ### @Awk-file{ ### author = "Alan Jeffrey with Michal Jaegermann", ### version = "0.05", ### date = "08 March 1993", ### time = "21:33:01 GMT", ### filename = "mergevpl.awk", ### address = "School of Cognitive and Computing Sciences ### University of Sussex ### Brighton BN1 9QH ### UK", ### telephone = "+44 273 606755 x 3238", ### FAX = "+44 273 678188", ### checksum = "03182 531 2898 19087", ### email = "alanje@cogs.sussex.ac.uk", ### codetable = "ISO/ASCII", ### keywords = "vpl awk virtual fonts", ### supported = "yes", ### abstract = "This is an awk script which merges a number ### of vpl files.", ### docstring = "The checksum field above contains a CRC-16 ### checksum as the first value, followed by the ### equivalent of the standard UNIX wc (word ### count) utility output of lines, words, and ### characters. This is produced by Robert ### Solovay's checksum utility.", ### package = "stands alone", ### dependencies = "none", ### } ### ==================================================================== # # Copyright 1993 Alan Jeffrey and Michal Jaegermann. # # 15 Dec 1992, v0.01: Created file. # # 16 Dec 1992, v0.02: Fixed a bug with the loop # for (filenumber=1; filenumber 2147483647. This means that checksums # have to be read in as strings rather than numbers. # # 23 Feb 1993, v0.03: Michal Jaegermann (NTOMCZAK@vm.ucs.UAlberta.CA) # rewrote much of the code. The important changes are: # # * We now use awk's primitives for reading through files, # rather than opening each of the vpl files ourselves. Why # I didn't do this in the first place is beyond me... # # * Looking up the ASCII value of a character, doing upper-to-lower, # lower-to-upper and hex-to-deciaml conversion is much smoother. # # * Numbers are now kept as reals rather than integers, so we no # longer have problems with overflow. # # * We now print the value of DESIGNSIZE no matter what it is. # # * A bug with some uses of gettoken() was fixed. I had assumed # that if you say foo[gettoken()] = baz(gettoken()) then the # first gettoken() to be called will be the one inside foo[...] # This is not part of the AWK specification! Indeed, some AWK # implementations will evaluate the RHS of an assignment before # the LHS. # # In general, the code is now much cleaner---perhaps this has something # to do with the fact that I've never written an AWK program before! # # 24 Feb 1993, v0.04: Since we no longer have to worry about overflows, # CHECKSUM and FONTCHECKSUM can be kept as numbers rather than strings. # # I rewrote getnumber() so that parsing BOUNDARYCHAR will return the # current value of boundarychar. This means that any LABEL BOUNDARYCHAR # instructions in the LIGTABLE will refer to the current value of # BOUNDARYCHAR, and not any later resetting of it. It's not obvious what # the correct behaviour in this situation should be... # # I removed some TABs from the output, since VPtoVF complains about # TAB characters. # # Since VPL integers are always positive, I replaced the %ld format for # integers by %lu. # # 8 Mar 1993, v0.05: Updated some comments. # # Alan. # This is an awk script for merging .vpl files. It's called by saying # # awk -f mergevpl.awk FILENAMES # # The vpl files are then merged together, with precedence given to the # later files. For example, if afm2tfm creates a file thingraw.vpl, # which contains a symbol font, you can set the values of fontdimens # 8--22 by creating a .vpl file thingdim.vpl containing # # (FONTDIMEN # (PARAMETER D 8 R 0.04) # (PARAMETER D 9 R 0.12) # (PARAMETER D 10 R 0.17) # (PARAMETER D 11 R 0.2) # (PARAMETER D 12 R 0.6) # (PARAMETER D 13 R 0.11) # ) # # You can then merge this with thingraw.vpl by saying # # awk -f mergevpl.awk thingraw.vpl thingdim.vpl > thing.vpl # # You can also do things like remove italic correction from characters # by merging the vpl file with one containing: # # (CHARACTER D 123 (CHARIC D 0)) # # This will leave the rest of the character alone, but set its italic # correction to zero. # # This script uses 1 design units to the design size, and scales all # dimensions appropriately. This is useful, since the maximum measurement that # afm2tfm allows is 2.048 * design size, which is usually only # 20.48pt! This is a bit annoying, so you can use this script to # scale the design units back down. If you want a different unit, you # can set the variable U on the command line. # # Alan. BEGIN { true = 1 nextlig = 0 designunits = 1.0 if (!U) {U = 1} printf "(VTITLE Created by awk -f mergevpl.awk" for (i = 1; i < ARGC; i++) {printf " %s", ARGV[i]} print ")" print "(COMMENT Please edit that VTITLE if you edit this file)" # initialize ascii table cstring = "" for (i = 1; i < 255; i++) cstring = cstring sprintf("%c", i) # initialize strings for case conversions # if your awk supports toupper/tolower functions you may use # them in this program instead of upper() and this stuff # is not needed # lcaseletters ="abcdefghijklmnopqrstuvwxyz" ucaseletters ="ABCDEFGHIJKLMNOPQRSTUVWXYZ" # for a hex scan hexdigs = "0123456789ABCDEF" } # main loop # we are not interested in blank lines 0 != NF { # make parenthesis into separate fields gsub(/\(/, " ( ", $0) gsub(/\)/, " ) ", $0) # tokenfield holds a number of the last field read tokenfield = 0 do { # next construct starts tokenfield += 1 if ("(" != $tokenfield) {error("I was expecting a (")} # set value of token getcommand() if (token == "CHECKSUM") {checksum = terminal(getnumber())} else if (token == "DESIGNSIZE") {designsize = terminal(getnumber())} else if (token == "DESIGNUNITS") {designunits = terminal(getnumber())} else if (token == "CODINGSCHEME") {codingscheme = getterminal()} else if (token == "FAMILY") {family = terminal(gettoken())} else if (token == "FACE") {face = terminal(getnumber())} else if (token == "SEVENBITSSAFEFLAG") {sevenbits = terminal(gettoken())} else if (token == "HEADER") {header = getterminal()} else if (token == "FONTDIMEN") {parsefontdimen()} else if (token == "LIGTABLE") {parseligtable()} else if (token == "BOUNDARYCHAR") {boundarychar = terminal(getnumber())} else if (token == "CHARACTER") {parsecharacter()} else if (token == "VTITLE") {getterminal()} else if (token == "MAPFONT") {parsemapfont()} else if (token == "COMMENT") {getterminal()} else error("I was expecting a top-level property") } while (tokenfield < NF) } # Write output file based on the vpl files that have been read in. END { # do not produce any output if we got an error if (1 == errflag) exit 1 if (checksum) {printf "(CHECKSUM D %lu)\n", checksum} if (designsize) {printf "(DESIGNSIZE R %s)\n", rfmt(designsize)} printf "(DESIGNUNITS R %s)\n", rfmt(U) if (codingscheme) {printf "(%s)\n", codingscheme} if (family) {printf "(FAMILY %s)\n", family} if (face) {printf "(FACE D %lu)\n", face} if (sevenbits) {printf "(SEVENBITSSAFEFLAG %s)\n", sevenbits} if (header) {printf "(%s)\n", header} if (boundarychar) {printf "(BOUNDARYCHAR D %lu)", boundarychar} print "(FONTDIMEN" for (n in fontdim) {printf "(PARAMETER D %lu R %s)\n", \ n, rfmt(fontdim[n])} print " )" for (n in fontname) { printf "(MAPFONT D %lu\n", n printf " (FONTNAME %s)\n", fontname[n] if (fontarea[n]) {printf " (FONTAREA %s)\n", fontarea[n]} if (fontchecksum[n]) {printf " (FONTCHECKSUM D %lu)\n", fontchecksum[n]} if (fontat[n]) {printf " (FONTAT R %s)\n", rfmt(fontat[n])} if (fontdsize[n]) {printf " (FONTDSIZE %s)\n", rfmt(fontdsize[n])} print " )" } if (nextlig > 0) { print "(LIGTABLE" for (n = 1; n <= nextlig; n++) {printf " %s\n", ligs[liglabel[n]]} print " )" } for (n = 0; n < 256; n++) { if (char[n]) { printf "(CHARACTER D %lu\n", n if (charwd[n]) {printf " (CHARWD R %s)\n", rfmt(charwd[n])} if (charht[n]) {printf " (CHARHT R %s)\n", rfmt(charht[n])} if (chardp[n]) {printf " (CHARDP R %s)\n", rfmt(chardp[n])} if (charic[n]) {printf " (CHARIC R %s)\n", rfmt(charic[n])} if (nextlarger[n]) {printf " (NEXTLARGER D %lu)\n", nextlarger[n]} if (varchar[n]) {printf " (%s)\n", varchar[n]} if (map[n]) {printf " (%s)\n", map[n]} print " )" } } } # An error message. Return an error value to a calling shell # This will work only with the 'right' shell which understands "cat 1>&2" # construct. If you are using gawk replace with 'printf ... > "/dev/stderr"'. # function error(s) { printf "%s not %s at line %lu of %s.\n%s\n", s, token, FNR, FILENAME, $0 \ | "cat 1>&2" errflag = 1 exit 1 } # Output formatter for real numbers function rfmt(val, s) { s = sprintf("%.15g", val) if (0 == index(s, ".")) s = s ".0" return(s) } # To get one token, we look to see if there's a token left over from # the last getline. If there isn't, we read a line in, and put spaces # around any parenthesis (so they can be parsed as separate lexical # units). Then we return the next token. # # This function will execute 'getline' only when we have to contine parsing # the current construct on the next line. Blank lines are skipped. function gettoken() { if (tokenfield >= NF) { do { if (getline <= 0) { token = "EOF or read error" error("Expected new token") } } while (NF == 0) gsub(/\(/, " ( ", $0) gsub(/\)/, " ) ", $0) tokenfield = 0 } tokenfield += 1 return ($tokenfield) } # Does not return anything but sets global token to an uppercase # version of the current $tokenfield function getcommand() { # # if your awk had built-in toupper(), required by POSIX, then # the following will be preferable # token = toupper(gettoken()) # token = upper(gettoken()) } # not needed if toupper() is present function upper(s, temp, i, pos, c) { temp = "" for (i = 1; i <= length(s); i++) { c = substr(s, i, 1) if (0 != (pos = index(lcaseletters, c))) { c = substr(ucaseletters, pos, 1) } temp = temp c } return temp } # To get a number, we get a token, and use that to decide whether the # next token is ascii, decimal, octal, or 3-character Xerox. function getnumber( command) { getcommand() if (token == "C") {return index(cstring, gettoken())} # ".0" added to guard against overflow if your awk does not do # an automatic conversion to 'double' if (token == "D") {return gettoken() ".0"} if (token == "R") {return gettoken()} if (token == "O") {return octal(gettoken())} if (token == "BOUNDARYCHAR") {return boundarychar} command = token getcommand() if (command == "F") {getcommand(); return xerox(token)} if (command == "H") {getcommand(); return hex(token)} else {error("I was expecting a number")} } # Do conversions in a portable manner. If your awk follows POSIX # standard you may replace functions below by adjusting CONVFMT # for much faster conversions. function octal(a, i, result) { result = 0.0 for (i = 1; i <= length(a); i++) result = (result * 8.0) + substr(a, i, 1) return result } function hex(a, i, result) { result = 0.0 for (i = 1; i <= length(a); i++) result = (result * 16.0) + hexdigit(substr(a, i, 1)) return result } function hexdigit(a, pos) { if (0 != (pos = index(hexdigs, a))) return (pos - 1) else error("I was expecting a Hex number") } function xerox(a) { if (a = "MRR") {return 0} if (a = "MIR") {return 1} if (a = "BRR") {return 2} if (a = "BIR") {return 3} if (a = "LRR") {return 4} if (a = "LIR") {return 5} if (a = "MRC") {return 6} if (a = "MIC") {return 7} if (a = "BRC") {return 8} if (a = "BIC") {return 9} if (a = "LRC") {return 10} if (a = "LIC") {return 11} if (a = "MRE") {return 12} if (a = "MIE") {return 13} if (a = "BRE") {return 14} if (a = "BIE") {return 15} if (a = "LRE") {return 16} if (a = "LIE") {return 17} error("I was expecting a 3-character Xerox code") } # To get a number in design units, we read in a number, then scale it # by designunits : U. function getunit() {return (getnumber() * U / designunits)} # A terminal token is one followed by ). function terminal(a) { token = gettoken() if (token == ")") {return a} else {error("I was expecting a )")} } # This function gets an entire string of tokens, up to a closing ). # It balances parenthesis, for example if the input is # # ( COMMENT foo ( bar ) baz ) # # then getterminal will return "COMMENT foo ( bar ) baz". function getterminal( level, result) { level = 1 result = "" while (level > 0) { result = result token " " token = gettoken() if (token == "(") {level += 1} else if (token == ")") {level -= 1} } return result } # This command parses a fontdimen assuming the (FONTDIMEN has already # been read. function parsefontdimen( idx) { token = gettoken() while (token == "(") { getcommand() if (token == "SLANT") {fontdim[1] = terminal(getnumber())} else if (token == "SPACE") {fontdim[2] = terminal(getunit())} else if (token == "STRETCH") {fontdim[3] = terminal(getunit())} else if (token == "SHRINK") {fontdim[4] = terminal(getunit())} else if (token == "XHEIGHT") {fontdim[5] = terminal(getunit())} else if (token == "QUAD") {fontdim[6] = terminal(getunit())} else if (token == "EXTRASPACE") {fontdim[7] = terminal(getunit())} else if (token == "NUM1") {fontdim[8] = terminal(getunit())} else if (token == "NUM2") {fontdim[9] = terminal(getunit())} else if (token == "NUM3") {fontdim[10] = terminal(getunit())} else if (token == "DENOM1") {fontdim[11] = terminal(getunit())} else if (token == "DENOM2") {fontdim[12] = terminal(getunit())} else if (token == "SUP1") {fontdim[13] = terminal(getunit())} else if (token == "SUP2") {fontdim[14] = terminal(getunit())} else if (token == "SUP3") {fontdim[15] = terminal(getunit())} else if (token == "SUB1") {fontdim[16] = terminal(getunit())} else if (token == "SUB2") {fontdim[17] = terminal(getunit())} else if (token == "SUPDROP") {fontdim[18] = terminal(getunit())} else if (token == "SUBDROP") {fontdim[19] = terminal(getunit())} else if (token == "DELIM1") {fontdim[20] = terminal(getunit())} else if (token == "DELIM2") {fontdim[21] = terminal(getunit())} else if (token == "AXISHEIGHT") {fontdim[22] = terminal(getunit())} else if (token == "DEFAULTRULETHICKNESS") {fontdim[8] = terminal(getunit())} else if (token == "BIGOPSPACING1") {fontdim[9] = terminal(getunit())} else if (token == "BIGOPSPACING2") {fontdim[10] = terminal(getunit())} else if (token == "BIGOPSPACING3") {fontdim[11] = terminal(getunit())} else if (token == "BIGOPAPACING4") {fontdim[12] = terminal(getunit())} else if (token == "BIGOPSPACING5") {fontdim[13] = terminal(getunit())} else if (token == "PARAMETER") { idx = sprintf("%lu", getnumber()) fontdim[idx] = terminal(getunit()) } else if (token == "COMMENT") {getterminal()} else error("I was expecting a FONTDIMEN property") token = gettoken() } if (token != ")") {error("I was expecting a FONTDIMEN property")} } # This function parses a mapfont, assuming the (MAPFONT has already # been read. function parsemapfont( n) { n = sprintf("%lu", getnumber()) token = gettoken() while (token == "(") { getcommand() if (token == "FONTNAME") {fontname[n] = terminal(gettoken())} else if (token == "FONTAREA") {fontarea[n] = terminal(gettoken())} else if (token == "FONTCHECKSUM") {fontchecksum[n] = terminal(getnumber())} else if (token == "FONTAT") {fontat[n] = terminal(getunit())} else if (token == "FONTDSIZE") {fontdsize[n] = terminal(getnumber())} else if (token == "COMMENT") {getterminal()} else error("I was expecting a MAPFONT property") token = gettoken() } if (token != ")") {error("I was expecting a MAPFONT property")} } # This function parses a ligtable, assuming the (LIGTABLE has already # been read. function parseligtable( label, num) { token = gettoken() while (token == "(") { getcommand() if (token == "LABEL") { label = sprintf("%.15g", terminal(getnumber())) if (!ligs[label]) { nextlig += 1 liglabel[nextlig] = label ligs[label] = "(LABEL D " label ")" } } else if (token == "LIG" || token == "STOP" || token == "SKIP") { ligs[label] = ligs[label] " (" getterminal() ") " } else if (token == "KRN") { num = getnumber() ligs[label] = ligs[label] (sprintf(" ( KRN D %lu R %f )", num, terminal(getunit()))) } else if (token == "COMMENT") {getterminal()} else error("I was expecting a LIGTABLE property") token = gettoken() } if (token != ")") {error("I was expecting a LIGTABLE property")} } # This function parses a character, assuming the (CHARACTER has already # been read. function parsecharacter( c) { c = sprintf("%lu", getnumber()) char[c] = true token = gettoken() while (token == "(") { getcommand() if (token == "CHARWD") {charwd[c] = terminal(getunit())} else if (token == "CHARHT") {charht[c] = terminal(getunit())} else if (token == "CHARDP") {chardp[c] = terminal(getunit())} else if (token == "CHARIC") {charic[c] = terminal(getunit())} else if (token == "MAP") {charmap[c] = getterminal()} else if (token == "NEXTLARGER") { nextlarger[c] = sprintf("%.15g", terminal(getnumber())) delete varchar[c] } else if (token == "VARCHAR") { varchar[c] = getterminal() nextlarger[c] } else if (token == "COMMENT") {getterminal()} else error("I was expecting a CHARACTER property") token = gettoken() } if (token != ")") {error("I was expecting a CHARACTER property")} }