% % \iffalse %<*driver> \documentclass{tcldoc} \usepackage{array} % Logos: % \DeclareFontFamily{U}{logo}{} \DeclareFontShape{U}{logo}{m}{n}{ <-9> logo8 <9-10> logo9 <10-> logo10 }{} \DeclareFontShape{U}{logo}{bx}{n}{<-> logobf10}{} \DeclareFontShape{U}{logo}{b}{n}{<-> ssub * logo/bx/n}{} \DeclareFontShape{U}{logo}{m}{sl}{<-> logosl10}{} \DeclareFontShape{U}{logo}{m}{it}{<-> ssub * logo/m/sl}{} % \DeclareRobustCommand\META{{% % \fontencoding{U}\fontfamily{logo}\selectfont META% % }} \DeclareRobustCommand\MF{% {\fontencoding{U}\fontfamily{logo}\selectfont META}\-% {\fontencoding{U}\fontfamily{logo}\selectfont FONT}% } \DeclareRobustCommand\MP{% {\fontencoding{U}\fontfamily{logo}\selectfont META}\-% {\fontencoding{U}\fontfamily{logo}\selectfont POST}% } \DeclareRobustCommand\PS{PostScript} \DeclareRobustCommand\TOne{Type~1} \DeclareRobustCommand\package[1]{\textsf{#1}} % Useful environments: % % \begin{environment}{isyntax} % |isyntax| is for ``informal'' syntax specifications, such as % \begin{quote} % A \meta{varchar command} can be one of\\ % \begin{quote} % |\vartop|\marg{glyph}\\ % |\varmid|\marg{glyph}\\ % |\varbot|\marg{glyph}\\ % |\varrep|\marg{glyph} % \end{quote} % \end{quote} % The implementation is based on that of the \texttt{verse} % environment.It usually works just as fine with \texttt{quote}, but % \texttt{isyntax} handles breaking long lines better (and more % elegantly). % \changes{1.37}{2000/05/14}{Increased \cs{rightskip} stretchability % and added setting of \cs{linepenalty}. (LH)} % \begin{macrocode} \makeatletter \newenvironment{isyntax}{% \let\\\@centercr \list{}{% \itemsep \z@ \itemindent -1.5em% \listparindent \itemindent \advance \leftmargin 1.5em% }% \advance \rightskip \z@\@plus0.7\linewidth \relax \linepenalty=100\relax \item\relax }{\endlist} \makeatother % \end{macrocode} % \end{environment} % Tabular notes: \makeatletter \newcommand\tabnotemark[1]{% \leavevmode \ifhmode\edef\@x@sf{\the\spacefactor}\nobreak\fi \hbox{\@textsuperscript{% \normalfont\itshape \count@=#1\relax\@alph\count@ }}% \ifhmode\spacefactor\@x@sf\fi \relax } \newenvironment{tabnotes}{% \list{\the\c@mpfootnote}{% \usecounter{mpfootnote}% \def\makelabel##1{\hfil\tabnotemark{##1}}% \normalfont\footnotesize \setlength\leftmargin{0pt}% \setlength\labelsep{0pt}% \setlength\labelwidth{1.8em}% \setlength\itemindent{\labelwidth}% \setlength\listparindent{1em}% \setlength\parsep{0pt plus 1pt}% \setlength\itemsep{\parsep}% \setlength\topsep{\footnotesep}% }% }{\endlist} \makeatother \CodelineIndex \title{\texttt{pstokens.dtx}} \author{Lars Hellstr\"om} \date{Work in progress} \begin{document} \maketitle \DocInput{pstokens.dtx} \end{document} % % \fi % % \begin{abstract} % \package{pstokens} is a collection of utility routines for % \Tcllogo\ programs that need to view a file as a sequence of % \PS\ tokens rather than as a sequence of characters or lines. % \end{abstract} % % % \setnamespace{pstokens} % % First of all, the \texttt{pstokens} namespace must be defined. % \begin{tcl} namespace eval pstokens {} % \end{tcl} % % \section{Reading and writing tokens} % % This section contains the routines for reading a text file as a % sequence of tokens (tokenizing) and writing a sequence of tokens % to a text file (detokenizing). % % % \subsection{Data structures for tokens} % % A \emph{token} is a \Tcllogo\ list with the format % \begin{quote} % \word{type} \word{data}\regstar % \end{quote} % The interpretation of the \word{data} depends on what the \word{type} % is, as specified below. Some of the token types only occur internally % in the tokenizer and some others have no meaning to a \PS\ interpreter, % but this organisation seems to be the most convenient here. % % A primary goal for the format of tokens is that if a sequence of % tokens are read from one file and written to another then the only % things that may have changed should be the whitespace between the % tokens, not the tokens themselves. Therefore the tokenizer generally % avoids interpreting the tokens any more than necessary; % interpretation of tokens is instead handled in the next section. % % The token types that occur in \package{pstokens} are % \begin{description} % \item[\texttt{number}, \texttt{radixnumber}] % \describestring[token type]{number} % \describestring[token type]{radixnumber} % A number (integer or real), and radix number, respectively. There % is one \word{data} item, which contains the string that % represented the number in the tokenized text. % \item[\texttt{string}, \texttt{hexstring}, \texttt{base-85}] % \describestring[token type]{string} % \describestring[token type]{hexstring} % \describestring[token type]{base-85} % A parenthesis-delimited string, hexadecimal string, and ASCII % base-85 string, respectively. There is one \word{data} item, % which contains the string (minus delimiters) that represented % the string in the tokenized text. % \item[\texttt{executable}, \texttt{literal}, \texttt{immediate}] % \describestring[token type]{executable} % \describestring[token type]{literal} % \describestring[token type]{immediate} % An executable (normal), literal, and immediately evaluated name % respectively. There is one \word{data} item, which simply contains % the name as a \Tcllogo\ string, not including any initial slashes. % \item[\texttt{procedure}] % \describestring[token type]{procedure} % An executionable array. The data is simply the sequence of tokens % in the array. % \item[\texttt{comment}] % \describestring[token type]{comment} % This is not a \PS\ token at all, but a representation of a % comment. The reason comments are tokenized is that they often % contain interesting information (especially structured comments). % The structure of this kind of token is % \begin{quote} % |comment| \word{head} \word{tail} % \end{quote} % The \word{head} is the part of the line which is after the |%| % that started the comment and before the first whitespace character % after it. The \word{tail} is the rest of the line (including the % whitespace character, if there is one). Any of \word{head} and % \word{tail} may be empty. The structured comment % \begin{quote} % |%%Pages: 2| % \end{quote} % would have \word{head} equal to \verb*"%Pages:" and \word{tail} % equal to \verb*" 2". % \item[\texttt{eof}] % \describestring[token type]{eof} % This is a pseudotoken which denotes that the end of the file was % reached while looking for the next token. There structure is % \begin{quote} % |eof| \word{token}\regopt % \end{quote} % If the \word{token} is present then it is a composite token which % was being read when the file unexpectedly ended. It is an error % if the \word{token} is present. % \item[\texttt{special}] % \describestring[token type]{special} % This token type can be used to extend the token type syntax % without having to change the actual routines for reading and % writing tokens. The structure of a |special| token is % \begin{quote} % |special| \word{token} \word{extra data}\regstar % \end{quote} % If a |special| token gets sent to the token writer then it will be % treated as the token \word{token} (which usually has another % type), but other routines may treat it differently based on the % information in the \word{extra data}. % \item[\texttt{error}] % \describestring[token type]{error} % A token of this type is returned from the tokenizer whenever it % encounters anything which is syntactically incorrect. The % general structure of this kind of token is % \begin{quote} % |error| \word{error type} \word{data}\regstar % \end{quote} % where the \word{error type} more precisely specifies the error. % So far, the following error types are used % \begin{quote} % |error| |extra| \word{string} \word{line no.}\\ % |error| |missing| \word{string} \word{line no.}\\ % |error| |missing/extra| \word{m-string} \word{e-string} % \word{line no.} % \end{quote} % |extra| means that there is an occurence of \word{string} which % shouldn't have been there, whereas |missing| means that there % should have been a \word{string} there, but isn't. |missing/extra| % is the combination that there either should have been an % \word{m-string} there, but isn't, or there is an \word{e-string} % there, but there shouldn't be. % \word{line no.} is the line on which the error was encountered. % \end{description} % % % \subsection{Reading tokens} % % \begin{arrayvar}{input_line}[file-id] % \begin{arrayvar}{input_line_no}[file-id] % Since the |gets| procedure always reads an entire line (which % usually contain several tokens) each time it is used, it is % necessary to store the most recently read line somewhere. This is % the purpose of the |input_line| array, which is indexed by the file % id. In general, text is removed from the entires in |input_line| as % it is tokenized. % % The |input_line_no| array stores the number of the line in the % corresponding |input_line| entry. The first line in a file has % number one. % \end{arrayvar}\end{arrayvar} % % \begin{proc}{open_input} % \begin{proc}{close_input} % These procedures are wrappers around \Tcllogo's |open| and |close| % procedures respectively which additionally initialize and unset the % relevant entry of the |input_line| and |input_line_no| arrays. Thus % their syntaxes are % \begin{quote} % |pstokens::open_input| \word{file-name} \word{access}\regopt\ % \word{permissions}\regopt\\ % |pstokens::close_input| \word{file-id} % \end{quote} % and they return the same things as |open| and |close| respectively. % % A possible development of tokenizer is to implement filtered input, % in which case more support data structures would have to be % initialized, but then the |open_input| and |close_input| procedures % would be redefined to cope with this. % \begin{tcl} %<*tokenio> proc pstokens::open_input {args} { set res [eval open $args] global pstokens::input_line pstokens::input_line_no set pstokens::input_line($res) "" set pstokens::input_line_no($res) 0 return $res } % \medskip proc pstokens::close_input {file_id} { global pstokens::input_line pstokens::input_line_no unset pstokens::input_line($file_id) unset pstokens::input_line_no($file_id) close $file_id } % \end{tcl} % \end{proc}\end{proc} % % The tokenizer has a couple of extra internal token types for things % which later become parts of other tokens. % \begin{description} % \item[\texttt{beginproc}, \texttt{endproc}] % \describestring[token type]{beginproc} % \describestring[token type]{endproc} % These are the |{| and |}| at the beginning and end respectively of % a \PS\ procedure. There is no \word{data}. % \item[\texttt{beginstring}, \texttt{beginhex}, \texttt{beginbase85}] % \describestring[token type]{beginstring} % \describestring[token type]{beginhex} % \describestring[token type]{beginbase85} % These token types are the |(|, |<|, and |<~| at the beginning of % a \PS\ literal, hex, and base-85 string respectively. There is no % \word{data}. % \end{description} % % % \begin{proc}{get_primitive_token} % The |get_primitive_token| procedure returns the next primitive % token in the input file. It takes the file id of the source to read % from as its only argument. % % A \emph{primitive token} is a smallest syntactic unit in the \PS\ % file. Smallest syntactic units are characterized by that the % insertion of a space anywhere in them changes its meaning. One % exception is however the left parenthesis characters; it is % convenient to include |(| among the primitive tokens even though % inserting a space after it will change the meaning of % the corresponding token. % % The first step in |get_primitive_token| is to read ahead until some % non-whitespace (or the end of the file) is encountered. % \begin{tcl} proc pstokens::get_primitive_token {file} { global pstokens::input_line pstokens::input_line_no set pstokens::input_line($file)\ [string trimleft [set pstokens::input_line($file)] \t\n\f\r\ ] while {[string length [set pstokens::input_line($file)]]==0} { if {[gets $file pstokens::input_line($file)]==-1} then { return {eof} } incr pstokens::input_line_no($file) set pstokens::input_line($file)\ [string trimleft [set pstokens::input_line($file)] \t\n\f\r\ ] } % \end{tcl} % The second step is to isolate the part of the text that constitutes % the next token. This is done using regular expressions, but as these % get rather complicated it seems best to explain how they are % constructed. The ASCII characters null (|\x00|), tab (|\x09|), line % feed (|\x0a|), form feed (|\x0c|), carriage return (|\x0d|), and % space (|\x20|) count as whitespace and delimit tokens, but null is % currently ignored because \Tcllogo\ before v\,8 cannot keep them in % strings anyway. The ASCII characters |(|, |)|, |<|, |>|, |[|, |]|, % |{|, |}|, |/|, and |%| also delimit tokens, and therefore the % regular expression for a \PS\ name is % \begin{quote} % |(/?/?)([^][()<>{}/%|\meta{tab}\meta{LF}\meta{FF}\meta{CR}^^A % \verb*" ]+)|//?" % \end{quote} % (Note that the first right bracket above does not end a character % range, but is part of the list of characters which are exculded % from it. Also note that |/| and |//| are valid \PS\ tokens.) % The number of initial slashes determine whether the name is % \texttt{executable}, \texttt{literal}, or \texttt{immediate}. % % The executable tokens are however somewhat tricky, because the above % regular expression will also match all numbers. An \emph{integer} is % any token which matches the regular expression \verb"[+-]?[0-9]+", % whereas a \emph{real number} is anything else which matches the % regular expression % \begin{quote} % \verb"[+-]?([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?" % \end{quote} % and a \emph{radix number} is anything which matches the regular % expression\footnote{Actually there is also the restriction that % none of the characters after the \texttt{\#} may represent a digit % greater than or equal to the decimal number before the \texttt{\#}, % but I'm ignoring that, for now.} % \begin{quote} % |[0-9][0-9]?#[0-9A-Za-z]+| % \end{quote} % \begin{tcl} if {[regexp "^(/?/?)(\[^\]\[()<>\{\}/%\t\n\f\r\ \]+)|^//?"\ [set pstokens::input_line($file)] tokenstr slashes name]}\ then { if {[regexp\ {^[+-]?([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?$}\ $tokenstr]}\ then { set token [list number $tokenstr] } elseif {[regexp {^[0-9][0-9]?#[0-9A-Za-z]+$} $tokenstr]} then { set token [list radixnumber $tokenstr] } else { if {"$tokenstr"=="/" || "$tokenstr"=="//"}\ then {set slashes $tokenstr} set token [list [lindex {executable literal immediate}\ [string length $slashes]] $name] } }\ % \end{tcl} % There are also a couple of primitive tokens which consist of % non-whitespace delimiting characters, namely % \begin{quote} % |{|, |}|, |<|, |<~|, |<<|, |>>|, |[|, |]|, |(|, % \end{quote} % The right parenthesis and greater than characters does not appear in % the list because they may not appear outside a string. Finally % there are the comments, which have the regular expression % \begin{quote} % |^%([^|\meta{tab}\meta{LF}\meta{FF}\meta{CR}\verb*" ]*)(.*)$" % \end{quote} % Here the first parenthesis is the head of the comment and the second % is the tail. % \begin{tcl} else { switch -regexp -- [set pstokens::input_line($file)] { ^\{ {set token beginproc ; set tokenstr \{} ^\} {set token endproc ; set tokenstr \}} ^\\\[ {set token {executable [} ; set tokenstr \[} ^\\\] {set token {executable ]} ; set tokenstr \]} ^< {set token beginhex ; set tokenstr <} ^<~ {set token beginbase85 ; set tokenstr <~} ^<< {set token {executable <<} ; set tokenstr <<} ^>> {set token {executable >>} ; set tokenstr >>} ^\\( {set token beginstring ; set tokenstr (} ^% { regexp "^%(\[^\t\n\f\r\ \]*)(.*)\$"\ [set pstokens::input_line($file)] tokenstr head tail set token [list comment $head $tail] } default { % \end{tcl} % But if none of the above matched the next character must be % erroneous. % \begin{tcl} set tokenstr\ [string index [set pstokens::input_line($file)] 0] set token [list error extra $tokenstr\ [set pstokens::input_line_no($file)] ] } } } set pstokens::input_line($file)\ [string range [set pstokens::input_line($file)]\ [string length $tokenstr] end] return $token } % \end{tcl} % % \end{proc} % % \begin{proc}{get_string} % The |get_string| procedure reads a \PS\ literal string and converts % it to a \texttt{string} token, which it returns. The only argument % is the file id of the input file. It is assumed that the next thing % on the input file line is the character after the left parenthesis % that started the string. % % The only conversion of the data that |get_string| performs is that % escaped newlines are skipped and non-escaped newlines are replaced % by |\n| escapes. If the end-of-file is encountered while reading a % string then the necessary number of right parentheses are added to % the string and the resulting string token is built into an |eof| % token, which is returned. % % The local variable |level| keeps track of the parenthesis nesting % level, the local variable |data| is the string read so far, and the % local variable is a flag which is |1| if the next character should % be escaped and |0| otherwise. % \begin{tcl} proc pstokens::get_string {file} { upvar #0 pstokens::input_line line global pstokens::input_line_no set level 1 set data "" set escaped 0 while {$level>0} { while {$level>0 && [string length $line($file)]>0} { set c [string index $line($file) 0] if {$escaped} then { append data $c set escaped 0 } else { switch -exact -- $c { ( {incr level ; append data $c} ) { incr level -1 if {$level>0} then {append data $c} } \\ {set escaped 1 ; append data $c} default {append data $c} } } set line($file) [string range $line($file) 1 end] } if {$level>0} then { if {!$escaped} then {append data \\n} set escaped 0 if {[gets $file line($file)]==-1} then { while {$level>1} {append data ) ; incr level -1} return [list eof [list string $data]] } incr pstokens::input_line_no($file) } } list string $data } % \end{tcl} % \end{proc} % % % \begin{proc}{get_hexstring} % The |get_hexstring| procedure reads a \PS\ hexadecimal string and % converts it to a \texttt{hexstring} token, which it returns. The % only argument is the file id of the input file. It is assumed that % the next thing on the input file line is the character after the % less than character that started the string. % % The only conversion of the data that |get_hexstring| performs is that % embedded whitespace is removed. If a character which is neither % whitespace, a hexadecimal digit, or the closing |>| is encountered % then an \texttt{error} token is returned and the text read is put % back into the |input_line| buffert (followed by an |>| which is % inserted to terminate the string. If the end-of-file is % encountered while reading a string then what has been read so far % is returned as a hexstring token built into an |eof| token. % \begin{tcl} proc pstokens::get_hexstring {file} { upvar #0 pstokens::input_line line global pstokens::input_line_no set data "" while 1 { while {[string length $line($file)]==0} { if {[gets $file line($file)]==-1} then { return [list eof [list hexstring $data]] } incr pstokens::input_line_no($file) } set c [string index $line($file) 0] set line($file) [string range $line($file) 1 end] switch -regexp -- $c { {[0-9a-fA-F]} {append data $c} \[\t\n\f\r\ \] {} > {break} default { set line($file) <$data>$c$line($file) return [list error missing/extra > $c\ [set pstokens::input_line_no($file)]] } } } list hexstring $data } % \end{tcl} % \end{proc} % % % \begin{proc}{get_base85string} % The |get_base85string| procedure reads a \PS\ base-85 string and % converts it to a \texttt{base-85} token, which it returns. The % only argument is the file id of the input file. It is assumed that % the next thing on the input file line is the character after the % |<~| characters that started the string. % % The only conversion of the data that |get_base85string| performs is % that embedded whitespace is removed. If a character which is neither % whitespace, a base-85 digit, the letter |z|, or the closing |~| is % encountered then an \texttt{error} token is returned and the text % read is put back into the |input_line| buffert (followed by a |~>| % that terminates the string). If the end-of-file is encountered while % reading a string then what has been read so far is returned as a % base-85 string token built into an |eof| token. % \begin{tcl} proc pstokens::get_base85string {file} { upvar #0 pstokens::input_line line pstokens::input_line_no line_no set data "" while 1 { while {[string length $line($file)]==0} { if {[gets $file line($file)]==-1} then { return [list eof [list base-85 $data]] } incr line_no($file) } set c [string index $line($file) 0] set line($file) [string range $line($file) 1 end] switch -regexp -- $c { {[!-uz]} {append data $c} \[\t\n\f\r\ \] {} ~ {break} default { set line($file) <~$data~>$c$line($file) return [list error missing/extra ~> $c $line_no($file)] } } } % \end{tcl} % If the character after the closing |~| is not a |>| then the entire % string is put back in the |input_line| buffert and a |missing| error % is issued. % \begin{tcl} if {[string length $line($file)]>0 &&\ "[string index $line($file) 0]"==">"} then { set line($file) [string range $line($file) 1 end] list base-85 $data } else { set line($file) <~$data~>$line($file) list error missing > $line_no($file) } } % \end{tcl} % \end{proc} % % % \begin{proc}{get_token} % \begin{proc}{get_token_rek} % The |get_token| procedure has the syntax % \begin{quote} % |pstokens::get_token| \word{file-id} % \end{quote} % and returns the next \PS\ token in the \word{file-id} file. % % Most of the work is however done by the |get_token_rek| procedure, % which is a recursive form of |get_token|. |get_token_rek| handles % converting the various |begin|\textellipsis\ token types to their % complete forms, and it calls itself recursively to get the tokens % that form a procedure. |get_token| only checks that there are no % lone |endproc| tokens. % \begin{tcl} proc pstokens::get_token {file} { set token [pstokens::get_token_rek $file] if {"[lindex $token 0]" == "endproc"} then { global pstokens::input_line_no set token [list error extra \} [set pstokens::input_line_no($file)]] } return $token } % \medskip proc pstokens::get_token_rek {file} { set token [pstokens::get_primitive_token $file] switch -exact [lindex $token 0] { beginproc { set token {procedure} while {"[lindex [set token2 [pstokens::get_token_rek $file]] 0]"\ != "endproc" && "[lindex $token2 0]" != "eof"} { lappend token $token2 } if {"[lindex $token2 0]" == "eof"} then { if {[llength $token2]>1} then { lappend token [lindex $token2 1] } set token [list eof $token] } } beginstring {set token [pstokens::get_string $file]} beginhex {set token [pstokens::get_hexstring $file]} beginbase85 {set token [pstokens::get_base85string $file]} } return $token } % \end{tcl} % % These procedures, and in particular |get_token_rek|, could be made % more intelligent when it comes to handling errors that occur inside % procedures. Currently they just embed the |error| token in the % procedure. % \end{proc}\end{proc} % % % \subsection{Writing tokens} % % Writing tokens is simpler than reading them; the only real % complication is that the length of lines must be bounded. In order to % achieve this, data is written linewise to the file (much like the way % it is read). % % \begin{arrayvar}{output_line}[file-id] % The |output_line| array stores each output line until it is % actually written to file. Like with the |input_line| array, each % active output file has its own entry in this array. % \end{arrayvar} % % \begin{proc}{flush_line} % The |flush_line| procedure has the syntax % \begin{quote} % |pstokens::flush_line| \word{file-id} \word{physical}\regopt % \end{quote} % It flushes buffered data that is to be written to the file with id % \word{file-id}. The \word{physical} argument controls whether the % data actually should be physically written to file (when |1|) or % merely flushed from the |output_line| array (when |0|). The default % is |0|. Calling |flush_line| starts a new line in the output file % unless the |output_line| entry is empty. % \begin{tcl} proc pstokens::flush_line {fileid {physical 0}} { global pstokens::output_line if {[string length [set pstokens::output_line($fileid)]]>0} then { puts $fileid [set pstokens::output_line($fileid)] set pstokens::output_line($fileid) "" } if {$physical} then {flush $fileid} } % \end{tcl} % \end{proc} % % \begin{proc}{open_output} % \begin{proc}{close_output} % These procedures are wrappers around \Tcllogo's |open| and |close| % procedures respectively which additionally initialize and unset the % relevant entry of the |output_line|. Thus their syntaxes are % \begin{quote} % |pstokens::open_output| \word{file-name} \word{access}\regopt\ % \word{permissions}\regopt\\ % |pstokens::close_output| \word{file-id} % \end{quote} % and they return the same things as |open| and |close| respectively. % Note however that unlike the case with |open|, the \word{access} % argument of |open_output| defaults to |w|. % % A possible development of the routines for writing tokens would be % to support output channels other than files. In that case more % support data structures would have to be initialized, but then the % |open_output| and |close_output| procedures would be redefined to % cope with this. % \begin{tcl} proc pstokens::open_output {name {access w} args} { set res [eval open \$name \$access $args] global pstokens::output_line set pstokens::output_line($res) "" return $res } % \medskip proc pstokens::close_output {file_id} { pstokens::flush_line $file_id 1 close $file_id global pstokens::output_line unset pstokens::output_line($file_id) } % \end{tcl} % \end{proc}\end{proc} % % \begin{variable}{wrap_length} % The |wrap_length| variable stores the maximal number of characters % that may appear on a single output line. The same value applies for % all output files. % \begin{tcl} set pstokens::wrap_length 72 % \end{tcl} % \end{variable} % % \begin{proc}{put_word} % The |put_word| procedure has the syntax % \begin{quote} % |pstokens::put_word| \word{file-id} \word{string} % \end{quote} % It writes \meta{string} to the file with id \word{file-id}, % preceeded by a whitespace character if the current line is % nonempty. The whitespace character is usually a space, but it % will be a newline if the length of what has already been written % to the current line is too large to also fit the \word{string} % within the length specified by the |wrap_length| variable. % \begin{tcl} proc pstokens::put_word {file str} { global pstokens::output_line pstokens::wrap_length set l [string length [set pstokens::output_line($file)]] if {$l==0} then { set pstokens::output_line($file) $str } elseif {$l + [string length $str] < ${pstokens::wrap_length}}\ then { append pstokens::output_line($file) " $str" } else { puts $file [set pstokens::output_line($file)] set pstokens::output_line($file) $str } } % \end{tcl} % \end{proc} % % \begin{proc}{put_breakable} % The |put_breakable| procedure has the syntax % \begin{quote} % |pstokens::put_breakable| \word{file-id} % \word{prefix} \word{text} \word{suffix} % \end{quote} % It writes \meta{prefix}\meta{text}\meta{suffix} to the file with % id \word{file-id} and inserts whitspace (newlines) into the % \meta{text} in such a way that the |wrap_length| linewidth isn't % exceeded. This is primarily useful for hexadecimal and base-85 % strings. % \begin{tcl} proc pstokens::put_breakable {file prefix text suffix} { upvar #0 pstokens::output_line line pstokens::wrap_length wrap if {[string length $line($file)] + [string length $prefix] +\ [string length $text] + [string length $suffix] < $wrap} then { append line($file) " $prefix$text$suffix" } else { pstokens::flush_line $file set line($file) $prefix while {[string length $line($file)] + [string length $text] +\ [string length $suffix] < $wrap} { set t [expr {$wrap -[string length $line($file)]}] append line($file) [string range $text 0 [expr {$t-1}]] set text [string range $text $t end] pstokens::flush_line $file } append line($file) $text$suffix } } % \end{tcl} % \end{proc} % % \begin{proc}{put_string} % The |put_string| procedure has the syntax % \begin{quote} % |pstokens::put_string| \word{file-id} \word{text} % \end{quote} % It writes |(|\meta{text}|)| to the file with id \word{file-id} and % inserts backslash+newline items into the \meta{text} in such a way % that the |wrap_length| linewidth isn't exceeded and the % tokenization of the string is unaffected. % % The only places where it is unsafe to insert a backslash+newline is % inside an escape squence. A position can only be inside such a % sequence if the part of the \meta{text} which is before it gets a % match against the regular expression % \begin{quote} % \verb"(^|[^\])(\\\\)*(\\[0-7]?[0-7]?)$" % \end{quote} % and in this case the position between the second and third % parenthesis is safe. % \begin{tcl} proc pstokens::put_string {file text} { upvar #0 pstokens::output_line line pstokens::wrap_length wrap if {[string length $line($file)] + [string length $text] + 2 <\ $wrap} then { append line($file) " ($text)" } else { pstokens::flush_line $file set line($file) ( while {[string length $line($file)] + [string length $text] +\ 1 < $wrap} { set t [expr {$wrap -[string length $line($file)]}] set s [string range $text 0 [expr {$t-1}]] if {[regexp {(^|[^\])(\\\\)*(\\[0-7]?[0-7]?)$} $s\ foo bar baz escape]}\ then { incr t -[string length $escape] set s [string range $text [expr {$t-1}]] } append line($file) $s set text [string range $text $t end] pstokens::flush_line $file } append line($file) $text) } } % \end{tcl} % \end{proc} % % \begin{proc}{put_token} % The |put_token| procedure has the syntax % \begin{quote} % |pstokens::put_token| \word{file-id} \word{token} % \end{quote} % It writes the \PS\ token \word{token} to the file with id % \word{file-id}. % \begin{tcl} proc pstokens::put_token {file token} { switch [lindex $token 0] { number - radixnumber - executable {pstokens::put_word $file [lindex $token 1]} literal {pstokens::put_word $file /[lindex $token 1]} immediate {pstokens::put_word $file //[lindex $token 1]} string {pstokens::put_string $file [lindex $token 1]} comment { pstokens::flush_line $file 0 puts $file %[lindex $token 1][lindex $token 2] } special {pstoken::put_token $file [lindex $token 1]} procedure { pstokens::put_word $file \{ foreach subtok [lrange $token 1 end] { pstokens::put_token $file $subtok } pstokens::put_word $file \} } hexstring {pstokens::put_breakable $file < [lindex $token 1] >} base-85 {pstokens::put_breakable $file <~ [lindex $token 1] ~>} eof { if {[llength $token]>1} then { pstokens::put_token $file [lindex $token 1] } pstokens::terminal_print "Token unexpectedly terminated by\ end of file." } error { pstokens::terminal_print "Error token intercepted at put:\ [switch [lindex $token 1] { extra {expr {"extra [lindex $token 2] at input line\ [lindex $token 3]"}} missing {expr {"missing [lindex $token 2] at input\ line [lindex $token 3]"}} missing/extra {expr {"missing [lindex $token 2], or\ extra [lindex $token 3], at input line\ [lindex $token 4]"}} default {expr {"!!!-unimplemented error-!!!"}} }]." } default { pstokens::terminal_print\ "Token of unknown type encountered: $token" } } } % \end{tcl} % \end{proc} % \begin{tcl} % % \end{tcl} % % \section{Token stream editing} % % This section I still have ToDo. % % % \begin{thebibliography}{99} % % \bibitem{PSman} % Adobe Systems Incorporated: % \textit{\PS\ language reference manual, 3rd ed.}, % Addison--Wesley, 1999; ISBN 0-201-37922-8; % http:/\slash\texttt{partners.adobe.com}\slash % \texttt{asn}\slash\texttt{developer}\slash\texttt{PDFS}\slash % \texttt{TN}\slash\texttt{PLRM.pdf}. % % \bibitem{MPinTUG} % John D.\ Hobby: \textit{A \MF-like System with \PS\ output}, % TUGboat \textbf{10} (4) (1989), 505--512. % %^^A 2 or 4 ? Hobby's bibliography says 2, but Bebee's % %^^A tugboat.bib says 4. % %^^A @article{Hobby89a, % %^^A author = {John D. Hobby}, % %^^A title = {A {METAFONT}-like System with PostScript Output}, % %^^A journal = {{TUG}boat}, % %^^A volume = {10}, % %^^A number = {2}, % %^^A pages = {505--512}, % %^^A year = {1989} % %^^A } % \bibitem{MPman} % John D.\ Hobby: \textit{A User's Manual for \MP}, % AT\&T Bell Laboratories Computing Science Technical Report no.~162 % (1992); % \textit{see} http:/\slash \texttt{cm.bell-labs.com}\slash % \texttt{who}\slash\texttt{hobby}\slash\texttt{MetaPost.html}. % %^^A @techreport{Hobby92, % %^^A title = {A User's manual for {MetaPost}}, % %^^A author = {John D. Hobby}, % %^^A institution = {AT\&T Bell Laboratories}, % %^^A address = {Murray Hill, New Jersey}, % %^^A type = {Computing Science Technical Report}, % %^^A number = {no.~162}, % %^^A year = {1992} % %^^A } % % \end{thebibliography} % % % \Finale % \endinput