% 
% \iffalse
%<*driver>
\documentclass{tcldoc}

\usepackage{array}


% Logos:
% 
\DeclareFontFamily{U}{logo}{}
\DeclareFontShape{U}{logo}{m}{n}{
  <-9>   logo8
  <9-10> logo9
  <10->  logo10
}{}
\DeclareFontShape{U}{logo}{bx}{n}{<-> logobf10}{}
\DeclareFontShape{U}{logo}{b}{n}{<-> ssub * logo/bx/n}{}
\DeclareFontShape{U}{logo}{m}{sl}{<-> logosl10}{}
\DeclareFontShape{U}{logo}{m}{it}{<-> ssub * logo/m/sl}{}

% \DeclareRobustCommand\META{{%
%    \fontencoding{U}\fontfamily{logo}\selectfont META%
% }}
\DeclareRobustCommand\MF{%
   {\fontencoding{U}\fontfamily{logo}\selectfont META}\-%
   {\fontencoding{U}\fontfamily{logo}\selectfont FONT}%
}
\DeclareRobustCommand\MP{%
   {\fontencoding{U}\fontfamily{logo}\selectfont META}\-%
   {\fontencoding{U}\fontfamily{logo}\selectfont POST}%
}
\DeclareRobustCommand\PS{PostScript}
\DeclareRobustCommand\TOne{Type~1}
\DeclareRobustCommand\package[1]{\textsf{#1}}


% Useful environments:
% 
% \begin{environment}{isyntax}
%   |isyntax| is for ``informal'' syntax specifications, such as
%   \begin{quote}
%     A \meta{varchar command} can be one of\\
%     \begin{quote}
%       |\vartop|\marg{glyph}\\
%       |\varmid|\marg{glyph}\\
%       |\varbot|\marg{glyph}\\
%       |\varrep|\marg{glyph}
%     \end{quote}
%   \end{quote}
%   The implementation is based on that of the \texttt{verse} 
%   environment.It usually works just as fine with \texttt{quote}, but 
%   \texttt{isyntax} handles breaking long lines better (and more 
%   elegantly).
%   \changes{1.37}{2000/05/14}{Increased \cs{rightskip} stretchability 
%      and added setting of \cs{linepenalty}. (LH)}
%    \begin{macrocode}
\makeatletter
\newenvironment{isyntax}{%
   \let\\\@centercr
   \list{}{%
      \itemsep \z@
      \itemindent -1.5em%
      \listparindent \itemindent
      \advance \leftmargin 1.5em%
   }%
   \advance \rightskip \z@\@plus0.7\linewidth \relax
   \linepenalty=100\relax
   \item\relax
}{\endlist}
\makeatother
%    \end{macrocode}
% \end{environment}


% Tabular notes:
\makeatletter
\newcommand\tabnotemark[1]{%
   \leavevmode
   \ifhmode\edef\@x@sf{\the\spacefactor}\nobreak\fi
   \hbox{\@textsuperscript{%
      \normalfont\itshape
      \count@=#1\relax\@alph\count@
   }}%
   \ifhmode\spacefactor\@x@sf\fi
   \relax
}
\newenvironment{tabnotes}{%
   \list{\the\c@mpfootnote}{%
      \usecounter{mpfootnote}%
      \def\makelabel##1{\hfil\tabnotemark{##1}}%
      \normalfont\footnotesize
      \setlength\leftmargin{0pt}%
      \setlength\labelsep{0pt}%
      \setlength\labelwidth{1.8em}%
      \setlength\itemindent{\labelwidth}%
      \setlength\listparindent{1em}%
      \setlength\parsep{0pt plus 1pt}%
      \setlength\itemsep{\parsep}%
      \setlength\topsep{\footnotesep}%
   }%
}{\endlist}
\makeatother


\CodelineIndex

\title{\texttt{pstokens.dtx}}
\author{Lars Hellstr\"om}
\date{Work in progress}
\begin{document}
\maketitle

\DocInput{pstokens.dtx}

\end{document}
%</driver>
% \fi
% 
% \begin{abstract}
%   \package{pstokens} is a collection of utility routines for 
%   \Tcllogo\ programs that need to view a file as a sequence of 
%   \PS\ tokens rather than as a sequence of characters or lines.
% \end{abstract}
% 
% 
% \setnamespace{pstokens}
% 
% First of all, the \texttt{pstokens} namespace must be defined.
% \begin{tcl}
namespace eval pstokens {}
% \end{tcl}
% 
% \section{Reading and writing tokens}
% 
% This section contains the routines for reading a text file as a 
% sequence of tokens (tokenizing) and writing a sequence of tokens 
% to a text file (detokenizing). 
% 
% 
% \subsection{Data structures for tokens}
% 
% A \emph{token} is a \Tcllogo\ list with the format
% \begin{quote}
%   \word{type} \word{data}\regstar
% \end{quote}
% The interpretation of the \word{data} depends on what the \word{type} 
% is, as specified below. Some of the token types only occur internally 
% in the tokenizer and some others have no meaning to a \PS\ interpreter, 
% but this organisation seems to be the most convenient here.
% 
% A primary goal for the format of tokens is that if a sequence of 
% tokens are read from one file and written to another then the only 
% things that may have changed should be the whitespace between the 
% tokens, not the tokens themselves. Therefore the tokenizer generally 
% avoids interpreting the tokens any more than necessary; 
% interpretation of tokens is instead handled in the next section.
% 
% The token types that occur in \package{pstokens} are
% \begin{description}
%   \item[\texttt{number}, \texttt{radixnumber}]
%     \describestring[token type]{number}
%     \describestring[token type]{radixnumber}
%     A number (integer or real), and radix number, respectively. There 
%     is one \word{data} item, which contains the string that 
%     represented the number in the tokenized text.
%   \item[\texttt{string}, \texttt{hexstring}, \texttt{base-85}]
%     \describestring[token type]{string}
%     \describestring[token type]{hexstring}
%     \describestring[token type]{base-85}
%     A parenthesis-delimited string, hexadecimal string, and ASCII 
%     base-85 string, respectively. There is one \word{data} item, 
%     which contains the string (minus delimiters) that represented 
%     the string in the tokenized text.
%   \item[\texttt{executable}, \texttt{literal}, \texttt{immediate}]
%     \describestring[token type]{executable}
%     \describestring[token type]{literal}
%     \describestring[token type]{immediate}
%     An executable (normal), literal, and immediately evaluated name 
%     respectively. There is one \word{data} item, which simply contains 
%     the name as a \Tcllogo\ string, not including any initial slashes.
%   \item[\texttt{procedure}]
%     \describestring[token type]{procedure}
%     An executionable array. The data is simply the sequence of tokens 
%     in the array.
%   \item[\texttt{comment}]
%     \describestring[token type]{comment}
%     This is not a \PS\ token at all, but a representation of a 
%     comment. The reason comments are tokenized is that they often 
%     contain interesting information (especially structured comments).
%     The structure of this kind of token is
%     \begin{quote}
%       |comment| \word{head} \word{tail}
%     \end{quote}
%     The \word{head} is the part of the line which is after the |%| 
%     that started the comment and before the first whitespace character 
%     after it. The \word{tail} is the rest of the line (including the 
%     whitespace character, if there is one). Any of \word{head} and 
%     \word{tail} may be empty. The structured comment
%     \begin{quote}
%       |%%Pages: 2|
%     \end{quote}
%     would have \word{head} equal to \verb*"%Pages:" and \word{tail} 
%     equal to \verb*" 2".
%   \item[\texttt{eof}]
%     \describestring[token type]{eof}
%     This is a pseudotoken which denotes that the end of the file was 
%     reached while looking for the next token. There structure is
%     \begin{quote}
%       |eof| \word{token}\regopt
%     \end{quote}
%     If the \word{token} is present then it is a composite token which 
%     was being read when the file unexpectedly ended. It is an error 
%     if the \word{token} is present.
%   \item[\texttt{special}]
%     \describestring[token type]{special}
%     This token type can be used to extend the token type syntax 
%     without having to change the actual routines for reading and 
%     writing tokens. The structure of a |special| token is
%     \begin{quote}
%       |special| \word{token} \word{extra data}\regstar
%     \end{quote}
%     If a |special| token gets sent to the token writer then it will be 
%     treated as the token \word{token} (which usually has another 
%     type), but other routines may treat it differently based on the 
%     information in the \word{extra data}.
%   \item[\texttt{error}]
%     \describestring[token type]{error}
%     A token of this type is returned from the tokenizer whenever it 
%     encounters anything which is syntactically incorrect. The 
%     general structure of this kind of token is
%     \begin{quote}
%       |error| \word{error type} \word{data}\regstar
%     \end{quote}
%     where the \word{error type} more precisely specifies the error. 
%     So far, the following error types are used
%     \begin{quote}
%       |error| |extra| \word{string} \word{line no.}\\
%       |error| |missing| \word{string} \word{line no.}\\
%       |error| |missing/extra| \word{m-string} \word{e-string} 
%       \word{line no.}
%     \end{quote}
%     |extra| means that there is an occurence of \word{string} which 
%     shouldn't have been there, whereas |missing| means that there 
%     should have been a \word{string} there, but isn't. |missing/extra| 
%     is the combination that there either should have been an 
%     \word{m-string} there, but isn't, or there is an \word{e-string} 
%     there, but there shouldn't be.
%     \word{line no.} is the line on which the error was encountered.
% \end{description}
% 
% 
% \subsection{Reading tokens}
% 
% \begin{arrayvar}{input_line}[file-id]
% \begin{arrayvar}{input_line_no}[file-id]
%   Since the |gets| procedure always reads an entire line (which 
%   usually contain several tokens) each time it is used, it is 
%   necessary to store the most recently read line somewhere. This is 
%   the purpose of the |input_line| array, which is indexed by the file 
%   id. In general, text is removed from the entires in |input_line| as 
%   it is tokenized.
%   
%   The |input_line_no| array stores the number of the line in the 
%   corresponding |input_line| entry. The first line in a file has 
%   number one.
% \end{arrayvar}\end{arrayvar}
% 
% \begin{proc}{open_input}
% \begin{proc}{close_input}
%   These procedures are wrappers around \Tcllogo's |open| and |close| 
%   procedures respectively which additionally initialize and unset the 
%   relevant entry of the |input_line| and |input_line_no| arrays. Thus 
%   their syntaxes are
%   \begin{quote}
%     |pstokens::open_input| \word{file-name} \word{access}\regopt\ 
%     \word{permissions}\regopt\\
%     |pstokens::close_input| \word{file-id}
%   \end{quote}
%   and they return the same things as |open| and |close| respectively.
%   
%   A possible development of tokenizer is to implement filtered input, 
%   in which case more support data structures would have to be 
%   initialized, but then the |open_input| and |close_input| procedures 
%   would be redefined to cope with this.
%   \begin{tcl}
%<*tokenio>
proc pstokens::open_input {args} {
   set res [eval open $args]
   global pstokens::input_line pstokens::input_line_no
   set pstokens::input_line($res) ""
   set pstokens::input_line_no($res) 0
   return $res
}
% \medskip
proc pstokens::close_input {file_id} {
   global pstokens::input_line pstokens::input_line_no
   unset pstokens::input_line($file_id)
   unset pstokens::input_line_no($file_id)
   close $file_id
}
%   \end{tcl}
% \end{proc}\end{proc}
% 
% The tokenizer has a couple of extra internal token types for things 
% which later become parts of other tokens.
% \begin{description}
%   \item[\texttt{beginproc}, \texttt{endproc}]
%     \describestring[token type]{beginproc}
%     \describestring[token type]{endproc}
%     These are the |{| and |}| at the beginning and end respectively of 
%     a \PS\ procedure. There is no \word{data}.
%   \item[\texttt{beginstring}, \texttt{beginhex}, \texttt{beginbase85}]
%     \describestring[token type]{beginstring}
%     \describestring[token type]{beginhex}
%     \describestring[token type]{beginbase85}
%     These token types are the |(|, |<|, and |<~| at the beginning of 
%     a \PS\ literal, hex, and base-85 string respectively. There is no 
%     \word{data}.
% \end{description}
% 
% 
% \begin{proc}{get_primitive_token}
%   The |get_primitive_token| procedure returns the next primitive 
%   token in the input file. It takes the file id of the source to read 
%   from as its only argument.
%   
%   A \emph{primitive token} is a smallest syntactic unit in the \PS\ 
%   file. Smallest syntactic units are characterized by that the 
%   insertion of a space anywhere in them changes its meaning. One 
%   exception is however the left parenthesis characters; it is 
%   convenient to include |(| among the primitive tokens even though 
%   inserting a space after it will change the meaning of 
%   the corresponding token.
%   
%   The first step in |get_primitive_token| is to read ahead until some 
%   non-whitespace (or the end of the file) is encountered.
%   \begin{tcl}
proc pstokens::get_primitive_token {file} {
   global pstokens::input_line pstokens::input_line_no
   set pstokens::input_line($file)\
      [string trimleft [set pstokens::input_line($file)] \t\n\f\r\ ]
   while {[string length [set pstokens::input_line($file)]]==0} {
      if {[gets $file pstokens::input_line($file)]==-1} then {
         return {eof}
      }
      incr pstokens::input_line_no($file)
      set pstokens::input_line($file)\
         [string trimleft [set pstokens::input_line($file)] \t\n\f\r\ ]
   }
%   \end{tcl}
%   The second step is to isolate the part of the text that constitutes 
%   the next token. This is done using regular expressions, but as these
%   get rather complicated it seems best to explain how they are 
%   constructed. The ASCII characters null (|\x00|), tab (|\x09|), line 
%   feed (|\x0a|), form feed (|\x0c|), carriage return (|\x0d|), and 
%   space (|\x20|) count as whitespace and delimit tokens, but null is 
%   currently ignored because \Tcllogo\ before v\,8 cannot keep them in 
%   strings anyway. The ASCII characters |(|, |)|, |<|, |>|, |[|, |]|, 
%   |{|, |}|, |/|, and |%| also delimit tokens, and therefore the 
%   regular expression for a \PS\ name is
%   \begin{quote}
%     |(/?/?)([^][()<>{}/%|\meta{tab}\meta{LF}\meta{FF}\meta{CR}^^A
%     \verb*" ]+)|//?"
%   \end{quote}
%   (Note that the first right bracket above does not end a character 
%   range, but is part of the list of characters which are exculded 
%   from it. Also note that |/| and |//| are valid \PS\ tokens.)
%   The number of initial slashes determine whether the name is 
%   \texttt{executable}, \texttt{literal}, or \texttt{immediate}.
%   
%   The executable tokens are however somewhat tricky, because the above 
%   regular expression will also match all numbers. An \emph{integer} is 
%   any token which matches the regular expression \verb"[+-]?[0-9]+", 
%   whereas a \emph{real number} is anything else which matches the 
%   regular expression
%   \begin{quote}
%     \verb"[+-]?([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?"
%   \end{quote}
%   and a \emph{radix number} is anything which matches the regular 
%   expression\footnote{Actually there is also the restriction that 
%   none of the characters after the \texttt{\#} may represent a digit 
%   greater than or equal to the decimal number before the \texttt{\#}, 
%   but I'm ignoring that, for now.}
%   \begin{quote}
%     |[0-9][0-9]?#[0-9A-Za-z]+|
%   \end{quote}
%   \begin{tcl}
   if {[regexp "^(/?/?)(\[^\]\[()<>\{\}/%\t\n\f\r\ \]+)|^//?"\
      [set pstokens::input_line($file)] tokenstr slashes name]}\
   then {
      if {[regexp\
            {^[+-]?([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?$}\
            $tokenstr]}\
      then {
         set token [list number $tokenstr]
      } elseif {[regexp {^[0-9][0-9]?#[0-9A-Za-z]+$} $tokenstr]} then {
         set token [list radixnumber $tokenstr]
      } else {
         if {"$tokenstr"=="/" || "$tokenstr"=="//"}\
         then {set slashes $tokenstr}
         set token [list [lindex {executable literal immediate}\
            [string length $slashes]] $name]
      }
   }\
%   \end{tcl}
%   There are also a couple of primitive tokens which consist of 
%   non-whitespace delimiting characters, namely
%   \begin{quote}
%     |{|, |}|, |<|, |<~|, |<<|, |>>|, |[|, |]|, |(|, 
%   \end{quote}
%   The right parenthesis and greater than characters does not appear in 
%   the list because they may not appear outside a string. Finally 
%   there are the comments, which have the regular expression 
%   \begin{quote}
%     |^%([^|\meta{tab}\meta{LF}\meta{FF}\meta{CR}\verb*" ]*)(.*)$"
%   \end{quote}
%   Here the first parenthesis is the head of the comment and the second 
%   is the tail.
%   \begin{tcl}
   else {
      switch -regexp -- [set pstokens::input_line($file)] {
         ^\{ {set token beginproc ; set tokenstr \{}
         ^\} {set token endproc ; set tokenstr \}}
         ^\\\[ {set token {executable [} ; set tokenstr \[}
         ^\\\] {set token {executable ]} ; set tokenstr \]}
         ^< {set token beginhex ; set tokenstr <}
         ^<~ {set token beginbase85 ; set tokenstr <~}
         ^<< {set token {executable <<} ; set tokenstr <<}
         ^>> {set token {executable >>} ; set tokenstr >>}
         ^\\( {set token beginstring ; set tokenstr (}
         ^% {
            regexp "^%(\[^\t\n\f\r\ \]*)(.*)\$"\
              [set pstokens::input_line($file)] tokenstr head tail
            set token [list comment $head $tail]
         }
         default {
%   \end{tcl}
%   But if none of the above matched the next character must be 
%   erroneous.
%   \begin{tcl}
            set tokenstr\
               [string index [set pstokens::input_line($file)] 0]
            set token [list error extra $tokenstr\
               [set pstokens::input_line_no($file)] ]
         }
      }
   }
   set pstokens::input_line($file)\
      [string range [set pstokens::input_line($file)]\
         [string length $tokenstr] end]
   return $token
}
%   \end{tcl}
%   
% \end{proc}
% 
% \begin{proc}{get_string}
%   The |get_string| procedure reads a \PS\ literal string and converts 
%   it to a \texttt{string} token, which it returns. The only argument 
%   is the file id of the input file. It is assumed that the next thing 
%   on the input file line is the character after the left parenthesis 
%   that started the string.
%   
%   The only conversion of the data that |get_string| performs is that 
%   escaped newlines are skipped and non-escaped newlines are replaced 
%   by |\n| escapes. If the end-of-file is encountered while reading a 
%   string then the necessary number of right parentheses are added to 
%   the string and the resulting string token is built into an |eof| 
%   token, which is returned.
%   
%   The local variable |level| keeps track of the parenthesis nesting 
%   level, the local variable |data| is the string read so far, and the 
%   local variable is a flag which is |1| if the next character should 
%   be escaped and |0| otherwise.
%   \begin{tcl}
proc pstokens::get_string {file} {
   upvar #0 pstokens::input_line line
   global pstokens::input_line_no
   set level 1
   set data ""
   set escaped 0
   while {$level>0} {
      while {$level>0 && [string length $line($file)]>0} {
         set c [string index $line($file) 0]
         if {$escaped} then {
            append data $c
            set escaped 0
         } else {
            switch -exact -- $c {
               ( {incr level ; append data $c}
               ) {
                  incr level -1
                  if {$level>0} then {append data $c}
               }
               \\ {set escaped 1 ; append data $c}
               default {append data $c}
            }
         }
         set line($file) [string range $line($file) 1 end]
      }
      if {$level>0} then {
         if {!$escaped} then {append data \\n}
         set escaped 0
         if {[gets $file line($file)]==-1} then {
            while {$level>1} {append data ) ; incr level -1}
            return [list eof [list string $data]]
         }
         incr pstokens::input_line_no($file)
      }
   }
   list string $data
}
%   \end{tcl}
% \end{proc}
% 
% 
% \begin{proc}{get_hexstring}
%   The |get_hexstring| procedure reads a \PS\ hexadecimal string and 
%   converts it to a \texttt{hexstring} token, which it returns. The 
%   only argument is the file id of the input file. It is assumed that 
%   the next thing on the input file line is the character after the 
%   less than character that started the string.
%   
%   The only conversion of the data that |get_hexstring| performs is that 
%   embedded whitespace is removed. If a character which is neither 
%   whitespace, a hexadecimal digit, or the closing |>| is encountered 
%   then an \texttt{error} token is returned and the text read is put 
%   back into the |input_line| buffert (followed by an |>| which is 
%   inserted to terminate the string. If the end-of-file is 
%   encountered while reading a string then what has been read so far 
%   is returned as a hexstring token built into an |eof| token.
%   \begin{tcl}
proc pstokens::get_hexstring {file} {
   upvar #0 pstokens::input_line line
   global pstokens::input_line_no
   set data ""
   while 1 {
      while {[string length $line($file)]==0} {
         if {[gets $file line($file)]==-1} then {
            return [list eof [list hexstring $data]]
         }
         incr pstokens::input_line_no($file)
      }
      set c [string index $line($file) 0]
      set line($file) [string range $line($file) 1 end]
      switch -regexp -- $c {
         {[0-9a-fA-F]} {append data $c}
         \[\t\n\f\r\ \] {}
         > {break}
         default {
            set line($file) <$data>$c$line($file)
            return [list error missing/extra > $c\
               [set pstokens::input_line_no($file)]]
         }
      }
   }
   list hexstring $data
}
%   \end{tcl}
% \end{proc}
% 
% 
% \begin{proc}{get_base85string}
%   The |get_base85string| procedure reads a \PS\ base-85 string and 
%   converts it to a \texttt{base-85} token, which it returns. The 
%   only argument is the file id of the input file. It is assumed that 
%   the next thing on the input file line is the character after the 
%   |<~| characters that started the string.
%   
%   The only conversion of the data that |get_base85string| performs is 
%   that embedded whitespace is removed. If a character which is neither 
%   whitespace, a base-85 digit, the letter |z|, or the closing |~| is 
%   encountered then an \texttt{error} token is returned and the text 
%   read is put back into the |input_line| buffert (followed by a |~>| 
%   that terminates the string). If the end-of-file is encountered while 
%   reading a string then what has been read so far is returned as a 
%   base-85 string token built into an |eof| token.
%   \begin{tcl}
proc pstokens::get_base85string {file} {
   upvar #0 pstokens::input_line line pstokens::input_line_no line_no
   set data ""
   while 1 {
      while {[string length $line($file)]==0} {
         if {[gets $file line($file)]==-1} then {
            return [list eof [list base-85 $data]]
         }
         incr line_no($file)
      }
      set c [string index $line($file) 0]
      set line($file) [string range $line($file) 1 end]
      switch -regexp -- $c {
         {[!-uz]} {append data $c}
         \[\t\n\f\r\ \] {}
         ~ {break}
         default {
            set line($file) <~$data~>$c$line($file)
            return [list error missing/extra ~> $c $line_no($file)]
         }
      }
   }
%   \end{tcl}
%   If the character after the closing |~| is not a |>| then the entire 
%   string is put back in the |input_line| buffert and a |missing| error 
%   is issued.
%   \begin{tcl}
   if {[string length $line($file)]>0 &&\
         "[string index $line($file) 0]"==">"} then {
      set line($file) [string range $line($file) 1 end]
      list base-85 $data
   } else {
      set line($file) <~$data~>$line($file)
      list error missing > $line_no($file)
   }
}
%   \end{tcl}
% \end{proc}
% 
% 
% \begin{proc}{get_token}
% \begin{proc}{get_token_rek}
%   The |get_token| procedure has the syntax
%   \begin{quote}
%     |pstokens::get_token| \word{file-id}
%   \end{quote}
%   and returns the next \PS\ token in the \word{file-id} file.
%   
%   Most of the work is however done by the |get_token_rek| procedure, 
%   which is a recursive form of |get_token|. |get_token_rek| handles 
%   converting the various |begin|\textellipsis\ token types to their 
%   complete forms, and it calls itself recursively to get the tokens 
%   that form a procedure. |get_token| only checks that there are no 
%   lone |endproc| tokens.
%   \begin{tcl}
proc pstokens::get_token {file} {
   set token [pstokens::get_token_rek $file]
   if {"[lindex $token 0]" == "endproc"} then {
      global pstokens::input_line_no
      set token [list error extra \} [set pstokens::input_line_no($file)]]
   }
   return $token
}
% \medskip
proc pstokens::get_token_rek {file} {
   set token [pstokens::get_primitive_token $file]
   switch -exact [lindex $token 0] {
      beginproc {
         set token {procedure}
         while {"[lindex [set token2 [pstokens::get_token_rek $file]] 0]"\
               != "endproc" && "[lindex $token2 0]" != "eof"} {
            lappend token $token2
         }
         if {"[lindex $token2 0]" == "eof"} then {
            if {[llength $token2]>1} then {
               lappend token [lindex $token2 1]
            }
            set token [list eof $token]
         }
      }
      beginstring {set token [pstokens::get_string $file]}
      beginhex {set token [pstokens::get_hexstring $file]}
      beginbase85 {set token [pstokens::get_base85string $file]}
   }
   return $token
}
%   \end{tcl}
%   
%   These procedures, and in particular |get_token_rek|, could be made 
%   more intelligent when it comes to handling errors that occur inside 
%   procedures. Currently they just embed the |error| token in the 
%   procedure.
% \end{proc}\end{proc}
% 
% 
% \subsection{Writing tokens}
% 
% Writing tokens is simpler than reading them; the only real 
% complication is that the length of lines must be bounded. In order to 
% achieve this, data is written linewise to the file (much like the way 
% it is read).
% 
% \begin{arrayvar}{output_line}[file-id]
%   The |output_line| array stores each output line until it is 
%   actually written to file. Like with the |input_line| array, each 
%   active output file has its own entry in this array.
% \end{arrayvar}
% 
% \begin{proc}{flush_line}
%   The |flush_line| procedure has the syntax
%   \begin{quote}
%     |pstokens::flush_line| \word{file-id} \word{physical}\regopt
%   \end{quote}
%   It flushes buffered data that is to be written to the file with id 
%   \word{file-id}. The \word{physical} argument controls whether the 
%   data actually should be physically written to file (when |1|) or 
%   merely flushed from the |output_line| array (when |0|). The default 
%   is |0|. Calling |flush_line| starts a new line in the output file 
%   unless the |output_line| entry is empty.
%   \begin{tcl}
proc pstokens::flush_line {fileid {physical 0}} {
   global pstokens::output_line
   if {[string length [set pstokens::output_line($fileid)]]>0} then {
      puts $fileid [set pstokens::output_line($fileid)]
      set pstokens::output_line($fileid) ""
   }
   if {$physical} then {flush $fileid}
}
%   \end{tcl}
% \end{proc}
% 
% \begin{proc}{open_output}
% \begin{proc}{close_output}
%   These procedures are wrappers around \Tcllogo's |open| and |close| 
%   procedures respectively which additionally initialize and unset the 
%   relevant entry of the |output_line|. Thus their syntaxes are
%   \begin{quote}
%     |pstokens::open_output| \word{file-name} \word{access}\regopt\ 
%     \word{permissions}\regopt\\
%     |pstokens::close_output| \word{file-id}
%   \end{quote}
%   and they return the same things as |open| and |close| respectively. 
%   Note however that unlike the case with |open|, the \word{access} 
%   argument of |open_output| defaults to |w|.
%   
%   A possible development of the routines for writing tokens would be 
%   to support output channels other than files. In that case more 
%   support data structures would have to be initialized, but then the 
%   |open_output| and |close_output| procedures would be redefined to 
%   cope with this.
%   \begin{tcl}
proc pstokens::open_output {name {access w} args} {
   set res [eval open \$name \$access $args]
   global pstokens::output_line
   set pstokens::output_line($res) ""
   return $res
}
% \medskip
proc pstokens::close_output {file_id} {
   pstokens::flush_line $file_id 1
   close $file_id
   global pstokens::output_line
   unset pstokens::output_line($file_id)
}
%   \end{tcl}
% \end{proc}\end{proc}
% 
% \begin{variable}{wrap_length}
%   The |wrap_length| variable stores the maximal number of characters 
%   that may appear on a single output line. The same value applies for 
%   all output files.
%   \begin{tcl}
set pstokens::wrap_length 72
%   \end{tcl}
% \end{variable}
% 
% \begin{proc}{put_word}
%   The |put_word| procedure has the syntax
%   \begin{quote}
%     |pstokens::put_word| \word{file-id} \word{string}
%   \end{quote}
%   It writes \meta{string} to the file with id \word{file-id}, 
%   preceeded by a whitespace character if the current line is 
%   nonempty. The whitespace character is usually a space, but it 
%   will be a newline if the length of what has already been written 
%   to the current line is too large to also fit the \word{string} 
%   within the length specified by the |wrap_length| variable.
%   \begin{tcl}
proc pstokens::put_word {file str} {
   global pstokens::output_line pstokens::wrap_length
   set l [string length [set pstokens::output_line($file)]]
   if {$l==0} then {
      set pstokens::output_line($file) $str
   } elseif {$l + [string length $str] < ${pstokens::wrap_length}}\
   then {
      append pstokens::output_line($file) " $str"
   } else {
      puts $file [set pstokens::output_line($file)]
      set pstokens::output_line($file) $str
   }
}
%   \end{tcl}
% \end{proc}
% 
% \begin{proc}{put_breakable}
%   The |put_breakable| procedure has the syntax
%   \begin{quote}
%     |pstokens::put_breakable| \word{file-id} 
%     \word{prefix} \word{text} \word{suffix}
%   \end{quote}
%   It writes \meta{prefix}\meta{text}\meta{suffix} to the file with 
%   id \word{file-id} and inserts whitspace (newlines) into the 
%   \meta{text} in such a way that the |wrap_length| linewidth isn't 
%   exceeded. This is primarily useful for hexadecimal and base-85 
%   strings.
%   \begin{tcl}
proc pstokens::put_breakable {file prefix text suffix} {
   upvar #0 pstokens::output_line line pstokens::wrap_length wrap
   if {[string length $line($file)] + [string length $prefix] +\
      [string length $text] + [string length $suffix] < $wrap} then {
      append line($file) " $prefix$text$suffix"
   } else {
      pstokens::flush_line $file
      set line($file) $prefix
      while {[string length $line($file)] + [string length $text] +\
            [string length $suffix] < $wrap} {
         set t [expr {$wrap -[string length $line($file)]}]
         append line($file) [string range $text 0 [expr {$t-1}]]
         set text [string range $text $t end]
         pstokens::flush_line $file
      }
      append line($file) $text$suffix
   }
}
%   \end{tcl}
% \end{proc}
% 
% \begin{proc}{put_string}
%   The |put_string| procedure has the syntax
%   \begin{quote}
%     |pstokens::put_string| \word{file-id} \word{text}
%   \end{quote}
%   It writes |(|\meta{text}|)| to the file with id \word{file-id} and 
%   inserts backslash+newline items into the \meta{text} in such a way 
%   that the |wrap_length| linewidth isn't exceeded and the 
%   tokenization of the string is unaffected.
%   
%   The only places where it is unsafe to insert a backslash+newline is 
%   inside an escape squence. A position can only be inside such a 
%   sequence if the part of the \meta{text} which is before it gets a 
%   match against the regular expression
%   \begin{quote}
%     \verb"(^|[^\])(\\\\)*(\\[0-7]?[0-7]?)$"
%   \end{quote}
%   and in this case the position between the second and third 
%   parenthesis is safe.
%   \begin{tcl}
proc pstokens::put_string {file text} {
   upvar #0 pstokens::output_line line pstokens::wrap_length wrap
   if {[string length $line($file)] + [string length $text] + 2 <\
         $wrap} then {
      append line($file) " ($text)"
   } else {
      pstokens::flush_line $file
      set line($file) (
      while {[string length $line($file)] + [string length $text] +\
            1 < $wrap} {
         set t [expr {$wrap -[string length $line($file)]}]
         set s [string range $text 0 [expr {$t-1}]]
         if {[regexp {(^|[^\])(\\\\)*(\\[0-7]?[0-7]?)$} $s\
            foo bar baz escape]}\
         then {
            incr t -[string length $escape]
            set s [string range $text [expr {$t-1}]]
         }
         append line($file) $s
         set text [string range $text $t end]
         pstokens::flush_line $file
      }
      append line($file) $text)
   }
}
%   \end{tcl}
% \end{proc}
% 
% \begin{proc}{put_token}
%   The |put_token| procedure has the syntax
%   \begin{quote}
%     |pstokens::put_token| \word{file-id} \word{token}
%   \end{quote}
%   It writes the \PS\ token \word{token} to the file with id 
%   \word{file-id}.
%   \begin{tcl}
proc pstokens::put_token {file token} {
   switch [lindex $token 0] {
      number -
      radixnumber -
      executable {pstokens::put_word $file [lindex $token 1]}
      literal {pstokens::put_word $file /[lindex $token 1]}
      immediate {pstokens::put_word $file //[lindex $token 1]}
      string {pstokens::put_string $file [lindex $token 1]}
      comment {
         pstokens::flush_line $file 0
         puts $file %[lindex $token 1][lindex $token 2]
      }
      special {pstoken::put_token $file [lindex $token 1]}
      procedure {
         pstokens::put_word $file \{
         foreach subtok [lrange $token 1 end] {
            pstokens::put_token $file $subtok
         }
         pstokens::put_word $file \}
      }
      hexstring {pstokens::put_breakable $file < [lindex $token 1] >}
      base-85 {pstokens::put_breakable $file <~ [lindex $token 1] ~>}
      eof {
         if {[llength $token]>1} then {
            pstokens::put_token $file [lindex $token 1]
         }
         pstokens::terminal_print "Token unexpectedly terminated by\
            end of file."
      }
      error {
         pstokens::terminal_print "Error token intercepted at put:\
            [switch [lindex $token 1] {
               extra {expr {"extra [lindex $token 2] at input line\
                  [lindex $token 3]"}}
               missing {expr {"missing [lindex $token 2] at input\
                  line [lindex $token 3]"}}
               missing/extra {expr {"missing [lindex $token 2], or\
                  extra [lindex $token 3], at input line\
                  [lindex $token 4]"}}
               default {expr {"!!!-unimplemented error-!!!"}}
            }]."
      }
      default {
         pstokens::terminal_print\
            "Token of unknown type encountered: $token"
      }
   }
}
%   \end{tcl}
% \end{proc}


% \begin{tcl}
%</tokenio>
% \end{tcl}
% 
% \section{Token stream editing}
% 
% This section I still have ToDo.
% 
% 
% \begin{thebibliography}{99}
% 
% \bibitem{PSman}
%   Adobe Systems Incorporated: 
%   \textit{\PS\ language reference manual, 3rd ed.}, 
%   Addison--Wesley, 1999; ISBN 0-201-37922-8;
%   http:/\slash\texttt{partners.adobe.com}\slash
%   \texttt{asn}\slash\texttt{developer}\slash\texttt{PDFS}\slash
%   \texttt{TN}\slash\texttt{PLRM.pdf}.
% 
% \bibitem{MPinTUG}
%   John D.\ Hobby: \textit{A \MF-like System with \PS\ output}, 
%   TUGboat \textbf{10} (4) (1989), 505--512.
% %^^A 2 or 4 ?  Hobby's bibliography says 2, but Bebee's
% %^^A tugboat.bib says 4.
% %^^A @article{Hobby89a,
% %^^A    author = {John D. Hobby},
% %^^A    title = {A {METAFONT}-like System with PostScript Output},
% %^^A    journal = {{TUG}boat},
% %^^A    volume = {10},
% %^^A    number = {2},
% %^^A    pages = {505--512},
% %^^A    year = {1989}
% %^^A }
% \bibitem{MPman}
%   John D.\ Hobby: \textit{A User's Manual for \MP}, 
%   AT\&T Bell Laboratories Computing Science Technical Report no.~162 
%   (1992);
%   \textit{see} http:/\slash \texttt{cm.bell-labs.com}\slash 
%   \texttt{who}\slash\texttt{hobby}\slash\texttt{MetaPost.html}.
% %^^A @techreport{Hobby92,
% %^^A    title = {A User's manual for {MetaPost}},
% %^^A    author = {John D. Hobby},
% %^^A    institution = {AT\&T Bell Laboratories},
% %^^A    address = {Murray Hill, New Jersey},
% %^^A    type = {Computing Science Technical Report},
% %^^A    number = {no.~162},
% %^^A    year = {1992}
% %^^A }
% 
% \end{thebibliography}
% 
% 
% \Finale
% 
\endinput