#!/usr/bin/perl
#
# ========================================================================
#  @perl-file{
#     author              = "Alan Jeffrey",
#     version             = "0.19",
#     date                = "15 January 1996",
#     time                = "12:26:55 GMT",
#     filename            = "texfaq2html",
#     address             = "School of Cognitive and Computing Sciences
#                            University of Sussex
#                            Brighton BN1 9QH
#                            UK",
#     FAX                 = "+44 1273 671320"
#     email               = "alanje@cogs.sussex.ac.uk",
#     codetable           = "ISO/ASCII",
#     keywords            = "LaTeX FAQ HTML",
#     supported           = "yes",
#     abstract            = "This perl script converts the UKTUG TeX FAQ
#                            LaTeX source document into HTML, on the fly."
#     package             = "stands alone",
#     dependencies        = "faqbody.tex, newfaq.aux, dirctan.tex,
#                            filectan,tex", 
#  }
# ========================================================================

# A script to provide a searchable WWW interface to the the UKTUG TeX
# FAQ file.
#
# The script takes parameters in the form of the QUERY_STRING environment 
# variable. 

# Copyright 1994 Alan Jeffrey

# 15 Sep 1994, v0.01: Created script.
#
# 16 Sep 1994, v0.02: Added the ability to download more then one
#    question at once.  Added some extra filters.
#
# 23 Sep 1994, v0.03: Updated to new markup syntax.
#
# 5 Oct 1994, v0.04: Updated to new markup syntax.
#
# 7 Oct 1994, v0.05: Added --, ---, \CTANref and \CTANlabel.
#
# 26 Oct 1994, v0.06: Added some more markup.  Read CTAN definitions
#    from separate files.  Read the Qrefs from the aux file.
#
# 28 Oct 1994, v0.07: Added \@ and the optional argument to \Qref.
#    Changed the banner.  
#
# 8 Nov 1994, v0.08: Added footnoteenv.   Changed the \Qref syntax to
#    match Robin's.  Added \ldots and \large.  Added \ProgName.  Added
#    \&. Moved \cs before \Qref so you can have \cs's inside \Qref's.
#    Renamed newfaq.tex to faqbody.tex.
#
# 25 Nov 1994, v0.09: Added \dots, $e$, \ISBN, \#, \ ,
#    \begin{comment}...\end{comment}. 
#    \cs now takes its argument between |...|.
#
# 2 Dec 1994, v0.10: Added \protect and proglist.
#
# 6 Dec 1994, v0.11: Allow \fileversion and \filedate on the same
#    line.
#
# 9 Mar 1995, v0.12: Made relative paths explicit for the new httpd.
#
# 15 Sep 1995, v0.13: Fixed a bug where I'd assumed that html-ignored
#    material wasn't nested (in fact the document has a footnote
#    inside an htmlignore).
#
# 23 Nov 1995, v0.14: Added \MP{}.
#
# 28 Nov 1995, v0.15: Added \TUGboat{} and \Package|...|.
#
# 2 Jan 1996, v0.16: Added \AllTeX{}, \acro{...} and \nothtml{...}.
#
# 15 Jan 1996, v0.17: Added \twee.
#
# 10 Jun 1997, v0.17a: for operation at Cambridge
#
# 18 Aug 1997, v0.18: alternative sites and formats
#
# 8 Oct 1997, v0.19: perl5 udpate, changed format of all file-name variables
#                    so don't contain directory name, so that reports
#                    of problems don't constitute a security hole
#
# 14 Sep 2000: version to run with betas of the faq


require "sanitize.pl";

# The site-specific stuff:

$home = "$ENV{FAQ_HOME}" || "/home/rf/tex/faq";
$faqhtml = $home . "/html";
$texfaq = "faqbody.tex";
$auxfaq = "newfaq.aux";
$ctandir = "dirctan.tex";
$ctanfiles = "filectan.tex";
$archive_list = "archive.list";

# defaults; these have to be allowed by the $archive_list file

$default_archive = "cam.ctan.org";
$default_format  = "tar.gz";

# pro tem -- can we set these differently?

$arch = $default_archive;
$fmt  = $default_format;

# table of symbols we believe in
%SymbolChar = (
	        92 => "\\",
	       123 => "\{",
	       125 => "\}"
);

# This script produces HTML, but we're sending the stuff to file
# so we needn't tell anyone
#
# print ("Content-type: text/html\n\n");

# By default, we convert LaTeX to HTML.

$converting = 1;
$ignoring = 0;
$sectioning = 0;

# two things used in conversion of \item[ ]

$itemset = "";
$enditemset = "";

# Get the list of CTAN directories:

open (CTANDIR, "$home/$ctandir")
	|| &oh_dear ("Couldn't open $ctandir");

while (<CTANDIR>) {
    if ( /\\CTANdirectory\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2.$fmt";
    } elsif ( /\\CTANdirectory\*\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2/";
    }
}

# Get the list of CTAN files

open (CTANFILES, "$home/$ctanfiles") 
	|| &oh_dear ("Couldn't open $ctanfiles");

while (<CTANFILES>) {

    if ( /\\CTANfile\{([^\}]*)\}\{([^\}]*)\}/ ) {
	$ctanref{$1} = "$2";
    }
}

# Get the list of allowable archives

open (ARCHIVE_LIST, "$home/$archive_list")
    || &oh_dear ("Couldn't open $archive_list");

while (<ARCHIVE_LIST>) {
    chop;
    ($archive, $root_dir) = split(/\s+/, $_, 2);
    $archive_root{$archive} = $root_dir;
}
unless ( $root = $archive_root{$default_archive} ) {
    &oh_dear("Archive $default_archive isn't in my list") }

# Get the Qrefs:

open (AUXFAQ, "$home/$auxfaq") 
	|| &oh_dear ("Couldn't open $auxfaq");


while (<AUXFAQ>) {
    if (/\\newlabel\{([^\}]*)\}\{\{([^\}]*)\}/) {
	$qref{$1} = "FAQ$2.html";
    }
}

# $labels to hold a (list of) label names for these question(s)
$labels =~ s/^\+//;

# Open the FAQ file:

open (TEXFAQ, "$home/$texfaq")
	|| &oh_dear ("Couldn't open $texfaq");

# Run through to the introduction, grabbing useful info.

while (<TEXFAQ>) {
    last if /\\section\{Introduction\}/;
    $fileversion=$1 if /\\def\\faqfileversion\{([^\}]*)\}/;
    $filedate=$1 if /\\def\\faqfiledate\{([^\}]*)\}/;
}

# open main html file

open (FAQ, ">$faqhtml/index.html") ||
    oh_dear ("couldn't open output file");

printf FAQ ("
<html><head>
<title>TeX Frequently Asked Questions</title>
</head><body>
<h1 align=\"center\">Welcome to the UK List of<br>
                TeX Frequently Asked Questions</h1>
");

# Blast out the introduction until we get to the first section.

print FAQ "<h2>Introduction</h2>\n";
while (<TEXFAQ>) {
    last if /\\section\{([^\}]*)\}/;
    &sanitize_line;
    print FAQ;
}

# start the index of questions ($_ still left over from last read from file)
/\\section\{([^\}]*)\}/;
$_ = $1;
&sanitize_line;
print FAQ "<h3>$_</h3><ul>\n";

# first pass, we have a file open by default
$faq_file_open = 1;

while ( $faq_file_open ) {

    # no question yet open
    $qfile_open = 0;

    while (<TEXFAQ>) {
	if ($converting) {
	    if ( /^\s*\\section\{(.*)\}\s*$/ ) {
		$_=$1;
		&sanitize_line;
		print FAQ "</ul><h3>$_</h3><ul>\n";
		$_="";
	    } elsif ( /^\s*\\subsection\{(.*)\}\s*$/ ) {
		$_=$1;
		&sanitize_line;
		print FAQ "</ul><h4>$_</h4><ul>\n";
		$_="";
	    }
	}

	&sanitize_line;
	if (/^\s*\\Question(\[.*\])?\{(.*)\}\s*$/) {
	    $qnum++;
	    $entry = "<li><a href=\"FAQ$qnum.html\">$2</a>\n";
	    print FAQ $entry;
	    if ($qfile_open) {
		print QFILE "</body>\n";
		close QFILE;
	    }
	    open (QFILE, ">$faqhtml/FAQ$qnum.html") ||
		oh_dear ("couldn't open file for question $qnum");
	    $qfile_open=1;
	    printf QFILE ("<head>
<title>UK TeX FAQ -- question $qnum</title>
</head><body>
<h3>$2</h3>
");
	} else {
	    if ($qfile_open) { print QFILE; }
	}
    }

    if ($qfile_open) {
	print QFILE "</body>\n";
	close QFILE;
    }

    # now: are there any other file names in ARGV?
    if ( $#ARGV >= 0 ) {
	close TEXFAQ;
	$next_file = $ARGV[0];
	open (TEXFAQ, "$home/$next_file") ||
	    oh_dear ("Couldn't open $home/$next_file");
	shift;
    } else {
	$faq_file_open = 0;
    }
}

# finish off the index.html file listing all questions
print FAQ "</ul>\n";

# report number of questions processed
    print "$qnum questions processed\n";

# An error report:

sub oh_dear {
    print $_[0], "\n";
    die $_;
}


# Print a tail.
    
printf FAQ ("
<hr><address>
   Maintenance of the
   <a href=\"http://www.tex.ac.uk/faq\">TeX FAQ</a>
   is coordinated by Robin Fairbairns.<p>
");

$qref_noans=$qref{"Q-noans"};
$qref_newans=$qref{"Q-newans"};

printf FAQ ("
   Comments, suggestions, or error reports? -- see
   \"<a href=\"%s\">Improving the FAQ</a>\" or
   \"<a href=\"%s\">Extending the FAQ</a>\". 
<p>
   This is FAQ version $fileversion, last modified on $filedate.
</address>
</body></html>
", $qref_noans, $qref_newans);

# That's it!