diff options
Diffstat (limited to 'tools/linuxdoc-tools/LinuxDocTools.pm')
-rw-r--r-- | tools/linuxdoc-tools/LinuxDocTools.pm | 668 |
1 files changed, 668 insertions, 0 deletions
diff --git a/tools/linuxdoc-tools/LinuxDocTools.pm b/tools/linuxdoc-tools/LinuxDocTools.pm new file mode 100644 index 00000000..12b77319 --- /dev/null +++ b/tools/linuxdoc-tools/LinuxDocTools.pm @@ -0,0 +1,668 @@ +#! /usr/bin/perl +# +# LinuxDocTools.pm +# +# LinuxDoc-Tools driver core. This contains all the basic functionality +# we need to control all other components. +# +# Copyright © 1996, Cees de Groot. +# Copyright © 2000, Taketoshi Sano +# Copyright © 2006-2018, Agustin Martin +# -------------------------------------------------------------------------------- + +package LinuxDocTools; + +require 5.006; +use strict; + +=head1 NAME + +LinuxDocTools - SGML conversion utilities for LinuxDoc DTD. + +=head1 SYNOPSIS + + use LinuxDocTools; + LinuxDocTools::init; + @files = LinuxDocTools::process_options ($0, @ARGV); + for $curfile (@files) { + LinuxDocTools::process_file ($curfile); + } + +=head1 DESCRIPTION + +The LinuxDocTools package encapsulates all the functionality offered by +LinuxDoc-Tools. It is used, of course, by LinuxDoc-Tools; +but the encapsulation should provide for a simple interface for other users as well. + +=head1 FUNCTIONS + +=over 4 + +=cut + +use File::Copy; +use File::Temp qw(tempdir); +use File::Basename qw(fileparse); +use LinuxDocTools::Lang; +use LinuxDocTools::Utils qw(usage cleanup trap_signals remove_tmpfiles create_temp); +use LinuxDocTools::Vars; + +sub BEGIN +{ + # + # Make sure we're always looking here. Note that "use lib" adds + # on the front of the search path, so we first push dist, then + # site, so that site is searched first. + # + use lib "$main::DataDir/dist"; + use lib "$main::DataDir/site"; +} + +# ----------------------------------------------------------------------------------- +sub ldt_searchfile { +# ----------------------------------------------------------------------------------- +# Look for a readable file in the locations. Return first math. +# ----------------------------------------------------------------------------------- + my $files = shift; + foreach my $file ( @$files ){ + return $file if -r $file; + } +} + +# ----------------------------------------------------------------------------------- +sub ldt_getdtd_v1 { +# ----------------------------------------------------------------------------------- +# Get the dtd +# ----------------------------------------------------------------------------------- + my $file = shift; + my $error_header = "LinuxdocTools::ldt_getdtd_v1"; + my $dtd; + + open ( my $FILE, "< $file") + or die "$error_header: Could not open \"$file\" for reading. Aborting ...\n"; + + while ( <$FILE> ) { + tr/A-Z/a-z/; + # check for [<!doctype ... system] type definition + if ( /<!doctype\s*(\w*)\s*system/ ) { + $dtd = $1; + last; + # check for <!doctype ... PUBLIC ... DTD ... + } elsif ( /<!doctype\s*\w*\s*public\s*.*\/\/dtd\s*(\w*)/mi ) { + $dtd = $1; + last; + # check for <!doctype ... + # PUBLIC ... DTD ... + # (multi-line version) + } elsif ( /<!doctype\s*(\w*)/ ) { + $dtd = "precheck"; + next; + } elsif ( /\s*public\s*.*\/\/dtd\s*(\w*)/ && $dtd eq "precheck" ) { + $dtd = $1; + last; + } + } + close $FILE; + + return $dtd; +} + +# ----------------------------------------------------------------------------------- +sub ldt_getdtd_v2 { +# ----------------------------------------------------------------------------------- +# Second way of getting dtd, fron nsgmls output. +# ----------------------------------------------------------------------------------- + my $preaspout = shift; + my $error_header = "LinuxdocTools::ldt_getdtd_v2"; + my $dtd2; + + open (my $TMP,"< $preaspout") + or die "%error_header: Could not open $preaspout for reading. Aborting ...\n"; + while ( defined ($dtd2 = <$TMP>) && ! ( $dtd2 =~ /^\(/) ) { }; + close $TMP; + $dtd2 =~ s/^\(//; + $dtd2 =~ tr/A-Z/a-z/; + chomp $dtd2; + return $dtd2; +} + +# ----------------------------------------------------------------------------------- +sub ldt_latin1tosgml { +# ----------------------------------------------------------------------------------- +# Convert latin1 chars in input filehandle to sgml entities in the returned string +# ----------------------------------------------------------------------------------- + my $FILE = shift; + my $sgmlout; + + while (<$FILE>){ + # Outline these commands later on - CdG + #change latin1 characters to SGML + #by Farzad Farid, adapted by Greg Hankins + s/À/\À/g; + s/Á/\Á/g; + s/Â/\Â/g; + s/Ã/\Ã/g; + s/Ä/\Ä/g; + s/Å/\Å/g; + s/Æ/\Æ/g; + s/Ç/\Ç/g; + s/È/\È/g; + s/É/\É/g; + s/Ê/\Ê/g; + s/Ë/\Ë/g; + s/Ì/\Ì/g; + s/Í/\Í/g; + s/Î/\Î/g; + s/Ï/\Ï/g; + s/Ñ/\Ñ/g; + s/Ò/\Ò/g; + s/Ó/\Ó/g; + s/Ô/\Ô/g; + s/Õ/\Õ/g; + s/Ö/\Ö/g; + s/Ø/\Ø/g; + s/Ù/\Ù/g; + s/Ú/\Ú/g; + s/Û/\Û/g; + s/Ü/\Ü/g; + s/Ý/\Ý/g; + s/Þ/\Þ/g; + s/ß/\ß/g; + s/à/\à/g; + s/á/\á/g; + s/â/\â/g; + s/ã/\ã/g; + s/ä/\ä/g; + s/å/\å/g; + s/æ/\æ/g; + s/ç/\ç/g; + s/è/\è/g; + s/é/\é/g; + s/ê/\ê/g; + s/ë/\ë/g; + s/ì/\ì/g; + s/í/\í/g; + s/î/\î/g; + s/ï/\ï/g; + s/µ/\μ/g; + s/ð/\ð/g; + s/ñ/\ñ/g; + s/ò/\ò/g; + s/ó/\ó/g; + s/ô/\ô/g; + s/õ/\õ/g; + s/ö/\ö/g; + s/ø/\ø/g; + s/ù/\ù/g; + s/ú/\ú/g; + s/û/\û/g; + s/ü/\ü/g; + s/ý/\ý/g; + s/þ/\þ/g; + s/ÿ/\ÿ/g; + $sgmlout .= $_; + } + return $sgmlout; +} + +# ------------------------------------------------------------------------ + +=item LinuxDocTools::init + +Takes care of initialization of package-global variables (which are actually +defined in L<LinuxDocTools::Vars>). The package-global variables are I<$global>, +a reference to a hash containing numerous settings, I<%Formats>, a hash +containing all the formats, and I<%FmtList>, a hash containing the currently +active formats for help texts. + +Apart from this, C<LinuxDocTools::init> also finds all distributed and site-local +formatting backends and C<require>s them. + +=cut + +# ----------------------------------------------------------------------------------- +sub init { +# ----------------------------------------------------------------------------------- + trap_signals; + + # Register the ``global'' pseudoformat. Apart from the global settings, we + # also use $global to keep the global variable name space clean everything + # that we need to provide to other modules is stuffed into $global. + $global = {}; + $global->{NAME} = "global"; + $global->{HELP} = ""; + $global->{OPTIONS} = [ + { option => "backend", + type => "l", + 'values' => [ "html", "info", "latex", "lyx", "rtf", "txt", "check" ], + short => "B" }, + { option => "papersize", + type => "l", + 'values' => [ "a4", "letter" ], + short => "p" }, + { option => "language", + type => "l", + 'values' => [ @LinuxDocTools::Lang::Languages ], + short => "l" }, + { option => "charset", type => "l", + 'values' => [ "latin", "ascii", "nippon", "euc-kr" ], short => "c" }, + { option => "style", type => "s", short => "S" }, + { option => "tabsize", type => "i", short => "t" }, + # { option => "verbose", type => "f", short => "v" }, + { option => "debug", type => "f", short => "d" }, + { option => "define", type => "s", short => "D" }, + { option => "include", type => "s", short => "i" }, + { option => "pass", type => "s", short => "P" } + ]; + $global->{backend} = "linuxdoc"; + $global->{papersize} = "a4"; + $global->{language} = "en"; + $global->{charset} = "ascii"; + $global->{style} = ""; + $global->{tabsize} = 8; + $global->{verbose} = 0; + $global->{define} = ""; + $global->{debug} = 0; + $global->{include} = ""; + $global->{pass} = ""; + $global->{InFiles} = []; + $global->{fmtlist} = ""; # List of loaded fmt files + $Formats{$global->{NAME}} = $global; # All formats we know. + $FmtList{$global->{NAME}} = $global; # List of formats for help msgs. + + $global->{sgmlpre} = "$main::AuxBinDir/sgmlpre"; + my $error_header = "LinuxdocTools::init"; + + if ( -e "/etc/papersize" ){ + open (my $PAPERSIZE,"< /etc/papersize") || + die "$error_header: Count not open \"/etc/papersize\" for reading\n"; + chomp (my $paper = <$PAPERSIZE>); + $global->{papersize} = "letter" if ( $paper eq "letter"); + close $PAPERSIZE; + } + + # automatic language detection: disabled by default + # { + # my $lang; + # foreach $lang (@LinuxDocTools::Lang::Languages) + # { + # if (($ENV{"LC_ALL"} =~ /^$lang/i) || + # ($ENV{"LC_CTYPE"} =~ /^$lang/i) || + # ($ENV{"LANG"} =~ /^$lang/i)) { + # $global->{language} = Any2ISO($lang); + # } + # } + # } + + # -------------------------------------------------------------------------------- + $global->{preNSGMLS} = sub { + # ------------------------------------------------------------------------------ + # Define a fallback preNSGMLS. Used when the format is "global" (from sgmlcheck). + # ------------------------------------------------------------------------------ + $global->{NsgmlsOpts} .= " -s "; + $global->{NsgmlsPrePipe} = "cat $global->{file}"; + }; + + # We need to load all fmt files here, so the allowed options for all + # format are put into $global and a complete usage message is built, + # including options for all formats. + my %locations = (); + foreach my $path ("$main::DataDir/site", + "$main::DataDir/dist", + "$main::DataDir/fmt"){ + foreach my $location (<$path/fmt_*.pl>){ + my $fmt = $location; + $fmt =~ s/^.*_//; + $fmt =~ s/\.pl$//; + $locations{$fmt} = $location unless defined $locations{$fmt}; + } + } + + foreach my $fmt ( keys %locations ){ + $global->{fmtlist} .= " Loading $locations{$fmt}\n"; + require $locations{$fmt}; + } +} + +# ------------------------------------------------------------------------ + +=item LinuxDocTools::process_options ($0, @ARGV) + +This function contains all initialization that is bound to the current +invocation of LinuxDocTools. It looks in C<$0> to deduce the backend that +should be used (ld2txt activates the I<txt> backend) and parses the +options array. It returns an array of filenames it encountered during +option processing. + +As a side effect, the environment variable I<SGML_CATALOG_FILES> is +modified and, once I<$global->{format}> is known, I<SGMLDECL> is set. + +=cut + +# ------------------------------------------------------------------------ +sub process_options { +# ------------------------------------------------------------------------ + my $progname = shift; + my @tmpargs = @_; + my @args = (); + my $format = ''; + + # Try getting the format. We need to do this here so process_options + # knows which is the format and which format options are allowed + + # First, see if we have an explicit backend option by looping over command line. + # Do not shift in the while condition itself, 0 in options like '-s 0' will + # otherwise stop looping + while ( @tmpargs ){ + $_ = shift @tmpargs; + if ( s/--backend=// ){ + $format = $_; + } elsif ( $_ eq "-B" ){ + $format = shift @tmpargs; + } else { + push @args, $_; + } + } + + unless ( $format ){ + my ($tmpfmt, $dummy1, $dummy2) = fileparse($progname, ""); + if ( $tmpfmt =~ s/^sgml2// ) { # Calling program through sgml2xx symlinks + $format = $tmpfmt; + } elsif ( $tmpfmt eq "sgmlcheck" ) { # Calling program through sgmlcheck symlink + $format = "global"; + } + } + + if ( $format ) { + if ( $format eq "check" ){ + $format = "global"; + } elsif ( $format eq "latex" ){ + $format = "latex2e"; + } + $FmtList{$format} = $Formats{$format} or + usage("$format: Unknown format"); + $global->{format} = $format; + } else { + usage(""); + } + + # Parse all the options from @args, and return files. + my @files = LinuxDocTools::Utils::process_options(@args); + + # Check the number of given files + $#files > -1 || usage("No filenames given"); + + # Normalize language string + $global->{language} = Any2ISO($global->{language}); + + # Setup the SGML environment. + my @sgmlcatalogs = + (# SGML iso-entities catalog location in Debian sgml-data package + "$main::isoentities_prefix/share/sgml/entities/sgml-iso-entities-8879.1986/catalog", + # SGML iso-entities catalog location in ArchLinux, Fedora and Gentoo + "$main::isoentities_prefix/share/sgml/sgml-iso-entities-8879.1986/catalog", + # SGML iso-entities catalog location when installed from linuxdoc-tools + "$main::isoentities_prefix/share/sgml/iso-entities-8879.1986/iso-entities.cat", + # dtd/catalog for SGML-Tools + "$main::DataDir/linuxdoc-tools.catalog", + # The super catalog + "/etc/sgml/catalog"); + + @sgmlcatalogs = ($ENV{SGML_CATALOG_FILES}, @sgmlcatalogs) if defined $ENV{SGML_CATALOG_FILES}; + + $ENV{SGML_CATALOG_FILES} = join(':', @sgmlcatalogs); + + # Set to one of these if readable, nil otherwise + $ENV{SGMLDECL} = ldt_searchfile(["$main::DataDir/dtd/$global->{format}.dcl", + "$main::DataDir/dtd/$global->{style}.dcl", + "$main::DataDir/dtd/sgml.dcl"]); + + # Show the list of loaded fmt_*.pl files if debugging + print STDERR $global->{fmtlist} if $global->{debug}; + + # Return the list of files to be processed + return @files; +} + +# ------------------------------------------------------------------------ + +=item LinuxDocTools::process_file + +With all the configuration done, this routine will take a single filename +and convert it to the currently active backend format. The conversion is +done in a number of steps in tight interaction with the currently active +backend (see also L<LinuxDocTools::BackEnd>): + +=over + +=item 1. Backend: set NSGMLS options and optionally create a pre-NSGMLS pipe. + +=item 2. Here: Run the preprocessor to handle conditionals. + +=item 3. Here: Run NSGMLS. + +=item 4. Backend: run pre-ASP conversion. + +=item 5. Here: Run SGMLSASP. + +=item 6. Backend: run post-ASP conversion, generating the output. + +=back + +All stages are influenced by command-line settings, currently active format, +etcetera. See the code for details. + +=cut + +# ------------------------------------------------------------------------ +sub process_file { +# ------------------------------------------------------------------------ + my $file = $global->{origfile} = shift (@_); + my $saved_umask = umask; + my $error_header = "LinuxdocTools::process_file"; + + print "Processing file $file\n"; + umask 0077; + + my ($filename, $filepath, $filesuffix) = fileparse($file, "\.sgml"); + $global->{filename} = $filename; + $global->{filepath} = $filepath; + $global->{file} = ldt_searchfile(["$filepath/$filename.sgml", + "$filepath/$filename.SGML"]) + or die "$error_header: Cannot find $file. Aborting ...\n"; + + my $dtd = ldt_getdtd_v1("$global->{file}"); + print STDERR "DTD: " . $dtd . "\n" if $global->{debug}; + + # Prepare temporary directory + my $tmpdir = $ENV{'TMPDIR'} || '/tmp'; + $tmpdir = tempdir("linuxdoc-tools.XXXXXXXXXX", DIR => "$tmpdir"); + + # Set common base name for temp files and temp file names + my $tmpbase = $global->{tmpbase} = $tmpdir . '/sgmltmp.' . $filename; + my $precmdout = "$tmpbase.01.precmdout"; + my $nsgmlsout = "$tmpbase.02.nsgmlsout"; # Was $tmpbase.1 + my $preaspout = "$tmpbase.03.preaspout"; # Was $tmpbase.2 + my $aspout = "$tmpbase.04.aspout"; # Was $tmpbase.3 + + # Set up the preprocessing command. Conditionals have to be + # handled here until they can be moved into the DTD, otherwise + # a validating SGML parser will choke on them. + + # Check if output option for latex is pdf or not + if ($global->{format} eq "latex2e") { + if ($Formats{$global->{format}}{output} eq "pdf") { + $global->{define} .= " pdflatex=yes"; + } + } + + # Set the actual pre-processing command + my($precmd) = "| $global->{sgmlpre} output=$global->{format} $global->{define}"; + + # Make sure path of file to be processed is in SGML_SEARCH_PATH + $ENV{"SGML_SEARCH_PATH"} .= ":$filepath"; + + # You can hack $NsgmlsOpts here, etcetera. + $global->{NsgmlsOpts} .= "-D $main::prefix/share/sgml -D $main::DataDir"; + $global->{NsgmlsOpts} .= "-i$global->{include}" if ($global->{include}); + + # If a preNSGMLS function is defined in the fmt file, pipe its output to $FILE, + # otherwise just open $global->{file} as $IFILE + # ---------------------------------------------------------------------------- + my $IFILE; + if ( defined $Formats{$global->{format}}{preNSGMLS} ) { + $global->{NsgmlsPrePipe} = &{$Formats{$global->{format}}{preNSGMLS}}; + open ($IFILE,"$global->{NsgmlsPrePipe} |") + || die "$error_header: Could not open pipe from $global->{NsgmlsPrePipe}. Aborting ...\n"; + } else { + open ($IFILE,"< $global->{file}") + || die "$error_header: Could not open $global->{file} for reading. Aborting ...\n"; + } + + # Create a temp file with $precmd output + my $precmd_command = "$precmd > $precmdout"; + + open (my $PRECMDOUT, "$precmd_command") + or die "$error_header: Could not open pipe to $precmdout. Aborting ...\n"; + + if ($global->{charset} eq "latin") { + print $PRECMDOUT ldt_latin1tosgml($IFILE); + } else { + copy($IFILE,$PRECMDOUT); + } + + close $IFILE; + close $PRECMDOUT; + + # Process with nsgmls. + my $nsgmls_command = "$main::progs->{NSGMLS} $global->{NsgmlsOpts} $ENV{SGMLDECL} $precmdout > $nsgmlsout"; + system($nsgmls_command) == 0 + or die "Error: \"$nsgmls_command\" failed with exit status: ",$? >> 8,"\n"; + + # Special case: if format is global, we're just checking. + cleanup if ( $global->{format} eq "global"); + + # If output file does not exists or is empty, something went wrong. + if ( ! -e "$nsgmlsout" ) { + die "$error_header: Can't create file $nsgmlsout. Aborting ...\n"; + } elsif ( -z "$nsgmlsout" ){ + die "$error_header: $nsgmlsout empty, SGML parsing error. Aborting ...\n"; + } + + print "- Nsgmls stage finished.\n" if $global->{debug}; + + # If a preASP stage is defined, let the format handle it. + # -------------------------------------------------------- + open (my $PREASP_IN, "< $nsgmlsout") + or die "$error_header: Could not open $nsgmlsout for reading. Aborting ...\n"; + open (my $PREASP_OUT, "> $preaspout") + or die "$error_header: Could not open $preaspout for writing. Aborting ...\n"; + + if (defined $Formats{$global->{format}}{preASP}) { + # Usage: preASP ($INHANDLE, $OUTHANDLE); + &{$Formats{$global->{format}}{preASP}}($PREASP_IN, $PREASP_OUT) == 0 + or die "$error_header: Error pre-processing $global->{format}.\n"; + } else { + copy ($PREASP_IN, $PREASP_OUT); + } + + close $PREASP_IN; + close $PREASP_OUT; + + die "$error_header: Can't create $preaspout file. Aborting ...\n" + unless -e "$preaspout"; + + print "- PreASP stage finished.\n" if ( $global->{debug} ); + + # Run sgmlsasp, with an optional style if specified. + # ----------------------------------------------------------- + my $dtd2 = ldt_getdtd_v2($preaspout) + or die "$error_header: Could not read dtd from $preaspout. Aborting ...\n"; + + unless ( $dtd eq $dtd2 ){ + print STDERR "Warning: Two different values for dtd, dtd1: $dtd, dtd2: $dtd2\n"; + $dtd = $dtd2; + } + + $global->{'dtd'} = $dtd; + + # Search order: + # - datadir/site/<dtd>/<format> + # - datadir/dist/<dtd>/<format> + + my $style = ($global->{style}) ? + ldt_searchfile(["$main::DataDir/site/$dtd/$global->{format}/$global->{style}mapping", + "$main::DataDir/dist/$dtd/$global->{format}/$global->{style}mapping", + "$main::DataDir/mappings/$global->{format}/$global->{style}mapping"]) + : + ''; + + my $mapping = ldt_searchfile(["$main::DataDir/site/$dtd/$global->{format}/mapping", + "$main::DataDir/dist/$dtd/$global->{format}/mapping", + "$main::DataDir/mappings/$global->{format}/mapping"]) + or die "$error_header: Could not find mapping file for $dtd/$global->{format}. Aborting ...\n"; + + $mapping = "$style $mapping" if $style; + + $global->{charset} = "nippon" if ($global->{language} eq "ja"); + + # We don't have Korean groff so charset should be latin1. + if ($global->{language} eq "ko") { + if ($global->{format} eq "groff") { + $global->{charset} = "latin1"; + } else { + $global->{charset} = "euc-kr"; + } + } + + if ($global->{format} eq "groff"){ + if ($dtd eq "linuxdoctr") { + $mapping = "$main::DataDir/mappings/$global->{format}/tr-mapping"; + } + } + + my $sgmlsasp_command = "$main::progs->{SGMLSASP} $mapping < $preaspout | + expand -t $global->{tabsize} > $aspout"; + system ($sgmlsasp_command) == 0 + or die "$error_header: Error running $sgmlsasp_command. Aborting ...\n"; + + die "$error_header: Can't create $aspout file. Aborting ...\n" + unless -e "$aspout"; + + print "- ASP stage finished.\n" if ( $global->{debug} ); + + # If a postASP stage is defined, let the format handle it. + # ---------------------------------------------------------------- + umask $saved_umask; + + open (my $INPOSTASP, "< $aspout" ) + or die "$error_header: Could not open $aspout for reading. Aborting ...\n"; + if (defined $Formats{$global->{format}}{postASP}) { + # Usage: postASP ($INHANDLE) + # Should leave whatever it thinks is right based on $INHANDLE. + &{$Formats{$global->{format}}{postASP}}($INPOSTASP) == 0 + or die "$error_header: Error post-processing $global->{format}. Aborting ...\n"; + } + close $INPOSTASP; + + print "- postASP stage finished.\n" if ( $global->{debug} ); + + # All done, remove the temporaries. + remove_tmpfiles($tmpbase) unless ( $global->{debug} ); +} + +=pod + +=back + +=head1 SEE ALSO + +Documentation for various sub-packages of LinuxDocTools. + +=head1 AUTHOR +SGMLTools are written by Cees de Groot, C<E<lt>cg@cdegroot.comE<gt>>, +and various SGML-Tools contributors as listed in C<CONTRIBUTORS>. +Taketoshi Sano C<E<lt>sano@debian.org<gt>> rename to LinuxDocTools. + +=cut +1; |