diff options
Diffstat (limited to 'tools/linuxdoc-tools/LinuxDocTools/CharEnts.pm')
-rw-r--r-- | tools/linuxdoc-tools/LinuxDocTools/CharEnts.pm | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/tools/linuxdoc-tools/LinuxDocTools/CharEnts.pm b/tools/linuxdoc-tools/LinuxDocTools/CharEnts.pm new file mode 100644 index 00000000..b0bcd532 --- /dev/null +++ b/tools/linuxdoc-tools/LinuxDocTools/CharEnts.pm @@ -0,0 +1,176 @@ +# +# CharEnts.pm +# +# $Id: CharEnts.pm,v 1.1.1.1 2001/05/24 15:57:41 sano Exp $ +# +# SGML Character Entity utilities -- interface to Perl module +# Text::EntityMap. +# +package LinuxDocTools::CharEnts; +use strict; + +=head1 NAME + +LinuxDocTools::CharEnts - Interface to Text::EntityMap + +=head1 SYNOPSIS + + my $char_maps = load_char_maps ('.2ext', [ Text::EntityMap::sdata_dirs() ]); + + $value = parse_data ($value, $char_maps, $escape_sub); + +=head1 DESCRIPTION + +This module provides a simple interface to the entity map handling provided by +B<Text::EntityMap>. + +=head1 FUNCTIONS + +=over 4 + +=cut + +use Text::EntityMap; +use Exporter; + +use vars qw(@ISA @EXPORT $VERSION); +@ISA = qw(Exporter); +@EXPORT = qw(load_char_maps parse_data); +$VERSION = sprintf("%d.%02d", q$Revision: 1.1.1.1 $ =~ /(\d+)\.(\d+)/); + +# `%warn_map' tracks entities that were not able to be mapped so they +# are only warned once. +my %warn_map = (); + +=item parse_data ($data, $char_map, $escape_sub) + +B<parse_data> takes a string of I<$data> in the output format of +B<nsgmls> (see SP's C<sgmlsout.htm> document) without the leading dash. +B<parse_data> calls I<$char_map>'s lookup method for each sdata +entity reference. If the entity reference is undefined, it is +left alone (without the (n)sgmls C<\|>). For all remaining data, +B<parse_data> calls back into I<$escape_sub> to properly escape +characters for the backend formatter. Strings returned from the +lookup method are assumed to be already escaped. + +This routine is derived from David Megginson's SGMLSpm. + +=cut + +sub parse_data { + my ($data, $char_map, $escape_sub) = @_; + my ($result) = ""; + + my $sdata_flag = 0; + my $out = ''; + + while ($data =~ /\\(\\|n|\||[0-7]{1,3})/) { + $out .= $`; + $data = $'; + + if ($1 eq '|') { + # beginning or end of SDATA + if ("$out" ne '') { + if ($sdata_flag) { + my ($mapping) = $char_map->lookup ($out); + if (defined $mapping) { + # escape `\' in mapping for ASP + $mapping =~ s/\\/\\\\/g; + $result .= $mapping; + } else { + if (!$warn_map{$out}) { + warn "parse_data: no entity map for \`$out'\n"; + $warn_map{$out} = 1; + } + # output the entity reference inside of `{}' + $result .= &$escape_sub ("{" . $out . "}"); + } + } else { + $result .= &$escape_sub ($out); + } + $out = ''; + } + $sdata_flag = !$sdata_flag; + + } elsif ($1 eq 'n') { + # record end + + # pass '\\n' through to ASP + $result .= &$escape_sub ($out) . '\\n'; + $out = ''; + } elsif ($1 eq '\\') { + # backslash + + $result .= &$escape_sub ($out); + + $out = '[bsol ]'; # bsol == entity name for backslash + my ($mapping) = $char_map->lookup ($out); + if (defined $mapping) { + # escape `\' in mapping for ASP + $mapping =~ s/\\/\\\\/g; + $result .= $mapping; + } else { + if (!$warn_map{$out}) { + warn "parse_data: no entity map for \`$out'\n"; + $warn_map{$out} = 1; + } + # output the entity reference inside of `{}' + $result .= &$escape_sub ("{" . $out . "}"); + } + $out = ''; + } else { + # other octal character + $result .= &$escape_sub ($out . chr(oct($1))); + $out = ''; + } + } + $out .= $data; + if ("$out" ne '') { + $result .= &$escape_sub ($out); + } + + return ($result); +} + +=item load_char_maps ($format, $paths) + +B<load_char_maps> takes an EntityMap format suffix and loads all of the +character entity replacement sets for that suffix into an EntityMapGroup. +It searches every directory in I<@{$path}>. + +=cut + +sub load_char_maps { + my ($format, $paths) = @_; + + my (@char_maps) = (); + my ($path, $file_name, $char_map); + + foreach $path (@{$paths}) { + if (-d $path) { + opendir (SDATADIR, $path) + || die "load_char_map: opening directory \`$path' for reading: $!\n"; + foreach $file_name (readdir (SDATADIR)) { + next if ($file_name !~ /$format$/); + eval {$char_map = Text::EntityMap->load ("$path/$file_name")} + || die "load_char_map: loading \`$path/$file_name'\n$@\n"; + push (@char_maps, $char_map); + } + closedir (SDATADIR); + } + } + + warn "load_char_maps: no entity maps found\n" + if ($#char_maps == -1); + + return (Text::EntityMap->group (@char_maps)); +} + +=back + +=head1 AUTHOR + +Ken MacLeod, C<E<lt>ken@bitsko.slc.ut.usE<gt>> + +=cut +1; |