# Converter.pm: Common code for Converters.
#
# Copyright 2011-2023 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# Original author: Patrice Dumas
package Texinfo::Convert::Converter;
use 5.00405;
use strict;
# To check if there is no erroneous autovivification
#no autovivification qw(fetch delete exists store strict);
# for fileparse
use File::Basename;
# for file names portability
use File::Spec;
use Encode qw(decode);
use Carp qw(cluck confess);
use Texinfo::Common;
use Texinfo::Report;
use Texinfo::Convert::Utils;
use Texinfo::Convert::Unicode;
use Texinfo::Convert::Text;
use Texinfo::Convert::NodeNameNormalization;
use Texinfo::Structuring;
use Texinfo::Translations;
require Exporter;
use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter Texinfo::Report Texinfo::Translations);
%EXPORT_TAGS = ( 'all' => [ qw(
xml_protect_text
xml_comment
xml_accent
xml_accents
) ] );
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
$VERSION = '7.1';
my %defaults = (
'documentlanguage' => undef,
);
# defaults for all converters that are not defined elsewhere.
# undef values in general marks information passed by the caller that
# is valid in the parser initialization hash, but is not considered
# as "configuration/customization". It is not available through get_conf()
# but is available directly in the converter as a hash key.
# FIXME separate the two types of information and check that those
# items are not valid customization options?
# NOTE converters for now do not add more, set converted_format, and,
# rarely, output_format. It would be good to keep it that way and add
# customization options instead.
my %common_converters_defaults = (
# Following are set in the main program
'language_config_dirs' => undef,
'converted_format' => undef,
# can be different from the converted_format, for example, epub3
# output format converted format is html.
'output_format' => undef,
'structuring' => undef,
# Not set in the main program
'translated_commands' => {'error' => 'error@arrow{}',},
# This is the default, mainly for tests; the caller should set them. These
# values are what is used in tests of the Converters. These variables are
# customization options, set in the main program when a converter is
# called from the main program.
'PACKAGE_AND_VERSION' => 'texinfo',
'PACKAGE_VERSION' => '',
'PACKAGE_URL' => 'http://www.gnu.org/software/texinfo/',
'PROGRAM' => '',
);
my %all_converters_defaults
= (%Texinfo::Common::default_converter_command_line_options,
%Texinfo::Common::default_converter_customization,
%Texinfo::Common::document_settable_unique_at_commands,
%Texinfo::Common::document_settable_multiple_at_commands,
%common_converters_defaults
);
# For translation of in document string.
if (0) {
my $self;
# TRANSLATORS: expansion of @error{} as Texinfo code
$self->gdt('error@arrow{}');
}
our %default_args_code_style = (
'email' => [1],
'anchor' => [1],
'uref' => [1],
'url' => [1],
'math' => [1],
'inforef' => [1,undef,1],
'image' => [1, 1, 1, undef, 1],
# and type?
'float' => [1],
);
foreach my $code_style_command (keys(%Texinfo::Commands::brace_code_commands)) {
$default_args_code_style{$code_style_command} = [1];
}
foreach my $ref_cmd ('pxref', 'xref', 'ref') {
$default_args_code_style{$ref_cmd} = [1, undef, undef, 1];
}
################################################################
# converter API
# convert_tree() and convert() should be implemented in converters.
# There is an implementation of output() below but in general
# output() should also be implemented by Converters.
# Functions that should be defined in specific converters
sub converter_defaults($$)
{
return %defaults;
}
sub converter_initialize($)
{
}
sub output_internal_links($)
{
my $self = shift;
return undef;
}
# this function is designed so as to be used in specific Converters
sub converter(;$)
{
my $class = shift;
my $converter = { 'set' => {} };
my $conf;
my $name = 'converter';
if (ref($class) eq 'HASH') {
$conf = $class;
bless $converter;
} elsif (defined($class)) {
bless $converter, $class;
$name = ref($converter);
$conf = shift;
} else {
bless $converter;
$conf = shift;
$name = ref($converter);
}
my %defaults = $converter->converter_defaults($conf);
foreach my $key (keys(%all_converters_defaults)) {
$defaults{$key} = $all_converters_defaults{$key}
if (!exists($defaults{$key}));
}
$converter->{'conf'} = {};
foreach my $key (keys(%defaults)) {
if (Texinfo::Common::valid_customization_option($key)) {
$converter->{'conf'}->{$key} = $defaults{$key};
} else {
$converter->{$key} = $defaults{$key};
}
}
#$converter->{'converter_pre_conf'} = \%defaults;
if (defined($conf)) {
if ($conf->{'parser'}) {
$converter->{'global_commands'}
= $conf->{'parser'}->global_commands_information();
$converter->{'parser_info'} = $conf->{'parser'}->global_information();
my $floats = $conf->{'parser'}->floats_information();
my ($labels, $targets_list, $nodes_list)
= $conf->{'parser'}->labels_information();
$converter->{'floats'} = $floats if ($floats);
$converter->{'labels'} = $labels if ($labels);
$converter->{'indices_information'}
= $conf->{'parser'}->indices_information();
$converter->{'values'} = $conf->{'parser'}->{'values'};
delete $conf->{'parser'};
}
foreach my $key (keys(%$conf)) {
if (Texinfo::Common::valid_customization_option($key)) {
$converter->{'conf'}->{$key} = $conf->{$key};
} elsif (!exists($defaults{$key})) {
warn "$key not a possible configuration in $name\n";
} else {
$converter->{$key} = $conf->{$key};
}
# configuration set here, in general coming from command-line
# will not be reset by set_conf.
$converter->{'set'}->{$key} = 1;
}
}
# set $converter->{'converter_init_conf'} to the customization
# options obtained after setting the defaults and applying
# the customization passed as argument.
$converter->{'converter_init_conf'} = { %{$converter->{'conf'}} };
foreach my $key (keys (%defaults)) {
if (defined($converter->{$key})) {
$converter->{'converter_init_conf'}->{$key} = $converter->{$key};
}
}
Texinfo::Common::set_output_encodings($converter, $converter->{'parser_info'});
# turn the array to a hash for speed. Not sure it really matters for such
# a small array.
my $expanded_formats = $converter->get_conf('EXPANDED_FORMATS');
$converter->{'expanded_formats_hash'} = {};
if (defined($expanded_formats)) {
foreach my $expanded_format (@{$converter->get_conf('EXPANDED_FORMATS')}) {
$converter->{'expanded_formats_hash'}->{$expanded_format} = 1;
}
}
# used for output files information, to register opened
# and not closed files. Accessed through output_files_information()
$converter->{'output_files'} = Texinfo::Common::output_files_initialize();
$converter->Texinfo::Report::new();
$converter->converter_initialize();
return $converter;
}
sub _convert_document_tree_units($$;$$)
{
my $self = shift;
my $root = shift;
my $tree_units = shift;
my $fh = shift;
if ($tree_units) {
my $result = '';
foreach my $tree_unit (@$tree_units) {
$result .= $self->write_or_return($self->convert_tree($tree_unit), $fh);
}
return $result;
} else {
return $self->write_or_return($self->convert_tree($root), $fh);
}
}
# the two following methods can be used to implement convert() in
# Converters.
sub convert_document_sections($$;$)
{
my $self = shift;
my $root = shift;
my $fh = shift;
my $tree_units = Texinfo::Structuring::split_by_section($root);
return $self->_convert_document_tree_units($root, $tree_units, $fh);
}
sub convert_document_nodes($$;$)
{
my $self = shift;
my $root = shift;
my $fh = shift;
my $tree_units = Texinfo::Structuring::split_by_node($root);
return $self->_convert_document_tree_units($root, $tree_units, $fh);
}
# In general, converters override this method, but simple
# converters can use it. It is used for the plaintext
# output format.
# use file_counters and out_filepaths converter states.
sub output($$)
{
my $self = shift;
my $root = shift;
my $tree_units;
if (defined($self->get_conf('OUTFILE'))
and ($Texinfo::Common::null_device_file{$self->get_conf('OUTFILE')}
or $self->get_conf('OUTFILE') eq '-'
or $self->get_conf('OUTFILE') eq '')) {
if ($self->get_conf('SPLIT')) {
$self->document_warn($self,
sprintf(__("%s: output incompatible with split"),
$self->get_conf('OUTFILE')));
$self->force_conf('SPLIT', 0);
}
}
if ($self->get_conf('SPLIT')) {
$self->set_conf('NODE_FILES', 1);
}
my ($output_file, $destination_directory, $output_filename,
$document_name) = $self->determine_files_and_directory();
my ($encoded_destination_directory, $dir_encoding)
= $self->encoded_output_file_name($destination_directory);
my $succeeded
= $self->create_destination_directory($encoded_destination_directory,
$destination_directory);
return undef unless $succeeded;
if ($self->get_conf('USE_NODES')) {
$tree_units = Texinfo::Structuring::split_by_node($root);
} else {
$tree_units = Texinfo::Structuring::split_by_section($root);
}
Texinfo::Structuring::split_pages($tree_units, $self->get_conf('SPLIT'));
# determine file names associated with the different pages
if ($output_file ne '') {
$self->_set_tree_units_files($tree_units, $output_file, $destination_directory,
$output_filename, $document_name);
}
#print STDERR "$tree_units $tree_units->[0]->{'structure'}->{'unit_filename'}\n";
# Now do the output
my $fh;
my $output = '';
if (!$tree_units or !$tree_units->[0]->{'structure'}
or !defined($tree_units->[0]->{'structure'}->{'unit_filename'})) {
# no page
my $outfile_name;
my $encoded_outfile_name;
if ($output_file ne '') {
if ($self->get_conf('SPLIT')) {
my $top_node_file_name = $self->top_node_filename($document_name);
if (defined($destination_directory) and $destination_directory ne '') {
$outfile_name = File::Spec->catfile($destination_directory,
$top_node_file_name);
} else {
$outfile_name = $top_node_file_name;
}
} else {
$outfile_name = $output_file;
}
print STDERR "DO No pages, output in $outfile_name\n"
if ($self->get_conf('DEBUG'));
my $path_encoding;
($encoded_outfile_name, $path_encoding)
= $self->encoded_output_file_name($outfile_name);
my $error_message;
($fh, $error_message) = Texinfo::Common::output_files_open_out(
$self->output_files_information(), $self,
$encoded_outfile_name);
if (!$fh) {
$self->document_error($self,
sprintf(__("could not open %s for writing: %s"),
$outfile_name, $error_message));
return undef;
}
} else {
print STDERR "DO No pages, string output\n"
if ($self->get_conf('DEBUG'));
}
if ($tree_units and @$tree_units) {
foreach my $tree_unit (@$tree_units) {
my $tree_unit_text = $self->convert_tree($tree_unit);
$output .= $self->write_or_return($tree_unit_text, $fh);
}
} else {
# REMARK right now, this code is never called, as
# Texinfo::Structuring::split_by_node or split_by_page always return
# an array containing at least one unit. But this was not the case
# in the past and could change again in the future.
#
# FIXME this is in general better to use convert(), for instance
# to have the converter output footnotes when it is not done
# as part of tree units formatting (th case here).
# However, this breaks the promise that only convert_tree is used
# in generic Converter code.
$output .= $self->write_or_return($self->convert($root), $fh);
}
# NOTE do not close STDOUT now to avoid a perl warning.
# FIXME is it still true that there is such a warning?
if ($fh and $outfile_name ne '-') {
Texinfo::Common::output_files_register_closed(
$self->output_files_information(), $encoded_outfile_name);
if (!close($fh)) {
$self->document_error($self,
sprintf(__("error on closing %s: %s"),
$outfile_name, $!));
}
}
return $output if ($output_file eq '');
} else {
# output with pages
print STDERR "DO Elements with filenames\n"
if ($self->get_conf('DEBUG'));
my %files_filehandle;
foreach my $tree_unit (@$tree_units) {
my $tree_unit_filename = $tree_unit->{'structure'}->{'unit_filename'};
my $out_filepath = $self->{'out_filepaths'}->{$tree_unit_filename};
my $file_fh;
# open the file and output the elements
if (!exists($files_filehandle{$tree_unit_filename})) {
my $error_message;
($file_fh, $error_message) = Texinfo::Common::output_files_open_out(
$self->output_files_information(), $self,
$out_filepath);
if (!$file_fh) {
$self->document_error($self,
sprintf(__("could not open %s for writing: %s"),
$out_filepath, $error_message));
return undef;
}
$files_filehandle{$tree_unit_filename} = $file_fh;
} else {
$file_fh = $files_filehandle{$tree_unit_filename};
}
my $tree_unit_text = $self->convert_tree($tree_unit);
print $file_fh $tree_unit_text;
$self->{'file_counters'}->{$tree_unit_filename}--;
if ($self->{'file_counters'}->{$tree_unit_filename} == 0) {
# NOTE do not close STDOUT here to avoid a perl warning
if ($out_filepath ne '-') {
Texinfo::Common::output_files_register_closed(
$self->output_files_information(), $out_filepath);
if (!close($file_fh)) {
$self->document_error($self,
sprintf(__("error on closing %s: %s"),
$out_filepath, $!));
return undef;
}
}
}
}
}
}
###############################################################
# Implementation of the customization API that is used in many
# Texinfo modules
sub get_conf($$)
{
my $self = shift;
my $conf = shift;
if (!Texinfo::Common::valid_customization_option($conf)) {
confess("CBUG: unknown option $conf\n");
#return undef;
}
return $self->{'conf'}->{$conf};
}
sub set_conf($$$)
{
my $self = shift;
my $conf = shift;
my $value = shift;
if (!Texinfo::Common::valid_customization_option($conf)) {
die "BBUG: unknown option $conf\n";
return undef;
}
if ($self->{'set'}->{$conf}) {
return 0;
} else {
$self->{'conf'}->{$conf} = $value;
return 1;
}
}
sub force_conf($$$)
{
my $self = shift;
my $conf = shift;
my $value = shift;
if (!Texinfo::Common::valid_customization_option($conf)) {
die "ABUG: unknown option $conf\n";
return undef;
}
$self->{'conf'}->{$conf} = $value;
return 1;
}
sub output_files_information($)
{
my $self = shift;
return $self->{'output_files'};
}
####################################################################
# Useful functions. Those in this section are used in the module and can
# also be used in other Converter modules.
# output fo $fh if defined, otherwise return the text.
sub write_or_return($$$)
{
my $self = shift;
my $text = shift;
my $fh = shift;
if ($fh) {
print $fh $text;
return '';
} else {
return $text;
}
}
my $STDIN_DOCU_NAME = 'stdin';
sub determine_files_and_directory($;$)
{
my $self = shift;
my $output_format = shift;
if (not defined($output_format)) {
$output_format = $self->{'output_format'};
}
# determine input file base name
my $input_basefile;
if (defined($self->{'parser_info'}->{'input_file_name'})) {
# 'input_file_name' is not decoded, as it is derived from input
# file which is not decoded either. We want to return only
# decoded character strings such that they can easily be mixed
# with other character strings, so we decode here.
my $input_file_name = $self->{'parser_info'}->{'input_file_name'};
my $encoding = $self->get_conf('COMMAND_LINE_ENCODING');
if (defined($encoding)) {
$input_file_name = decode($encoding, $input_file_name, sub { '?' });
# use '?' as replacement character rather than U+FFFD in case it
# is re-encoded to an encoding without this character
}
my ($directories, $suffix);
($input_basefile, $directories, $suffix) = fileparse($input_file_name);
} else {
# This could happen if called on a piece of texinfo
$input_basefile = '';
}
my $input_basename;
if ($input_basefile eq '-') {
$input_basename = $STDIN_DOCU_NAME;
} else {
$input_basename = $input_basefile;
$input_basename =~ s/\.te?x(i|info)?$//;
}
my $setfilename;
if (defined($self->get_conf('setfilename'))) {
$setfilename = $self->get_conf('setfilename');
} elsif ($self->{'global_commands'}
and $self->{'global_commands'}->{'setfilename'}
and $self->{'global_commands'}->{'setfilename'}->{'extra'}
and defined($self->{'global_commands'}->{'setfilename'}->{'extra'}->{'text_arg'})) {
$setfilename
= $self->{'global_commands'}->{'setfilename'}->{'extra'}->{'text_arg'};
}
my $input_basename_for_outfile = $input_basename;
my $setfilename_for_outfile = $setfilename;
# PREFIX overrides both setfilename and the input file base name
if (defined($self->get_conf('PREFIX'))) {
$setfilename_for_outfile = undef;
$input_basename_for_outfile = $self->get_conf('PREFIX');
}
# the document path, in general the outfile without
# extension and can be set from setfilename if outfile is not set
my $document_path;
# determine output file and output file name
my $output_file;
if (!defined($self->get_conf('OUTFILE'))) {
if (defined($setfilename_for_outfile)
and !$self->get_conf('NO_USE_SETFILENAME')) {
$output_file = $setfilename_for_outfile;
$document_path = $setfilename_for_outfile;
$document_path =~ s/\.[^\.]*$//;
if (!$self->get_conf('USE_SETFILENAME_EXTENSION')) {
$output_file =~ s/\.[^\.]*$//;
$output_file .= '.'.$self->get_conf('EXTENSION')
if (defined($self->get_conf('EXTENSION'))
and $self->get_conf('EXTENSION') ne '');
}
} elsif ($input_basename_for_outfile ne '') {
$output_file = $input_basename_for_outfile;
$document_path = $input_basename_for_outfile;
$output_file .= '.'.$self->get_conf('EXTENSION')
if (defined($self->get_conf('EXTENSION'))
and $self->get_conf('EXTENSION') ne '');
} else {
$output_file = '';
$document_path = $output_file;
}
if (defined($self->get_conf('SUBDIR')) and $output_file ne '') {
my $dir = File::Spec->canonpath($self->get_conf('SUBDIR'));
$output_file = File::Spec->catfile($dir, $output_file);
}
} else {
$document_path = $self->get_conf('OUTFILE');
$document_path =~ s/\.[^\.]*$//;
$output_file = $self->get_conf('OUTFILE');
}
# the output file path, in general same as the outfile but can be
# set from setfilename if outfile is not set.
my $output_filepath = $output_file;
# in this case one wants to get the result in a string and there
# is a setfilename. The setfilename is used to get something.
# This happens in the test suite.
if ($output_file eq '' and defined($setfilename_for_outfile)
and !$self->get_conf('NO_USE_SETFILENAME')) {
$output_filepath = $setfilename_for_outfile;
$document_path = $setfilename_for_outfile;
$document_path =~ s/\.[^\.]*$//;
}
# $document_name is the name of the document, which is the output
# file basename, $output_filename, without extension.
my ($document_name, $output_filename, $directories, $suffix);
# We may be handling setfilename there, so it is not obvious that we
# want to use fileparse and not consider unixish separators. However,
# if this is setfilename, it should be a simple file name, so it
# should hopefully be harmless to use fileparse
($document_name, $directories, $suffix) = fileparse($document_path);
($output_filename, $directories, $suffix) = fileparse($output_filepath);
my $destination_directory;
if ($self->get_conf('SPLIT')) {
if (defined($self->get_conf('OUTFILE'))) {
$destination_directory = $self->get_conf('OUTFILE');
} elsif (defined($self->get_conf('SUBDIR'))) {
$destination_directory = $self->get_conf('SUBDIR');
} else {
$destination_directory = $document_name;
if (defined($output_format) and $output_format ne '') {
$destination_directory .= '_'.$output_format;
}
}
} else {
# $output_file_filename is not used, but $output_filename should be
# the same as long as $output_file is the same as $output_filepath
# which is the case except if $output_file is ''.
my ($output_file_filename, $output_dir, $suffix) = fileparse($output_file);
if ($output_dir ne '') {
$destination_directory = $output_dir;
}
}
if (defined($destination_directory)
and $destination_directory ne '') {
$destination_directory = File::Spec->canonpath($destination_directory);
}
return ($output_file, $destination_directory, $output_filename,
$document_name, $input_basefile);
}
sub _id_to_filename($$)
{
my $self = shift;
my $id = shift;
return substr($id, 0, $self->get_conf('BASEFILENAME_LENGTH'));
}
sub normalized_sectioning_command_filename($$)
{
my $self = shift;
my $command = shift;
my $no_unidecode;
$no_unidecode = 1 if (defined($self->get_conf('USE_UNIDECODE'))
and !$self->get_conf('USE_UNIDECODE'));
my $normalized_name
= Texinfo::Convert::NodeNameNormalization::normalize_transliterate_texinfo(
{'contents' => $command->{'args'}->[0]->{'contents'}},
$no_unidecode);
my $filename = $self->_id_to_filename($normalized_name);
$filename .= '.'.$self->get_conf('EXTENSION')
if (defined($self->get_conf('EXTENSION'))
and $self->get_conf('EXTENSION') ne '');
return ($normalized_name, $filename);
}
sub node_information_filename($$$)
{
my $self = shift;
my $normalized = shift;
my $node_contents = shift;
my $no_unidecode;
$no_unidecode = 1 if (defined($self->get_conf('USE_UNIDECODE'))
and !$self->get_conf('USE_UNIDECODE'));
my $filename;
if (defined($normalized)) {
if ($self->get_conf('TRANSLITERATE_FILE_NAMES')) {
$filename = Texinfo::Convert::NodeNameNormalization::normalize_transliterate_texinfo(
{'contents' => $node_contents},
$no_unidecode);
} else {
$filename = $normalized;
}
} elsif (defined($node_contents)) {
$filename = Texinfo::Convert::NodeNameNormalization::normalize_node(
{ 'contents' => $node_contents });
} else {
$filename = '';
}
$filename = $self->_id_to_filename($filename);
return $filename;
}
sub initialize_tree_units_files($)
{
my $self = shift;
$self->{'out_filepaths'} = {};
$self->{'file_counters'} = {};
$self->{'filenames'} = {};
}
# If CASE_INSENSITIVE_FILENAMES is set, reuse the first
# filename with the same name insensitive to the case.
sub register_normalize_case_filename($$)
{
my $self = shift;
my $filename = shift;
if ($self->get_conf('CASE_INSENSITIVE_FILENAMES')) {
if (exists($self->{'filenames'}->{lc($filename)})) {
if ($self->get_conf('DEBUG')) {
print STDERR "Reusing case-insensitive ".
$self->{'filenames'}->{lc($filename)}." for $filename\n";
}
$filename = $self->{'filenames'}->{lc($filename)};
} else {
$self->{'filenames'}->{lc($filename)} = $filename;
}
} else {
if (exists($self->{'filenames'}->{$filename})) {
if ($self->get_conf('DEBUG')) {
print STDERR "Reusing ".$self->{'filenames'}->{$filename}
." for $filename\n";
}
} else {
$self->{'filenames'}->{$filename} = $filename;
}
}
return $filename;
}
# return the registered filename if there is one, else return undef.
# Note that it only tells that a file name has been its name considered,
# not that the corresponding file is created.
# currently unused.
sub registered_filename($$)
{
my $self = shift;
my $filename = shift;
if ($self->get_conf('CASE_INSENSITIVE_FILENAMES')) {
if (exists($self->{'filenames'}->{lc($filename)})) {
return $self->{'filenames'}->{lc($filename)};
}
} elsif (exists($self->{'filenames'}->{$filename})) {
return $self->{'filenames'}->{$filename};
}
return undef;
}
# Sets $tree_unit->{'structure'}->{'unit_filename'}.
sub set_tree_unit_file($$$)
{
my $self = shift;
my $tree_unit = shift;
my $filename = shift;
if (!defined($filename)) {
cluck("set_tree_unit_file: filename not defined\n");
}
if (!defined($tree_unit)) {
cluck("set_tree_unit_file: tree_unit not defined\n");
}
$filename = $self->register_normalize_case_filename($filename);
$tree_unit->{'structure'} = {} if (!$tree_unit->{'structure'});
# This should never happen, set_tree_unit_file is called once per
# tree unit.
if (exists($tree_unit->{'structure'}->{'unit_filename'})) {
if ($tree_unit->{'structure'}->{'unit_filename'} eq $filename) {
print STDERR "set_tree_unit_file: already set: $filename\n"
if ($self->get_conf('DEBUG'));
} else {
print STDERR "set_tree_unit_file: unit_filename reset: "
.$tree_unit->{'structure'}->{'unit_filename'}.", $filename\n"
if ($self->get_conf('DEBUG'));
}
}
$tree_unit->{'structure'}->{'unit_filename'} = $filename;
}
# sets out_filepaths converter state, associating a file name
# to a file path.
# $FILEPATH can be given explicitly, otherwise it is based on $FILENAME
# and $DESTINATION_DIRECTORY
sub set_file_path($$$;$)
{
my $self = shift;
my $filename = shift;
my $destination_directory = shift;
my $filepath = shift;
if (!defined($filename)) {
cluck("set_file_path: filename not defined\n");
}
$filename = $self->register_normalize_case_filename($filename);
if (not defined($filepath)) {
if (defined($destination_directory) and $destination_directory ne '') {
$filepath = File::Spec->catfile($destination_directory, $filename);
} else {
$filepath = $filename;
}
}
# should not happen, the file path should be set only once
# per file name.
if (defined($self->{'out_filepaths'}->{$filename})) {
if ($self->{'out_filepaths'}->{$filename} eq $filepath) {
print STDERR "set_file_path: filepath set: $filepath\n"
if ($self->get_conf('DEBUG'));
} else {
print STDERR "set_file_path: filepath reset: "
.$self->{'out_filepaths'}->{$filename}.", $filepath\n"
if ($self->get_conf('DEBUG'));
}
}
$self->{'out_filepaths'}->{$filename} = $filepath;
}
sub top_node_filename($$)
{
my $self = shift;
my $document_name = shift;
my $top_node_filename;
if (defined($self->get_conf('TOP_FILE'))
and $self->get_conf('TOP_FILE') ne '') {
$top_node_filename = $self->get_conf('TOP_FILE');
} else {
my $extension = '';
$extension = '.'.$self->get_conf('EXTENSION')
if (defined($self->get_conf('EXTENSION'))
and $self->get_conf('EXTENSION') ne '');
$top_node_filename = $document_name;
if (defined($top_node_filename)) {
$top_node_filename .= $extension;
}
}
return $top_node_filename;
}
sub _get_root_element($$)
{
my $self = shift;
my $command = shift;
my $find_container = shift;
my $current = $command;
while (1) {
if ($current->{'type'}) {
if ($current->{'type'} eq 'unit') {
return $current;
}
}
if ($current->{'parent'}) {
$current = $current->{'parent'};
} else {
return undef;
}
}
}
# set file_counters converter state
sub _set_tree_units_files($$$$$$)
{
my $self = shift;
my $tree_units = shift;
my $output_file = shift;
my $destination_directory = shift;
my $output_filename = shift;
my $document_name = shift;
# Ensure that the document has pages
return undef if (!defined($tree_units) or !@$tree_units);
$self->initialize_tree_units_files();
my $extension = '';
$extension = '.'.$self->get_conf('EXTENSION')
if (defined($self->get_conf('EXTENSION'))
and $self->get_conf('EXTENSION') ne '');
if (!$self->get_conf('SPLIT')) {
$self->set_file_path($output_filename, undef, $output_file);
foreach my $tree_unit (@$tree_units) {
$self->set_tree_unit_file($tree_unit, $output_filename);
}
} else {
my $node_top;
$node_top = $self->{'labels'}->{'Top'} if ($self->{'labels'});
my $top_node_filename = $self->top_node_filename($document_name);
# first determine the top node file name.
if ($node_top and defined($top_node_filename)) {
my ($node_top_unit) = $self->_get_root_element($node_top);
if (!defined($node_top_unit)) {
print STDERR "No element for top node (".scalar(@$tree_units)." units)\n"
if ($self->get_conf('DEBUG'));
} else {
$self->set_file_path($top_node_filename, $destination_directory);
$self->set_tree_unit_file($node_top_unit, $top_node_filename);
}
}
my $file_nr = 0;
my $previous_page;
foreach my $tree_unit (@$tree_units) {
# For Top node.
next if ($tree_unit->{'structure'}
and defined($tree_unit->{'structure'}->{'unit_filename'}));
my $file_tree_unit = $tree_unit->{'extra'}->{'first_in_page'};
if (!$file_tree_unit) {
cluck ("No first_in_page for $tree_unit\n");
}
if (!defined($file_tree_unit->{'structure'}->{'unit_filename'})) {
foreach my $root_command (@{$file_tree_unit->{'contents'}}) {
if ($root_command->{'cmdname'}
and $root_command->{'cmdname'} eq 'node') {
my $node_filename;
# double node are not normalized, they are handled here
if (!defined($root_command->{'extra'}->{'normalized'})
or !defined($self->{'labels'}->{
$root_command->{'extra'}->{'normalized'}})) {
$node_filename = 'unknown_node';
} else {
$node_filename
= $self->node_information_filename(
$root_command->{'extra'}->{'normalized'},
$root_command->{'args'}->[0]->{'contents'});
}
$node_filename .= $extension;
$self->set_file_path($node_filename,$destination_directory);
$self->set_tree_unit_file($file_tree_unit, $node_filename);
last;
}
}
if (!defined($file_tree_unit->{'structure'}->{'unit_filename'})) {
# use section to do the file name if there is no node
my $command = $file_tree_unit->{'extra'}->{'unit_command'};
if ($command) {
if ($command->{'cmdname'} eq 'top' and !$node_top
and defined($top_node_filename)) {
$self->set_file_path($top_node_filename, $destination_directory);
$self->set_tree_unit_file($file_tree_unit, $top_node_filename);
} else {
my ($normalized_name, $filename)
= $self->normalized_sectioning_command_filename($command);
$self->set_file_path($filename, $destination_directory);
$self->set_tree_unit_file($file_tree_unit, $filename);
}
} else {
# when everything else has failed
if ($file_nr == 0 and !$node_top and defined($top_node_filename)) {
$self->set_file_path($top_node_filename, $destination_directory);
$self->set_tree_unit_file($file_tree_unit, $top_node_filename);
} else {
my $filename = $document_name . "_$file_nr";
$filename .= $extension;
$self->set_file_path($filename, $destination_directory);
$self->set_tree_unit_file($tree_unit, $filename);
}
$file_nr++;
}
}
}
$self->set_tree_unit_file($tree_unit,
$file_tree_unit->{'structure'}->{'unit_filename'});
}
}
foreach my $tree_unit (@$tree_units) {
my $tree_unit_filename = $tree_unit->{'structure'}->{'unit_filename'};
$self->{'file_counters'}->{$tree_unit_filename} = 0
if (!exists($self->{'file_counters'}->{$tree_unit_filename}));
$self->{'file_counters'}->{$tree_unit_filename}++;
print STDERR 'Page '
# uncomment for perl object name
#."$tree_unit "
.Texinfo::Structuring::root_or_external_element_cmd_texi($tree_unit)
.": $tree_unit_filename($self->{'file_counters'}->{$tree_unit_filename})\n"
if ($self->get_conf('DEBUG'));
}
}
sub create_destination_directory($$$)
{
my $self = shift;
my $destination_directory_path = shift;
my $destination_directory_name = shift;
if (defined($destination_directory_path)
and ! -d $destination_directory_path) {
if (!mkdir($destination_directory_path, oct(755))) {
$self->document_error($self, sprintf(__(
"could not create directory `%s': %s"),
$destination_directory_name, $!));
return 0;
}
}
return 1;
}
#############################################################
# useful methods for Converters.
# determine the default, with $INIT_CONF if set, or the default common
# to all the converters
sub _command_init($$)
{
my $global_command = shift;
my $init_conf = shift;
if (exists($Texinfo::Common::document_settable_at_commands{$global_command})) {
if (defined($init_conf->{$global_command})) {
return $init_conf->{$global_command};
} elsif (defined($Texinfo::Common::document_settable_at_commands{$global_command})) {
return $Texinfo::Common::document_settable_at_commands{$global_command};
}
}
}
# $COMMANDS_LOCATION is 'before', 'last', 'preamble' or 'preamble_or_first'
# 'before' means setting to the values before the document commands
# (defaults and command-line).
# 'preamble' means setting sequentially to the values in the preamble.
# 'preamble_or_first' means setting to the first value for the command
# in the document if the first command is not in the preamble, else set
# sequentially to the values in the preamble.
# 'last' means setting to the last value for the command in the document.
#
# Notice that the only effect is to use set_conf (directly or through
# set_global_document_command), no @-commands setting side effects are done
# and associated customization variables are not set/reset either.
sub set_global_document_commands($$;$)
{
my $self = shift;
my $commands_location = shift;
my $selected_commands = shift;
my $init_conf;
if (defined($self->{'output_init_conf'})) {
# use in priority the initial customization per output
$init_conf = $self->{'output_init_conf'};
} else {
$init_conf = $self->{'converter_init_conf'};
}
if (not defined($selected_commands)) {
$selected_commands = [keys(%Texinfo::Common::document_settable_at_commands)];
}
if ($commands_location eq 'before') {
foreach my $global_command (@{$selected_commands}) {
# for commands not appearing in the document, this should set the
# same value, the converter initialization value
$self->set_conf($global_command, _command_init($global_command, $init_conf));
# NOTE if the variable is set from an handler, or in the converter after
# $init_conf was set, but before starting the conversion, it is ignored here
# and the $init_conf value is set. The previously set value could be
# in $self->get_conf(), but what is available from $self->get_conf() could
# also be a value set by a previous call of set_global_document_commands.
# There is no easy way to deal with this issue, other than making sure that
# a customization value that is expected to be set early is set in $init_conf.
}
} else {
foreach my $global_command (@{$selected_commands}) {
if ($self->get_conf('DEBUG')) {
print STDERR "SET_global($commands_location) $global_command\n";
}
my $element = Texinfo::Common::set_global_document_command($self,
$self->{'global_commands'}, $global_command, $commands_location);
if (not defined($element)) {
# commands not appearing in the document, this should set the
# same value, the converter initialization value
# the NOTE above in 'before' holds here too.
$self->set_conf($global_command,
_command_init($global_command, $init_conf));
}
}
}
}
sub present_bug_message($$;$)
{
my $self = shift;
my $message = shift;
my $current = shift;
my $line_message = '';
my $current_element_message = '';
if ($current) {
if ($current->{'source_info'}) {
my $source_info = $current->{'source_info'};
my $file = $source_info->{'file_name'};
$line_message
= "in: $source_info->{'file_name'}:$source_info->{'line_nr'}";
if ($source_info->{'macro'} ne '') {
$line_message .= " (possibly involving $source_info->{'macro'})";
}
$line_message .= "\n";
}
if ($current) {
$current_element_message = "current: ".
Texinfo::Common::debug_print_element($current, 1);
}
}
my $additional_information = '';
if ($line_message.$current_element_message ne '') {
$additional_information = "Additional information:\n".
$line_message.$current_element_message;
}
warn "You found a bug: $message\n\n".$additional_information;
}
# Reverse the decoding of the file name from the input encoding.
# A wrapper around Texinfo::Utils::encoded_input_file_name() that
# can be called in converters through an objet oriented syntax.
sub encoded_input_file_name($$;$)
{
my $self = shift;
my $file_name = shift;
my $input_file_encoding = shift;
return Texinfo::Convert::Utils::encoded_input_file_name($self, $file_name,
$input_file_encoding);
}
# A wrapper around Texinfo::Utils::encoded_output_file_name() that
# can be called in converters through an objet oriented syntax.
sub encoded_output_file_name($$)
{
my $self = shift;
my $file_name = shift;
return Texinfo::Convert::Utils::encoded_output_file_name($self, $file_name);
}
sub txt_image_text($$$)
{
my ($self, $element, $basefile) = @_;
my ($text_file_name, $file_name_encoding)
= $self->encoded_input_file_name($basefile.'.txt');
my $txt_file = Texinfo::Common::locate_include_file($self, $text_file_name);
if (!defined($txt_file)) {
return undef, undef;
} else {
my $filehandle = do { local *FH };
if (open ($filehandle, $txt_file)) {
my $encoding
= Texinfo::Common::element_associated_processing_encoding($element);
if (defined($encoding)) {
binmode($filehandle, ":encoding($encoding)");
}
my $result = '';
my $max_width = 0;
while (<$filehandle>) {
my $width = Texinfo::Convert::Unicode::string_width($_);
if ($width > $max_width) {
$max_width = $width;
}
$result .= $_;
}
if (!close ($filehandle)) {
my $decoded_file_name = $txt_file;
$decoded_file_name = Encode::decode($file_name_encoding, $txt_file)
if (defined($file_name_encoding));
$self->document_warn($self,
sprintf(__("error on closing image text file %s: %s"),
$decoded_file_name, $!));
}
return $result, $max_width;
} else {
my $decoded_file_name = $txt_file;
$decoded_file_name = Encode::decode($file_name_encoding, $txt_file)
if (defined($file_name_encoding));
$self->line_warn($self,
sprintf(__("\@image file `%s' unreadable: %s"),
$decoded_file_name, $!), $element->{'source_info'});
}
}
return undef, undef;
}
sub float_type_number($$)
{
my $self = shift;
my $float = shift;
my $type_element;
if ($float->{'extra'}->{'float_type'} ne '') {
$type_element = $float->{'args'}->[0];
}
my $tree;
if ($type_element) {
if (defined($float->{'structure'})
and defined($float->{'structure'}->{'float_number'})) {
$tree = $self->gdt("{float_type} {float_number}",
{'float_type' => $type_element,
'float_number' => $float->{'structure'}->{'float_number'}});
} else {
$tree = $self->gdt("{float_type}",
{'float_type' => $type_element});
}
} elsif (defined($float->{'structure'})
and defined($float->{'structure'}->{'float_number'})) {
$tree = $self->gdt("{float_number}",
{'float_number' => $float->{'structure'}->{'float_number'}});
}
return $tree;
}
sub float_name_caption($$)
{
my $self = shift;
my $element = shift;
my $caption;
if ($element->{'extra'} and $element->{'extra'}->{'caption'}) {
$caption = $element->{'extra'}->{'caption'};
} elsif ($element->{'extra'} and $element->{'extra'}->{'shortcaption'}) {
$caption = $element->{'extra'}->{'shortcaption'};
}
#if ($self->get_conf('DEBUG')) {
# my $caption_texi =
# Texinfo::Convert::Texinfo::convert_to_texinfo({ 'contents' => $caption->{'contents'}});
# print STDERR " CAPTION: $caption_texi\n";
#}
my $type_element;
if ($element->{'extra'}->{'float_type'} ne '') {
$type_element = $element->{'args'}->[0];
}
my $prepended;
if ($type_element) {
if ($caption) {
if ($element->{'structure'}
and defined($element->{'structure'}->{'float_number'})) {
# TRANSLATORS: added before caption
$prepended = $self->gdt('{float_type} {float_number}: ',
{'float_type' => $type_element,
'float_number' => $element->{'structure'}->{'float_number'}});
} else {
# TRANSLATORS: added before caption, no float label
$prepended = $self->gdt('{float_type}: ',
{'float_type' => $type_element});
}
} else {
if ($element->{'structure'}
and defined($element->{'structure'}->{'float_number'})) {
$prepended = $self->gdt("{float_type} {float_number}",
{'float_type' => $type_element,
'float_number' => $element->{'structure'}->{'float_number'}});
} else {
$prepended = $self->gdt("{float_type}",
{'float_type' => $type_element});
}
}
} elsif ($element->{'structure'}
and defined($element->{'structure'}->{'float_number'})) {
if ($caption) {
# TRANSLATORS: added before caption, no float type
$prepended = $self->gdt('{float_number}: ',
{'float_number' => $element->{'structure'}->{'float_number'}});
} else {
$prepended = $self->gdt("{float_number}",
{'float_number' => $element->{'structure'}->{'float_number'}});
}
}
return ($caption, $prepended);
}
# This is used when the formatted text has no comment nor new line, but
# one want to add the comment or new line from the original arg
sub format_comment_or_return_end_line($$)
{
my $self = shift;
my $element = shift;
my $end_line;
my $comment = $element->{'args'}->[-1]->{'info'}->{'comment_at_end'}
if $element->{'args'} and $element->{'args'}->[-1]->{'info'};
if ($comment) {
$end_line = $self->convert_tree($comment);
} elsif ($element->{'args'} and $element->{'args'}->[-1]->{'info'}
and $element->{'args'}->[-1]->{'info'}->{'spaces_after_argument'}) {
my $text = $element->{'args'}->[-1]
->{'info'}->{'spaces_after_argument'}->{'text'};
if (chomp($text)) {
$end_line = "\n";
} else {
$end_line = '';
}
} else {
$end_line = '';
}
return $end_line;
}
sub table_item_content_tree($$$)
{
my $self = shift;
my $element = shift;
my $contents = shift;
my $table_item_tree = {'parent' => $element};
return $table_item_tree
if (!defined($contents));
my $table_command = $element->{'parent'}->{'parent'}->{'parent'};
if ($table_command->{'extra'}
and $table_command->{'extra'}->{'command_as_argument'}) {
my $command_as_argument
= $table_command->{'extra'}->{'command_as_argument'};
my $command = {'cmdname' => $command_as_argument->{'cmdname'},
'source_info' => $element->{'source_info'},
'parent' => $table_item_tree };
if ($table_command->{'extra'}->{'command_as_argument_kbd_code'}) {
$command->{'extra'} = {} if (!$command->{'extra'});
$command->{'extra'}->{'code'} = 1;
}
if ($command_as_argument->{'type'} eq 'definfoenclose_command') {
$command->{'type'} = $command_as_argument->{'type'};
$command->{'extra'} = {} if (!$command->{'extra'});
$command->{'extra'}->{'begin'} = $command_as_argument->{'extra'}->{'begin'};
$command->{'extra'}->{'end'} = $command_as_argument->{'extra'}->{'end'};
}
my $arg = {'type' => 'brace_command_arg',
'contents' => $contents,
'parent' => $command,};
$command->{'args'} = [$arg];
$contents = [$command];
}
$table_item_tree->{'contents'} = $contents;
return $table_item_tree;
}
sub convert_accents($$$;$$)
{
my $self = shift;
my $accent = shift;
my $format_accents = shift;
my $output_encoded_characters = shift;
my $in_upper_case = shift;
my ($contents, $stack)
= Texinfo::Convert::Utils::find_innermost_accent_contents($accent);
my $result = $self->convert_tree({'contents' => $contents});
my $encoded;
if ($output_encoded_characters) {
$encoded = Texinfo::Convert::Unicode::encoded_accents($self, $result, $stack,
$self->get_conf('OUTPUT_ENCODING_NAME'),
$format_accents,
$in_upper_case);
}
if (!defined($encoded)) {
foreach my $accent_command (reverse(@$stack)) {
$result = &$format_accents ($self, $result, $accent_command,
$in_upper_case);
}
return $result;
} else {
return $encoded;
}
}
# index sub-entries specified with @subentry, separated by commas, or by
# $SEPARATOR, if set
sub comma_index_subentries_tree {
my ($self, $current_entry, $separator) = @_;
$separator = ', ' if (!defined($separator));
my @contents;
while ($current_entry->{'extra'} and $current_entry->{'extra'}->{'subentry'}) {
$current_entry = $current_entry->{'extra'}->{'subentry'};
push @contents, {'text' => $separator}, $current_entry->{'args'}->[0];
}
if (scalar(@contents)) {
return {'contents' => \@contents};
}
return undef;
}
# This method allows to count words in elements and returns an array
# and a text already formatted.
sub sort_element_counts($$;$$)
{
my $converter = shift;
my $tree = shift;
my $use_sections = shift;
my $count_words = shift;
my $elements;
if ($use_sections) {
$elements = Texinfo::Structuring::split_by_section($tree);
} else {
$elements = Texinfo::Structuring::split_by_node($tree);
}
if (!$elements) {
@$elements = ($tree);
} elsif (scalar(@$elements) >= 1
and (not $elements->[0]->{'extra'}
or not $elements->[0]->{'extra'}->{'unit_command'})) {
shift @$elements;
}
my $max_count = 0;
my @name_counts_array;
require Texinfo::Convert::Texinfo;
foreach my $element (@$elements) {
my $name;
if ($element->{'extra'} and $element->{'extra'}->{'unit_command'}) {
my $command = $element->{'extra'}->{'unit_command'};
if ($command->{'args'}->[0]->{'contents'}) {
$name = "\@$command->{'cmdname'} "
.Texinfo::Convert::Texinfo::convert_to_texinfo(
{'contents' => $command->{'args'}->[0]->{'contents'}});
}
}
$name = 'UNNAMED tree element' if (!defined($name));
my $count;
my $converted_element = $converter->convert_tree($element);
if ($count_words) {
my @res = split /\W+/, $converted_element;
$count = scalar(@res);
} else {
my @res = split /^/, $converted_element;
$count = scalar(@res);
}
push @name_counts_array, [$count, $name];
if ($count > $max_count) {
$max_count = $count;
}
}
my @sorted_name_counts_array = sort {$a->[0] <=> $b->[0]} @name_counts_array;
@sorted_name_counts_array = reverse(@sorted_name_counts_array);
my $max_length = length($max_count);
my $result = '';
foreach my $sorted_count (@sorted_name_counts_array) {
$result .= sprintf("%${max_length}d $sorted_count->[1]\n",
$sorted_count->[0]);
}
return (\@sorted_name_counts_array, $result);
}
########################################################################
# XML related methods and variables that may be used in different
# XML Converters.
my $xml_numeric_entity_mdash = ''.hex('2014').';';
my $xml_numeric_entity_ndash = ''.hex('2013').';';
my $xml_numeric_entity_ldquo = ''.hex('201C').';';
my $xml_numeric_entity_rdquo = ''.hex('201D').';';
my $xml_numeric_entity_lsquo = ''.hex('2018').';';
my $xml_numeric_entity_rsquo = ''.hex('2019').';';
sub xml_format_text_with_numeric_entities($$)
{
my $self = shift;
my $text = shift;
$text =~ s/``/$xml_numeric_entity_ldquo/g;
$text =~ s/\'\'/$xml_numeric_entity_rdquo/g;
$text =~ s/`/$xml_numeric_entity_lsquo/g;
$text =~ s/\'/$xml_numeric_entity_rsquo/g;
$text =~ s/---/$xml_numeric_entity_mdash/g;
$text =~ s/--/$xml_numeric_entity_ndash/g;
return $text;
}
sub xml_protect_text($$)
{
my $self = shift;
my $text = shift;
if (!defined($text)) {
cluck;
}
$text =~ s/&/&/g;
$text =~ s/</g;
$text =~ s/>/>/g;
$text =~ s/\"/"/g;
return $text;
}
# 'today' is not set here.
our %xml_text_entity_no_arg_commands_formatting = (
'TeX' => 'TeX',
'LaTeX' => 'LaTeX',
'bullet' => '•',
'copyright' => '©',
'registeredsymbol' => '®',
'dots' => '…',
'enddots' => '...',
'equiv' => '≡',
# in general the following is not used since error
# appears in 'translated_commands'
'error' => 'error-->',
'expansion' => '→',
'arrow' => '→',
'click' => '→',
'minus' => '−',
'point' => '∗',
'print' => '-|',
'result' => '⇒',
'aa' => 'å',
'AA' => 'Å',
'ae' => 'æ',
'oe' => 'œ', # œ not in html 3.2
'AE' => 'Æ',
'OE' => 'Œ', # Œ not in html 3.2
'o' => 'ø',
'O' => 'Ø',
'ss' => 'ß',
'DH' => 'Ð',
'dh' => 'ð',
'TH' => 'Þ',
'th' => 'þ',
'l' => 'ł',
'L' => 'Ł',
'exclamdown' => '¡',
'questiondown' => '¿',
'pounds' => '£',
'ordf' => 'ª',
'ordm' => 'º',
'comma' => ',',
'atchar' => '@',
'ampchar' => '&',
'lbracechar' => '{',
'rbracechar' => '}',
'backslashchar' => '\\',
'hashchar' => '#',
'euro' => '€',
'geq' => '≥',
'leq' => '≤',
'tie' => ' ',
'textdegree' => '°',
'quotedblleft' => '“',
'quotedblright' => '”',
'quoteleft' => '‘',
'quoteright' => '’',
'quotedblbase' => '„',
'quotesinglbase' => '‚',
'guillemetleft' => '«',
'guillemetright' => '»',
'guillemotleft' => '«',
'guillemotright' => '»',
'guilsinglleft' => '‹',
'guilsinglright' => '›',
);
foreach my $no_brace_command (keys(%Texinfo::Common::nobrace_symbol_text)) {
$xml_text_entity_no_arg_commands_formatting{$no_brace_command}
= $Texinfo::Common::nobrace_symbol_text{$no_brace_command};
}
$xml_text_entity_no_arg_commands_formatting{'&'} = '&';
sub xml_comment($$)
{
my $self = shift;
my $text = shift;
chomp $text;
$text =~ s/--+/-/go;
return '' . "\n";
}
our %xml_accent_entities = (
'"', 'uml',
'~', 'tilde',
'^', 'circ',
'`', 'grave',
"'", 'acute',
",", 'cedil',
'ringaccent', 'ring',
'ogonek', 'ogon',
'dotless', 'nodot',
);
our %xml_accent_text_with_entities = (
'ringaccent' => 'aA',
"'" => 'aeiouyAEIOUY',
',' => 'cC',
'^' => 'aeiouAEIOU',
'`' => 'aeiouAEIOU',
'~' => 'nNaoAO',
'"' => 'aeiouyAEIOU',
'dotless' => 'i',
# according to http://www2.lib.virginia.edu/small/vhp/download/ISO.txt
# however this doesn't seems to work in firefox
# 'ogonek' => 'aeiuAEIU',
);
sub xml_numeric_entity_accent($$)
{
my $accent = shift;
my $text = shift;
if (exists($Texinfo::Convert::Unicode::unicode_accented_letters{$accent})
and exists($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}->{$text})) {
return '' .
hex($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}->{$text}). ';';
}
if (exists($Texinfo::Convert::Unicode::unicode_diacritics{$accent})) {
my $diacritic = ''
.hex($Texinfo::Convert::Unicode::unicode_diacritics{$accent}). ';';
if ($accent ne 'tieaccent') {
return $text . $diacritic;
} else {
# tieaccent diacritic is naturally and correctly composed
# between two characters
my $remaining_text = $text;
# we consider that letters are either characters or entities
if ($remaining_text =~ s/^([\p{L}\d]|&[a-zA-Z0-9]+;)([\p{L}\d]|&[a-zA-Z0-9]+;)(.*)$/$3/) {
return $1.$diacritic.$2 . $remaining_text;
} else {
return $text . $diacritic;
}
}
}
return undef;
}
sub xml_accent($$$;$$$)
{
my $self = shift;
my $text = shift;
my $command = shift;
my $in_upper_case = shift;
my $use_numeric_entities = shift;
my $accent = $command->{'cmdname'};
if ($in_upper_case and $text =~ /^\w$/) {
$text = uc ($text);
}
# do not return a dotless i or j as such if it is further composed
# with an accented letter, return the letter as is
if ($accent eq 'dotless') {
if ($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}
and exists($Texinfo::Convert::Unicode::unicode_accented_letters{$accent}->{$text})
and ($command->{'parent'}
and $command->{'parent'}->{'parent'}
and $command->{'parent'}->{'parent'}->{'cmdname'}
and $Texinfo::Convert::Unicode::unicode_accented_letters{$command->{'parent'}
->{'parent'}->{'cmdname'}})) {
return $text;
}
}
if ($use_numeric_entities) {
my $formatted_accent = xml_numeric_entity_accent($accent, $text);
if (defined($formatted_accent)) {
return $formatted_accent;
}
} else {
return "&${text}$xml_accent_entities{$accent};"
if (defined($xml_accent_entities{$accent})
and defined($xml_accent_text_with_entities{$accent})
and ($text =~ /^[$xml_accent_text_with_entities{$accent}]$/));
my $formatted_accent = xml_numeric_entity_accent($accent, $text);
if (defined($formatted_accent)) {
return $formatted_accent;
}
}
# REMARK this code should never be run as there are diacritics for every
# accent command.
#
# TODO it is not possible to call xml_protect_text since what is in $text
# may already be xml. But this means that each time ascii_accent changes
# it should be changed here too ii ascii_accent returns invalid xml.
return $text . '<' if ($accent eq 'v');
return Texinfo::Convert::Text::ascii_accent($text, $command);
}
sub _xml_numeric_entities_accent($$$;$)
{
my $self = shift;
my $text = shift;
my $command = shift;
my $in_upper_case = shift;
return xml_accent($self, $text, $command, $in_upper_case, 1);
}
sub xml_accents($$;$)
{
my $self = shift;
my $accent = shift;
my $in_upper_case = shift;
my $format_accents;
if ($self->get_conf('USE_NUMERIC_ENTITY')) {
$format_accents = \&_xml_numeric_entities_accent;
} else {
$format_accents = \&xml_accent;
}
return $self->convert_accents($accent, $format_accents,
$self->get_conf('OUTPUT_CHARACTERS'),
$in_upper_case);
}
1;
__END__
=head1 NAME
Texinfo::Convert::Converter - Parent class for Texinfo tree converters
=head1 SYNOPSIS
package Texinfo::Convert::MyConverter;
use Texinfo::Convert::Converter;
@ISA = qw(Texinfo::Convert::Converter);
sub converter_defaults ($$) {
return %myconverter_defaults;
}
sub converter_initialize($) {
my $self = shift;
$self->{'document_context'} = [{}];
}
sub convert($$) {
...
}
sub convert_tree($$) {
...
}
sub output($$) {
...
}
# end of Texinfo::Convert::MyConverter
my $converter = Texinfo::Convert::MyConverter->converter(
{'parser' => $parser});
$converter->output($texinfo_tree);
=head1 NOTES
The Texinfo Perl module main purpose is to be used in C to convert
Texinfo to other formats. There is no promise of API stability.
=head1 DESCRIPTION
C is a super class that can be used to
simplify converters initialization. The class also provide some
useful methods.
In turn, the converter should define some methods. Two are
optional, C, C and
used for initialization, to give information to C.
X> X> X>
The C method is mandatory and should convert portions of Texinfo
tree. The C