diff --git a/MANIFEST b/MANIFEST index 5c94dd640feb..190e36267599 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1838,6 +1838,7 @@ cpan/Pod-Usage/t/pod/testcmp.pl cpan/Pod-Usage/t/pod/testp2pt.pl cpan/Pod-Usage/t/pod/usage.pod cpan/Pod-Usage/t/pod/usage2.pod +cpan/podlators/docs/docknot.yaml cpan/podlators/lib/Pod/Man.pm Convert POD data to *roff cpan/podlators/lib/Pod/ParseLink.pm Perl an L<> formatting code in POD text cpan/podlators/lib/Pod/Text.pm Pod-Parser - convert POD data to formatted ASCII text @@ -1853,7 +1854,12 @@ cpan/podlators/t/data/basic.man podlators test cpan/podlators/t/data/basic.ovr podlators test cpan/podlators/t/data/basic.pod podlators test cpan/podlators/t/data/basic.txt podlators test +cpan/podlators/t/data/man/encoding.groff +cpan/podlators/t/data/man/encoding.pod +cpan/podlators/t/data/man/encoding.roff +cpan/podlators/t/data/man/encoding.utf8 cpan/podlators/t/data/perl.conf podlators test +cpan/podlators/t/data/regenerate-data cpan/podlators/t/data/snippets/color/escape-wrapping cpan/podlators/t/data/snippets/color/tag-width cpan/podlators/t/data/snippets/color/tag-wrapping @@ -1877,9 +1883,19 @@ cpan/podlators/t/data/snippets/man/eth cpan/podlators/t/data/snippets/man/fixed-font cpan/podlators/t/data/snippets/man/fixed-font-in-item cpan/podlators/t/data/snippets/man/for-blocks +cpan/podlators/t/data/snippets/man/guesswork +cpan/podlators/t/data/snippets/man/guesswork-all +cpan/podlators/t/data/snippets/man/guesswork-no-quoting +cpan/podlators/t/data/snippets/man/guesswork-none +cpan/podlators/t/data/snippets/man/guesswork-partial +cpan/podlators/t/data/snippets/man/guesswork-quoting cpan/podlators/t/data/snippets/man/hyphen-in-s cpan/podlators/t/data/snippets/man/iso-8859-1 +cpan/podlators/t/data/snippets/man/iso-8859-1-error-die +cpan/podlators/t/data/snippets/man/iso-8859-1-error-pod +cpan/podlators/t/data/snippets/man/iso-8859-1-roff cpan/podlators/t/data/snippets/man/item-fonts +cpan/podlators/t/data/snippets/man/language cpan/podlators/t/data/snippets/man/link-quoting cpan/podlators/t/data/snippets/man/link-to-url cpan/podlators/t/data/snippets/man/long-quote @@ -1887,29 +1903,32 @@ cpan/podlators/t/data/snippets/man/lquote-and-quote cpan/podlators/t/data/snippets/man/lquote-rquote cpan/podlators/t/data/snippets/man/markup-in-name cpan/podlators/t/data/snippets/man/multiline-x +cpan/podlators/t/data/snippets/man/naive +cpan/podlators/t/data/snippets/man/naive-groff cpan/podlators/t/data/snippets/man/name-guesswork +cpan/podlators/t/data/snippets/man/name-quotes +cpan/podlators/t/data/snippets/man/name-quotes-none cpan/podlators/t/data/snippets/man/nested-lists cpan/podlators/t/data/snippets/man/newlines-in-c cpan/podlators/t/data/snippets/man/non-ascii +cpan/podlators/t/data/snippets/man/nonbreaking-space-l cpan/podlators/t/data/snippets/man/not-bullet cpan/podlators/t/data/snippets/man/not-numbers cpan/podlators/t/data/snippets/man/nourls -cpan/podlators/t/data/snippets/man/paired-quotes cpan/podlators/t/data/snippets/man/periods cpan/podlators/t/data/snippets/man/quote-escaping cpan/podlators/t/data/snippets/man/rquote-none -cpan/podlators/t/data/snippets/man/small-caps-magic cpan/podlators/t/data/snippets/man/soft-hyphens cpan/podlators/t/data/snippets/man/trailing-space cpan/podlators/t/data/snippets/man/true-false -cpan/podlators/t/data/snippets/man/uppercase-license cpan/podlators/t/data/snippets/man/utf8-nonbreaking cpan/podlators/t/data/snippets/man/utf8-verbatim cpan/podlators/t/data/snippets/man/x-whitespace cpan/podlators/t/data/snippets/man/x-whitespace-entry +cpan/podlators/t/data/snippets/man/zero-width-space cpan/podlators/t/data/snippets/overstrike/tag-width cpan/podlators/t/data/snippets/overstrike/wrapping -cpan/podlators/t/data/snippets/README podlators test +cpan/podlators/t/data/snippets/README.md cpan/podlators/t/data/snippets/termcap/escape-wrapping cpan/podlators/t/data/snippets/termcap/tag-width cpan/podlators/t/data/snippets/termcap/tag-wrapping @@ -1929,11 +1948,19 @@ cpan/podlators/t/data/snippets/text/error-stderr cpan/podlators/t/data/snippets/text/error-stderr-opt cpan/podlators/t/data/snippets/text/for cpan/podlators/t/data/snippets/text/iso-8859-1 +cpan/podlators/t/data/snippets/text/iso-8859-1-error-die +cpan/podlators/t/data/snippets/text/iso-8859-1-error-pod +cpan/podlators/t/data/snippets/text/iso-8859-1-utf8 cpan/podlators/t/data/snippets/text/late-encoding cpan/podlators/t/data/snippets/text/link-rt cpan/podlators/t/data/snippets/text/link-url cpan/podlators/t/data/snippets/text/margin +cpan/podlators/t/data/snippets/text/naive +cpan/podlators/t/data/snippets/text/name-quotes +cpan/podlators/t/data/snippets/text/name-quotes-none +cpan/podlators/t/data/snippets/text/non-latin cpan/podlators/t/data/snippets/text/nonbreaking-space +cpan/podlators/t/data/snippets/text/nonbreaking-space-l cpan/podlators/t/data/snippets/text/nourls cpan/podlators/t/data/snippets/text/periods cpan/podlators/t/data/snippets/text/quotes-opt @@ -1943,6 +1970,7 @@ cpan/podlators/t/data/snippets/text/utf8 cpan/podlators/t/data/snippets/text/utf8-iso cpan/podlators/t/data/snippets/text/verbatim cpan/podlators/t/data/termcap podlators test +cpan/podlators/t/docs/changes.t cpan/podlators/t/docs/pod.t podlators test cpan/podlators/t/docs/pod-spelling.t podlators test cpan/podlators/t/docs/spdx-license.t podlators test @@ -1957,6 +1985,7 @@ cpan/podlators/t/lib/Test/RRA/ModuleVersion.pm podlators test cpan/podlators/t/man/devise-date.t podlators test cpan/podlators/t/man/devise-title.t podlators test cpan/podlators/t/man/empty.t podlators test +cpan/podlators/t/man/encoding.t cpan/podlators/t/man/heading.t podlators test cpan/podlators/t/man/iso-8859-1.t podlators test cpan/podlators/t/man/no-encode.t podlators test diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index 74093af680d3..0872c8ddce8b 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -983,19 +983,11 @@ package Maintainers; }, 'podlators' => { - 'DISTRIBUTION' => 'RRA/podlators-4.14.tar.gz', + 'DISTRIBUTION' => 'RRA/podlators-5.00.tar.gz', 'MAIN_MODULE' => 'Pod::Man', 'FILES' => q[cpan/podlators pod/perlpodstyle.pod], 'EXCLUDED' => [ - qr{^docs/metadata/}, - ], - - # https://github.com/rra/podlators/pull/15 - 'CUSTOMIZED' => [ - 't/general/basic.t', - 't/man/empty.t', - 't/man/no-encode.t', - 't/text/invalid.t', + qr{^\.github/workflows/build\.yaml}, ], 'MAP' => { diff --git a/Porting/sync-with-cpan b/Porting/sync-with-cpan index 0fc5726eeda5..2d31a28281cd 100755 --- a/Porting/sync-with-cpan +++ b/Porting/sync-with-cpan @@ -172,7 +172,7 @@ my $package_url = "http://www.cpan.org/modules/$package"; my $package_file = "$tmpdir/$package"; # this is a cache my @problematic = ( - 'podlators', # weird CUSTOMIZED section due to .PL files + # no current entries as of perl-5.37.7 (Dec 2022) ); diff --git a/cpan/podlators/Makefile.PL b/cpan/podlators/Makefile.PL index ff76df5f9f70..93fcd4f9d981 100644 --- a/cpan/podlators/Makefile.PL +++ b/cpan/podlators/Makefile.PL @@ -4,7 +4,7 @@ # which only supports that build method, and because it is a dependency of # other build systems like Module::Build. # -# Copyright 1999-2001, 2008, 2010, 2012, 2014-2016, 2018-2019 +# Copyright 1999-2001, 2008, 2010, 2012, 2014-2016, 2018-2019, 2022 # Russ Allbery # # This program is free software; you may redistribute it and/or modify it @@ -30,7 +30,7 @@ sub dist_version { open(my $fh, '<', File::Spec->catfile('lib', 'Pod', 'Man.pm')) or die "$0: cannot open lib/Pod/Man.pm: $!\n"; while (defined(my $line = <$fh>)) { - if ($line =~ m{ \A \$VERSION \s+ = \s+ '([^\']+)' }xms) { + if ($line =~ m{ \A (?:our \s+)? \$VERSION \s+ = \s+ '([^\']+)' }xms) { close($fh) or die "$0: cannot close lib/Pod/Man.pm\n"; return $1; } @@ -89,7 +89,7 @@ my %metadata = ( LICENSE => 'perl_5', EXE_FILES => [scripts('pod2text', 'pod2man')], VERSION_FROM => 'lib/Pod/Man.pm', - MIN_PERL_VERSION => '5.008', + MIN_PERL_VERSION => '5.010', # Use *.PL files to generate the driver scripts so that we get the correct # invocation of Perl on non-UNIX platforms. diff --git a/cpan/podlators/docs/docknot.yaml b/cpan/podlators/docs/docknot.yaml new file mode 100644 index 000000000000..4dcb50e2dab4 --- /dev/null +++ b/cpan/podlators/docs/docknot.yaml @@ -0,0 +1,144 @@ +# Package metadata for podlators. +# +# This file contains configuration for DocKnot used to generate +# documentation files (like README.md) and web pages. Other documentation +# in this package is generated automatically from these files as part of +# the release process. For more information, see DocKnot's documentation. +# +# DocKnot is available from . +# +# Copyright 1999-2010, 2012-2022 Russ Allbery +# +# SPDX-License-Identifier: MIT + +format: v1 + +name: podlators +maintainer: Russ Allbery +version: '5.00' +synopsis: format POD source into various output formats + +license: + name: Perl +copyrights: + - holder: Russ Allbery + years: 1999-2010, 2012-2022 + +build: + type: ExtUtils::MakeMaker +distribution: + cpan: podlators + section: perl + tarname: podlators + version: podlators +support: + email: rra@cpan.org + github: rra/podlators + web: https://www.eyrie.org/~eagle/software/podlators/ +vcs: + browse: https://git.eyrie.org/?p=perl/podlators.git + github: rra/podlators + openhub: https://www.openhub.net/p/podlators + status: + workflow: build + type: Git + url: https://git.eyrie.org/git/perl/podlators.git + +quote: + author: Robert Fripp + text: | + We move from making unnecessary efforts, the exertions of force, to making + necessary efforts: the direction of effortlessness. In this the prime + maxim is: honor necessity, honor sufficiency. + work: '"The Road to Graceland"' +docs: + api: + - name: pod-man + title: Pod::Man + - name: pod-text + title: Pod::Text + - name: pod-text-color + title: Pod::Text::Color + - name: pod-text-overstrike + title: Pod::Text::Overstrike + - name: pod-text-termcap + title: Pod::Text::Termcap + developer: + - name: todo + title: To-do list + user: + - name: perlpodstyle + title: POD style guide + - name: pod2man + title: pod2man documentation + - name: pod2text + title: pod2text documentation + - name: thanks + title: Thanks and credits + +blurb: | + podlators contains Pod::Man and Pod::Text modules which convert POD input to + *roff source output, suitable for man pages, or plain text. It also + includes several subclasses of Pod::Text for formatted output to terminals + with various capabilities. It is the source package for the Pod::Man and + Pod::Text modules included with Perl. + +description: | + POD is the Plain Old Documentation format, the documentation language used + for all of Perl's documentation. I learned it to document Perl modules, + started using it for Perl scripts as well, and discovered it was the most + convenient way I've found to write program documentation. It's extremely + simple, well-designed for writing Unix manual pages (and I'm a + traditionalist who thinks that any program should have a regular manual + page), and easily readable in the raw format by humans. + + The translators into text and nroff (for manual pages) included in the Perl + distribution had various bugs, however, and used their own ad hoc parsers, + so when I started running into those bugs and when a new generic parser + (Pod::Parser) was written, I decided to rewrite the two translators that I + use the most and fix the bugs that were bothering me. This package is the + result. + + podlators contains two main modules, Pod::Man and Pod::Text. The former + converts POD into nroff/troff source and the latter into plain text (with + various options controlling some of the formatting). There are also several + subclasses of Pod::Text for generating slightly formatted text using color + or other terminal control escapes, and a general utility module, + Pod::ParseLink, for parsing the POD `L<>` formatting sequences. Also + included in this package are the `pod2text` and `pod2man` driver scripts. + + Both Pod::Text and Pod::Man provide a variety of options for fine-tuning + their output. Pod::Man also tries to massage input text where appropriate + to produce better output when run through nroff or troff, such as + distinguishing between different types of hyphens. + + As of Perl 5.6.0, my implementation was included in Perl core, and each + release of Perl will have the at-the-time most current version of podlators + included. You therefore only need to install this package yourself if you + need a newer version than came with Perl (to get some bug fixes, for + example). + +requirements: | + This module requires Perl 5.10 or later. + + The troff/nroff generated by Pod::Man should be compatible with any troff or + nroff implementation with the `-man` macro set, including mandoc. It is + primarily tested by me under GNU groff, but Perl users send bug reports for + a wide variety of implementations and Pod::Man is used to generate all of + Perl's own manual pages, so hopefully most of the bugs have been weeded out. + +test: + lancaster: true + suffix: | + The following additional Perl modules will be used by the test suite if + present: + + * Test::CPAN::Changes (part of CPAN-Changes) + * Test::MinimumVersion + * Test::Pod + * Test::Spelling + * Test::Strict + * Test::Synopsis + + All are available on CPAN. Those tests will be skipped if the modules are + not available. diff --git a/cpan/podlators/lib/Pod/Man.pm b/cpan/podlators/lib/Pod/Man.pm index d7c029357a29..57be69f60049 100644 --- a/cpan/podlators/lib/Pod/Man.pm +++ b/cpan/podlators/lib/Pod/Man.pm @@ -14,36 +14,33 @@ package Pod::Man; -use 5.008; +use 5.010; use strict; use warnings; -use subs qw(makespace); -use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION); - use Carp qw(carp croak); use Pod::Simple (); # Conditionally import Encode and set $HAS_ENCODE if it is available. This is # required to support building as part of Perl core, since podlators is built # before Encode is. -our $HAS_ENCODE; +my $HAS_ENCODE; BEGIN { $HAS_ENCODE = eval { require Encode }; } -@ISA = qw(Pod::Simple); - -$VERSION = '4.14'; - -# Set the debugging level. If someone has inserted a debug function into this -# class already, use that. Otherwise, use any Pod::Simple debug function -# that's defined, and failing that, define a debug level of 10. -BEGIN { - my $parent = defined (&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG : undef; - unless (defined &DEBUG) { - *DEBUG = $parent || sub () { 10 }; - } +our @ISA = qw(Pod::Simple); +our $VERSION = '5.00'; + +# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available. Code +# taken from Pod::Simple 3.32, but was only added in 3.30. +my ($NBSP, $SHY); +if ($Pod::Simple::VERSION ge 3.30) { + $NBSP = $Pod::Simple::nbsp; + $SHY = $Pod::Simple::shy; +} else { + $NBSP = chr utf8::unicode_to_native(0xA0); + $SHY = chr utf8::unicode_to_native(0xAD); } # Import the ASCII constant from Pod::Simple. This is true iff we're in an @@ -57,9 +54,9 @@ BEGIN { *pretty = \&Pod::Simple::pretty } # Formatting instructions for various types of blocks. cleanup makes hyphens # hard, adds spaces between consecutive underscores, and escapes backslashes. # convert translates characters into escapes. guesswork means to apply the -# transformations done by the guesswork sub. literal says to protect literal -# quotes from being turned into UTF-8 quotes. By default, all transformations -# are on except literal, but some elements override. +# transformations done by the guesswork sub (if enabled). literal says to +# protect literal quotes from being turned into UTF-8 quotes. By default, all +# transformations are on except literal, but some elements override. # # DEFAULT specifies the default settings. All other elements should list only # those settings that they are overriding. Data indicates =for roff blocks, @@ -75,6 +72,84 @@ my %FORMATTING = ( X => { cleanup => 0, guesswork => 0 }, ); +# Try to map an encoding as understood by Perl Encode to an encoding +# understood by groff's preconv. Encode doesn't care about hyphens or +# capitalization, but preconv does. The key is the canonicalized Encode +# encoding, and the value is something preconv might understand. +# +# FreeBSD mandoc only understands utf-8 and iso-latin-1 as of 2022-09-24. +# groff preconv prefers iso-8859-1, but also understands iso-latin-1, so +# convert ISO-8859-1 to iso-latin-1 for FreeBSD. +my %ENCODINGS = ( + ascii => 'us-ascii', + big5 => 'big5', + big5eten => 'big5', + cp950 => 'big5', + cp1047 => 'cp1047', + euccn => 'gb2312', + eucjp => 'euc-jp', + euckr => 'euc-kr', + gb2312 => 'gb2312', + gb2312raw => 'gb2312', + iso88591 => 'iso-latin-1', + iso88592 => 'iso-8859-2', + iso88595 => 'iso-8859-5', + iso88597 => 'iso-8859-7', + iso88599 => 'iso-8859-9', + iso885913 => 'iso-8859-13', + iso885915 => 'iso-8859-15', + koi8r => 'koi8-r', + latin1 => 'iso-8859-1', + usascii => 'us-ascii', + utf8 => 'utf-8', + utf16 => 'utf-16', + utf16be => 'utf-16be', + utf16le => 'utf-16le', +); + +############################################################################## +# Translation tables +############################################################################## + +# The following table is adapted from Tom Christiansen's pod2man. It is only +# used with roff output. It assumes that the standard preamble has already +# been printed, since that's what defines all of the accent marks. We really +# want to do something better than this when *roff actually supports other +# character sets itself, since these results are pretty poor. +# +# This only works in an ASCII world. What to do in a non-ASCII world is very +# unclear, so we just output what we get and hope for the best. +my %ESCAPES; +@ESCAPES{0xA0 .. 0xFF} = ( + $NBSP, undef, undef, undef, undef, undef, undef, undef, + undef, undef, undef, undef, undef, $SHY, undef, undef, + + undef, undef, undef, undef, undef, undef, undef, undef, + undef, undef, undef, undef, undef, undef, undef, undef, + + "A\\*`", "A\\*'", "A\\*^", "A\\*~", "A\\*:", "A\\*o", "\\*(Ae", "C\\*,", + "E\\*`", "E\\*'", "E\\*^", "E\\*:", "I\\*`", "I\\*'", "I\\*^", "I\\*:", + + "\\*(D-", "N\\*~", "O\\*`", "O\\*'", "O\\*^", "O\\*~", "O\\*:", undef, + "O\\*/", "U\\*`", "U\\*'", "U\\*^", "U\\*:", "Y\\*'", "\\*(Th", "\\*8", + + "a\\*`", "a\\*'", "a\\*^", "a\\*~", "a\\*:", "a\\*o", "\\*(ae", "c\\*,", + "e\\*`", "e\\*'", "e\\*^", "e\\*:", "i\\*`", "i\\*'", "i\\*^", "i\\*:", + + "\\*(d-", "n\\*~", "o\\*`", "o\\*'", "o\\*^", "o\\*~", "o\\*:", undef, + "o\\*/" , "u\\*`", "u\\*'", "u\\*^", "u\\*:", "y\\*'", "\\*(th", "y\\*:", +) if ASCII; + +############################################################################## +# Utility functions +############################################################################## + +# Returns whether the given encoding needs a call to Encode::encode. +sub _needs_encode { + my ($encoding) = @_; + return $encoding ne 'roff' && $encoding ne 'groff'; +} + ############################################################################## # Object initialization ############################################################################## @@ -88,9 +163,6 @@ sub new { my $class = shift; my $self = $class->SUPER::new; - # Tell Pod::Simple not to handle S<> by automatically inserting  . - $self->nbsp_for_S (1); - # Tell Pod::Simple to keep whitespace whenever possible. if (my $preserve_whitespace = $self->can ('preserve_whitespace')) { $self->$preserve_whitespace (1); @@ -109,63 +181,84 @@ sub new { # to put them in our object as hash keys and values. This could cause # problems if we ever clash with Pod::Simple's own internal class # variables. - %$self = (%$self, @_); + my %opts = @_; + my @opts = map { ("opt_$_", $opts{$_}) } keys %opts; + %$self = (%$self, @opts); + + # Pod::Simple uses encoding internally, so we need to store it as + # ENCODING. Set the default to UTF-8 if not specified. + # + # Degrade to the old roff encoding if Encode is not available. + # + # Suppress the warning message when PERL_CORE is set, indicating this is + # running as part of the core Perl build. Perl builds podlators (and all + # pure Perl modules) before Encode and other XS modules, so Encode won't + # yet be available. Rely on the Perl core build to generate man pages + # later, after all the modules are available, so that UTF-8 handling will + # be correct. + my %options = @_; + if (defined $self->{opt_encoding}) { + $$self{ENCODING} = $self->{opt_encoding}; + } elsif (ASCII) { + $$self{ENCODING} = 'UTF-8'; + } else { + $$self{ENCODING} = 'groff'; + } + if (_needs_encode($$self{ENCODING}) && !$HAS_ENCODE) { + if (!$ENV{PERL_CORE}) { + carp ('encoding requested but Encode module not available,' + . ' falling back to groff escapes'); + } + $$self{ENCODING} = 'groff'; + } # Send errors to stderr if requested. - if ($$self{stderr} and not $$self{errors}) { - $$self{errors} = 'stderr'; + if ($self->{opt_stderr} and not $self->{opt_errors}) { + $self->{opt_errors} = 'stderr'; } - delete $$self{stderr}; + delete $self->{opt_stderr}; # Validate the errors parameter and act on it. - if (not defined $$self{errors}) { - $$self{errors} = 'pod'; - } - if ($$self{errors} eq 'stderr' || $$self{errors} eq 'die') { + $self->{opt_errors} //= 'pod'; + if ($self->{opt_errors} eq 'stderr' || $self->{opt_errors} eq 'die') { $self->no_errata_section (1); $self->complain_stderr (1); - if ($$self{errors} eq 'die') { - $$self{complain_die} = 1; + if ($self->{opt_errors} eq 'die') { + $self->{complain_die} = 1; } - } elsif ($$self{errors} eq 'pod') { + } elsif ($self->{opt_errors} eq 'pod') { $self->no_errata_section (0); $self->complain_stderr (0); - } elsif ($$self{errors} eq 'none') { + } elsif ($self->{opt_errors} eq 'none') { $self->no_errata_section (1); $self->no_whining (1); } else { - croak (qq(Invalid errors setting: "$$self{errors}")); - } - delete $$self{errors}; - - # Degrade back to non-utf8 if Encode is not available. - # - # Suppress the warning message when PERL_CORE is set, indicating this is - # running as part of the core Perl build. Perl builds podlators (and all - # pure Perl modules) before Encode and other XS modules, so Encode won't - # yet be available. Rely on the Perl core build to generate man pages - # later, after all the modules are available, so that UTF-8 handling will - # be correct. - if ($$self{utf8} and !$HAS_ENCODE) { - if (!$ENV{PERL_CORE}) { - carp ('utf8 mode requested but Encode module not available,' - . ' falling back to non-utf8'); - } - delete $$self{utf8}; + croak (qq(Invalid errors setting: "$self->{opt_errors}")); } + delete $self->{opt_errors}; # Initialize various other internal constants based on our arguments. $self->init_fonts; $self->init_quotes; $self->init_page; - # For right now, default to turning on all of the magic. - $$self{MAGIC_CPP} = 1; - $$self{MAGIC_EMDASH} = 1; - $$self{MAGIC_FUNC} = 1; - $$self{MAGIC_MANREF} = 1; - $$self{MAGIC_SMALLCAPS} = 1; - $$self{MAGIC_VARS} = 1; + # Configure guesswork based on options. + my $guesswork = $self->{opt_guesswork} || q{}; + my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork); + if (!%guesswork || $guesswork{all}) { + #<<< + $$self{GUESSWORK} = { + functions => 1, + manref => 1, + quoting => 1, + variables => 1, + }; + #>>> + } elsif ($guesswork{none}) { + $$self{GUESSWORK} = {}; + } else { + $$self{GUESSWORK} = {%guesswork}; + } return $self; } @@ -183,7 +276,7 @@ sub init_fonts { # Figure out the fixed-width font. If user-supplied, make sure that they # are the right length. for (qw/fixed fixedbold fixeditalic fixedbolditalic/) { - my $font = $$self{$_}; + my $font = $self->{"opt_$_"}; if (defined ($font) && (length ($font) < 1 || length ($font) > 2)) { croak qq(roff font should be 1 or 2 chars, not "$font"); } @@ -192,19 +285,19 @@ sub init_fonts { # Set the default fonts. We can't be sure portably across different # implementations what fixed bold-italic may be called (if it's even # available), so default to just bold. - $$self{fixed} ||= 'CW'; - $$self{fixedbold} ||= 'CB'; - $$self{fixeditalic} ||= 'CI'; - $$self{fixedbolditalic} ||= 'CB'; + $self->{opt_fixed} ||= 'CW'; + $self->{opt_fixedbold} ||= 'CB'; + $self->{opt_fixeditalic} ||= 'CI'; + $self->{opt_fixedbolditalic} ||= 'CB'; # Set up a table of font escapes. First number is fixed-width, second is # bold, third is italic. $$self{FONTS} = { '000' => '\fR', '001' => '\fI', '010' => '\fB', '011' => '\f(BI', - '100' => toescape ($$self{fixed}), - '101' => toescape ($$self{fixeditalic}), - '110' => toescape ($$self{fixedbold}), - '111' => toescape ($$self{fixedbolditalic}) }; + '100' => toescape ($self->{opt_fixed}), + '101' => toescape ($self->{opt_fixeditalic}), + '110' => toescape ($self->{opt_fixedbold}), + '111' => toescape ($self->{opt_fixedbolditalic}) }; } # Initialize the quotes that we'll be using for C<> text. This requires some @@ -215,25 +308,27 @@ sub init_quotes { my ($self) = (@_); # Handle the quotes option first, which sets both quotes at once. - $$self{quotes} ||= '"'; - if ($$self{quotes} eq 'none') { + $self->{opt_quotes} ||= '"'; + if ($self->{opt_quotes} eq 'none') { $$self{LQUOTE} = $$self{RQUOTE} = ''; - } elsif (length ($$self{quotes}) == 1) { - $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes}; - } elsif (length ($$self{quotes}) % 2 == 0) { - my $length = length ($$self{quotes}) / 2; - $$self{LQUOTE} = substr ($$self{quotes}, 0, $length); - $$self{RQUOTE} = substr ($$self{quotes}, $length); + } elsif (length ($self->{opt_quotes}) == 1) { + $$self{LQUOTE} = $$self{RQUOTE} = $self->{opt_quotes}; + } elsif (length ($self->{opt_quotes}) % 2 == 0) { + my $length = length ($self->{opt_quotes}) / 2; + $$self{LQUOTE} = substr ($self->{opt_quotes}, 0, $length); + $$self{RQUOTE} = substr ($self->{opt_quotes}, $length); } else { - croak(qq(Invalid quote specification "$$self{quotes}")) + croak(qq(Invalid quote specification "$self->{opt_quotes}")) } # Now handle the lquote and rquote options. - if (defined $$self{lquote}) { - $$self{LQUOTE} = $$self{lquote} eq 'none' ? q{} : $$self{lquote}; + if (defined($self->{opt_lquote})) { + $self->{opt_lquote} = q{} if $self->{opt_lquote} eq 'none'; + $$self{LQUOTE} = $self->{opt_lquote}; } - if (defined $$self{rquote}) { - $$self{RQUOTE} = $$self{rquote} eq 'none' ? q{} : $$self{rquote}; + if (defined $self->{opt_rquote}) { + $self->{opt_rquote} = q{} if $self->{opt_rquote} eq 'none'; + $$self{RQUOTE} = $self->{opt_rquote}; } # Double the first quote; note that this should not be s///g as two double @@ -254,16 +349,13 @@ sub init_page { # Set the defaults for page titles and indentation if the user didn't # override anything. - $$self{center} = 'User Contributed Perl Documentation' - unless defined $$self{center}; - $$self{release} = 'perl v' . $version - unless defined $$self{release}; - $$self{indent} = 4 - unless defined $$self{indent}; + $self->{opt_center} //= 'User Contributed Perl Documentation'; + $self->{opt_release} //= 'perl v' . $version; + $self->{opt_indent} //= 4; # Double quotes in things that will be quoted. for (qw/center release/) { - $$self{$_} =~ s/\"/\"\"/g if $$self{$_}; + $self->{"opt_$_"} =~ s/\"/\"\"/g if $self->{"opt_$_"}; } } @@ -289,7 +381,6 @@ sub init_page { # according to the current formatting instructions as we do. sub _handle_text { my ($self, $text) = @_; - DEBUG > 3 and print "== $text\n"; my $tag = $$self{PENDING}[-1]; $$tag[2] .= $self->format_text ($$tag[1], $text); } @@ -308,7 +399,6 @@ sub method_for_element { # text and nested elements. Otherwise, if start_element is defined, call it. sub _handle_element_start { my ($self, $element, $attrs) = @_; - DEBUG > 3 and print "++ $element (<", join ('> <', %$attrs), ">)\n"; my $method = $self->method_for_element ($element); # If we have a command handler, we need to accumulate the contents of the @@ -316,7 +406,6 @@ sub _handle_element_start { # and the formatting codes so that IN_NAME isn't still set for the # first heading after the NAME heading. if ($self->can ("cmd_$method")) { - DEBUG > 2 and print "<$element> starts saving a tag\n"; $$self{IN_NAME} = 0 if ($element ne 'Para' && length ($element) > 1); # How we're going to format embedded text blocks depends on the tag @@ -328,11 +417,8 @@ sub _handle_element_start { %{ $FORMATTING{$element} || {} }, }; push (@{ $$self{PENDING} }, [ $attrs, $formatting, '' ]); - DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n"; } elsif (my $start_method = $self->can ("start_$method")) { $self->$start_method ($attrs, ''); - } else { - DEBUG > 2 and print "No $method start method, skipping\n"; } } @@ -341,16 +427,12 @@ sub _handle_element_start { # an end_ method for the element, call that. sub _handle_element_end { my ($self, $element) = @_; - DEBUG > 3 and print "-- $element\n"; my $method = $self->method_for_element ($element); # If we have a command handler, pull off the pending text and pass it to # the handler along with the saved attribute hash. if (my $cmd_method = $self->can ("cmd_$method")) { - DEBUG > 2 and print " stops saving a tag\n"; my $tag = pop @{ $$self{PENDING} }; - DEBUG > 4 and print "Popped: [", pretty ($tag), "]\n"; - DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n"; my $text = $self->$cmd_method ($$tag[0], $$tag[2]); if (defined $text) { if (@{ $$self{PENDING} } > 1) { @@ -361,8 +443,6 @@ sub _handle_element_end { } } elsif (my $end_method = $self->can ("end_$method")) { $self->$end_method (); - } else { - DEBUG > 2 and print "No $method end method, skipping\n"; } } @@ -381,31 +461,40 @@ sub format_text { my $literal = $$options{literal}; # Cleanup just tidies up a few things, telling *roff that the hyphens are - # hard, putting a bit of space between consecutive underscores, and - # escaping backslashes. Be careful not to mangle our character - # translations by doing this before processing character translation. + # hard, putting a bit of space between consecutive underscores, escaping + # backslashes, and converting zero-width spaces to zero-width break + # points. if ($cleanup) { $text =~ s/\\/\\e/g; $text =~ s/-/\\-/g; $text =~ s/_(?=_)/_\\|/g; + $text =~ s/\x{200B}/\\:/g; } - # Normally we do character translation, but we won't even do that in - # blocks or if UTF-8 output is desired. - if ($convert && !$$self{utf8} && ASCII) { - $text =~ s/([^\x00-\x7F])/$ESCAPES{ord ($1)} || "X"/eg; + # Except in blocks, if groff or roff encoding is requested and + # we're in an ASCII environment, do the encoding. For EBCDIC, we just + # write what we get and hope for the best. Leave non-breaking spaces and + # soft hyphens alone; we'll convert those at the last minute. + if ($convert) { + if (ASCII) { + if ($$self{ENCODING} eq 'groff') { + $text =~ s{ ([^\x00-\x7F\xA0\xAD]) }{ + '\\[u' . sprintf('%04X', ord($1)) . ']' + }xmsge; + } elsif ($$self{ENCODING} eq 'roff') { + $text =~ s/([^\x00-\x7F\xA0\xAD])/$ESCAPES{ord ($1)} || "X"/eg; + } + } } # Ensure that *roff doesn't convert literal quotes to UTF-8 single quotes, - # but don't mess up our accept escapes. + # but don't mess up accent escapes. if ($literal) { $text =~ s/(?guesswork ($text); } @@ -420,31 +509,47 @@ sub quote_literal { my $self = shift; local $_ = shift; + # If in NAME section, just return an ASCII quoted string to avoid + # confusing tools like whatis. + if ($$self{IN_NAME}) { + my $lquote = $$self{LQUOTE} eq '""' ? '"' : $$self{LQUOTE}; + my $rquote = $$self{RQUOTE} eq '""' ? '"' : $$self{RQUOTE}; + return $lquote . $_ . $rquote; + } + # A regex that matches the portion of a variable reference that's the # array or hash index, separated out just because we want to use it in # several places in the following regex. - my $index = '(?: \[.*\] | \{.*\} )?'; - - # If in NAME section, just return an ASCII quoted string to avoid - # confusing tools like whatis. - return qq{"$_"} if $$self{IN_NAME}; + my $index = '(?: \[[^]]+\] | \{[^}]+\} )?'; # Check for things that we don't want to quote, and if we find any of # them, return the string with just a font change and no quoting. + # + # Traditionally, Pod::Man has not quoted Perl variables, functions, + # numbers, or hex constants, but this is not always desirable. Make this + # optional on the quoting guesswork flag. + my $extra = qr{(?!)}xms; # never matches + if ($$self{GUESSWORK}{quoting}) { + $extra = qr{ + \$+ [\#^]? \S $index # special ($^F, $") + | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func + | [\$\@%&*]* [:\'\w]+ + (?: \\-> )? \(\s*[^\s,\)]*\s*\) # 0/1-arg func call + | (?: [+] || \\- )? ( \d[\d.]* | \.\d+ ) + (?: [eE] (?: [+] || \\- )? \d+ )? # a number + | 0x [a-fA-F\d]+ # a hex constant + }xms; + } m{ ^\s* (?: - ( [\'\`\"] ) .* \1 # already quoted - | \\\*\(Aq .* \\\*\(Aq # quoted and escaped - | \\?\` .* ( \' | \\\*\(Aq ) # `quoted' - | \$+ [\#^]? \S $index # special ($^Foo, $") - | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func - | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call - | [-+]? ( \d[\d.]* | \.\d+ ) (?: [eE][-+]?\d+ )? # a number - | 0x [a-fA-F\d]+ # a hex constant + ( [\'\"] ) .* \1 # already quoted + | \\\*\(Aq .* \\\*\(Aq # quoted and escaped + | \\?\` .* ( \' | \\?\` | \\\*\(Aq ) # `quoted' or `quoted` + | $extra ) \s*\z - }xso and return '\f(FS' . $_ . '\f(FE'; + }xms and return '\f(FS' . $_ . '\f(FE'; # If we didn't return, go ahead and quote the text. return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"; @@ -453,10 +558,9 @@ sub quote_literal { # Takes a text block to perform guesswork on. Returns the text block with # formatting codes added. This is the code that marks up various Perl # constructs and things commonly used in man pages without requiring the user -# to add any explicit markup, and is applied to all non-literal text. We're -# guaranteed that the text we're applying guesswork to does not contain any -# *roff formatting codes. Note that the inserted font sequences must be -# treated later with mapfonts or textmapfonts. +# to add any explicit markup, and is applied to all non-literal text. Note +# that the inserted font sequences must be treated later with mapfonts or +# textmapfonts. # # This method is very fragile, both in the regular expressions it uses and in # the ordering of those modifications. Care and testing is required when @@ -464,7 +568,6 @@ sub quote_literal { sub guesswork { my $self = shift; local $_ = shift; - DEBUG > 5 and print " Guesswork called on [$_]\n"; # By the time we reach this point, all hyphens will be escaped by adding a # backslash. We want to undo that escaping if they're part of regular @@ -475,9 +578,9 @@ sub guesswork { # Note that this is not user-controllable; we pretty much have to do this # transformation or *roff will mangle the output in unacceptable ways. s{ - ( (?:\G|^|\s) [\(\"]* [a-zA-Z] ) ( \\- )? + ( (?:\G|^|\s|$NBSP) [\(\"]* [a-zA-Z] ) ( \\- )? ( (?: [a-zA-Z\']+ \\-)+ ) - ( [a-zA-Z\']+ ) (?= [\)\".?!,;:]* (?:\s|\Z|\\\ ) ) + ( [a-zA-Z\']+ ) (?= [\)\".?!,;:]* (?:\s|$NBSP|\Z|\\\ ) ) \b } { my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4); @@ -486,52 +589,17 @@ sub guesswork { $prefix . $hyphen . $main . $suffix; }egx; - # Translate "--" into a real em-dash if it's used like one. This means - # that it's either surrounded by whitespace, it follows a regular word, or - # it occurs between two regular words. - if ($$self{MAGIC_EMDASH}) { - s{ (\s) \\-\\- (\s) } { $1 . '\*(--' . $2 }egx; - s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx; - } - - # Make words in all-caps a little bit smaller; they look better that way. - # However, we don't want to change Perl code (like @ARGV), nor do we want - # to fix the MIME in MIME-Version since it looks weird with the - # full-height V. - # - # We change only a string of all caps (2) either at the beginning of the - # line or following regular punctuation (like quotes) or whitespace (1), - # and followed by either similar punctuation, an em-dash, or the end of - # the line (3). - # - # Allow the text we're changing to small caps to include double quotes, - # commas, newlines, and periods as long as it doesn't otherwise interrupt - # the string of small caps and still fits the criteria. This lets us turn - # entire warranty disclaimers in man page output into small caps. - if ($$self{MAGIC_SMALLCAPS}) { - s{ - ( ^ | [\s\(\"\'\`\[\{<>] | \\[ ] ) # (1) - ( [A-Z] [A-Z] (?: \s? [/A-Z+:\d_\$&] | \\- | \s? [.,\"] )* ) # (2) - (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | \\[ ] | $ ) # (3) - } { - $1 . '\s-1' . $2 . '\s0' - }egx; - } - - # Note that from this point forward, we have to adjust for \s-1 and \s-0 - # strings inserted around things that we've made small-caps if later - # transforms should work on those strings. - # Embolden functions in the form func(), including functions that are in - # all capitals, but don't embolden if there's anything between the parens. + # all capitals, but don't embolden if there's anything inside the parens. # The function must start with an alphabetic character or underscore and # then consist of word characters or colons. - if ($$self{MAGIC_FUNC}) { + if ($$self{GUESSWORK}{functions}) { s{ - ( \b | \\s-1 ) - ( [A-Za-z_] ([:\w] | \\s-?[01])+ \(\) ) + (? 5 and print " Guesswork returning [$_]\n"; return $_; } @@ -608,9 +664,9 @@ sub mapfonts { my ($fixed, $bold, $italic) = (0, 0, 0); my %magic = (F => \$fixed, B => \$bold, I => \$italic); my $last = '\fR'; - $text =~ s< + $text =~ s{ \\f\((.)(.) - > < + }{ my $sequence = ''; my $f; if ($last ne '\fR') { $sequence = '\fP' } @@ -623,25 +679,46 @@ sub mapfonts { $last = $f; $sequence; } - >gxe; + }gxe; return $text; } # Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU # groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather # than R, presumably because \f(CW doesn't actually do a font change. To work -# around this, use a separate textmapfonts for text blocks where the default -# font is always R and only use the smart mapfonts for headings. +# around this, use a separate textmapfonts for text blocks that uses \fR +# instead of \fP. +# +# Originally, this function was much simpler because it went directly from \fB +# to \f(CW and relied on \f(CW clearing bold since it wasn't \f(CB. +# Unfortunately, while this works for mandoc, this is not how groff works; +# \fBfoo\f(CWbar still prints bar in bold. Therefore, we force the font back +# to the default before each font change. sub textmapfonts { my ($self, $text) = @_; my ($fixed, $bold, $italic) = (0, 0, 0); my %magic = (F => \$fixed, B => \$bold, I => \$italic); - $text =~ s< + my $last = '\fR'; + $text =~ s{ \\f\((.)(.) - > < + }{ + my $sequence = q{}; + if ($last ne '\fR') { $sequence = '\fR' } ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1; - $$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) }; - >gxe; + my $f = $$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) }; + if ($f eq $last) { + ''; + } else { + if ($f ne '\fR') { $sequence .= $f } + $last = $f; + $sequence; + } + }gxe; + + # We can do a bit of cleanup by collapsing sequences like \fR\fB\fR\fI + # into just \fI. + $text =~ s{ (?: \\fR )? (?: \\f (.|\(..) \\fR )+ }{\\fR}xms; + return $text; } @@ -761,10 +838,22 @@ sub outindex { # Output some text, without any additional changes. sub output { my ($self, @text) = @_; - if ($$self{ENCODE}) { - print { $$self{output_fh} } Encode::encode ('UTF-8', join ('', @text)); + my $text = join('', @text); + $text =~ s{$NBSP}{\\ }g; + $text =~ s{$SHY}{\\%}g; + + if ($$self{ENCODE} && _needs_encode($$self{ENCODING})) { + my $check = sub { + my ($char) = @_; + my $display = '"\x{' . hex($char) . '}"'; + my $error = "$display does not map to $$self{ENCODING}"; + $self->whine ($self->line_count(), $error); + return Encode::encode ($$self{ENCODING}, chr($char)); + }; + my $output = Encode::encode ($$self{ENCODING}, $text, $check); + print { $$self{output_fh} } $output; } else { - print { $$self{output_fh} } @text; + print { $$self{output_fh} } $text; } } @@ -777,21 +866,20 @@ sub output { sub start_document { my ($self, $attrs) = @_; if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) { - DEBUG and print "Document is contentless\n"; $$self{CONTENTLESS} = 1; } else { delete $$self{CONTENTLESS}; } - # When UTF-8 output is set, check whether our output file handle already - # has a PerlIO encoding layer set. If it does not, we'll need to encode - # our output before printing it (handled in the output() sub). Wrap the - # check in an eval to handle versions of Perl without PerlIO. + # When an encoding is requested, check whether our output file handle + # already has a PerlIO encoding layer set. If it does not, we'll need to + # encode our output before printing it (handled in the output() sub). + # Wrap the check in an eval to handle versions of Perl without PerlIO. # # PerlIO::get_layers still requires its argument be a glob, so coerce the # file handle to a glob. $$self{ENCODE} = 0; - if ($$self{utf8}) { + if ($$self{ENCODING}) { $$self{ENCODE} = 1; eval { my @options = (output => 1, details => 1); @@ -806,15 +894,15 @@ sub start_document { # document was content-free. if (!$$self{CONTENTLESS}) { my ($name, $section); - if (defined $$self{name}) { - $name = $$self{name}; - $section = $$self{section} || 1; + if (defined $self->{opt_name}) { + $name = $self->{opt_name}; + $section = $self->{opt_section} || 1; } else { ($name, $section) = $self->devise_title; } - my $date = defined($$self{date}) ? $$self{date} : $self->devise_date; + my $date = $self->{opt_date} // $self->devise_date(); $self->preamble ($name, $section, $date) - unless $self->bare_output or DEBUG > 9; + unless $self->bare_output; } # Initialize a few per-document variables. @@ -839,7 +927,6 @@ sub end_document { } return if $self->bare_output; return if ($$self{CONTENTLESS} && !$$self{ALWAYS_EMIT_SOMETHING}); - $self->output (q(.\" [End document]) . "\n") if DEBUG; } # Try to figure out the name and section from the file name and return them as @@ -848,8 +935,8 @@ sub end_document { sub devise_title { my ($self) = @_; my $name = $self->source_filename || ''; - my $section = $$self{section} || 1; - $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i); + my $section = $self->{opt_section} || 1; + $section = 3 if (!$self->{opt_section} && $name =~ /\.pm\z/i); $name =~ s/\.p(od|[lm])\z//i; # If Pod::Parser gave us an IO::File reference as the source file name, @@ -981,7 +1068,7 @@ sub devise_date { # module, but this order is correct for both Solaris and Linux. sub preamble { my ($self, $name, $section, $date) = @_; - my $preamble = $self->preamble_template (!$$self{utf8}); + my $preamble = $self->preamble_template(); # Build the index line and make sure that it will be syntactically valid. my $index = "$name $section"; @@ -1000,7 +1087,7 @@ sub preamble { $date =~ s/\"/\"\"/g; # Substitute into the preamble the configuration options. - $preamble =~ s/\@CFONT\@/$$self{fixed}/; + $preamble =~ s/\@CFONT\@/$self->{opt_fixed}/; $preamble =~ s/\@LQUOTE\@/$$self{LQUOTE}/; $preamble =~ s/\@RQUOTE\@/$$self{RQUOTE}/; chomp $preamble; @@ -1008,7 +1095,24 @@ sub preamble { # Get the version information. my $version = $self->version_report; - # Finally output everything. + # groff's preconv script will use this line to correctly determine the + # input encoding if the encoding is one of the ones it recognizes. It + # must be the first or second line. + # + # If the output encoding is some version of Unicode, we could also add a + # Unicode Byte Order Mark to the start of the file, but I am concerned + # that may break a *roff implementation that might otherwise cope with + # Unicode. Revisit this if someone files a bug report about it. + if (_needs_encode($$self{ENCODING})) { + my $normalized = lc($$self{ENCODING}); + $normalized =~ s{-}{}g; + my $coding = $ENCODINGS{$normalized} || lc($$self{ENCODING}); + if ($coding ne 'us-ascii') { + $self->output (qq{.\\\" -*- mode: troff; coding: $coding -*-\n}); + } + } + + # Output the majority of the preamble. $self->output (<<"----END OF HEADER----"); .\\" Automatically generated by $version .\\" @@ -1018,13 +1122,18 @@ $preamble .\\" ======================================================================== .\\" .IX Title "$index" -.TH $name $section "$date" "$$self{release}" "$$self{center}" +.TH $name $section "$date" "$self->{opt_release}" "$self->{opt_center}" .\\" For nroff, turn off justification. Always turn off hyphenation; it makes .\\" way too many mistakes in technical documents. .if n .ad l .nh ----END OF HEADER---- - $self->output (".\\\" [End of preamble]\n") if DEBUG; + + # If the language was specified, output the language configuration. + if ($self->{opt_language}) { + $self->output(".mso $self->{opt_language}.tmac\n"); + $self->output(".hla $self->{opt_language}\n"); + } } ############################################################################## @@ -1049,11 +1158,6 @@ sub cmd_para { $$self{SHIFTWAIT} = 0; } - # Add the line number for debugging, but not in the NAME section just in - # case the comment would confuse apropos. - $self->output (".\\\" [At source line $line]\n") - if defined ($line) && DEBUG && !$$self{IN_NAME}; - # Force exactly one newline at the end and strip unwanted trailing # whitespace at the end, but leave "\ " backslashed space from an S< > at # the end of a line. Reverse the text first, to avoid having to scan the @@ -1136,9 +1240,6 @@ sub heading_common { $self->output (".PD\n"); } - # Output the current source line. - $self->output ( ".\\\" [At source line $line]\n" ) - if defined ($line) && DEBUG; return $text; } @@ -1203,6 +1304,13 @@ sub cmd_i { return $_[0]->{IN_NAME} ? $_[2] : '\f(IS' . $_[2] . '\f(IE' } sub cmd_f { return $_[0]->{IN_NAME} ? $_[2] : '\f(IS' . $_[2] . '\f(IE' } sub cmd_c { return $_[0]->quote_literal ($_[2]) } +# Convert all internal whitespace to $NBSP. +sub cmd_s { + my ($self, $attrs, $text) = @_; + $text =~ s{ \s }{$NBSP}xmsg; + return $text; +} + # Index entries are just added to the pending entries. sub cmd_x { my ($self, $attrs, $text) = @_; @@ -1224,7 +1332,7 @@ sub cmd_l { } if (not defined ($to) or $to eq $text) { return "<$text>"; - } elsif ($$self{nourls}) { + } elsif ($self->{opt_nourls}) { return $text; } else { return "$text <$$attrs{to}>"; @@ -1245,12 +1353,10 @@ sub over_common_start { my ($self, $type, $attrs) = @_; my $line = $$attrs{start_line}; my $indent = $$attrs{indent}; - DEBUG > 3 and print " Starting =over $type (line $line, indent ", - ($indent || '?'), "\n"; # Find the indentation level. unless (defined ($indent) && $indent =~ /^[-+]?\d{1,4}\s*$/) { - $indent = $$self{indent}; + $indent = $self->{opt_indent}; } # If we've gotten multiple indentations in a row, we need to emit the @@ -1279,7 +1385,6 @@ sub over_common_start { # .RE and then a new .RS to unconfuse *roff. sub over_common_end { my ($self) = @_; - DEBUG > 3 and print " Ending =over\n"; $$self{INDENT} = pop @{ $$self{INDENTS} }; pop @{ $$self{ITEMTYPES} }; @@ -1318,7 +1423,6 @@ sub end_over_block { $_[0]->over_common_end } sub item_common { my ($self, $type, $attrs, $text) = @_; my $line = $$attrs{start_line}; - DEBUG > 3 and print " $type item (line $line): $text\n"; # Clean up the text. We want to end up with two variables, one ($text) # which contains any body text after taking out the item portion, and @@ -1447,49 +1551,16 @@ sub parse_string_document { return $self->SUPER::parse_string_document ($doc); } -############################################################################## -# Translation tables -############################################################################## - -# The following table is adapted from Tom Christiansen's pod2man. It assumes -# that the standard preamble has already been printed, since that's what -# defines all of the accent marks. We really want to do something better than -# this when *roff actually supports other character sets itself, since these -# results are pretty poor. -# -# This only works in an ASCII world. What to do in a non-ASCII world is very -# unclear -- hopefully we can assume UTF-8 and just leave well enough alone. -@ESCAPES{0xA0 .. 0xFF} = ( - "\\ ", undef, undef, undef, undef, undef, undef, undef, - undef, undef, undef, undef, undef, "\\%", undef, undef, - - undef, undef, undef, undef, undef, undef, undef, undef, - undef, undef, undef, undef, undef, undef, undef, undef, - - "A\\*`", "A\\*'", "A\\*^", "A\\*~", "A\\*:", "A\\*o", "\\*(Ae", "C\\*,", - "E\\*`", "E\\*'", "E\\*^", "E\\*:", "I\\*`", "I\\*'", "I\\*^", "I\\*:", - - "\\*(D-", "N\\*~", "O\\*`", "O\\*'", "O\\*^", "O\\*~", "O\\*:", undef, - "O\\*/", "U\\*`", "U\\*'", "U\\*^", "U\\*:", "Y\\*'", "\\*(Th", "\\*8", - - "a\\*`", "a\\*'", "a\\*^", "a\\*~", "a\\*:", "a\\*o", "\\*(ae", "c\\*,", - "e\\*`", "e\\*'", "e\\*^", "e\\*:", "i\\*`", "i\\*'", "i\\*^", "i\\*:", - - "\\*(d-", "n\\*~", "o\\*`", "o\\*'", "o\\*^", "o\\*~", "o\\*:", undef, - "o\\*/" , "u\\*`", "u\\*'", "u\\*^", "u\\*:", "y\\*'", "\\*(th", "y\\*:", -) if ASCII; - ############################################################################## # Premable ############################################################################## -# The following is the static preamble which starts all *roff output we -# generate. Most is static except for the font to use as a fixed-width font, -# which is designed by @CFONT@, and the left and right quotes to use for C<> -# text, designated by @LQOUTE@ and @RQUOTE@. However, the second part, which -# defines the accent marks, is only used if $escapes is set to true. +# The preamble which starts all *roff output we generate. Most is static +# except for the font to use as a fixed-width font (designed by @CFONT@), and +# the left and right quotes to use for C<> text (designated by @LQOUTE@ and +# @RQUOTE@). Accent marks are only defined if the output encoding is roff. sub preamble_template { - my ($self, $accents) = @_; + my ($self) = @_; my $preamble = <<'----END OF PREAMBLE----'; .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v @@ -1504,29 +1575,12 @@ sub preamble_template { .ft R .fi .. -.\" Set up some character translations and predefined strings. \*(-- will -.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left -.\" double quote, and \*(R" will give a right double quote. \*(C+ will -.\" give a nicer C++. Capital omega is used to do unbreakable dashes and -.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, -.\" nothing in troff, for use with C<>. -.tr \(*W- -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>. .ie n \{\ -. ds -- \(*W- -. ds PI pi -. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -. ds L" "" -. ds R" "" . ds C` @LQUOTE@ . ds C' @RQUOTE@ 'br\} .el\{\ -. ds -- \|\(em\| -. ds PI \(*p -. ds L" `` -. ds R" '' . ds C` . ds C' 'br\} @@ -1560,7 +1614,7 @@ sub preamble_template { ----END OF PREAMBLE---- #'# for cperl-mode - if ($accents) { + if ($$self{ENCODING} eq 'roff') { $preamble .= <<'----END OF PREAMBLE----' .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). @@ -1590,7 +1644,7 @@ sub preamble_template { . ds / .\} .if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h'|\\n:u' . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' @@ -1637,10 +1691,13 @@ sub preamble_template { 1; __END__ +=encoding UTF-8 + =for stopwords en em ALLCAPS teeny fixedbold fixeditalic fixedbolditalic stderr utf8 UTF-8 Allbery Sean Burke Ossanna Solaris formatters troff uppercased Christiansen -nourls parsers Kernighan lquote rquote +nourls parsers Kernighan lquote rquote unrepresentable mandoc NetBSD PostScript +SMP macOS EBCDIC fallbacks manref reflowed reflowing FH overridable =head1 NAME @@ -1666,100 +1723,197 @@ using L, normally via L, or printing using L. It is conventionally invoked using the driver script B, but it can also be used directly. -As a derived class from Pod::Simple, Pod::Man supports the same methods and -interfaces. See L for all the details. - -new() can take options, in the form of key/value pairs that control the -behavior of the parser. See below for details. - -If no options are given, Pod::Man uses the name of the input file with any -trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to -section 1 unless the file ended in C<.pm> in which case it defaults to -section 3, to a centered title of "User Contributed Perl Documentation", to -a centered footer of the Perl version it is run with, and to a left-hand -footer of the modification date of its input (or the current date if given -C for input). - -Pod::Man assumes that your *roff formatters have a fixed-width font named -C. If yours is called something else (like C), use the C -option to specify it. This generally only matters for troff output for -printing. Similarly, you can set the fonts used for bold, italic, and -bold italic fixed-width output. - -Besides the obvious pod conversions, Pod::Man also takes care of -formatting func(), func(3), and simple variable references like $foo or -@bar so you don't have to use code escapes for them; complex expressions -like C<$fred{'stuff'}> will still need to be escaped, though. It also -translates dashes that aren't used as hyphens into en dashes, makes long -dashes--like this--into proper em dashes, fixes "paired quotes," makes C++ -look right, puts a little space between double underscores, makes ALLCAPS -a teeny bit smaller in B, and escapes stuff that *roff treats as -special so that you don't have to. - -The recognized options to new() are as follows. All options take a single -argument. +By default (on non-EBCDIC systems), Pod::Man outputs UTF-8. Its output should +work with the B program on systems that use B (most Linux +distributions) or B (most BSD variants), but may result in mangled +output on older UNIX systems. To choose a different, possibly more +backward-compatible output mangling on such systems, set the C +option to C (the default in earlier Pod::Man versions). See the +C option and L for more details. + +See L for the versions of Pod::Man with significant +backward-incompatible changes (other than constructor options, whose versions +are documented below), and the versions of Perl that included them. + +=head1 CLASS METHODS + +=over 4 + +=item new(ARGS) + +Create a new Pod::Man object. ARGS should be a list of key/value pairs, where +the keys are chosen from the following. Each option is annotated with the +version of Pod::Man in which that option was added with its current meaning. =over 4 =item center -Sets the centered page header for the C<.TH> macro. The default, if this -option is not specified, is "User Contributed Perl Documentation". +[1.00] Sets the centered page header for the C<.TH> macro. The default, if +this option is not specified, is C. =item date -Sets the left-hand footer for the C<.TH> macro. If this option is not set, -the contents of the environment variable POD_MAN_DATE, if set, will be used. -Failing that, the value of SOURCE_DATE_EPOCH, the modification date of the -input file, or the current time if stat() can't find that file (which will be -the case if the input is from C) will be used. If obtained from the -file modification date or the current time, the date will be formatted as -C and will be based on UTC (so that the output will be -reproducible regardless of local time zone). +[4.00] Sets the left-hand footer for the C<.TH> macro. If this option is not +set, the contents of the environment variable POD_MAN_DATE, if set, will be +used. Failing that, the value of SOURCE_DATE_EPOCH, the modification date of +the input file, or the current time if stat() can't find that file (which will +be the case if the input is from C) will be used. If taken from any +source other than POD_MAN_DATE (which is used verbatim), the date will be +formatted as C and will be based on UTC (so that the output will +be reproducible regardless of local time zone). + +=item encoding + +[5.00] Specifies the encoding of the output. The value must be an encoding +recognized by the L module (see L), or the special +values C or C. The default on non-EBCDIC systems is UTF-8. + +If the output contains characters that cannot be represented in this encoding, +that is an error that will be reported as configured by the C option. +If error handling is other than C, the unrepresentable character will be +replaced with the Encode substitution character (normally C). + +If the C option is set to the special value C (the default on +EBCDIC systems), or if the Encode module is not available and the encoding is +set to anything other than C, Pod::Man will translate all non-ASCII +characters to C<\[uNNNN]> Unicode escapes. These are not traditionally part +of the *roff language, but are supported by B and B and thus by +the majority of manual page processors in use today. + +If the C option is set to the special value C, Pod::Man will +do its historic transformation of (some) ISO 8859-1 characters into *roff +escapes that may be adequate in troff and may be readable (if ugly) in nroff. +This was the default behavior of versions of Pod::Man before 5.00. With this +encoding, all other non-ASCII characters will be replaced with C. It may +be required for very old troff and nroff implementations that do not support +UTF-8, but its representation of any non-ASCII character is very poor and +often specific to European languages. + +If the output file handle has a PerlIO encoding layer set, setting C +to anything other than C or C will be ignored and no encoding +will be done by Pod::Man. It will instead rely on the encoding layer to make +whatever output encoding transformations are desired. + +WARNING: The input encoding of the POD source is independent from the output +encoding, and setting this option does not affect the interpretation of the +POD input. Unless your POD source is US-ASCII, its encoding should be +declared with the C<=encoding> command in the source. If this is not done, +Pod::Simple will will attempt to guess the encoding and may be successful if +it's Latin-1 or UTF-8, but it will produce warnings. See L for +more information. =item errors -How to report errors. C says to throw an exception on any POD -formatting error. C says to report errors on standard error, but -not to throw an exception. C says to include a POD ERRORS section -in the resulting documentation summarizing the errors. C ignores -POD errors entirely, as much as possible. +[2.27] How to report errors. C says to throw an exception on any POD +formatting error. C says to report errors on standard error, but not +to throw an exception. C says to include a POD ERRORS section in the +resulting documentation summarizing the errors. C ignores POD errors +entirely, as much as possible. The default is C. =item fixed -The fixed-width font to use for verbatim text and code. Defaults to -C. Some systems may want C instead. Only matters for B -output. +[1.00] The fixed-width font to use for verbatim text and code. Defaults to +C. Some systems prefer C instead. Only matters for B output. =item fixedbold -Bold version of the fixed-width font. Defaults to C. Only matters +[1.00] Bold version of the fixed-width font. Defaults to C. Only matters for B output. =item fixeditalic -Italic version of the fixed-width font (actually, something of a misnomer, -since most fixed-width fonts only have an oblique version, not an italic -version). Defaults to C. Only matters for B output. +[1.00] Italic version of the fixed-width font (something of a misnomer, since +most fixed-width fonts only have an oblique version, not an italic version). +Defaults to C. Only matters for B output. =item fixedbolditalic -Bold italic (probably actually oblique) version of the fixed-width font. -Pod::Man doesn't assume you have this, and defaults to C. Some -systems (such as Solaris) have this font available as C. Only matters -for B output. +[1.00] Bold italic (in theory, probably oblique in practice) version of the +fixed-width font. Pod::Man doesn't assume you have this, and defaults to +C. Some systems (such as Solaris) have this font available as C. +Only matters for B output. + +=item guesswork + +[5.00] By default, Pod::Man applies some default formatting rules based on +guesswork and regular expressions that are intended to make writing Perl +documentation easier and require less explicit markup. These rules may not +always be appropriate, particularly for documentation that isn't about Perl. +This option allows turning all or some of it off. + +The special value C enables all guesswork. This is also the default for +backward compatibility reasons. The special value C disables all +guesswork. Otherwise, the value of this option should be a comma-separated +list of one or more of the following keywords: + +=over 4 + +=item functions + +Convert function references like C to bold even if they have no markup. +The function name accepts valid Perl characters for function names (including +C<:>), and the trailing parentheses must be present and empty. + +=item manref + +Make the first part (before the parentheses) of manual page references like +C bold even if they have no markup. The section must be a single +number optionally followed by lowercase letters. + +=item quoting + +If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by +double quotes in nroff (terminal) output unless the contents are already +quoted. When this guesswork is enabled, quote marks will also be suppressed +for Perl variables, function names, function calls, numbers, and hex +constants. + +=item variables + +Convert Perl variable names to a fixed-width font even if they have no markup. +This transformation will only be apparent in troff output, or some other +output format (unlike nroff terminal output) that supports fixed-width fonts. + +=back + +Any unknown guesswork name is silently ignored (for potential future +compatibility), so be careful about spelling. + +=item language + +[5.00] Add commands telling B that the input file is in the given +language. The value of this setting must be a language abbreviation for which +B provides supplemental configuration, such as C (for Japanese) or +C (for Chinese). + +Specifically, this adds: + + .mso .tmac + .hla + +to the start of the file, which configure correct line breaking for the +specified language. Without these commands, groff may not know how to add +proper line breaks for Chinese and Japanese text if the manual page is +installed into the normal manual page directory, such as F. + +On many systems, this will be done automatically if the manual page is +installed into a language-specific manual page directory, such as +F. In that case, this option is not required. + +Unfortunately, the commands added with this option are specific to B +and will not work with other B and B implementations. =item lquote =item rquote -Sets the quote marks used to surround CE> text. C sets the -left quote mark and C sets the right quote mark. Either may also -be set to the special value C, in which case no quote mark is added -on that side of CE> text (but the font is still changed for troff -output). +[4.08] Sets the quote marks used to surround CE> text. C sets the +left quote mark and C sets the right quote mark. Either may also be +set to the special value C, in which case no quote mark is added on that +side of CE> text (but the font is still changed for troff output). Also see the C option, which can be used to set both quotes at once. If both C and one of the other options is set, C or C @@ -1767,21 +1921,21 @@ overrides C. =item name -Set the name of the manual page for the C<.TH> macro. Without this -option, the manual name is set to the uppercased base name of the file -being converted unless the manual section is 3, in which case the path is -parsed to see if it is a Perl module path. If it is, a path like -C<.../lib/Pod/Man.pm> is converted into a name like C. This -option, if given, overrides any automatic determination of the name. +[4.08] Set the name of the manual page for the C<.TH> macro. Without this +option, the manual name is set to the uppercased base name of the file being +converted unless the manual section is 3, in which case the path is parsed to +see if it is a Perl module path. If it is, a path like C<.../lib/Pod/Man.pm> +is converted into a name like C. This option, if given, overrides +any automatic determination of the name. If generating a manual page from standard input, the name will be set to -C if this option is not provided. Providing this option is strongly -recommended to set a meaningful manual page name. +C if this option is not provided. In this case, providing this option +is strongly recommended to set a meaningful manual page name. =item nourls -Normally, LZ<><> formatting codes with a URL but anchor text are formatted -to show both the anchor text and the URL. In other words: +[2.27] Normally, LZ<><> formatting codes with a URL but anchor text are +formatted to show both the anchor text and the URL. In other words: L @@ -1796,10 +1950,10 @@ important. =item quotes -Sets the quote marks used to surround CE> text. If the value is a -single character, it is used as both the left and right quote. Otherwise, -it is split in half, and the first half of the string is used as the left -quote and the second is used as the right quote. +[4.00] Sets the quote marks used to surround CE> text. If the value is a +single character, it is used as both the left and right quote. Otherwise, it +is split in half, and the first half of the string is used as the left quote +and the second is used as the right quote. This may also be set to the special value C, in which case no quote marks are added around CE> text (but the font is still changed for troff @@ -1811,82 +1965,258 @@ options is set, C or C overrides C. =item release -Set the centered footer for the C<.TH> macro. By default, this is set to -the version of Perl you run Pod::Man under. Setting this to the empty -string will cause some *roff implementations to use the system default -value. +[1.00] Set the centered footer for the C<.TH> macro. By default, this is set +to the version of Perl you run Pod::Man under. Setting this to the empty +string will cause some *roff implementations to use the system default value. -Note that some system C macro sets assume that the centered footer -will be a modification date and will prepend something like "Last -modified: ". If this is the case for your target system, you may want to -set C to the last modified date and C to the version -number. +Note that some system C macro sets assume that the centered footer will be +a modification date and will prepend something like C. If +this is the case for your target system, you may want to set C to the +last modified date and C to the version number. =item section -Set the section for the C<.TH> macro. The standard section numbering -convention is to use 1 for user commands, 2 for system calls, 3 for -functions, 4 for devices, 5 for file formats, 6 for games, 7 for -miscellaneous information, and 8 for administrator commands. There is a lot -of variation here, however; some systems (like Solaris) use 4 for file -formats, 5 for miscellaneous information, and 7 for devices. Still others -use 1m instead of 8, or some mix of both. About the only section numbers -that are reliably consistent are 1, 2, and 3. +[1.00] Set the section for the C<.TH> macro. The standard section numbering +convention is to use 1 for user commands, 2 for system calls, 3 for functions, +4 for devices, 5 for file formats, 6 for games, 7 for miscellaneous +information, and 8 for administrator commands. There is a lot of variation +here, however; some systems (like Solaris) use 4 for file formats, 5 for +miscellaneous information, and 7 for devices. Still others use 1m instead of +8, or some mix of both. About the only section numbers that are reliably +consistent are 1, 2, and 3. By default, section 1 will be used unless the file ends in C<.pm> in which case section 3 will be selected. =item stderr -Send error messages about invalid POD to standard error instead of -appending a POD ERRORS section to the generated *roff output. This is -equivalent to setting C to C if C is not already -set. It is supported for backward compatibility. +[2.19] If set to a true value, send error messages about invalid POD to +standard error instead of appending a POD ERRORS section to the generated +*roff output. This is equivalent to setting C to C if +C is not already set. + +This option is for backward compatibility with Pod::Man versions that did not +support C. Normally, the C option should be used instead. =item utf8 -By default, Pod::Man produces the most conservative possible *roff output -to try to ensure that it will work with as many different *roff -implementations as possible. Many *roff implementations cannot handle -non-ASCII characters, so this means all non-ASCII characters are converted -either to a *roff escape sequence that tries to create a properly accented -character (at least for troff output) or to C. - -If this option is set, Pod::Man will instead output UTF-8. If your *roff -implementation can handle it, this is the best output format to use and -avoids corruption of documents containing non-ASCII characters. However, -be warned that *roff source with literal UTF-8 characters is not supported -by many implementations and may even result in segfaults and other bad -behavior. - -Be aware that, when using this option, the input encoding of your POD -source should be properly declared unless it's US-ASCII. Pod::Simple will -attempt to guess the encoding and may be successful if it's Latin-1 or -UTF-8, but it will produce warnings. Use the C<=encoding> command to -declare the encoding. See L for more information. +[2.21] This option used to set the output encoding to UTF-8. Since this is +now the default, it is ignored and does nothing. =back -The standard Pod::Simple method parse_file() takes one argument naming the -POD file to read from. By default, the output is sent to C, but -this can be changed with the output_fh() method. +=back + +=head1 INSTANCE METHODS + +As a derived class from Pod::Simple, Pod::Man supports the same methods and +interfaces. See L for all the details. This section summarizes +the most-frequently-used methods and the ones added by Pod::Man. + +=over 4 + +=item output_fh(FH) + +Direct the output from parse_file(), parse_lines(), or parse_string_document() +to the file handle FH instead of C. + +=item output_string(REF) + +Direct the output from parse_file(), parse_lines(), or parse_string_document() +to the scalar variable pointed to by REF, rather than C. For example: + + my $man = Pod::Man->new(); + my $output; + $man->output_string(\$output); + $man->parse_file('/some/input/file'); + +Be aware that the output in that variable will already be encoded in UTF-8. + +=item parse_file(PATH) + +Read the POD source from PATH and format it. By default, the output is sent +to C, but this can be changed with the output_fh() or output_string() +methods. + +=item parse_from_file(INPUT, OUTPUT) + +=item parse_from_filehandle(FH, OUTPUT) -The standard Pod::Simple method parse_from_file() takes up to two -arguments, the first being the input file to read POD from and the second -being the file to write the formatted output to. +Read the POD source from INPUT, format it, and output the results to OUTPUT. -You can also call parse_lines() to parse an array of lines or -parse_string_document() to parse a document already in memory. As with -parse_file(), parse_lines() and parse_string_document() default to sending -their output to C unless changed with the output_fh() method. Be -aware that parse_lines() and parse_string_document() both expect raw bytes, -not decoded characters. +parse_from_filehandle() is provided for backward compatibility with older +versions of Pod::Man. parse_from_file() should be used instead. -To put the output from any parse method into a string instead of a file -handle, call the output_string() method instead of output_fh(). +=item parse_lines(LINES[, ...[, undef]]) -See L for more specific details on the methods available to -all derived parsers. +Parse the provided lines as POD source, writing the output to either C +or the file handle set with the output_fh() or output_string() methods. This +method can be called repeatedly to provide more input lines. An explicit +C should be passed to indicate the end of input. + +This method expects raw bytes, not decoded characters. + +=item parse_string_document(INPUT) + +Parse the provided scalar variable as POD source, writing the output to either +C or the file handle set with the output_fh() or output_string() +methods. + +This method expects raw bytes, not decoded characters. + +=back + +=head1 ENCODING + +As of Pod::Man 5.00, the default output encoding for Pod::Man is UTF-8. This +should work correctly on any modern system that uses either B (most +Linux distributions) or B (Alpine Linux and most BSD variants, +including macOS). + +The user will probably have to use a UTF-8 locale to see correct output. This +may be done by default; if not, set the LANG or LC_CTYPE environment variables +to an appropriate local. The locale C is available on most systems +if one wants correct output without changing the other things locales affect, +such as collation. + +The backward-compatible output format used in Pod::Man versions before 5.00 is +available by setting the C option to C. This may produce +marginally nicer results on older UNIX versions that do not use B or +B, but none of the available options will correctly render Unicode +characters on those systems. + +Below are some additional details about how this choice was made and some +discussion of alternatives. + +=head2 History + +The default output encoding for Pod::Man has been a long-standing problem. +B and B predate Unicode by a significant margin, and their +implementations for many UNIX systems reflect that legacy. It's common for +Unicode to not be supported in any form. + +Because of this, versions of Pod::Man prior to 5.00 maintained the highly +conservative output of the original pod2man, which output pure ASCII with +complex macros to simulate common western European accented characters when +processed with troff. The nroff output was awkward and sometimes incorrect, +and characters not used in western European scripts were replaced with C. +This choice maximized backwards compatibility with B and +B/B implementations at the cost of incorrect rendering of many +POD documents, particularly those containing people's names. + +The modern implementations, B (used in most Linux distributions) and +B (used by most BSD variants), do now support Unicode. Other UNIX +systems often do not, but they're now a tiny minority of the systems people +use on a daily basis. It's increasingly common (for very good reasons) to use +Unicode characters for POD documents rather than using ASCII conversions of +people's names or avoiding non-English text, making the limitations in the old +output format more apparent. + +Four options have been proposed to fix this: + +=over 2 + +=item * + +Optionally support UTF-8 output but don't change the default. This is the +approach taken since Pod::Man 2.1.0, which added the C option. Some +Pod::Man users use this option for better output on platforms known to support +Unicode, but since the defaults have not changed, people continued to +encounter (and file bug reports about) the poor default rendering. + +=item * + +Convert characters to troff C<\(xx> escapes. This requires maintaining a +large translation table and addresses only a tiny part of the problem, since +many Unicode characters have no standard troff name. B has the largest +list, but if one is willing to assume B is the formatter, the next +option is better. + +=item * + +Convert characters to groff C<\[uNNNN]> escapes. This is implemented as the +C encoding for those who want to use it, and is supported by both +B and B. However, it is no better than UTF-8 output for +portability to other implementations. See L for more +details. + +=item * + +Change the default output format to UTF-8 and ask those who want maximum +backward compatibility to explicitly select the old encoding. This fixes the +issue for most users at the cost of backwards compatibility. While the +rendering of non-ASCII characters is different on older systems that don't +support UTF-8, it's not always worse than the old output. + +=back + +Pod::Man 5.00 and later makes the last choice. This arguably produces worse +output when manual pages are formatted with B into PostScript or PDF, +but doing this is rare and normally manual, so the encoding can be changed in +those cases. The older output encoding is available by setting C to +C. + +=head2 Testing results + +Here is the results of testing C values of C and C on +various operating systems. The testing methodology was to create F +in the current directory, copy F or F from the +podlators 5.00 distribution to F, and then run: + + LANG=C.UTF-8 MANPATH=$(pwd)/man man 1 encoding + +If the locale is not explicitly set to one that includes UTF-8, the Unicode +characters were usually converted to ASCII (by, for example, dropping an +accent) or deleted or replaced with C<< >> if there was no conversion. + +Tested on 2022-09-25. Many thanks to the GCC Compile Farm project for access +to testing hosts. + + OS UTF-8 groff + ------------------ ------- ------- + AIX 7.1 no [1] no [2] + Alpine 3.15.0 yes yes + CentOS 7.9 yes yes + Debian 7 yes yes + FreeBSD 13.0 yes yes + NetBSD 9.2 yes yes + OpenBSD 7.1 yes yes + openSUSE Leap 15.4 yes yes + Solaris 10 yes no [2] + Solaris 11 no [3] no [3] + +I did not have access to a macOS system for testing, but since it uses +B, it's behavior is probably the same as the BSD hosts. + +Notes: + +=over 4 + +=item [1] + +Unicode characters were converted to one or two random ASCII characters +unrelated to the original character. + +=item [2] + +Unicode characters were shown as the body of the groff escape rather than the +indicated character (in other words, text like C<[u00EF]>). + +=item [3] + +Unicode characters were deleted entirely, as if they weren't there. Using +C instead of B to format the page showed the same results as +Solaris 10. Using C to format the page produced the +correct output. + +=back + +PostScript and PDF output using groff on a Debian 12 system do not support +combining accent marks or SMP characters due to a lack of support in the +default output font. + +Testing on additional platforms is welcome. Please let the author know if you +have additional results. =head1 DIAGNOSTICS @@ -1922,9 +2252,9 @@ option was set to C. =item PERL_CORE -If set and Encode is not available, silently fall back to non-UTF-8 mode -without complaining to standard error. This environment variable is set -during Perl core builds, which build Encode after podlators. Encode is +If set and Encode is not available, silently fall back to an encoding of +C without complaining to standard error. This environment variable is +set during Perl core builds, which build Encode after podlators. Encode is expected to not (yet) be available in that case. =item POD_MAN_DATE @@ -1953,65 +2283,104 @@ reliable if this variable overrode the timestamp of the input file.) =back -=head1 BUGS +=head1 COMPATIBILITY -Encoding handling assumes that PerlIO is available and does not work -properly if it isn't. The C option is therefore not supported -unless Perl is built with PerlIO support. +Pod::Man 1.02 (based on L) was the first version included with +Perl, in Perl 5.6.0. -There is currently no way to turn off the guesswork that tries to format -unmarked text appropriately, and sometimes it isn't wanted (particularly -when using POD to document something other than Perl). Most of the work -toward fixing this has now been done, however, and all that's still needed -is a user interface. +The current API based on L was added in Pod::Man 2.00. Pod::Man +2.04 was included in Perl 5.9.3, the first version of Perl to incorporate +those changes. This is the first version that correctly supports all modern +POD syntax. The parse_from_filehandle() method was re-added for backward +compatibility in Pod::Man 2.09, included in Perl 5.9.4. -The NAME section should be recognized specially and index entries emitted -for everything in that section. This would have to be deferred until the -next section, since extraneous things in NAME tends to confuse various man -page processors. Currently, no index entries are emitted for anything in -NAME. +Support for anchor text in LZ<><> links of type URL was added in Pod::Man +2.23, included in Perl 5.11.5. -Pod::Man doesn't handle font names longer than two characters. Neither do -most B implementations, but GNU troff does as an extension. It would -be nice to support as an option for those who want to use it. +parse_lines(), parse_string_document(), and parse_file() set a default output +file handle of C if one was not already set as of Pod::Man 2.28, +included in Perl 5.19.5. + +Support for SOURCE_DATE_EPOCH and POD_MAN_DATE was added in Pod::Man 4.00, +included in Perl 5.23.7, and generated dates were changed to use UTC instead +of the local time zone. This is also the first release that aligned the +module version and the version of the podlators distribution. All modules +included in podlators, and the podlators distribution itself, share the same +version number from this point forward. + +Pod::Man 4.10, included in Perl 5.27.8, changed the formatting for manual page +references and function names to bold instead of italic, following the current +Linux manual page standard. -The preamble added to each output file is rather verbose, and most of it -is only necessary in the presence of non-ASCII characters. It would -ideally be nice if all of those definitions were only output if needed, -perhaps on the fly as the characters are used. +Pod::Man 5.00 changed the default output encoding to UTF-8, overridable with +the new C option. It also fixed problems with bold or italic +extending too far when used with CZ<><> escapes, and began converting Unicode +zero-width spaces (U+200B) to the C<\:> *roff escape. It also dropped +attempts to add subtle formatting corrections in the output that would only be +visible when typeset with B, which had previously been a significant +source of bugs. -Pod::Man is excessively slow. +=head1 BUGS + +There are numerous bugs and language-specific assumptions in the nroff +fallbacks for accented characters in the C encoding. Since the point of +this encoding is backward compatibility with the output from earlier versions +of Pod::Man, and it is deprecated except when necessary to support old +systems, those bugs are unlikely to ever be fixed. + +Pod::Man doesn't handle font names longer than two characters. Neither do +most B implementations, but groff does as an extension. It would be +nice to support as an option for those who want to use it. =head1 CAVEATS -If Pod::Man is given the C option, the encoding of its output file -handle will be forced to UTF-8 if possible, overriding any existing -encoding. This will be done even if the file handle is not created by -Pod::Man and was passed in from outside. This maintains consistency -regardless of PERL_UNICODE and other settings. +=head2 Sentence spacing -The handling of hyphens and em dashes is somewhat fragile, and one may get -the wrong one under some circumstances. This should only matter for -B output. +Pod::Man copies the input spacing verbatim to the output *roff document. This +means your output will be affected by how B generally handles sentence +spacing. -When and whether to use small caps is somewhat tricky, and Pod::Man doesn't -necessarily get it right. +B dates from an era in which it was standard to use two spaces after +sentences, and will always add two spaces after a line-ending period (or +similar punctuation) when reflowing text. For example, the following input: -Converting neutral double quotes to properly matched double quotes doesn't -work unless there are no formatting codes between the quote marks. This -only matters for troff output. + =pod + + One sentence. + Another sentence. + +will result in two spaces after the period when the text is reflowed. If you +use two spaces after sentences anyway, this will be consistent, although you +will have to be careful to not end a line with an abbreviation such as C +or C. Output will also be consistent if you use the *roff style guide +(and L) recommendation of putting a line +break after each sentence, although that will consistently produce two spaces +after each sentence, which may not be what you want. + +If you prefer one space after sentences (which is the more modern style), you +will unfortunately need to ensure that no line in the middle of a paragraph +ends in a period or similar sentence-ending paragraph. Otherwise, B +will add a two spaces after that sentence when reflowing, and your output +document will have inconsistent spacing. + +=head2 Hyphens + +The handling of hyphens versus dashes is somewhat fragile, and one may get a +the wrong one under some circumstances. This will normally only matter for +line breaking and possibly for troff output. =head1 AUTHOR -Russ Allbery , based I heavily on the original B -by Tom Christiansen . The modifications to work with -Pod::Simple instead of Pod::Parser were originally contributed by Sean Burke - (but I've since hacked them beyond recognition and all bugs -are mine). +Written by Russ Allbery , based on the original B by +Tom Christiansen . + +The modifications to work with Pod::Simple instead of Pod::Parser were +contributed by Sean Burke , but I've since hacked them beyond +recognition and all bugs are mine. =head1 COPYRIGHT AND LICENSE -Copyright 1999-2010, 2012-2019 Russ Allbery +Copyright 1999-2010, 2012-2020, 2022 Russ Allbery Substantial contributions by Sean Burke . @@ -2020,18 +2389,19 @@ under the same terms as Perl itself. =head1 SEE ALSO -L, L, L, L, L, -L, L +L, L, L, L, +L, L, L, L Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual," Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is the best documentation of standard B and B. At the time of this writing, it's available at L. -The man page documenting the man macro set may be L instead of -L on your system. Also, please see L for extensive -documentation on writing manual pages if you've not done it before and -aren't familiar with the conventions. +The manual page documenting the man macro set may be L instead of +L on your system. + +See L for documentation on writing manual pages in POD if +you've not done it before and aren't familiar with the conventions. The current version of this module is always available from its web site at L. It is also part of the diff --git a/cpan/podlators/lib/Pod/ParseLink.pm b/cpan/podlators/lib/Pod/ParseLink.pm index 273c95847ac3..c4220a8d65a6 100644 --- a/cpan/podlators/lib/Pod/ParseLink.pm +++ b/cpan/podlators/lib/Pod/ParseLink.pm @@ -13,17 +13,15 @@ package Pod::ParseLink; -use 5.008; +use 5.010; use strict; use warnings; -use vars qw(@EXPORT @ISA $VERSION); - use Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(parselink); -$VERSION = '4.14'; +our @ISA = qw(Exporter); +our @EXPORT = qw(parselink); +our $VERSION = '5.00'; ############################################################################## # Implementation @@ -171,7 +169,7 @@ Russ Allbery =head1 COPYRIGHT AND LICENSE -Copyright 2001, 2008, 2009, 2014, 2018-2019 Russ Allbery +Copyright 2001, 2008, 2009, 2014, 2018-2019, 2022 Russ Allbery This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. diff --git a/cpan/podlators/lib/Pod/Text.pm b/cpan/podlators/lib/Pod/Text.pm index 56e6e78a86c3..85b4e53c517b 100644 --- a/cpan/podlators/lib/Pod/Text.pm +++ b/cpan/podlators/lib/Pod/Text.pm @@ -14,23 +14,20 @@ package Pod::Text; -use 5.008; +use 5.010; use strict; use warnings; -use vars qw(@ISA @EXPORT %ESCAPES $VERSION); - use Carp qw(carp croak); use Encode qw(encode); use Exporter (); use Pod::Simple (); -@ISA = qw(Pod::Simple Exporter); +our @ISA = qw(Pod::Simple Exporter); +our $VERSION = '5.00'; # We have to export pod2text for backward compatibility. -@EXPORT = qw(pod2text); - -$VERSION = '4.14'; +our @EXPORT = qw(pod2text); # Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available. Code # taken from Pod::Simple 3.32, but was only added in 3.30. @@ -43,6 +40,11 @@ if ($Pod::Simple::VERSION ge 3.30) { $SHY = chr utf8::unicode_to_native(0xAD); } +# Import the ASCII constant from Pod::Simple. This is true iff we're in an +# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is +# generally only false for EBCDIC. +BEGIN { *ASCII = \&Pod::Simple::ASCII } + ############################################################################## # Initialization ############################################################################## @@ -64,9 +66,6 @@ sub new { my $class = shift; my $self = $class->SUPER::new; - # Tell Pod::Simple to handle S<> by automatically inserting  . - $self->nbsp_for_S (1); - # Tell Pod::Simple to keep whitespace whenever possible. if ($self->can ('preserve_whitespace')) { $self->preserve_whitespace (1); @@ -89,16 +88,20 @@ sub new { my @opts = map { ("opt_$_", $opts{$_}) } keys %opts; %$self = (%$self, @opts); - # Send errors to stderr if requested. + # Backwards-compatibility support for the stderr option. if ($$self{opt_stderr} and not $$self{opt_errors}) { $$self{opt_errors} = 'stderr'; } delete $$self{opt_stderr}; - # Validate the errors parameter and act on it. - if (not defined $$self{opt_errors}) { - $$self{opt_errors} = 'pod'; + # Backwards-compatibility support for the utf8 option. + if ($$self{opt_utf8} && !$$self{opt_encoding}) { + $$self{opt_encoding} = 'UTF-8'; } + delete $$self{opt_utf8}; + + # Validate the errors parameter and act on it. + $$self{opt_errors} //= 'pod'; if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') { $self->no_errata_section (1); $self->complain_stderr (1); @@ -117,12 +120,12 @@ sub new { delete $$self{errors}; # Initialize various things from our parameters. - $$self{opt_alt} = 0 unless defined $$self{opt_alt}; - $$self{opt_indent} = 4 unless defined $$self{opt_indent}; - $$self{opt_margin} = 0 unless defined $$self{opt_margin}; - $$self{opt_loose} = 0 unless defined $$self{opt_loose}; - $$self{opt_sentence} = 0 unless defined $$self{opt_sentence}; - $$self{opt_width} = 76 unless defined $$self{opt_width}; + $$self{opt_alt} //= 0; + $$self{opt_indent} //= 4; + $$self{opt_margin} //= 0; + $$self{opt_loose} //= 0; + $$self{opt_sentence} //= 0; + $$self{opt_width} //= 76; # Figure out what quotes we'll be using for C<> text. $$self{opt_quotes} ||= '"'; @@ -272,9 +275,7 @@ sub reformat { } # Output text to the output device. Replace non-breaking spaces with spaces -# and soft hyphens with nothing, and then try to fix the output encoding if -# necessary to match the input encoding unless UTF-8 output is forced. This -# preserves the traditional pass-through behavior of Pod::Text. +# and soft hyphens with nothing, and then determine the output encoding. sub output { my ($self, @text) = @_; my $text = join ('', @text); @@ -284,15 +285,39 @@ sub output { if ($SHY) { $text =~ s/$SHY//g; } - unless ($$self{opt_utf8}) { - my $encoding = $$self{encoding} || ''; - if ($encoding && $encoding ne $$self{ENCODING}) { - $$self{ENCODING} = $encoding; - eval { binmode ($$self{output_fh}, ":encoding($encoding)") }; - } - } + + # The logic used here is described in the POD documentation. Prefer the + # configured encoding, then the pass-through option of using the same + # encoding as the input, and then UTF-8, but commit to an encoding for the + # document. + # + # ENCODE says whether to encode or not and is turned off if there is a + # PerlIO encoding layer (in start_document). ENCODING is the encoding + # that we previously committed to and is cleared at the start of each + # document. if ($$self{ENCODE}) { - print { $$self{output_fh} } encode ('UTF-8', $text); + my $encoding = $$self{ENCODING}; + if (!$encoding) { + $encoding = $self->encoding(); + if (!$encoding && ASCII && $text =~ /[^\x00-\x7F]/) { + $encoding = 'UTF-8'; + } + if ($encoding) { + $$self{ENCODING} = $encoding; + } + } + if ($encoding) { + my $check = sub { + my ($char) = @_; + my $display = '"\x{' . hex($char) . '}"'; + my $error = "$display does not map to $$self{ENCODING}"; + $self->whine ($self->line_count(), $error); + return Encode::encode ($$self{ENCODING}, chr($char)); + }; + print { $$self{output_fh} } encode ($encoding, $text, $check); + } else { + print { $$self{output_fh} } $text; + } } else { print { $$self{output_fh} } $text; } @@ -322,24 +347,18 @@ sub start_document { $$self{MARGIN} = $margin; # Default left margin. $$self{PENDING} = [[]]; # Pending output. - # We have to redo encoding handling for each document. - $$self{ENCODING} = ''; - - # When UTF-8 output is set, check whether our output file handle already - # has a PerlIO encoding layer set. If it does not, we'll need to encode - # our output before printing it (handled in the output() sub). - $$self{ENCODE} = 0; - if ($$self{opt_utf8}) { - $$self{ENCODE} = 1; - eval { - my @options = (output => 1, details => 1); - my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1]; - if ($flag && ($flag & PerlIO::F_UTF8 ())) { - $$self{ENCODE} = 0; - $$self{ENCODING} = 'UTF-8'; - } - }; - } + # We have to redo encoding handling for each document. Check whether the + # output file handle already has a PerlIO encoding layer set and, if so, + # disable encoding. + $$self{ENCODE} = 1; + eval { + my @options = (output => 1, details => 1); + my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1]; + if ($flag && ($flag & PerlIO::F_UTF8 ())) { + $$self{ENCODE} = 0; + } + }; + $$self{ENCODING} = $$self{opt_encoding}; return ''; } @@ -383,8 +402,7 @@ sub item { # Calculate the indentation and margin. $fits is set to true if the tag # will fit into the margin of the paragraph given our indentation level. - my $indent = $$self{INDENTS}[-1]; - $indent = $$self{opt_indent} unless defined $indent; + my $indent = $$self{INDENTS}[-1] // $$self{opt_indent}; my $margin = ' ' x $$self{opt_margin}; my $tag_length = length ($self->strip_format ($tag)); my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1); @@ -588,6 +606,13 @@ sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] } sub cmd_i { return '*' . $_[2] . '*' } sub cmd_x { return '' } +# Convert all internal whitespace to $NBSP. +sub cmd_s { + my ($self, $attrs, $text) = @_; + $text =~ s{ \s }{$NBSP}xmsg; + return $text; +} + # Apply a whole bunch of messy heuristics to not quote things that don't # benefit from being quoted. These originally come from Barrie Slaymaker and # largely duplicate code in Pod::Man. @@ -766,7 +791,7 @@ __END__ =for stopwords alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls -parsers +parsers EBCDIC autodetecting superset unrepresentable FH NNN =head1 NAME @@ -785,67 +810,152 @@ Pod::Text - Convert POD data to formatted text =head1 DESCRIPTION -Pod::Text is a module that can convert documentation in the POD format -(the preferred language for documenting Perl) into formatted text. It -uses no special formatting controls or codes whatsoever, and its output is -therefore suitable for nearly any device. +Pod::Text is a module that can convert documentation in the POD format (the +preferred language for documenting Perl) into formatted text. It uses no +special formatting controls or codes, and its output is therefore suitable for +nearly any device. -As a derived class from Pod::Simple, Pod::Text supports the same methods and -interfaces. See L for all the details; briefly, one creates a -new parser with C<< Pod::Text->new() >> and then normally calls parse_file(). +=head2 Encoding + +Pod::Text uses the following logic to choose an output encoding, in order: + +=over 4 + +=item 1. + +If a PerlIO encoding layer is set on the output file handle, do not do any +output encoding and will instead rely on the PerlIO encoding layer. + +=item 2. + +If the C or C options are set, use the output encoding +specified by those options. + +=item 3. + +If the input encoding of the POD source file was explicitly specified (using +C<=encoding>) or automatically detected by Pod::Simple, use that as the output +encoding as well. + +=item 4. + +Otherwise, if running on a non-EBCDIC system, use UTF-8 as the output +encoding. Since this is a superset of ASCII, this will result in ASCII output +unless the POD input contains non-ASCII characters without declaring or +autodetecting an encoding (usually via EZ<><> escapes). + +=item 5. + +Otherwise, for EBCDIC systems, output without doing any encoding and hope +this works. + +=back + +One caveat: Pod::Text has to commit to an output encoding the first time it +outputs a non-ASCII character, and then has to stick with it for consistency. +However, C<=encoding> commands don't have to be at the beginning of a POD +document. If someone uses a non-ASCII character early in a document with an +escape, such as EZ<><0xEF>, and then puts C<=encoding iso-8859-1> later, +ideally Pod::Text would follow rule 3 and output the entire document as ISO +8859-1. Instead, it will commit to UTF-8 following rule 4 as soon as it sees +that escape, and then stick with that encoding for the rest of the document. + +Unfortunately, there's no universally good choice for an output encoding. +Each choice will be incorrect in some circumstances. This approach was chosen +primarily for backwards compatibility. Callers should consider forcing the +output encoding via C if they have any knowledge about what encoding +the user may expect. + +In particular, consider importing the L module, if available, +and setting C to C to use an output encoding appropriate to +the user's locale. But be aware that if the user is not using locales or is +using a locale of C, Encode::Locale will set the output encoding to +US-ASCII. This will cause all non-ASCII characters will be replaced with C +and produce a flurry of warnings about unsupported characters, which may or +may not be what you want. + +=head1 CLASS METHODS + +=over 4 + +=item new(ARGS) -new() can take options, in the form of key/value pairs, that control the -behavior of the parser. The currently recognized options are: +Create a new Pod::Text object. ARGS should be a list of key/value pairs, +where the keys are chosen from the following. Each option is annotated with +the version of Pod::Text in which that option was added with its current +meaning. =over 4 =item alt -If set to a true value, selects an alternate output format that, among other -things, uses a different heading style and marks C<=item> entries with a +[2.00] If set to a true value, selects an alternate output format that, among +other things, uses a different heading style and marks C<=item> entries with a colon in the left margin. Defaults to false. =item code -If set to a true value, the non-POD parts of the input file will be included -in the output. Useful for viewing code documented with POD blocks with the -POD rendered and the code left intact. +[2.13] If set to a true value, the non-POD parts of the input file will be +included in the output. Useful for viewing code documented with POD blocks +with the POD rendered and the code left intact. + +=item encoding + +[5.00] Specifies the encoding of the output. The value must be an encoding +recognized by the L module (see L). If the output +contains characters that cannot be represented in this encoding, that is an +error that will be reported as configured by the C option. If error +handling is other than C, the unrepresentable character will be replaced +with the Encode substitution character (normally C). + +If the output file handle has a PerlIO encoding layer set, this parameter will +be ignored and no encoding will be done by Pod::Man. It will instead rely on +the encoding layer to make whatever output encoding transformations are +desired. + +WARNING: The input encoding of the POD source is independent from the output +encoding, and setting this option does not affect the interpretation of the +POD input. Unless your POD source is US-ASCII, its encoding should be +declared with the C<=encoding> command in the source, as near to the top of +the file as possible. If this is not done, Pod::Simple will will attempt to +guess the encoding and may be successful if it's Latin-1 or UTF-8, but it will +produce warnings. See L for more information. =item errors -How to report errors. C says to throw an exception on any POD -formatting error. C says to report errors on standard error, but -not to throw an exception. C says to include a POD ERRORS section -in the resulting documentation summarizing the errors. C ignores -POD errors entirely, as much as possible. +[3.17] How to report errors. C says to throw an exception on any POD +formatting error. C says to report errors on standard error, but not +to throw an exception. C says to include a POD ERRORS section in the +resulting documentation summarizing the errors. C ignores POD errors +entirely, as much as possible. The default is C. =item indent -The number of spaces to indent regular text, and the default indentation for -C<=over> blocks. Defaults to 4. +[2.00] The number of spaces to indent regular text, and the default +indentation for C<=over> blocks. Defaults to 4. =item loose -If set to a true value, a blank line is printed after a C<=head1> heading. -If set to false (the default), no blank line is printed after C<=head1>, -although one is still printed after C<=head2>. This is the default because -it's the expected formatting for manual pages; if you're formatting +[2.00] If set to a true value, a blank line is printed after a C<=head1> +heading. If set to false (the default), no blank line is printed after +C<=head1>, although one is still printed after C<=head2>. This is the default +because it's the expected formatting for manual pages; if you're formatting arbitrary text documents, setting this to true may result in more pleasing output. =item margin -The width of the left margin in spaces. Defaults to 0. This is the margin -for all text, including headings, not the amount by which regular text is -indented; for the latter, see the I option. To set the right +[2.21] The width of the left margin in spaces. Defaults to 0. This is the +margin for all text, including headings, not the amount by which regular text +is indented; for the latter, see the I option. To set the right margin, see the I option. =item nourls -Normally, LZ<><> formatting codes with a URL but anchor text are formatted -to show both the anchor text and the URL. In other words: +[3.17] Normally, LZ<><> formatting codes with a URL but anchor text are +formatted to show both the anchor text and the URL. In other words: L @@ -853,74 +963,131 @@ is formatted as: foo -This option, if set to a true value, suppresses the URL when anchor text -is given, so this example would be formatted as just C. This can -produce less cluttered output in cases where the URLs are not particularly -important. +This option, if set to a true value, suppresses the URL when anchor text is +given, so this example would be formatted as just C. This can produce +less cluttered output in cases where the URLs are not particularly important. =item quotes -Sets the quote marks used to surround CE> text. If the value is a -single character, it is used as both the left and right quote. Otherwise, -it is split in half, and the first half of the string is used as the left -quote and the second is used as the right quote. +[4.00] Sets the quote marks used to surround CE> text. If the value is a +single character, it is used as both the left and right quote. Otherwise, it +is split in half, and the first half of the string is used as the left quote +and the second is used as the right quote. This may also be set to the special value C, in which case no quote marks are added around CE> text. =item sentence -If set to a true value, Pod::Text will assume that each sentence ends in two -spaces, and will try to preserve that spacing. If set to false, all -consecutive whitespace in non-verbatim paragraphs is compressed into a -single space. Defaults to false. +[3.00] If set to a true value, Pod::Text will assume that each sentence ends +in two spaces, and will try to preserve that spacing. If set to false, all +consecutive whitespace in non-verbatim paragraphs is compressed into a single +space. Defaults to false. =item stderr -Send error messages about invalid POD to standard error instead of -appending a POD ERRORS section to the generated output. This is -equivalent to setting C to C if C is not already -set. It is supported for backward compatibility. +[3.10] Send error messages about invalid POD to standard error instead of +appending a POD ERRORS section to the generated output. This is equivalent to +setting C to C if C is not already set. It is +supported for backward compatibility. =item utf8 -By default, Pod::Text uses the same output encoding as the input encoding -of the POD source (provided that Perl was built with PerlIO; otherwise, it -doesn't encode its output). If this option is given, the output encoding -is forced to UTF-8. - -Be aware that, when using this option, the input encoding of your POD -source should be properly declared unless it's US-ASCII. Pod::Simple will -attempt to guess the encoding and may be successful if it's Latin-1 or -UTF-8, but it will produce warnings. Use the C<=encoding> command to -declare the encoding. See L for more information. +[3.12] If this option is set to a true value, the output encoding is set to +UTF-8. This is equivalent to setting C to C if C +is not already set. It is supported for backward compatibility. =item width -The column at which to wrap text on the right-hand side. Defaults to 76. +[2.00] The column at which to wrap text on the right-hand side. Defaults to +76. =back -The standard Pod::Simple method parse_file() takes one argument naming the -POD file to read from. By default, the output is sent to C, but -this can be changed with the output_fh() method. +=back + +=head1 INSTANCE METHODS + +As a derived class from Pod::Simple, Pod::Text supports the same methods and +interfaces. See L for all the details. This section summarizes +the most-frequently-used methods and the ones added by Pod::Text. + +=over 4 + +=item output_fh(FH) + +Direct the output from parse_file(), parse_lines(), or parse_string_document() +to the file handle FH instead of C. + +=item output_string(REF) + +Direct the output from parse_file(), parse_lines(), or parse_string_document() +to the scalar variable pointed to by REF, rather than C. For example: + + my $man = Pod::Man->new(); + my $output; + $man->output_string(\$output); + $man->parse_file('/some/input/file'); + +Be aware that the output in that variable will already be encoded (see +L). + +=item parse_file(PATH) -The standard Pod::Simple method parse_from_file() takes up to two -arguments, the first being the input file to read POD from and the second -being the file to write the formatted output to. +Read the POD source from PATH and format it. By default, the output is sent +to C, but this can be changed with the output_fh() or output_string() +methods. -You can also call parse_lines() to parse an array of lines or -parse_string_document() to parse a document already in memory. As with -parse_file(), parse_lines() and parse_string_document() default to sending -their output to C unless changed with the output_fh() method. Be -aware that parse_lines() and parse_string_document() both expect raw bytes, -not decoded characters. +=item parse_from_file(INPUT, OUTPUT) -To put the output from any parse method into a string instead of a file -handle, call the output_string() method instead of output_fh(). +=item parse_from_filehandle(FH, OUTPUT) -See L for more specific details on the methods available to -all derived parsers. +Read the POD source from INPUT, format it, and output the results to OUTPUT. + +parse_from_filehandle() is provided for backward compatibility with older +versions of Pod::Man. parse_from_file() should be used instead. + +=item parse_lines(LINES[, ...[, undef]]) + +Parse the provided lines as POD source, writing the output to either C +or the file handle set with the output_fh() or output_string() methods. This +method can be called repeatedly to provide more input lines. An explicit +C should be passed to indicate the end of input. + +This method expects raw bytes, not decoded characters. + +=item parse_string_document(INPUT) + +Parse the provided scalar variable as POD source, writing the output to either +C or the file handle set with the output_fh() or output_string() +methods. + +This method expects raw bytes, not decoded characters. + +=back + +=head1 FUNCTIONS + +Pod::Text exports one function for backward compatibility with older versions. +This function is deprecated; instead, use the object-oriented interface +described above. + +=over 4 + +=item pod2text([[-a,] [-NNN,]] INPUT[, OUTPUT]) + +Convert the POD source from INPUT to text and write it to OUTPUT. If OUTPUT +is not given, defaults to C. INPUT can be any expression supported as +the second argument to two-argument open(). + +If C<-a> is given as an initial argument, pass the C option to the +Pod::Text constructor. This enables alternative formatting. + +If C<-NNN> is given as an initial argument, pass the C option to the +Pod::Text constructor with the number C as its argument. This sets the +wrap line width to NNN. + +=back =head1 DIAGNOSTICS @@ -955,61 +1122,66 @@ option was set to C. =back -=head1 BUGS +=head1 COMPATIBILITY -Encoding handling assumes that PerlIO is available and does not work -properly if it isn't. The C option is therefore not supported -unless Perl is built with PerlIO support. +Pod::Text 2.03 (based on L) was the first version of this module +included with Perl, in Perl 5.6.0. Earlier versions of Perl had a different +Pod::Text module, with a different API. -=head1 CAVEATS +The current API based on L was added in Pod::Text 3.00. +Pod::Text 3.01 was included in Perl 5.9.3, the first version of Perl to +incorporate those changes. This is the first version that correctly supports +all modern POD syntax. The parse_from_filehandle() method was re-added for +backward compatibility in Pod::Text 3.07, included in Perl 5.9.4. -If Pod::Text is given the C option, the encoding of its output file -handle will be forced to UTF-8 if possible, overriding any existing -encoding. This will be done even if the file handle is not created by -Pod::Text and was passed in from outside. This maintains consistency -regardless of PERL_UNICODE and other settings. +Pod::Text 3.12, included in Perl 5.10.1, first implemented the current +practice of attempting to match the default output encoding with the input +encoding of the POD source, unless overridden by the C option or (added +later) the C option. -If the C option is not given, the encoding of its output file handle -will be forced to the detected encoding of the input POD, which preserves -whatever the input text is. This ensures backward compatibility with -earlier, pre-Unicode versions of this module, without large numbers of -Perl warnings. +Support for anchor text in LZ<><> links of type URL was added in Pod::Text +3.14, included in Perl 5.11.5. -This is not ideal, but it seems to be the best compromise. If it doesn't -work for you, please let me know the details of how it broke. +parse_lines(), parse_string_document(), and parse_file() set a default output +file handle of C if one was not already set as of Pod::Text 3.18, +included in Perl 5.19.5. -=head1 NOTES +Pod::Text 4.00, included in Perl 5.23.7, aligned the module version and the +version of the podlators distribution. All modules included in podlators, and +the podlators distribution itself, share the same version number from this +point forward. -This is a replacement for an earlier Pod::Text module written by Tom -Christiansen. It has a revamped interface, since it now uses Pod::Simple, -but an interface roughly compatible with the old Pod::Text::pod2text() -function is still available. Please change to the new calling convention, -though. +Pod::Text 4.09, included in Perl 5.25.7, fixed a serious bug on EBCDIC +systems, present in all versions back to 3.00, that would cause opening +brackets to disappear. -The original Pod::Text contained code to do formatting via termcap -sequences, although it wasn't turned on by default and it was problematic to -get it to work at all. This rewrite doesn't even try to do that, but a -subclass of it does. Look for L. +Pod::Text 5.00 now defaults, on non-EBCDIC systems, to UTF-8 encoding if it +sees a non-ASCII character in the input and the input encoding is not +specified. It also commits to an encoding with the first non-ASCII character +and does not change the output encoding if the input encoding changes. The +L module is now used for all output encoding rather than PerlIO +layers, which fixes earlier problems with output to scalars. =head1 AUTHOR -Russ Allbery , based I heavily on the original -Pod::Text by Tom Christiansen and its conversion to -Pod::Parser by Brad Appleton . Sean Burke's initial -conversion of Pod::Man to use Pod::Simple provided much-needed guidance on -how to use Pod::Simple. +Russ Allbery , based I heavily on the original Pod::Text +by Tom Christiansen and its conversion to Pod::Parser +by Brad Appleton . Sean Burke's initial conversion of +Pod::Man to use Pod::Simple provided much-needed guidance on how to use +Pod::Simple. =head1 COPYRIGHT AND LICENSE -Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019 Russ Allbery - +Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019, 2022 Russ +Allbery This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO -L, L, L, L +L, L, L, +L, L, L The current version of this module is always available from its web site at L. It is also part of the diff --git a/cpan/podlators/lib/Pod/Text/Color.pm b/cpan/podlators/lib/Pod/Text/Color.pm index 5d47c5ecb3bd..b340c5030cab 100644 --- a/cpan/podlators/lib/Pod/Text/Color.pm +++ b/cpan/podlators/lib/Pod/Text/Color.pm @@ -12,18 +12,15 @@ package Pod::Text::Color; -use 5.008; +use 5.010; use strict; use warnings; use Pod::Text (); use Term::ANSIColor qw(color colored); -use vars qw(@ISA $VERSION); - -@ISA = qw(Pod::Text); - -$VERSION = '4.14'; +our @ISA = qw(Pod::Text); +our $VERSION = '5.00'; ############################################################################## # Overrides @@ -170,11 +167,22 @@ options. Term::ANSIColor is used to get colors and therefore must be installed to use this module. -=head1 BUGS +=head1 COMPATIBILITY + +Pod::Text::Color 0.05 (based on L) was the first version of this +module included with Perl, in Perl 5.6.0. + +The current API based on L was added in Pod::Text::Color 2.00. +Pod::Text::Color 2.01 was included in Perl 5.9.3, the first version of Perl to +incorporate those changes. + +Several problems with wrapping and line length were fixed as recently as +Pod::Text::Color 4.11, included in Perl 5.29.1. -This is just a basic proof of concept. It should be seriously expanded to -support configurable coloration via options passed to the constructor, and -B should be taught about those. +This module inherits its API and most behavior from Pod::Text, so the details +in L also apply. Pod::Text and Pod::Text::Color have +had the same module version since 4.00, included in Perl 5.23.7. (They +unfortunately diverge in confusing ways prior to that.) =head1 AUTHOR @@ -182,7 +190,7 @@ Russ Allbery . =head1 COPYRIGHT AND LICENSE -Copyright 1999, 2001, 2004, 2006, 2008, 2009, 2018-2019 Russ Allbery +Copyright 1999, 2001, 2004, 2006, 2008, 2009, 2018-2019, 2022 Russ Allbery This program is free software; you may redistribute it and/or modify it diff --git a/cpan/podlators/lib/Pod/Text/Overstrike.pm b/cpan/podlators/lib/Pod/Text/Overstrike.pm index 53bc6afef237..a06f46feb6e7 100644 --- a/cpan/podlators/lib/Pod/Text/Overstrike.pm +++ b/cpan/podlators/lib/Pod/Text/Overstrike.pm @@ -19,17 +19,14 @@ package Pod::Text::Overstrike; -use 5.008; +use 5.010; use strict; use warnings; -use vars qw(@ISA $VERSION); - use Pod::Text (); -@ISA = qw(Pod::Text); - -$VERSION = '4.14'; +our @ISA = qw(Pod::Text); +our $VERSION = '5.00'; ############################################################################## # Overrides @@ -176,6 +173,22 @@ Currently, the outermost formatting instruction wins, so for example underlined text inside a region of bold text is displayed as simply bold. There may be some better approach possible. +=head1 COMPATIBILITY + +Pod::Text::Overstrike 1.01 (based on L) was the first version of +this module included with Perl, in Perl 5.6.1. + +The current API based on L was added in Pod::Text::Overstrike +2.00, included in Perl 5.9.3. + +Several problems with wrapping and line length were fixed as recently as +Pod::Text::Overstrike 2.04, included in Perl 5.11.5. + +This module inherits its API and most behavior from Pod::Text, so the details +in L also apply. Pod::Text and Pod::Text::Overstrike +have had the same module version since 4.00, included in Perl 5.23.7. (They +unfortunately diverge in confusing ways prior to that.) + =head1 AUTHOR Originally written by Joe Smith , using the framework @@ -185,7 +198,7 @@ created by Russ Allbery . Subsequently updated by Russ Allbery. Copyright 2000 by Joe Smith -Copyright 2001, 2004, 2008, 2014, 2018-2019 by Russ Allbery +Copyright 2001, 2004, 2008, 2014, 2018-2019, 2022 by Russ Allbery This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. diff --git a/cpan/podlators/lib/Pod/Text/Termcap.pm b/cpan/podlators/lib/Pod/Text/Termcap.pm index be218f0bf044..1a13a8e1b62f 100644 --- a/cpan/podlators/lib/Pod/Text/Termcap.pm +++ b/cpan/podlators/lib/Pod/Text/Termcap.pm @@ -12,7 +12,7 @@ package Pod::Text::Termcap; -use 5.008; +use 5.010; use strict; use warnings; @@ -20,11 +20,8 @@ use Pod::Text (); use POSIX (); use Term::Cap; -use vars qw(@ISA $VERSION); - -@ISA = qw(Pod::Text); - -$VERSION = '4.14'; +our @ISA = qw(Pod::Text); +our $VERSION = '5.00'; ############################################################################## # Overrides @@ -204,13 +201,13 @@ sub wrap { 1; __END__ +=for stopwords +ECMA-48 VT100 Allbery Solaris TERMPATH unformatted + =head1 NAME Pod::Text::Termcap - Convert POD data to ASCII text with format escapes -=for stopwords -ECMA-48 VT100 Allbery Solaris TERMPATH - =head1 SYNOPSIS use Pod::Text::Termcap; @@ -235,14 +232,37 @@ and how to override that behavior if necessary. If unable to find control strings for bold and underscore formatting, that formatting is skipped, resulting in the same output as Pod::Text. +=head1 COMPATIBILITY + +Pod::Text::Termcap 0.04 (based on L) was the first version of +this module included with Perl, in Perl 5.6.0. + +The current API based on L was added in Pod::Text::Termcap 2.00. +Pod::Text::Termcap 2.01 was included in Perl 5.9.3, the first version of Perl +to incorporate those changes. + +Several problems with wrapping and line length were fixed as recently as +Pod::Text::Termcap 4.11, included in Perl 5.29.1. + +Pod::Text::Termcap 4.13 stopped setting the TERMPATH environment variable +during module load. It also stopped falling back on VT100 escape sequences if +Term::Cap was not able to find usable escape sequences, instead producing +unformatted output for better results on dumb terminals. The next version to +be incorporated into Perl, 4.14, was included in Perl 5.31.8. + +This module inherits its API and most behavior from Pod::Text, so the details +in L also apply. Pod::Text and Pod::Text::Termcap +have had the same module version since 4.00, included in Perl 5.23.7. (They +unfortunately diverge in confusing ways prior to that.) + =head1 AUTHOR Russ Allbery =head1 COPYRIGHT AND LICENSE -Copyright 1999, 2001-2002, 2004, 2006, 2008-2009, 2014-2015, 2018-2019 Russ -Allbery +Copyright 1999, 2001-2002, 2004, 2006, 2008-2009, 2014-2015, 2018-2019, 2022 +Russ Allbery This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. diff --git a/cpan/podlators/scripts/pod2man.PL b/cpan/podlators/scripts/pod2man.PL index d6e685d20100..5b89980c75ee 100644 --- a/cpan/podlators/scripts/pod2man.PL +++ b/cpan/podlators/scripts/pod2man.PL @@ -38,7 +38,7 @@ print "Extracting $file (with variable substitutions)\n"; print {$out} <<"PREAMBLE" or die "Cannot write to $file: $!\n"; $Config{startperl} eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' - if \$running_under_some_shell; + if 0; # ^ Run only under a shell PREAMBLE # In the following, Perl variables are not expanded during extraction. @@ -72,11 +72,11 @@ my $stdin; # allowing short forms as well. --lax is currently ignored. my %options; Getopt::Long::config ('bundling_override'); -GetOptions (\%options, 'center|c=s', 'date|d=s', 'errors=s', 'fixed=s', - 'fixedbold=s', 'fixeditalic=s', 'fixedbolditalic=s', 'help|h', - 'lax|l', 'lquote=s', 'name|n=s', 'nourls', 'official|o', - 'quotes|q=s', 'release|r=s', 'rquote=s', 'section|s=s', 'stderr', - 'verbose|v', 'utf8|u') +GetOptions (\%options, 'center|c=s', 'date|d=s', 'encoding|e=s', 'errors=s', + 'fixed=s', 'fixedbold=s', 'fixeditalic=s', 'fixedbolditalic=s', + 'guesswork=s', 'help|h', 'lax|l', 'language=s', 'lquote=s', + 'name|n=s', 'nourls', 'official|o', 'quotes|q=s', 'release|r=s', + 'rquote=s', 'section|s=s', 'stderr', 'verbose|v', 'utf8|u') or exit 1; pod2usage (0) if $options{help}; @@ -128,7 +128,7 @@ __END__ =for stopwords en em --stderr stderr --utf8 UTF-8 overdo markup MT-LEVEL Allbery Solaris URL troff troff-specific formatters uppercased Christiansen --nourls UTC prepend -lquote rquote +lquote rquote unrepresentable mandoc manref EBCDIC =head1 NAME @@ -136,10 +136,12 @@ pod2man - Convert POD data to formatted *roff input =head1 SYNOPSIS -pod2man [B<--center>=I] [B<--date>=I] [B<--errors>=I