#! /usr/bin/env perl

################################################################
# Convert an ordinary Encapsulated PostScript file (.eps) into #
# one that works with both LaTeX+dvips and pdfLaTeX            #
#                                                              #
# By Scott Pakin <scott+peps@pakin.org>                        #
################################################################

our $VERSION = "1.1";
require 5.006_001;
use English;
use Getopt::Long;
use FindBin qw($Script);
use File::Basename;
use File::Temp qw(tempfile);
use File::Spec;
use File::Which;
use Pod::Usage;
use Pod::Man;
use Cwd qw(cwd chdir);
use POSIX;
use warnings;
use strict;

# Global variables
my $fontmap = "mpost.fmp";  # pstoedit-style font map file
my %TeX_to_PS;              # Hash table representing the contents of the above
my $infile = "-";           # Input file (default: stdin)
my $outfile = "-";          # Output file (default: stdout)
my $tempbase;               # Base name of temporary files
my @fontmappath = (         # Where to look for $fontmap
    "/usr/share/pstoedit/",
    "/usr/local/share/pstoedit/");

########################################################################

# At the end of the program's execution, delete all of our temporary files.
END {
    if (defined $tempbase) {
        foreach my $tempfile ($tempbase, <$tempbase.*>) {
            unlink $tempfile;
        }
    }
}


# Extract the first word from the given array.  A word is either a
# single token or a quoted set of space-separated tokens.  Return the
# first word followed by the remaining tokens.
sub extractName (@)
{
    my @tokens = @_;
    my $firstWord;

    # Ensure we have a token to extract.
    return (undef, @tokens) if $#tokens==-1;

    # Extract the first word from the token list.
    $firstWord = shift @tokens;
    if (substr ($firstWord, 0, 1) eq '"') {
        while (substr ($firstWord, length($firstWord)-1, 1) ne '"') {
            return (undef, @tokens) if $#tokens==-1;
            $firstWord .= " " . shift @tokens;
        }
        $firstWord = substr ($firstWord, 1, length($firstWord)-2);
    }

    # Return the word plus the remaining tokens.
    return ($firstWord, @tokens);
}


# Execute a shell command, and die if it fails.
sub executeCommand ($)
{
    my $command = $_[0];
    if (system "$command 1>&2") {
        die "${Script}: The following command failed with exit code @{[int($?/256)]}:\n    $command\n";
    }
}


# Generate -- and optionally clean up -- a man page.
sub createManPage ($$$)
{
    my $manfile = $_[0];   # Name of file to produce
    my $wantPS = $_[1];    # 1=produce PostScript, 0=produce troff
    my $section = $_[2];   # Manual section that purifyeps belongs to

    # Produce a (*roff) man page.
    my $parser = Pod::Man->new (center  => "",
                                release => "v$VERSION",
                                section => $section);
    $parser->parse_from_file ($PROGRAM_NAME, $manfile);

    # Touch up the man page, and use groff to write
    # the result back to disk as a PostScript file.
    return unless $wantPS;
    open (MANFILE, "<$manfile") or die "${Script}: $! ($manfile)\n";
    my @manlines = <MANFILE>;
    close MANFILE;
    open (MANFILE, "|groff -man > $manfile") or die "${Script}: $! ($manfile)\n";
    foreach (@manlines) {
        s/LaTeX/L\\h'-0.36m'\\v'-0.15'\\s-2A\\s+2\\v'0.15'\\h'-0.15m'TeX/g;
        s/TeX/T\\h'-0.1667m'\\v'0.20'E\\v'-0.20'\\h'-0.125m'X/g;
        s/\\\*\(--/--/g;
        print MANFILE $_;
    }
    close MANFILE;
}

########################################################################

# Initialize our base temporary filename.
(undef, $tempbase) = tempfile ($Script . "-XXXXXXXX", DIR => File::Spec->tmpdir());

# Try to determine the full filename of the font-map file.
foreach my $dir (@fontmappath) {
    my $newfontmap = File::Spec->catfile($dir, $fontmap);
    if (-e $newfontmap) {
        $fontmap = $newfontmap;
        last;
    }
}

# Process the command line.
my ($wanthelp, $wantman, $wantps, $wantversion);
my $versionmsg = "purifyeps version $VERSION

Copyright (C) 2010 Scott Pakin

This program may be distributed and/or modified under the conditions
of the LaTeX Project Public License, either version 1.3c of this
license or (at your option) any later version.

The latest version of this license is in:

   http://www.latex-project.org/lppl.txt

and version 1.3c or later is part of all distributions of LaTeX
version 2006/05/20 or later.
";
my $man_section = 1;                      # Section number of man page
GetOptions ("fontmap=s"     => \$fontmap,
            "make-man:s"    => \$wantman,
            "make-ps-man:s" => \$wantps,
            "section=s"     => \$man_section,
            "V|version"     => \$wantversion,
            "help"          => \$wanthelp) || pod2usage(2);
do {print $versionmsg; exit 1} if $wantversion;
pod2usage(1) if $wanthelp;
if (defined $wantman) {
    # Create a troff manual page.
    my $filename = ($wantman eq "" ?
                    basename ($PROGRAM_NAME, ".pl") . ".$man_section" :
                    $wantman);
    createManPage ($filename, 0, $man_section);
    print STDERR "Wrote $filename\n";
    exit 0;
}
if (defined $wantps) {
    # Create a PostScript manual page.
    my $filename = ($wantps eq "" ?
                    basename ($PROGRAM_NAME, ".pl") . ".ps" :
                    $wantps);
    createManPage ($filename, 1, $man_section);
    print STDERR "Wrote $filename\n";
    exit 0;
}
$infile = shift @ARGV if $#ARGV!=-1;
$outfile = shift @ARGV if $#ARGV!=-1;
pod2usage(2) if $#ARGV!=-1;                # Too many arguments.

# Ensure that pstoedit is installed.  Give a helpful error message if it isn't.
if (!defined which("pstoedit")) {
    die "${Script}: pstoedit must be installed and must appear in the executable search path\n";
}

# Read the font map file into a hash table.
open (FONTMAP, "<$fontmap") || die "${Script}: $! ($fontmap); specify an alternative with --fontmap\n";
FONTMAP_LINE:
    while (<FONTMAP>) {
        # Clean up the line.
        chomp;
        s/\%.*//;
        my $origLine = $_;
        next if /^\s*$/;
        my @tokens = split " ";

        # Extract the PostScript name, which may be quoted and contain spaces.
        my $PSname;
        ($PSname, @tokens) = extractName @tokens;
        if (!defined $PSname) {
            warn "${fontmap}:$NR: warning: incorrect format -- ignoring line\n";
            next FONTMAP_LINE;
        }

        # Extract the TeX name, which may also be quoted and contain spaces.
        my $TeXname;
        ($TeXname, @tokens) = extractName @tokens;
        if (!defined $TeXname) {
            warn "${fontmap}:$NR: warning: incorrect format -- ignoring line\n";
            next FONTMAP_LINE;
        }

        # That should be the end of the line.
        if ($#tokens != -1) {
            warn "${fontmap}:$NR: warning: incorrect format -- ignoring line\n";
            next FONTMAP_LINE;
        }

        # Store the mapping in a hash table.
        # HEURISTIC: If the mapping is not unique, map the TeX
        #            name to the *shortest* PostScript name.
        if (!defined $TeX_to_PS{$TeXname} ||
            length $PSname < length $TeX_to_PS{$TeXname}) {
            $TeX_to_PS{$TeXname} = $PSname;
        }
    }
close FONTMAP;

# Utilize pstoedit to convert from EPS to MetaPost.
my $mpfile = $tempbase . ".mp";
executeCommand "pstoedit -ssp -f mpost -fontmap \"$fontmap\" \"$infile\" \"$mpfile\"";

# Utilize mpost to convert from MetaPost to MPS (stylized EPS).
my $old_cwd = cwd();
chdir (File::Spec->tmpdir());   # mpost always writes to the current directory.
executeCommand "echo X | mpost $mpfile";
chdir $old_cwd;

# Post-process the MPS file into the output file.
my $mpsfile = $tempbase . ".1";
my @purified_eps;
my $outfile_clean = ($outfile eq "-") ? "stdout" : $outfile;
open (MPSFILE, "<$mpsfile") || die "${Script}: $! ($mpsfile)\n";
while (<MPSFILE>) {
    # Process blank lines.
    chomp;
    my @tokens = split " ";
    if ($#tokens == -1) {
        push @purified_eps, "$_\n";
        next;
    }

    # Convert the list of document fonts.
    if ($tokens[0] eq "%%DocumentFonts:") {
        my $outputLine = shift @tokens;
        foreach my $TeXname (@tokens) {
            my $PSname = $TeX_to_PS{$TeXname};
            if (!defined $PSname) {
                warn "${outfile_clean}:$NR: warning: TeX font \"$TeXname\" does not appear in $fontmap\n";
                $outputLine .= " $TeXname";
            }
            else {
                $outputLine .= " $PSname";
            }
        }
        push @purified_eps, "$outputLine\n";
        next;
    }

    # Convert the font name definitions.
    if ($#tokens == 2 &&
        $tokens[0] eq $tokens[1] &&
        $tokens[2] eq "def") {
        push @purified_eps, sprintf " %s /%s def\n", $tokens[0], $TeX_to_PS{substr($tokens[1],1)};
        next;
    }

    # By default, output the line as is.
    push @purified_eps, "$_\n";
}
close MPSFILE;

# Write the output file.
open (OUTFILE, ">$outfile") || die "${Script}: $! ($outfile_clean)\n";
print OUTFILE @purified_eps;
close OUTFILE;

# Finish up.
print STDERR "\nFile seems to have been purified successfully.\n";
exit 0;

###########################################################################

__END__


=head1 NAME

purifyeps - make an Encapsulated PostScript file work with both dvips and
pdflatex


=head1 SYNOPSIS

purifyeps
B<--help>

purifyeps
B<--version>

purifyeps
[B<--fontmap>=I<.fmp file>]
[I<.eps input file> [I<.eps output file>]]

purifyeps
B<--make-man>[=I<filename>] [B<--section>=I<section>]

purifyeps
B<--make-ps-man>[=I<filename>] [B<--section>=I<section>]


=head1 DESCRIPTION

While B<pdflatex> has a number of nice features, its primary
shortcoming relative to standard B<latex>+B<dvips> is that it is
unable to read ordinary Encapsulated PostScript (EPS) files, the most
common graphics format in the LaTeX world.  B<pdflatex> can read only
the following types of graphics files:

=over 4

=item PDF

Most people who use B<pdflatex> convert their documents to PDF using a
utility such as B<epstopdf>.  This works well and preserves the vector
nature of the original EPS.  Unfortunately, B<dvips> does not read
PDF, so two versions of the graphic must be maintained if the document
is to be processed with both B<latex>+B<dvips> and B<pdflatex>.

=item PNG

PNG is a bitmap format and therefore scales poorly.  Also, B<dvips>
does not read PNG, so two versions of the graphic must be maintained
if the document is to be processed with both B<latex>+B<dvips> and
B<pdflatex>.

=item JPEG

JPEG is a bitmap format and therefore scales poorly.  Also, B<dvips>
does not read JPEG, so two versions of the graphic must be maintained
if the document is to be processed with both B<latex>+B<dvips> and
B<pdflatex>.

=item MPS

This is probably the least-used B<pdflatex>-compatible graphics
format.  MPS is actually a stylized version of EPS that MetaPost
outputs.  Like PDF, MPS is a vector format and remains as such when
imported into a B<pdflatex> document.  Also like PDF, B<dvips> does
not read MPS, so two versions of the graphic must be maintained if the
document is to be processed with both B<latex>+B<dvips> and
B<pdflatex>.

=back

The insight behind B<purifyeps> is that there are only a few, small
differences between MPS and EPS and that a file can be converted into
a format that matches both the MPS and EPS specifications
simultaneously.  B<purifyeps> inputs an EPS file, uses B<pstoedit>'s
C<mpost> filter to convert the file to MetaPost (F<.mp>), runs
B<mpost> on the file to convert it to MPS, and finally performs some
touchups on the result to convert the file back to EPS, while
preserving its ability to be parsed by B<pdflatex>.


=head1 OPTIONS

=over 4

=item B<--help>

Display L<"Usage"> and L<"Options"> from the B<purifyeps> documentation.

=item B<-V>, B<--version>

Display the B<purifyeps> version number, copyright, and license.

=item B<--fontmap>=I<.fmp file>

Specify the name of a file that tells B<purifyeps> how to map from TeX
font names to PostScript font names.  [Default: F<mpost.fmp>]

=item B<--make-man> [=I<filename>]] [B<--section>=I<section>]

Automatically create a Unix man page for B<purifyeps>.  I<section>
specifies the section [default: C<1> (User Commands)].  I<filename>
defaults to F<purifyeps.1> or an analogous filename if I<section> is
specified.

=item B<--make-ps-man> [=I<filename>]] [B<--section>=I<section>]

Automatically create a PostScript version of the B<purifyeps>
documentation.  The documentation is formatted like a Unix man page.
I<section> specifies the section [default: C<1> (User Commands)].
I<filename> defaults to F<purifyeps.ps>.

=back

In normal operation (i.e., when not run with C<--help>, C<--make-man>,
or C<--make-ps-man>), B<purifyeps> takes the name of an input file and
output file.  The same filename can safely be used for both files.  If
the output filename is omitted, output will go to the standard output
device.  If the input filename is omitted, B<purifyeps> will read from
the standard input device.


=head1 EXAMPLES

Create a PostScript version of the B<purifyeps> documentation, but
call it F<happydoc.ps> instead of the default, F<purifyeps.ps>:

    purifyeps --make-ps-man=happydoc.ps

Create a Unix man page for B<purifyeps> (in the usual roff format),
but indicate that it belongs in section C<LOCAL> instead of the
default of section C<1>:

    purifyeps --make-man --section=LOCAL

Purify F<sample.eps> (F<mpost.fmp> is in the current directory):

    purifyeps sample.eps sample.eps

Purify F<sample.eps> (F<mpost.fmp> is in a different location):

    purifyeps --fontmap=/usr/share/pstoedit/mpost.fmp sample.eps sample.eps

Rename the purified version while purifying:

    purifyeps sample.eps sample-pure.eps

Do the same, but in a Unix pipeline:

    cat sample.eps | purifyeps > sample-pure.eps

When you run B<purifyeps>, you should see the output from both
B<pstoedit> and B<mpost>, followed by a success message from
B<purifyeps>:

    % cat sample.eps | purifyeps > sample-pure.eps
    pstoedit: version 3.30 / DLL interface 107 (build Mar 14 2002) :
    Copyright (C) 1993 - 2001 Wolfgang Glunz
    Interpreter finished. Return status 0
    This is MetaPost, Version 0.641 (Web2C 7.3.1)
    (/tmp/purifyeps-jdeGPkh9.mp [1] )
    1 output file written: purifyeps-jdeGPkh9.1
    Transcript written on purifyeps-jdeGPkh9.log.

    File seems to have been purified successfully.


=head1 FILES

=over 4

=item F<mpost.fmp>

File containing mappings between TeX and PostScript font names.  See
L<"NOTES"> for a description of this file's contents.

=back


=head1 BUGS

Error reporting could definitely stand to be improved.  Error messages
produced by B<pstoedit> and B<mpost> are sometimes silently ignored.
Also, errors sometimes cause B<purifyeps> to leave temporary files
(F<purifyeps->I<#####>) lying around.

B<purifyeps> is subject to all of the limitations that affect
B<pstoedit> and especially the C<mpost> backend to B<pstoedit>.  As a
result, B<purifyeps> ignores certain PostScript constructs, such as
nonuniformly scaled text.


=head1 NOTES

B<purifyeps> needs a file that tells it how to map from TeX font names
to PostScript font names.  This file must contain two, space-separated
columns.  The first lists a PostScript font name, and the second lists
the TeX equivalent.  Blank lines and lines that start with C<%> are
ignored.  The following is a sample F<.fmp> file:

    % This is a sample font map for purifyeps.
    Times-Bold                           ptmb
    Times-Italic                         ptmri
    Times-Roman                          ptmr
    Helvetica                            phvr
    Helvetica-Bold                       phvb
    Helvetica-Oblique                    phvro
    Courier                              pcrr
    Courier-Bold                         pcrb
    Courier-Oblique                      pcrro

Note that this is exactly the same format that B<pstoedit> uses.  By
default, B<purifyeps> looks in the current directory for a font map
called F<mpost.fmp>.  The C<--fontmap> command-line option tells
B<purifyeps> to use a different font map, which will typically be the
F<mpost.fmp> file that comes with B<pstoedit>.

Once you create purified EPS files with B<purifyeps>, you need to
instruct B<pdflatex> to use them.  The pdfLaTeX configuration of the
C<graphics> and C<graphicx> packages (F<pdftex.def>) normally ignores
F<.eps> files.  Putting the following LaTeX code in your document's
preamble tells B<pdflatex> that all F<.eps> files are in MPS format:

    % Tell pdfLaTeX that all .eps files were produced by MetaPost.
    \usepackage{graphicx} % or graphics
    \usepackage{ifpdf}
    \ifpdf
      \DeclareGraphicsRule{.eps}{mps}{*}{}
      \makeatletter
        \g@addto@macro\Gin@extensions{,.eps}
      \makeatother
    \fi

=head1 SEE ALSO

dvips(1), epstopdf(1), latex(1), mpost(1), pdflatex(1), pstoedit(1)


=head1 AUTHOR

Scott Pakin, I<scott+peps@pakin.org>