#!/usr/bin/env perl
# implements xgettext for Log::Report only, using Log::Report::Extract::PPI
# Options like GNU's xgettext

use warnings;
use strict;

use Log::Report 'log-report-lexicon';

use Getopt::Long qw/:config no_ignore_case bundling/;
use File::Find   qw/find/;
use Pod::Usage   qw/pod2usage/;

my $mode = 0;

GetOptions
	'cleanup!'          => \(my $cleanup  = 1),       # kill transl from removed files, def true
	'config|c=s'        => \(my %configs),            # domain configurations
	'domain|d=s'        => \(my $default_domain),     # for templates
	'files-from|f=s'    => \(my $from),               # file with filenames (MANIFEST?) or '-'
	'files-match|m=s'   => \(my $fn_match),           # select filename is dir
	'from-code=s'       => \(my $char_in),
	'help|h'            => \(my $help     = 0),
	'language|L=s'      => \(my $lang     = 'perl'),
	'mode=s'            => \$mode,
	'output-dir|p=s'    => \(my $output),
	'template|t=s'      => \(my $template),           # pattern in ::Template
	'to-code=s'         => \(my $char_out = 'utf-8'), # missing in xgettext?
	'verbose=i'         => \$mode,
	'version|V'         => \(my $version  = 0),
	'v+'                => \$mode,
   or exit 1;

if($version)
{	print "Log::Report $Log::Report::VERSION\n";
	exit 0;
}

$help && pod2usage(0);

# Load domain information, for instance defining context_rules.  The
# definitions are global, so automatically find their way in the Log::Report
# internals.
#    --config domain1=filename domain2=filename
#    --config domain1=filename --config domain2=filename

while(my ($domain, $fn) = each %configs)
{	trace "configuring domain $domain from $fn";
	textdomain $domain, config => $fn;
}

# all output to stderr
dispatcher FILE => stderr => to => \*STDERR , mode => $mode, format => sub {shift};
dispatcher close => 'default';

$template || $lang eq 'perl'
	or error __x"programming language {lang} not supported", lang => $lang;

defined $output
	or error __"explicit output directory (-p) required";

-d $output or mkdir $output
	or fault __x"cannot create output directory {dir}", dir => $output;

my @filenames;
if(defined $from)
{	!@ARGV
		or error __x"do not combine command-line filenames with --files-from";

	if($from eq '-')
	{   @filenames = <STDIN>;
	}
	else
	{   open FILENAMES, '<:raw', $from
			or fault __x"cannot read filename list from {fn}", fn => $from;

		@filenames = <FILENAMES>;
		close FILENAMES;
	}
	chomp(@filenames);
}
elsif(@ARGV)
{	find sub { push @filenames, $File::Find::name if -f }, @ARGV;
}
else
{	error "give --files-from or directories to be processed";
}

my $extr;
my %processed;

if($template)
{	# process from template toolkit
	eval "require Log::Report::Template::Extract";
	panic $@ if $@;

	$default_domain
		or error __x"specify a text-domain (-d) for the templates";

	$extr = Log::Report::Template::Extract->new(
		lexicon => $output,
		charset => $char_out,
		domain  => $default_domain,
		pattern => $template,
	);

	$fn_match ||= qr/\.tt2?$/i;

	foreach my $filename (@filenames)
	{   unless($filename =~ $fn_match)
		{   info __x"skipping (not a template) {fn}", fn => $filename;
			next;
		}
		$extr->process($filename, charset => $char_in);
		$processed{$filename}++;
	}
}
else
{	# process the pm files
	eval "require Log::Report::Extract::PerlPPI";
	error $@ if $@;

	$extr = Log::Report::Extract::PerlPPI->new(lexicon => $output, charset => $char_out);

	$fn_match ||= qr/\.p[lm]$/i;
	foreach my $filename (@filenames)
	{   if($filename !~ $fn_match)
		{	open my ($fh), '<', $filename
				or warning(__x"skipping missing file {fn}", fn => $filename), next;

			$fh->getline =~ m!\bperl\b|\bperl[0-9]!
				or info(__x"skipping (not perl) {fn}", fn => $filename), next;

			$fh->close;
		}
		$extr->process($filename, charset => $char_in);
		$processed{$filename}++;
	}
}

$extr->cleanup(keep => \%processed) if $cleanup;
$extr->showStats;
$extr->write;

__END__

=head1 NAME

xgettext-perl - extract translatable strings

=head1 SYNOPSIS

  xgettext-perl [GENERIC OPTIONS][SCRIPT   OPTIONS] directories
  xgettext-perl [GENERIC OPTIONS][TEMPLATE OPTIONS] directories

  GENERIC OPTIONS
  --config      -c %config    domain configuration
  --files-from  -f $filename  source of filenames to be processed
  --from-code      $charset   used by input files (auto-detects)
  --no-cleanup                keep unprocessed files in po table
  --output-dir  -p $directory location of lexicons (required)
  --to-code        $charset   charset of po files (default utf-8)
  --version     -V            show version of this script
  --verbose=3 -v -vv -vvv     debug mode

  TEMPLATE OPTIONS
  --domain      -d $domain    domain to be used
  --template    -t $notation  how to recognize the strings to be taken
  --files-match -m $regex     filter filenames, default .tt and .tt2

  SCRIPT OPTIONS
  --language    -L $proglang  programming language syntax (now only perl)
  --files-match -m $regex     filter filenames, default .pm and .pl

=head1 DESCRIPTION

This script will maintain PO-files: translation files.  On the moment, the
number of syntaxes is quite limited (see below)
There is no restrain on syntaxes which can be supported: there just was no
practical use to implement it yet.

=head2 Complex options

=over 4

=item --config %config

Log::Report translations supports complex additional features, like
context sensitive translations, which require a configuration file.
See Log::Report::Context

Say, your scripts and templates use textdomain name-spaces C<domain1>
and C<domain2> (please use better names), then you can pass their
respective configuration files as:

  --config domain1=filename domain2=filename    # or
  --config domain1=filename --config domain2=filename

=item --cleanup --no-cleanup

You should scan all script or template files in one go, because PO records
from files which are not mentioned will get removed.  That's the clean-up.
However, when you need more scans for a full update, you need to use this
option.  This also implies possible polution of your translation tables.

=back

=head2 Extracting from Perl with Log::Report syntax

When no --template notation is given, the provided file-names are expected
to contain program text.  Only Perl5 programs using the Log::Report msgid
notation (with leading '__' to mean gettext)

=head2 Extracting from Template::Toolkit

See Log::Report::Template::Extract

=cut
