Skip to content

Commit

Permalink
Added word-oriented diffs; code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
zipf committed Sep 2, 2012
1 parent 439822a commit 1bec3e9
Showing 1 changed file with 142 additions and 44 deletions.
186 changes: 142 additions & 44 deletions tools/postprocess.pl
Expand Up @@ -10,38 +10,54 @@
use File::Basename;
use Readonly;
use Pod::Tidy qw( tidy_files );
use Text::WordDiff;
use Pod::Checker;
use Getopt::Long;
#use utf8;

$|++;

my (@names, $translator);
my (@names, $nohtml, $diff);

my $result = GetOptions(
"pod=s" => \@names,
"trans=s" => \$translator,
"nohtml" => \$nohtml,
"diff" => \$diff,
);


die "Usage: perl postprocess.pl --pod <pod_name1> <pod_name2> ... [--trans <translator_name>]\n"
die "Usage: perl postprocess.pl --pod <pod_name1> <pod_name2> ... [--nohtml] [--diff]\n"
unless $names[0];


# Hard-coded paths relative to /perldoc-es/tools
# Source
Readonly my $SOURCE_PATH => "../../omegat_516/516/source";
Readonly my $TRANS_PATH => "../../omegat_516/516/target";
Readonly my $MEM_PATH => "../../omegat_516/516/omegat/project_save.tmx";
# Target
Readonly my $CLEAN_PATH => "../../omegat_clean_prj/source";
Readonly my $DISTR_PATH => "../POD2-ES/lib/POD2/ES";
Readonly my $POD_PATH => "../pod/reviewed";
Readonly my $WORK_PATH => "../memory/work/perlspanish-omegat.zipf.tmx";
Readonly my $CLEANM_PATH => "../../omegat_clean_prj/omegat/project_save.tmx";
# OmegaT project
Readonly my $SOURCE_PATH => "../../omegat_516/516/source";
Readonly my $TARGET_PATH => "../../omegat_516/516/target";
Readonly my $MEM_PATH => "../../omegat_516/516/omegat/project_save.tmx";
# Clean OmegaT project (only reviewed segments)
Readonly my $CLEAN_PATH => "../../omegat_clean_prj/source";
Readonly my $CLEANM_PATH => "../../omegat_clean_prj/omegat/project_save.tmx";
# git repository
Readonly my $TRANSPOD_PATH => "../pod/translated";
Readonly my $REVPOD_PATH => "../pod/reviewed";
Readonly my $DISTR_PATH => "../POD2-ES/lib/POD2/ES";
Readonly my $WORK_PATH => "../memory/work/perlspanish-omegat.zipf.tmx";

# Diff file boilerplate
Readonly my $DIFF_HEADER => <<"END_HEADER";
<html>
<head>
<META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=UTF-8'>
<link rel='stylesheet' href='word_diff.css' type='text/css'>
</head>
<title>Word-oriented POD comparison</title>
<body>
END_HEADER


# read team from __DATA__ section
my %team;
my (%team, %files);

while ( <DATA> ) {

Expand All @@ -51,11 +67,14 @@

my ($alias, @details) = split /,/;

#say $alias;
#say @details;

$team{$alias} = $details[0]; # Name

if ( @details > 2 ) { # files translated by this team member

$files{$_} = $alias foreach @details[2 .. $#details];

}
}

close DATA;
Expand All @@ -65,20 +84,22 @@
# copy work memory to clean project => clean memory
copy($MEM_PATH, $CLEANM_PATH);

# copy work memory to /memory/work and rename it to perlspanish-omegat.zipf.tmx
# copy work memory to /memory/work in repository
# and rename it perlspanish-omegat.zipf.tmx
copy($MEM_PATH, $WORK_PATH);



foreach my $pod_name (@names) {

my $source = "$SOURCE_PATH/$pod_name";
my $trans = "$TRANS_PATH/$pod_name";
my $pod = "$POD_PATH/$pod_name";
my $clean = "$CLEAN_PATH/$pod_name";
my $source = "$SOURCE_PATH/$pod_name"; # src file in work OmegaT project
my $target = "$TARGET_PATH/$pod_name"; # translated file generated by OmegaT
my $trans_pod = "$TRANSPOD_PATH/$pod_name"; # file delivered by translator
my $rev_pod = "$REVPOD_PATH/$pod_name"; # file delivered by reviewer
my $clean = "$CLEAN_PATH/$pod_name"; # src file in clean OmegaT project

# Get path components
my ($name, $path, $suffix) = fileparse($trans, qr{\.pod|\.pm|\..*});
my ($name, $path, $suffix) = fileparse($target, qr{\.pod|\.pm|\..*});
say $name;
say $path;
say $suffix;
Expand All @@ -91,7 +112,7 @@
$readme++;
say "Readme file" if $readme;

$final_name = "perl$ext.pod"; # new name convention for READMEs in 5.16
$final_name = "perl$ext.pod"; # new name convention for READMEs in 5.16

} else {

Expand All @@ -104,37 +125,49 @@
# copy source file to clean project => clean memory
copy($source, $clean);

# copy generated file to github archive (won't go through postprocessing)
copy($trans, $pod);
# copy generated file to git archive (won't go through postprocessing)
copy($target, $rev_pod);

# copy generated file to distribution
copy($trans, $distr);
copy($target, $distr);



# Replace double-spaces after full-stop with single space
open my $dirty, '<:encoding(latin-1)', $distr;
open my $dirty, '<:encoding(UTF-8)', $distr; # OmegaT generates UTF-8 files

my $text = do { local $/; <$dirty> };

close $dirty;

$text =~ s/(?<=\.) (?=[A-Z])/ /g; # two white spaces after full stop

# Check if there is a =encoding utf8 command
my $utf8;
$utf8++ if $text =~ /^=encoding utf8/;
# TO DO: add more checks

say "UTF-8-encoded file" if $utf8;

# Check if there is a =encoding command
my $encoding;
if ( $utf8 ) {
$encoding = "UTF-8";
if ( $text =~ /^=encoding (\S+)/m ) {

$encoding = $1;
if ( $encoding eq 'utf8' ) {

say "Found UTF-8 encoding command.";

} else {

say "Found alternative encoding command in POD. Changing to UTF-8...";
$text =~ s/^=encoding\s+$encoding/^=encoding utf8/;

}

} else {
$encoding = "latin-1";

say "No encoding command found. Adding '=encoding utf8'...";
$text = "=encoding utf8\n\n$text";

}


open my $fixed, ">:encoding($encoding)", $distr;
open my $fixed, ">:encoding(UTF-8)", $distr;

if ( $readme ) {

Expand All @@ -161,15 +194,17 @@
if ( $readme ) {

# Remove added pod formatting from README files
open my $dirty, "<:encoding($encoding)", $distr;
open my $dirty, "<:encoding(UTF-8)", $distr;

my $text = do { local $/; <$dirty> };

close $dirty;


$text =~ s/^=head1 FOO\n\n//;

open my $fixed, ">:encoding($encoding)", $distr;

open my $fixed, ">:encoding(UTF-8)", $distr;

print $fixed $text;

Expand All @@ -178,13 +213,13 @@


# Add TRANSLATORS section to distribution file
open my $out, ">>:encoding($encoding)", $distr;
open my $out, ">>:encoding(UTF-8)", $distr;

my $translators_section = "\n=head1 TRADUCTORES\n\n=over\n\n";

my @file_team = ("explorer", "zipf"); # default team

unshift(@file_team, $translator) if $translator;
unshift(@file_team, $files{$name}) if $files{$name};

$translators_section .= "=item * $team{$_}\n\n" foreach @file_team;
$translators_section .= "=back\n\n";
Expand All @@ -193,21 +228,84 @@

close $out;


# Check POD sintax/formatting
say "Checking POD syntax...";
podchecker($distr);


# Generate word-oriented diff file
if ( $diff ) {

diff_file(
trans => $trans_pod,
rev => $rev_pod,
path => $REVPOD_PATH,
name => $name,
extension => $ext,
header => $DIFF_HEADER,
);

}


# Generate HTML file for proofreading;
my $html = "$POD_PATH/$name$suffix.html";
system("perl -MPod::Simple::HTML -e Pod::Simple::HTML::go $distr > $html");
unless ( $nohtml ) {

my $html = "$REVPOD_PATH/$name$suffix.html";
system("perl -MPod::Simple::HTML -e Pod::Simple::HTML::go $distr > $html");

}

unlink "$distr~";

}


sub diff_file {

my %params = @_;

my (@trans, @rev);
open my $trans, "<:encoding(UTF-8)", $params{trans};
chomp(@trans = <$trans>);
close $trans;

open my $rev, "<:encoding(UTF-8)", $params{rev};
chomp(@rev = <$rev>);
close $rev;

my $target = "$params{path}/$params{name}_diff.html";

open my $out, ">:encoding(UTF-8)", $target;

say $out $DIFF_HEADER;
say $out "<h1>Comparison results for $params{name}.$params{extension}</h1>\n</br>";

for (my $i=0; $i < $#trans; $i++) {

if ( $rev[$i] ne $trans[$i] ) {

my $diff = word_diff \$trans[$i], \$rev[$i], { STYLE => 'HTML' };

say $out "<span style='color:blue'><b>TRANSLATOR:</b></span></br>$trans[$i]</br>";

say $out "<span style='color:red'><b>REVIEWER:</b></span></br>$rev[$i]</br>";

say $out "<span style='color:blueviolet'><b>CHANGES:</b></span></br>$diff</br>";

}

}


close $out;

}


__DATA__
j3nnn1,Jennifer Maldonado,C< jcmm986 + POD2ES at gmail.com >
mgomez,Manuel Gómez Olmedo,C< mgomez + POD2ES at decsai.ugr.es >
mgomez,Manuel Gómez Olmedo,C< mgomez + POD2ES at decsai.ugr.es >,perlootut,perlobj
explorer,Joaquín Ferrero (Tech Lead),C< explorer + POD2ES at joaquinferrero.com >
zipf,Enrique Nell (Language Lead),C< blas.gordon + POD2ES at gmail.com >

0 comments on commit 1bec3e9

Please sign in to comment.