openwrt-luci/build/i18n-scan.pl
Paul Donald b98d8c526e build: update build/i18n-* tools
modify i18n-add-language.sh so it can:
- be run from any path
- bootstrap any (or all) po folder(s) with existing language(s)
- (partially) update luci.mk with new languages

deprecate build/i18n-init.sh

Signed-off-by: Paul Donald <newtwen@gmail.com>
2024-01-03 13:14:06 +00:00

318 lines
7.6 KiB
Perl
Executable File

#!/usr/bin/perl
use strict;
use warnings;
use IPC::Open2;
use POSIX;
use Text::Balanced qw(gen_extract_tagged);
$ENV{'LC_ALL'} = 'C';
POSIX::setlocale(POSIX::LC_ALL, 'C');
@ARGV >= 1 || die "Usage: $0 <source directory>\n";
my %keywords = (
'.js' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
'.ut' => [ '_:1', '_:1,2c', 'N_:2,3', 'N_:2,3,4c' ],
'.uc' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
'.lua' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
'.htm' => [ '_:1', '_:1,2c', 'translate:1', 'translate:1,2c', 'translatef:1', 'N_:2,3', 'N_:2,3,4c', 'ntranslate:2,3', 'ntranslate:2,3,4c' ],
'.json' => [ '_:1', '_:1,2c' ]
);
sub xgettext($@) {
my $path = shift;
my @keywords = @_;
my ($ext) = $path =~ m!(\.\w+)$!;
my @cmd = qw(xgettext --from-code=UTF-8 --no-wrap);
if ($ext eq '.htm' || $ext eq '.lua') {
push @cmd, '--language=Lua';
}
elsif ($ext eq '.ut' || $ext eq '.uc' || $ext eq '.js' || $ext eq '.json') {
push @cmd, '--language=JavaScript';
}
push @cmd, map { "--keyword=$_" } (@{$keywords{$ext}}, @keywords);
push @cmd, '-o', '-';
return @cmd;
}
sub whitespace_collapse($) {
my $s = shift;
my %r = ('n' => ' ', 't' => ' ');
# Translate \t and \n to plain spaces, leave all other escape
# sequences alone. Finally replace all consecutive spaces by
# single ones and trim leading and trailing space.
$s =~ s/\\(.)/$r{$1} || "\\$1"/eg;
$s =~ s/ {2,}/ /g;
$s =~ s/^ //;
$s =~ s/ $//;
return $s;
}
sub postprocess_pot($$) {
my ($path, $source) = @_;
my (@res, $msgid);
my $skip = 1;
$source =~ s/^#: (.+?)\n/join("\n", map { "#: $path:$_" } $1 =~ m!:(\d+)!g) . "\n"/emg;
my @lines = split /\n/, $source;
# Remove all header lines up to the first location comment
while (@lines > 0 && $lines[0] !~ m!^#: !) {
shift @lines;
}
while (@lines > 0) {
my $line = shift @lines;
# Concat multiline msgids and collapse whitespaces
if ($line =~ m!^(msg\w+) "(.*)"$!) {
my $kw = $1;
my $kv = $2;
while (@lines > 0 && $lines[0] =~ m!^"(.*)"$!) {
$kv .= ' '. $1;
shift @lines;
}
$kv = whitespace_collapse($kv);
# Filter invalid empty msgids by popping all lines in @res
# leading to this point and skip all subsequent lines in
# @lines belonging to this faulty id.
if ($kw ne 'msgstr' && $kv eq '') {
while (@res > 0 && $res[-1] !~ m!^$!) {
pop @res;
}
while (@lines > 0 && $lines[0] =~ m!^(?:msg\w+ )?"(.*)"$!) {
shift @lines;
}
next;
}
push @res, sprintf '%s "%s"', $kw, $kv;
}
# Ignore any flags added by xgettext
elsif ($line =~ m!^#, !) {
next;
}
# Pass through other lines unmodified
else {
push @res, $line;
}
}
return @res ? join("\n", '', @res, '') : '';
}
sub uniq(@) {
my %h = map { $_, 1 } @_;
return sort keys %h;
}
sub preprocess_htm($$) {
my ($path, $source) = @_;
my $sub = {
'=' => '(%s)',
'_' => 'translate([==[%s]==])',
':' => 'translate([==[%s]==])',
'+' => 'include([==[%s]==])',
'#' => '--[==[%s]==]',
'' => '%s'
};
# Translate the .htm source into a valid Lua source using bracket quotes
# to avoid the need for complex escaping.
$source =~ s!<%-?([=_:+#]?)(.*?)-?%>!
my $t = $1;
my $s = $2;
# Split translation expressions on first non-escaped pipe.
if ($t eq ':' || $t eq '_') {
$s =~ s/^((?:[^\|\\]|\\.)*)\|(.*)$/$1]==],[==[$2/;
}
sprintf "]==]; $sub->{$t}; [==[", $s
!sge;
# Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
# and return them as extra keyword so that xgettext recognizes such expressions
# as translate(...) calls.
my @extra_function_keywords =
map { ("$_:1", "$_:1,2c") }
uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
return ("[==[$source]==]", @extra_function_keywords);
}
sub preprocess_ut($$) {
my ($path, $source) = @_;
# Translate the .ut source into valid JavaScript code by enclosing template text
# in multiline comments and extracting blocks as plain code.
my $comt = gen_extract_tagged('{#', '#}', '(?s).*?(?=\{[#{%])');
my $expr = gen_extract_tagged('{{', '}}', '(?s).*?(?=\{[#{%])');
my $stmt = gen_extract_tagged('{%', '%}', '(?s).*?(?=\{[#{%])');
my $res = '';
while (length($source)) {
my ($block, $remain, $prefix);
($block, $remain, $prefix) = $comt->($source);
($block, $remain, $prefix) = $expr->($source) unless defined $block;
($block, $remain, $prefix) = $stmt->($source) unless defined $block;
last unless defined $block;
$source = $remain;
$prefix =~ s!\*/!*\\/!g;
$res .= '/*' . $prefix . '*/';
if ($block =~ s!^\{#(.*)#}$!$1!s) {
$block =~ s!\*/!*\\/!g;
$res .= '/*' . $block . '*/';
}
elsif ($block =~ s!^\{\{(.*)}}$!$1!s) {
$block =~ s!^[+-]!!;
$block =~ s![+-]$!!;
$res .= '(' . $block . ')';
}
elsif ($block =~ s!^\{%(.*)%}$!$1!s) {
$block =~ s!^[+-]!!;
$block =~ s![+-]$!!;
$res .= '{' . $block . '}';
}
}
if ($source =~ m!^(.*)\{%[+-]?(.*)$!s) {
my $prefix = $1;
my $block = $2;
$prefix =~ s!\*/!*\\/!g;
$res .= '/*' . $prefix . '*/';
$res .= '{' . $block . '}';
}
return ($res);
}
sub preprocess_lua($$) {
my ($path, $source) = @_;
# Discover expressions like "lng.translate(...)" or "luci.i18n.translate(...)"
# and return them as extra keyword so that xgettext recognizes such expressions
# as translate(...) calls.
my @extra_function_keywords =
map { ("$_:1", "$_:1,2c") }
uniq($source =~ m!((?:\w+\.)+translatef?)[ \t\n]*\(!g);
return ($source, @extra_function_keywords);
}
sub preprocess_json($$) {
my ($path, $source) = @_;
my ($file) = $path =~ m!([^/]+)$!;
$source =~ s/("(?:title|description)")\s*:\s*("(?:[^"\\]|\\.)*")/$1: _($2)/sg;
return ($source);
}
my ($msguniq_in, $msguniq_out);
my $msguniq_pid = open2($msguniq_out, $msguniq_in, 'msguniq', '-s');
print $msguniq_in "msgid \"\"\nmsgstr \"Content-Type: text/plain; charset=UTF-8\"\n";
if (open F, "find @ARGV -type f '(' -name '*.htm' -o -name '*.lua' -o -name '*.js' -o -name '*.uc' -o -name '*.ut' -o -path '*/menu.d/*.json' -o -path '*/acl.d/*.json' -o -path '*/statistics/plugins/*.json' -o -path '*/https-dns-proxy/providers/*.json' ')' |")
{
while (defined( my $file = readline F))
{
chomp $file;
if (open S, '<', $file)
{
local $/ = undef;
my $source = <S>;
my @extra_function_keywords;
if ($file =~ m!\.htm$!)
{
($source, @extra_function_keywords) = preprocess_htm($file, $source);
}
elsif ($file =~ m!\.ut$!)
{
($source, @extra_function_keywords) = preprocess_ut($file, $source);
}
elsif ($file =~ m!\.lua$!)
{
($source, @extra_function_keywords) = preprocess_lua($file, $source);
}
elsif ($file =~ m!\.json$!)
{
($source, @extra_function_keywords) = preprocess_json($file, $source);
}
my ($xgettext_in, $xgettext_out);
my $pid = open2($xgettext_out, $xgettext_in, xgettext($file, @extra_function_keywords), '-');
print $xgettext_in $source;
close $xgettext_in;
my $pot = readline $xgettext_out;
close $xgettext_out;
waitpid $pid, 0;
print $msguniq_in postprocess_pot($file, $pot);
}
}
close F;
}
close $msguniq_in;
my @pot = <$msguniq_out>;
close $msguniq_out;
waitpid $msguniq_pid, 0;
while (@pot > 0) {
my $line = shift @pot;
# Reorder the location comments in a deterministic way to
# reduce SCM noise when frequently updating templates.
if ($line =~ m!^#: !) {
my @locs = ($line);
while (@pot > 0 && $pot[0] =~ m!^#: !) {
push @locs, shift @pot;
}
print
map { join(':', @$_) . "\n" }
sort { ($a->[0] cmp $b->[0]) || ($a->[1] <=> $b->[1]) }
map { [ /^(.+):(\d+)$/ ] }
@locs
;
next;
}
print $line;
}