#!/usr/bin/perl # # This script reads the xx.po file and generates some statistics # information in form of XML output. This output is parsed by # the translate.php file found on the KMyMoney web-site. # # (C) 2007 by Thomas Baumgart # # Syntax: # stats.pl <path-to-po-file> # # The script requires to find the kmymoney2.pot file in the same # directory in order to check if the po file is based on the current # pot file. # #*************************************************************************** #* This program is free software; you can redistribute it and/or modify * #* it under the terms of the GNU General Public License as published by * #* the Free Software Foundation; either version 2 of the License, or * #* (at your option) any later version. * #*************************************************************************** # total message counter my $msgs = 0; # fuzzy message counter my $fuzzy = 0; # translated message counter my $translated = 0; # untranslated message counter my $untranslated = 0; # state machine # possible states: # # 0 - idle # 1 - in message # 2 - in msgid # 3 - in empty msgstr # 4 - in msgstr my $state = 0; # line counter my $linecnt = 0; # filename of file to process my $fname; # pot version information my $potVersion; # "po file matches current pot file" flag my $poVersionOk = 0; if($#ARGV == -1) { $fname = "-"; } elsif($#ARGV == 0) { $fname = $ARGV[0]; $fname =~ /^(.*\/)?(.*)\.po$/; $basedir = $1; $langcode = $2; } else { print STDERR "Can only process one file at a time\n"; exit 1; } $country{"ca"} = "Catalan"; $country{"cs"} = "Czech"; $country{"da"} = "Denmark"; $country{"de"} = "Germany"; $country{"en_GB"} = "Great Britain"; $country{"es"} = "Spain"; $country{"es_AR"} = "Argentina"; $country{"fi"} = "Finnland"; $country{"fr"} = "France"; $country{"gl"} = "Galicia"; $country{"it"} = "Italy"; $country{"lt"} = "Lithuania"; $country{"nl"} = "Netherlands"; $country{"pl"} = "Poland"; $country{"pt_BR"} = "Brazil"; $country{"pt"} = "Portugal"; $country{"ro"} = "Romania"; $country{"ru"} = "Russia"; $country{"sk"} = "Slovakia"; $country{"sl"} = "Slovenia"; $country{"sv"} = "Sweden"; $country{"tr"} = "Turkey"; $country{"zh_CN"} = "China"; $language{"ca"} = "Catalan"; $language{"cs"} = "Czech"; $language{"da"} = "Danish"; $language{"de"} = "German"; $language{"en_GB"} = "British"; $language{"es"} = "Spanish (Spain)"; $language{"es_AR"} = "Spanish (Argentina)"; $language{"fi"} = "Suomi"; $language{"fr"} = "French"; $language{"gl"} = "Galician"; $language{"it"} = "Italian"; $language{"lt"} = "Lithuanian"; $language{"nl"} = "Dutch"; $language{"pl"} = "Polish"; $language{"pt_BR"} = "Portuguese (Brazil)"; $language{"pt"} = "Portuguese (Portugal)"; $language{"ro"} = "Romanian"; $language{"ru"} = "Russian"; $language{"sk"} = "Slovak"; $language{"sl"} = "Slovenian"; $language{"sv"} = "Swedish"; $language{"tr"} = "Turkish"; $language{"zh_CN"} = "Simplified Chinese"; $flags{"es_AR"} = "ar"; $flags{"en_GB"} = "gb"; $flags{"pt_BR"} = "br"; $flags{"pt"} = "pt"; $flags{"da"} = "dk"; $flags{"gl"} = "Galicia"; $flags{"zh_CN"} = "cn"; open(IN, "< $basedir/kmymoney2.pot") or die("POT file not found."); while(<IN>) { if($_ =~ /POT-Creation-Date/) { chomp($_); $potVersion = $_; last; } } close IN; $potVersionOk="0"; open(IN, "< $fname") or die("Cannot open $fname for reading"); while(<IN>) { $linecnt++; if($_ =~ /Language-Team: (.*) <.*>/) { $language = $1; next; } if($_ =~ /Last-Translator: (.*) <.*>/) { $author = $1; next; } if($_ =~ /POT-Creation-Date/) { chomp($_); $potVersionOk = "1" if($_ eq $potVersion); next; } # "PO-Revision-Date: 2007-02-21 21:57+0100\n" if($_ =~ /PO-Revision-Date: (\d+-\d+-\d+) /) { $poRevision = $1; } if($state == 0) { if($_ =~ /^\#:/) { $state = 1; } } elsif($state == 1) { if($_ =~ /^msgid /) { $msgs++; $state = 2; } elsif($_ =~ /^#, fuzzy/) { $fuzzy++; } } elsif($state == 2) { if($_ =~ /^msgstr ""$/) { # we have detected an emtpy msgstr. this can have two reasons # a) the message is untranslated, then the next line is empty # b) the message is too long and starts on the next line $state = 3; } elsif($_ =~ /^msgstr "[^"]/) { $translated++; $state = 4; } } elsif($state == 3) { if($_ =~ /^"[^"]/) { $translated++; $state = 4; } else { $untranslated++; $state = 0; } } elsif($state == 4) { chomp($_); if($_ =~ /^$/) { $state = 0; } } } close IN; my $flag = $flags{$langcode}; $flag = $langcode if(length($flag) == 0); $ptransdisp = (int (($translated - $fuzzy) * 10000 / $msgs)) / 100; ; $pfuzzy = int (($fuzzy * 100) / $msgs); $puntrans = int (($untranslated * 100) / $msgs); print "<translation>\n"; print " <flag>$flag</flag>\n"; print " <language>$language{$langcode}</language>\n"; print " <country>$country{$langcode}</country>\n"; print " <translator>$author</translator>\n"; print " <translated>$ptransdisp</translated>\n"; print " <fuzzy>$pfuzzy</fuzzy>\n"; print " <untranslated>$puntrans</untranslated>\n"; print " <potcurrent>$potVersionOk</potcurrent>\n"; print " <porevision>$poRevision</porevision>\n"; print "</translation>\n";