/* This file is part of KBabel Copyright (C) 2002 Stefan Asserh�ll <stefan.asserhall@telia.com> 2003-2005 Stanislav Visnovsky <visnovsky@kde.org> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. In addition, as a special exception, the copyright holders give permission to link the code of this program with any edition of the TQt library by Trolltech AS, Norway (or with modified versions of TQt that use the same license as TQt), and distribute linked combinations including the two. You must obey the GNU General Public License in all respects for all of the code used other than TQt. If you modify this file, you may extend this exception to your version of the file, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. */ #include "poinfo.h" #include "catalogitem.h" #include "findoptions.h" #include "msgfmt.h" #include "resources.h" #include <kapplication.h> #include <kio/netaccess.h> #include <kstandarddirs.h> #include <ksavefile.h> #include <tqdatastream.h> #include <tqdatetime.h> #include <tqdict.h> #include <tqfile.h> #include <tqfileinfo.h> #include <tqregexp.h> #include <tqtextcodec.h> #include "libgettext/pofiles.h" #include "libgettext/tokens.h" #include <fstream> using namespace KBabel; // A PO-file cache item struct poInfoCacheItem { PoInfo info; TQDateTime lastModified; }; inline TQDataStream& operator << ( TQDataStream& stream, poInfoCacheItem* item ) { // Note: if you change anything here, do not forget to increase the #define POINFOCACHE_VERSION stream << item->info.total; stream << item->info.fuzzy; stream << item->info.untranslated; stream << item->info.project; stream << item->info.creation; stream << item->info.revision; stream << item->info.lastTranslator; stream << item->info.languageTeam; stream << item->info.mimeVersion; stream << item->info.contentType; stream << item->info.encoding; stream << item->info.others; stream << item->info.headerComment; stream << item->lastModified; return stream; } inline TQDataStream& operator >> ( TQDataStream& stream, poInfoCacheItem* item ) { stream >> item->info.total; stream >> item->info.fuzzy; stream >> item->info.untranslated; stream >> item->info.project; stream >> item->info.creation; stream >> item->info.revision; stream >> item->info.lastTranslator; stream >> item->info.languageTeam; stream >> item->info.mimeVersion; stream >> item->info.contentType; stream >> item->info.encoding; stream >> item->info.others; stream >> item->info.headerComment; stream >> item->lastModified; return stream; } // Cache of PO-file items static TQDict<poInfoCacheItem> _poInfoCache; // File name of cache static TQString _poInfoCacheName; // flag to stop current reading bool PoInfo::stopStaticRead; bool PoInfo::_gettextPluralForm; // Note: We only read the cache file if the data seems usable. If not, we will re-generate the data. void PoInfo::cacheRead() { TQFile cacheFile( _poInfoCacheName ); if( cacheFile.open( IO_ReadOnly ) ) { TQDataStream s( &cacheFile ); // Check the file cache version. // If it is not the current version, we do not read the cache file TQ_UINT32 version; s >> version; if( version != POINFOCACHE_VERSION ) { // Wrong POINFOCACHE_VERSION, so abort kdDebug(KBABEL) << "Wrong cache file version: " << version << endl; return; } /* * Check the version of the TQDataStream with which the cache file was written * * If the cache file was written by an incompatible future version of TQt, * the cache file will not be read. * * On the other side, a cache file written by a previous version of TQt can be read, * by setting the version of the TQDataStream used. */ TQ_INT32 qdatastreamVersion; s >> qdatastreamVersion; if( qdatastreamVersion > 0 && qdatastreamVersion <= s.version() ) { s.setVersion( qdatastreamVersion ); } else { // TQDataStream version seems stupid, so abort kdDebug(KBABEL) << "Wrong TQDataStream version: " << qdatastreamVersion << endl; return; } TQString url; while( !s.atEnd() ) { poInfoCacheItem* item = new poInfoCacheItem; s >> url; s >> item; _poInfoCache.insert( url, item ); } cacheFile.close(); } } void PoInfo::cacheWrite() { // We use KSaveFile as otherwise we have no management about the cache file's integrity // (especially if two instances would write into the same cache file) KSaveFile cacheFile( _poInfoCacheName ); TQDataStream* stream = cacheFile.dataStream(); if( stream ) { // Write the cache file version // We choose to fix a format (TQ_UINT32) for compatibility (TQt version, platforms, architectures) const TQ_UINT32 version = POINFOCACHE_VERSION; *stream << version; // Write the version of the TQDataStream // Here too we choose a fixed format (TQ_INT32) for compatibility const TQ_INT32 qdatastreamVersion = stream->version(); *stream << qdatastreamVersion; TQDictIterator<poInfoCacheItem> it( _poInfoCache ); // iterator for dict for ( ; it.current(); ++it ) { if( TQFile::exists( it.currentKey() ) ) { *stream << it.currentKey(); *stream << it.current(); } } if ( !cacheFile.close() ) { kdWarning(KBABEL) << "Could not write cache file: " << _poInfoCacheName << endl; } } else { kdWarning(KBABEL) << "Could not create TQDataStream for cache file: " << _poInfoCacheName << endl; cacheFile.abort(); } } bool PoInfo::cacheFind(const TQString url, PoInfo& info) { // Read cache if it has not been read, and set up post routine to write it static bool _cacheIsRead = false; if( !_cacheIsRead ) { _cacheIsRead = true; _poInfoCacheName = locateLocal("cache", "kbabel/poinfocache"); cacheRead(); } poInfoCacheItem *item = _poInfoCache.find( url ); if( item ) { TQFileInfo fi( url ); if( fi.lastModified() == item->lastModified ) { info = item->info; return true; } } return false; } void PoInfo::cacheSave(const TQString url, PoInfo& info) { poInfoCacheItem *item = new poInfoCacheItem; TQFileInfo fi( url ); item->info = info; item->lastModified = fi.lastModified(); _poInfoCache.insert( url, item ); } TQTextCodec* PoInfo::codecForFile(TQString gettextHeader) { TQRegExp regexp("Content-Type:\\s*\\w+/[-\\w]+;?\\s*charset\\s*=\\s*(\\S+)\\s*\\\\n"); if( regexp.search(gettextHeader) == -1 ) { kdDebug(KBABEL) << "no charset entry found" << endl; return 0; } const TQString charset = regexp.cap(1); kdDebug(KBABEL) << "charset: " << charset << endl; TQTextCodec* codec=0; if(!charset.isEmpty()) { // "CHARSET" is the default charset entry in a template (pot). // characters in a template should be either pure ascii or // at least utf8, so utf8-codec can be used for both. if( charset == "CHARSET") { codec=TQTextCodec::codecForName("utf8"); kdDebug(KBABEL) << TQString("file seems to be a template: using utf8 encoding.") << endl; } else { codec=TQTextCodec::codecForName(charset.latin1()); } if(!codec) { kdWarning(KBABEL) << "charset found, but no codec available, using UTF8 instead" << endl; codec=TQTextCodec::codecForName("utf8"); } } else { // No charset? So it is probably ASCII, therefore UTF-8 kdWarning(KBABEL) << "No charset defined! Assuming UTF-8!" << endl; codec=TQTextCodec::codecForName("utf8"); } return codec; } PoInfo PoInfo::headerInfo(const CatalogItem& headerItem) { // A header of a Gettext .po/.pot file is made of entries of the kind: // key:value\n // Note that the "line" defined by the \n can be different than the line of the file. // We join all lines of the header and then split the result again at the \n sequence const TQStringList header=TQStringList::split("\\n",headerItem.msgstrAsList().join(TQString())); PoInfo info; // extract information from the header TQStringList::const_iterator it; // The header of a Gettext .po file is consisted of lines of key and value for(it=header.begin();it!=header.end();++it) { bool knownKey=false; // We search for the : character, which is the separator between key and value const int res=(*it).find(':'); if (res>=0) { knownKey=true; // We know most keys, if not it will be changed to false in the "else" case const TQString key=(*it).left(res).simplifyWhiteSpace(); TQString value=(*it).mid(res+1); // "Chop" the \n at the end if (value.endsWith("\\n")) value.remove(value.length()-2,2); // ### TQt4: use value.chop(2) value=value.simplifyWhiteSpace(); kdDebug(KBABEL) << "Header key: " << key << " value: " << value << endl; if (key=="Project-Id-Version") info.project=value; else if (key=="POT-Creation-Date") info.creation=value; else if (key=="PO-Revision-Date") info.revision=value; else if (key=="Last-Translator") info.lastTranslator=value; else if (key=="Language-Team") info.languageTeam=value; else if (key=="MIME-Version") info.mimeVersion=value; else if (key=="Content-Type") info.contentType=value; else if (key=="Content-Transfer-Encoding") info.encoding=value; else { kdDebug(KBABEL)<<"Unknown key: "<<key<<endl; knownKey=false; } } if (!knownKey) { TQString line=(*it); if(line.right(2)=="\\n") line.remove(line.length()-2,2); // ### TQt4: use value.chop(2) if(!info.others.isEmpty()) info.others+='\n'; info.others+=line.simplifyWhiteSpace(); } } info.headerComment=headerItem.comment(); return info; } ConversionStatus PoInfo::info(const TQString& url, PoInfo& info, TQStringList &wordList, bool updateWordList, bool interactive) { return PoInfo::info( url, info, wordList, updateWordList, interactive, true); } ConversionStatus PoInfo::info(const TQString& url, PoInfo& info, TQStringList &wordList, bool updateWordList, bool interactive, bool msgfmt) { stopStaticRead = false; if( !updateWordList && PoInfo::cacheFind( url, info ) ) return OK; TQString target; if(TDEIO::NetAccess::download(KURL( url ), target, 0)) { TQFile file(target); if ( msgfmt ) { // First check file with msgfmt to be sure, it is syntactically correct Msgfmt msgfmt; TQString output; Msgfmt::Status stat = msgfmt.checkSyntax( target , output ); if(stat == Msgfmt::SyntaxError) { TDEIO::NetAccess::removeTempFile(target); return PARSE_ERROR; } } std::ifstream* stream = new std::ifstream( file.name().local8Bit()); if( stream->is_open() ) { CatalogItem temp; info.total=0; info.fuzzy=0; info.untranslated=0; GettextFlexLexer* lexer = new GettextFlexLexer( stream ); lexer->yylex(); // now parse the rest of the file ConversionStatus success=OK; while( lexer->lastToken != T_EOF && success==OK) { if( interactive ) kapp->processEvents(10); if( stopStaticRead ) { TDEIO::NetAccess::removeTempFile(target); delete lexer; delete stream; return OK; } success=fastRead(temp,lexer,false); if(success==OK || success==RECOVERED_PARSE_ERROR) { success=OK; if( temp.comment().contains("\n#~") ) continue; // skip obsolete if( temp.msgid().first().isEmpty()) //header { if( temp.isFuzzy() ) temp.removeFuzzy(); //find out the codec TQTextCodec* codec = codecForFile( temp.msgstr().first() ); if( !codec ) return PARSE_ERROR; // convert from UTF-8 using codec temp.setComment( codec->toUnicode(temp.comment().utf8()) ); temp.setMsgstr( codec->toUnicode(temp.msgstr().first().utf8()) ); PoInfo infoCounts = info; info=PoInfo::headerInfo(temp); info.total = infoCounts.total; info.fuzzy = infoCounts.fuzzy; info.untranslated = infoCounts.untranslated; continue; // do not update counters and word list for header } info.total++; if(temp.isFuzzy()) info.fuzzy++; else if(temp.isUntranslated()) info.untranslated++; if( updateWordList ) { // FIXME: should care about plural forms in msgid TQString st = temp.msgid().first().simplifyWhiteSpace().lower(); TQStringList sl = TQStringList::split( ' ', st ); while(!sl.isEmpty()) { TQString w = sl.first(); sl.pop_front(); if( !wordList.contains(w) ) wordList.append( w ); } st = temp.msgstr().join(" " ).simplifyWhiteSpace().lower(); sl = TQStringList::split( ' ', st ); while(!sl.isEmpty()) { TQString w = sl.first(); sl.pop_front(); if( !wordList.contains(w) ) wordList.append( w ); } st = temp.comment().simplifyWhiteSpace().lower(); sl = TQStringList::split( ' ', st ); while(!sl.isEmpty()) { TQString w = sl.first(); sl.pop_front(); if( !wordList.contains(w) ) wordList.append( w ); } } } } delete lexer; delete stream; if(success==PARSE_ERROR) { TDEIO::NetAccess::removeTempFile(target); return PARSE_ERROR; } } else { delete stream; TDEIO::NetAccess::removeTempFile(target); return NO_PERMISSIONS; } TDEIO::NetAccess::removeTempFile(target); if( target == url ) PoInfo::cacheSave( url, info ); return OK; } else { return OS_ERROR; } return OK; } bool PoInfo::findInFile( const TQString& url, FindOptions options ) { enum {Begin, Comment, Msgid, Msgstr, Msgctxt} part = Begin; stopStaticRead = false; TQString target; if(TDEIO::NetAccess::download(KURL( url ), target, 0)) { std::ifstream* stream = new std::ifstream( target.local8Bit()); if(stream->is_open()) { TDEIO::NetAccess::removeTempFile(target); GettextFlexLexer* lexer = new GettextFlexLexer( stream ); lexer->yylex(); // prepare the search TQString searchStr = options.findStr; TQRegExp regexp( searchStr ); if( options.isRegExp ) regexp.setCaseSensitive( options.caseSensitive ); // first read header CatalogItem temp; ConversionStatus status = fastRead( temp, lexer, true ); if( status != OK || !temp.msgid().first().isEmpty() ) { delete lexer; delete stream; return false; // header is not at the beginning, broken file } TQTextCodec* codec = codecForFile( temp.msgstr().first() ); if( !codec ) { return false; } // now parse the rest of the file TQString text; int pos,len; while(lexer->lastToken != T_EOF) { switch( lexer->lastToken ) { case T_COMMENT: { part = Comment; if( !options.inComment ) break; text = codec->toUnicode(lexer->YYText()); if( options.isRegExp ) pos=regexp.search(text, 0 ); else pos=text.find(searchStr,0,options.caseSensitive); if( pos >= 0) { if( options.wholeWords) { len = searchStr.length(); TQString pre = text.mid(pos-1,1); TQString post = text.mid(pos+len,1); if( !pre.contains( TQRegExp("[a-zA-Z0-9]")) && !post.contains( TQRegExp("[a-zA-Z0-9]") ) ) { delete lexer; delete stream; return true; } } else { delete lexer; delete stream; return true; }; } break; } case T_STRING: { if( part == Msgid && !options.inMsgid ) break; else if( part == Msgstr && !options.inMsgstr ) break; // HACK: We ignore any string following a msgctxt, as it does not change a statistic else if( part == Msgctxt ) break; text = codec->toUnicode(lexer->YYText()); if( options.ignoreContextInfo ) { pos = options.contextInfo.search(text); len = options.contextInfo.matchedLength(); if( pos >= 0 ) text.remove( pos, len ); } if( options.ignoreAccelMarker ) { pos = text.find( options.accelMarker ); if( pos >= 0 ) text.remove( pos, 1 ); } if( options.isRegExp ) pos=regexp.search(text, 0 ); else pos=text.find(searchStr,0,options.caseSensitive); if( pos >= 0) { if( options.wholeWords) { len = searchStr.length(); TQString pre = text.mid(pos-1,1); TQString post = text.mid(pos+len,1); if( !pre.contains( TQRegExp("[a-zA-Z0-9]")) && !post.contains( TQRegExp("[a-zA-Z0-9]") ) ) { delete lexer; delete stream; return true; } } else { delete lexer; delete stream; return true; }; } break; } case T_MSGSTR: { part = Msgstr; break; } case T_MSGID: case T_MSGIDPLURAL: { kapp->processEvents(10); // if stopped, return not found if( stopStaticRead ) { delete lexer; delete stream; return false; } part = Msgid; break; } case T_MSGCTXT: { part = Msgctxt; break; } } lexer->yylex(); } delete lexer; delete stream; } } return false; } // this does not like any incorrect files ConversionStatus PoInfo::fastRead( CatalogItem& item, GettextFlexLexer *lexer, bool storeText) { item.clear(); _gettextPluralForm = false; // comment if( lexer->lastToken == T_COMMENT ) { TQString _comment = TQString::fromUtf8(lexer->YYText()); while( lexer->yylex() == T_COMMENT ) _comment += "\n"+TQString::fromUtf8(lexer->YYText()); item.setComment( _comment ); // kdDebug(KBABEL) << "Comment: " << _comment << endl; } //obsolete if( lexer->lastToken == T_OBSOLETE ) { lexer->yylex(); item.setComment("#~\n#~"); return OK; } // msgctxt if( lexer->lastToken == T_MSGCTXT ) { // HACK: we simply ignore the context, as it does not change a statistic do { lexer->yylex(); } while ( lexer->lastToken == T_STRING ); } // msgid if( lexer->lastToken != T_MSGID ) return PARSE_ERROR; if( lexer->yylex() != T_STRING ) return PARSE_ERROR; TQStringList msgids = item.msgid(); TQStringList::Iterator it = msgids.begin(); *it = TQString::fromUtf8(lexer->YYText()); if( storeText ) while( lexer->yylex() == T_STRING ) (*it) += ("\n"+ TQString::fromUtf8(lexer->YYText()) ); else { if( lexer->yylex() == T_STRING ) // this is not header { *it = "SKIPPED"; while( lexer->yylex() == T_STRING ); } } item.setMsgid( msgids ); // kdDebug(KBABEL) << "Msgid: " << *it << endl; if( lexer->lastToken == T_MSGIDPLURAL ) { _gettextPluralForm = true; if( lexer->yylex() != T_STRING ) return PARSE_ERROR; TQStringList msgids = item.msgid(); it = msgids.fromLast(); *it = TQString::fromUtf8(lexer->YYText()); if( storeText ) while( lexer->yylex() == T_STRING ) (*it)+="\n"+ TQString::fromUtf8(lexer->YYText()); else while( lexer->yylex() == T_STRING ); item.setMsgid( msgids ); // kdDebug(KBABEL) << "Msgid_plural: " << *it << endl; } // msgstr if( lexer->lastToken != T_MSGSTR ) return PARSE_ERROR; if( !_gettextPluralForm ) { if( lexer->yylex() != T_STRING ) return PARSE_ERROR; TQStringList msgstrs = item.msgstr(); it = msgstrs.begin(); *it = TQString::fromUtf8(lexer->YYText()); if( storeText || item.msgid().first().isEmpty() ) // if we should store the text or it is a header while( lexer->yylex() == T_STRING ) (*it)+= ("\n"+ TQString::fromUtf8(lexer->YYText())); else if( lexer->yylex() == T_STRING ) // check next token, whether it is really translated { *it = "SKIPPED"; while( lexer->yylex() == T_STRING ); } item.setMsgstr( msgstrs ); // kdDebug(KBABEL) << "Msgstr: " << *it << endl; } else { TQStringList msgstrs = item.msgstr(); TQString s = TQString::fromUtf8(lexer->YYText()); while( lexer->lastToken == T_MSGSTR && s.contains( TQRegExp("^msgstr\\[[0-9]+\\]" ) ) ) { if( lexer->yylex() != T_STRING ) return PARSE_ERROR; it = msgstrs.fromLast(); *it = TQString::fromUtf8(lexer->YYText()); if( storeText ) do { (*it)+="\n"+TQString::fromUtf8(lexer->YYText()); } while( lexer->yylex() == T_STRING ); else while( lexer->yylex() == T_STRING ); // kdDebug(KBABEL) << "Msgstr: " << *it << endl; s = TQString::fromUtf8(lexer->YYText()); } item.setMsgstr( msgstrs ); } return OK; } // kate: space-indent on; indent-width 4; replace-tabs on;