/*  
  This file is part of KBabel
  Copyright (C) 2002 Stefan Asserh�ll <stefan.asserhall@telia.com>
		2003-2005 Stanislav Visnovsky <visnovsky@kde.org>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  
  In addition, as a special exception, the copyright holders give
  permission to link the code of this program with any edition of
  the TQt library by Trolltech AS, Norway (or with modified versions
  of TQt that use the same license as TQt), and distribute linked
  combinations including the two.  You must obey the GNU General
  Public License in all respects for all of the code used other than
  TQt. If you modify this file, you may extend this exception to
  your version of the file, but you are not obligated to do so.  If
  you do not wish to do so, delete this exception statement from
  your version.

*/

#include "poinfo.h"

#include "catalogitem.h"
#include "findoptions.h"
#include "msgfmt.h"
#include "resources.h"

#include <kapplication.h>
#include <kio/netaccess.h>
#include <kstandarddirs.h>
#include <ksavefile.h>

#include <tqdatastream.h>
#include <tqdatetime.h>
#include <tqdict.h>
#include <tqfile.h>
#include <tqfileinfo.h>
#include <tqregexp.h>
#include <tqtextcodec.h>

#include "libgettext/pofiles.h"
#include "libgettext/tokens.h"

#include <fstream>

using namespace KBabel;

// A PO-file cache item
struct poInfoCacheItem
{
    PoInfo info;
    TQDateTime lastModified;
};

inline TQDataStream& operator << ( TQDataStream& stream, poInfoCacheItem* item )
{
    // Note: if you change anything here, do not forget to increase the #define POINFOCACHE_VERSION
    stream << item->info.total;
    stream << item->info.fuzzy;
    stream << item->info.untranslated;
    stream << item->info.project;
    stream << item->info.creation;
    stream << item->info.revision;
    stream << item->info.lastTranslator;
    stream << item->info.languageTeam;
    stream << item->info.mimeVersion;
    stream << item->info.contentType;
    stream << item->info.encoding;
    stream << item->info.others;
    stream << item->info.headerComment;
    stream << item->lastModified;
    return stream;
}

inline TQDataStream& operator >> ( TQDataStream& stream, poInfoCacheItem* item )
{
    stream >> item->info.total;
    stream >> item->info.fuzzy;
    stream >> item->info.untranslated;
    stream >> item->info.project;
    stream >> item->info.creation;
    stream >> item->info.revision;
    stream >> item->info.lastTranslator;
    stream >> item->info.languageTeam;
    stream >> item->info.mimeVersion;
    stream >> item->info.contentType;
    stream >> item->info.encoding;
    stream >> item->info.others;
    stream >> item->info.headerComment;
    stream >> item->lastModified;
    return stream;
}

// Cache of PO-file items
static TQDict<poInfoCacheItem> _poInfoCache;

// File name of cache
static TQString _poInfoCacheName;

// flag to stop current reading
bool PoInfo::stopStaticRead;

bool PoInfo::_gettextPluralForm;

// Note: We only read the cache file if the data seems usable. If not, we will re-generate the data.
void PoInfo::cacheRead()
{
    TQFile cacheFile( _poInfoCacheName );
    if( cacheFile.open( IO_ReadOnly ) ) {
	TQDataStream s( &cacheFile );

        // Check the file cache version.
        // If it is not the current version, we do not read the cache file
        TQ_UINT32 version;
	s >> version;
        if( version != POINFOCACHE_VERSION ) {
            // Wrong POINFOCACHE_VERSION, so abort
            kdDebug(KBABEL) << "Wrong cache file version: " << version << endl;
            return;
        }

        /*
         * Check the version of the TQDataStream with which the cache file was written
         *
         * If the cache file was written by an incompatible future version of TQt,
         * the cache file will not be read.
         *
         * On the other side, a cache file written by a previous version of TQt can be read,
         * by setting the version of the TQDataStream used.
         */
        TQ_INT32 qdatastreamVersion;
        s >> qdatastreamVersion;
        if( qdatastreamVersion > 0 &&  qdatastreamVersion <= s.version() ) {
            s.setVersion( qdatastreamVersion );
        }
        else {
            // TQDataStream version seems stupid, so abort
            kdDebug(KBABEL) << "Wrong TQDataStream version: " << qdatastreamVersion << endl;
            return;
        }

        TQString url;
        while( !s.atEnd() ) {
            poInfoCacheItem* item = new poInfoCacheItem;
            s >> url;
            s >> item;
            _poInfoCache.insert( url, item );
        }
        cacheFile.close();
    }
}

void PoInfo::cacheWrite()
{
    // We use KSaveFile as otherwise we have no management about the cache file's integrity
    // (especially if two instances would write into the same cache file)
    KSaveFile cacheFile( _poInfoCacheName );
    
    TQDataStream* stream = cacheFile.dataStream();
    
    if( stream ) {

        // Write the cache file version
        // We choose to fix a format (TQ_UINT32) for compatibility (TQt version, platforms, architectures)
	const TQ_UINT32 version = POINFOCACHE_VERSION;
	*stream << version;

        // Write the version of the TQDataStream
        // Here too we choose a fixed format (TQ_INT32) for compatibility
        const TQ_INT32 qdatastreamVersion = stream->version();
        *stream << qdatastreamVersion;
        
        TQDictIterator<poInfoCacheItem> it( _poInfoCache ); // iterator for dict
        for ( ; it.current(); ++it ) {
	    if( TQFile::exists( it.currentKey() ) ) {
		*stream << it.currentKey();
                *stream << it.current();
	    }
	}
        if ( !cacheFile.close() ) {
            kdWarning(KBABEL) << "Could not write cache file: " << _poInfoCacheName << endl;
        }
    }
    else {
        kdWarning(KBABEL) << "Could not create TQDataStream for cache file: " << _poInfoCacheName << endl;
        cacheFile.abort();
    }
}

bool PoInfo::cacheFind(const TQString url, PoInfo& info)
{
    // Read cache if it has not been read, and set up post routine to write it
    static bool _cacheIsRead = false;
    if( !_cacheIsRead ) {
	_cacheIsRead = true;
        _poInfoCacheName = locateLocal("cache", "kbabel/poinfocache");
	cacheRead();
    }

    poInfoCacheItem *item = _poInfoCache.find( url );
    if( item ) {
	TQFileInfo fi( url );

	if( fi.lastModified() == item->lastModified ) {
	    info = item->info;
	    return true;
	}
    }
    return false;
}

void PoInfo::cacheSave(const TQString url, PoInfo& info)
{
    poInfoCacheItem *item = new poInfoCacheItem;
    TQFileInfo fi( url );

    item->info = info;
    item->lastModified = fi.lastModified();
    _poInfoCache.insert( url, item );
}

TQTextCodec* PoInfo::codecForFile(TQString gettextHeader)
{
   TQRegExp regexp("Content-Type:\\s*\\w+/[-\\w]+;?\\s*charset\\s*=\\s*(\\S+)\\s*\\\\n");
   if( regexp.search(gettextHeader) == -1 )
   {
       kdDebug(KBABEL) << "no charset entry found" << endl;
       return 0;
   }
   
   const TQString charset = regexp.cap(1);
   kdDebug(KBABEL) << "charset: " << charset << endl;

   TQTextCodec* codec=0;

   if(!charset.isEmpty())
   {
      // "CHARSET" is the default charset entry in a template (pot).
      // characters in a template should be either pure ascii or 
      // at least utf8, so utf8-codec can be used for both.
      if( charset == "CHARSET")
      {
          codec=TQTextCodec::codecForName("utf8");
          kdDebug(KBABEL) 
              << TQString("file seems to be a template: using utf8 encoding.")
              << endl;
      }
      else
      {
         codec=TQTextCodec::codecForName(charset.latin1());
      }

      if(!codec)
      {
         kdWarning(KBABEL) << "charset found, but no codec available, using UTF8 instead" << endl;
	 codec=TQTextCodec::codecForName("utf8");
      }
   }
   else
   {
      // No charset? So it is probably ASCII, therefore UTF-8
       kdWarning(KBABEL) << "No charset defined! Assuming UTF-8!" << endl;
       codec=TQTextCodec::codecForName("utf8");
   }

   return codec;
}

PoInfo PoInfo::headerInfo(const CatalogItem& headerItem)
{
   // A header of a Gettext .po/.pot file is made of entries of the kind:
   // key:value\n
   // Note that the "line" defined by the \n can be different than the line of the file.

   // We join all lines of the header and then split the result again at the \n sequence
   const TQStringList header=TQStringList::split("\\n",headerItem.msgstrAsList().join(TQString()));

   PoInfo info;

   // extract information from the header
   TQStringList::const_iterator it;

   // The header of a Gettext .po file is consisted of lines of key and value
   for(it=header.begin();it!=header.end();++it)
   {
      bool knownKey=false;
      // We search for the : character, which is the separator between key and value
      const int res=(*it).find(':');
      if (res>=0)
      {
         knownKey=true; // We know most keys, if not it will be changed to false in the "else" case
         const TQString key=(*it).left(res).simplifyWhiteSpace();
         TQString value=(*it).mid(res+1);
         // "Chop" the \n at the end
         if (value.endsWith("\\n"))
            value.remove(value.length()-2,2); // ### TQt4: use  value.chop(2)
         value=value.simplifyWhiteSpace();
         kdDebug(KBABEL) << "Header key: " << key << " value: " << value << endl;
         if (key=="Project-Id-Version")
            info.project=value;
         else if (key=="POT-Creation-Date")
            info.creation=value;
         else if (key=="PO-Revision-Date")
            info.revision=value;
         else if (key=="Last-Translator")
            info.lastTranslator=value;
         else if (key=="Language-Team")
            info.languageTeam=value;
         else if (key=="MIME-Version")
            info.mimeVersion=value;
         else if (key=="Content-Type")
            info.contentType=value;
         else if (key=="Content-Transfer-Encoding")
            info.encoding=value;
         else
         {
            kdDebug(KBABEL)<<"Unknown key: "<<key<<endl;
            knownKey=false;
         }
      }
      if (!knownKey)
      {
         TQString line=(*it);

         if(line.right(2)=="\\n")
            line.remove(line.length()-2,2); // ### TQt4: use  value.chop(2)

         if(!info.others.isEmpty())
            info.others+='\n';

         info.others+=line.simplifyWhiteSpace();
      }
   }

   info.headerComment=headerItem.comment();

   return info;
}


ConversionStatus PoInfo::info(const TQString& url, PoInfo& info, TQStringList &wordList, bool updateWordList, bool interactive)
{
    return PoInfo::info( url, info, wordList, updateWordList, interactive, true);
}

ConversionStatus PoInfo::info(const TQString& url, PoInfo& info, TQStringList &wordList, bool updateWordList, bool interactive, bool msgfmt)
{
   stopStaticRead = false;

   if( !updateWordList && PoInfo::cacheFind( url, info ) )
       return OK;

   TQString target;
   if(TDEIO::NetAccess::download(KURL( url ), target, 0))
   {
       TQFile file(target);

       if ( msgfmt )
       {
            // First check file with msgfmt to be sure, it is syntactically correct
            Msgfmt msgfmt;
            TQString output;
            Msgfmt::Status stat = msgfmt.checkSyntax( target , output );
            if(stat == Msgfmt::SyntaxError)
            {
                TDEIO::NetAccess::removeTempFile(target);
                return PARSE_ERROR;
            }
       }


       std::ifstream* stream = new std::ifstream( file.name().local8Bit());
       if( stream->is_open() )
       {
           CatalogItem temp;

           info.total=0;
           info.fuzzy=0;
           info.untranslated=0;

	   GettextFlexLexer* lexer = new GettextFlexLexer( stream  );

	   lexer->yylex();
	   
           // now parse the rest of the file
           ConversionStatus success=OK;
	   
           while( lexer->lastToken != T_EOF && success==OK)
           {
               if( interactive ) kapp->processEvents(10);
	       
	       if( stopStaticRead )
	       {
                    TDEIO::NetAccess::removeTempFile(target);
                    delete lexer;
		    delete stream;
		    return OK;
		}
	       
	       success=fastRead(temp,lexer,false);
	       
               if(success==OK || success==RECOVERED_PARSE_ERROR)
               {
		  success=OK;
		  
		  if( temp.comment().contains("\n#~") ) continue; // skip obsolete
		  
                  if( temp.msgid().first().isEmpty()) //header
		  {
		      if( temp.isFuzzy() )  temp.removeFuzzy();
		      
		      //find out the codec
		      TQTextCodec* codec = codecForFile( temp.msgstr().first() );
		      if( !codec ) return PARSE_ERROR;
		      
		      // convert from UTF-8 using codec
		      temp.setComment( codec->toUnicode(temp.comment().utf8()) );
		      temp.setMsgstr( codec->toUnicode(temp.msgstr().first().utf8()) );
		      
		      PoInfo infoCounts = info;
		      info=PoInfo::headerInfo(temp);
		      info.total = infoCounts.total;
		      info.fuzzy = infoCounts.fuzzy;
		      info.untranslated = infoCounts.untranslated;
		      continue; // do not update counters and word list for header
		  }
		  		  
                  info.total++;

                  if(temp.isFuzzy())
                     info.fuzzy++;
                  else if(temp.isUntranslated())
                     info.untranslated++;
		     
		  if( updateWordList )
		  {
		    // FIXME: should care about plural forms in msgid
		    TQString st = temp.msgid().first().simplifyWhiteSpace().lower();
		    TQStringList sl = TQStringList::split( ' ', st );
		    while(!sl.isEmpty())
		    {
			TQString w = sl.first();
			sl.pop_front();
			if( !wordList.contains(w) ) wordList.append( w );
		    }
		    st = temp.msgstr().join(" " ).simplifyWhiteSpace().lower();
		    sl = TQStringList::split( ' ', st );
		    while(!sl.isEmpty())
		    {
			TQString w = sl.first();
			sl.pop_front();
			if( !wordList.contains(w) ) wordList.append( w );
		    }
		    st = temp.comment().simplifyWhiteSpace().lower();
		    sl = TQStringList::split( ' ', st );
		    while(!sl.isEmpty())
		    {
			TQString w = sl.first();
			sl.pop_front();
			if( !wordList.contains(w) ) wordList.append( w );
		    }
                }
	      }
           }

	   delete lexer;
	   delete stream;

           if(success==PARSE_ERROR)
           {
	       TDEIO::NetAccess::removeTempFile(target);
               return PARSE_ERROR;
           }
       }
       else
       {
          delete stream;
          TDEIO::NetAccess::removeTempFile(target);
          return NO_PERMISSIONS;
       }

	TDEIO::NetAccess::removeTempFile(target);
	if( target == url )
	    PoInfo::cacheSave( url, info );
        return OK;
   }
   else
   {
      return OS_ERROR;
   }

   return OK;
}

bool PoInfo::findInFile( const TQString& url, FindOptions options )
{
   enum {Begin, Comment, Msgid, Msgstr, Msgctxt} part = Begin;
   
   stopStaticRead = false;
   TQString target;
   if(TDEIO::NetAccess::download(KURL( url ), target, 0))
   {
       std::ifstream* stream = new std::ifstream( target.local8Bit()); 
       if(stream->is_open())
       {
           TDEIO::NetAccess::removeTempFile(target);
	   
	   GettextFlexLexer* lexer = new GettextFlexLexer( stream );

	   lexer->yylex();

           // prepare the search
	   
	   TQString searchStr = options.findStr;
	   TQRegExp regexp( searchStr );
	   
	   if( options.isRegExp ) 
		regexp.setCaseSensitive( options.caseSensitive );

           // first read header
	   CatalogItem temp;
	   
	   ConversionStatus status = fastRead( temp, lexer, true );
	   if( status != OK || !temp.msgid().first().isEmpty() ) 
	   {
		delete lexer;
		delete stream;
		return false; // header is not at the beginning, broken file
	   }

	   TQTextCodec* codec = codecForFile( temp.msgstr().first() );
	   if( !codec ) 
	   {
		return false;
	   }
	   
	   // now parse the rest of the file
	   TQString text;
	   int pos,len;
	   
           while(lexer->lastToken != T_EOF)
           {
	       switch( lexer->lastToken ) {
	           case T_COMMENT: {
			part = Comment;
			if( !options.inComment ) break;
			text = codec->toUnicode(lexer->YYText()); 
			if( options.isRegExp )
			    pos=regexp.search(text, 0 );
			else 
			    pos=text.find(searchStr,0,options.caseSensitive);
			if( pos >= 0)
			{
			    if( options.wholeWords) {
				len = searchStr.length();
				TQString pre = text.mid(pos-1,1);
				TQString post = text.mid(pos+len,1);
				if( !pre.contains( TQRegExp("[a-zA-Z0-9]")) &&
				    !post.contains( TQRegExp("[a-zA-Z0-9]") )
				) {
				    delete lexer;
				    delete stream;
				    return true;
				}
			    }
			    else {
				delete lexer;
				delete stream;
				return true;
			    };
			}
			break;
		   }
		   case T_STRING: {
			if( part == Msgid && !options.inMsgid ) break;
			else if( part == Msgstr && !options.inMsgstr ) break;
                        // HACK: We ignore any string following a msgctxt, as it does not change a statistic
                        else if( part == Msgctxt ) break;
			
			text = codec->toUnicode(lexer->YYText()); 
			
			if( options.ignoreContextInfo )
			{
			    pos = options.contextInfo.search(text);
			    len = options.contextInfo.matchedLength();
			    if( pos >= 0 )
		    		text.remove( pos, len );
			}
		    
			if( options.ignoreAccelMarker )
			{
			    pos = text.find( options.accelMarker );
			    if( pos >= 0 )
				text.remove( pos, 1 );
			}
			
			if( options.isRegExp )
			    pos=regexp.search(text, 0 );
			else 
			    pos=text.find(searchStr,0,options.caseSensitive);

			if( pos >= 0)
			{
			    if( options.wholeWords) {
				len = searchStr.length();
				TQString pre = text.mid(pos-1,1);
				TQString post = text.mid(pos+len,1);
				if( !pre.contains( TQRegExp("[a-zA-Z0-9]")) &&
				    !post.contains( TQRegExp("[a-zA-Z0-9]") )
				) {
				    delete lexer;
				    delete stream;
				    return true;
				}
			    }
			    else {
				delete lexer;
				delete stream;
				return true;
			    };
			}
			break;
		   }
		   case T_MSGSTR: {
			part = Msgstr;
			break;
		   }
		   case T_MSGID: 
		   case T_MSGIDPLURAL: {
	    		kapp->processEvents(10);
			
			// if stopped, return not found
			if( stopStaticRead ) 
			{
			    delete lexer;
			    delete stream;
			    return false;
			}
			part = Msgid;
			break;
		   }
                   case T_MSGCTXT: {
                        part = Msgctxt;
                        break;
                   }
	       }
	       lexer->yylex();
           }
	   delete lexer;
	   delete stream;
       }
    }
    return false;
}

// this does not like any incorrect files
ConversionStatus PoInfo::fastRead( CatalogItem& item, GettextFlexLexer *lexer, bool storeText)
{
   item.clear();
   _gettextPluralForm = false;

    // comment
    if( lexer->lastToken == T_COMMENT )
    {
	TQString _comment = TQString::fromUtf8(lexer->YYText());
	while( lexer->yylex() == T_COMMENT )
	    _comment += "\n"+TQString::fromUtf8(lexer->YYText());
	item.setComment( _comment );
//	kdDebug(KBABEL) << "Comment: " << _comment << endl;
    }

    //obsolete
    if( lexer->lastToken == T_OBSOLETE ) {
	lexer->yylex();
	item.setComment("#~\n#~");
	return OK;
    }

    // msgctxt
    if( lexer->lastToken == T_MSGCTXT ) {
        // HACK: we simply ignore the context, as it does not change a statistic
        do {
            lexer->yylex();
        } while ( lexer->lastToken == T_STRING );
    }
        
    // msgid
    if( lexer->lastToken != T_MSGID ) return PARSE_ERROR;
    
    if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
    TQStringList msgids = item.msgid();
    TQStringList::Iterator it = msgids.begin();
    *it = TQString::fromUtf8(lexer->YYText());
    if( storeText )
	while( lexer->yylex() == T_STRING )
    	    (*it) += ("\n"+ TQString::fromUtf8(lexer->YYText()) );
    else {
	if( lexer->yylex() == T_STRING ) // this is not header
	{
	    *it = "SKIPPED";
	    while( lexer->yylex() == T_STRING );
	}
    }
    item.setMsgid( msgids );
    
//    kdDebug(KBABEL) << "Msgid: " << *it << endl;

    if( lexer->lastToken == T_MSGIDPLURAL ) 
    {
	_gettextPluralForm = true;
	if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
	TQStringList msgids = item.msgid();
	it = msgids.fromLast();
	*it = TQString::fromUtf8(lexer->YYText());
	if( storeText ) 
	    while( lexer->yylex() == T_STRING )
    		(*it)+="\n"+ TQString::fromUtf8(lexer->YYText());
	else while( lexer->yylex() == T_STRING );
	item.setMsgid( msgids );
//	kdDebug(KBABEL) << "Msgid_plural: " << *it << endl;
    }
    
    // msgstr
    if( lexer->lastToken != T_MSGSTR ) return PARSE_ERROR;

    if( !_gettextPluralForm )
    {
	if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
	
	TQStringList msgstrs = item.msgstr();
	it = msgstrs.begin();
	*it = TQString::fromUtf8(lexer->YYText());
	if( storeText || item.msgid().first().isEmpty() ) // if we should store the text or it is a header
	    while( lexer->yylex() == T_STRING )
    		(*it)+= ("\n"+ TQString::fromUtf8(lexer->YYText()));
	else 
	if( lexer->yylex() == T_STRING ) // check next token, whether it is really translated
	{
	    *it = "SKIPPED";
	    while( lexer->yylex() == T_STRING );
	}
	item.setMsgstr( msgstrs );
//	kdDebug(KBABEL) << "Msgstr: " << *it << endl;
    } 
    else 
    {
	TQStringList msgstrs = item.msgstr();
	TQString s = TQString::fromUtf8(lexer->YYText());
	while( lexer->lastToken == T_MSGSTR && s.contains( TQRegExp("^msgstr\\[[0-9]+\\]" ) ) )
	{
	    if( lexer->yylex() != T_STRING ) return PARSE_ERROR;
	    it = msgstrs.fromLast();
	    *it = TQString::fromUtf8(lexer->YYText());
	    
	    if( storeText )
		do {
    		    (*it)+="\n"+TQString::fromUtf8(lexer->YYText());
		} while( lexer->yylex() == T_STRING );
	    else while( lexer->yylex() == T_STRING );
//	    kdDebug(KBABEL) << "Msgstr: " << *it << endl;
	    s = TQString::fromUtf8(lexer->YYText());
	} 
	item.setMsgstr( msgstrs );
    }
    
    return OK;
}

// kate: space-indent on; indent-width 4; replace-tabs on;