diff options
Diffstat (limited to 'src/z3950connection.cpp')
-rw-r--r-- | src/z3950connection.cpp | 589 |
1 files changed, 589 insertions, 0 deletions
diff --git a/src/z3950connection.cpp b/src/z3950connection.cpp new file mode 100644 index 0000000..ada2a1d --- /dev/null +++ b/src/z3950connection.cpp @@ -0,0 +1,589 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : $EMAIL + ***************************************************************************/ + +/*************************************************************************** + * * + * This file has been modified to match the requirements of KBibTeX. * + * In case of problems or bugs arising from this implementation, please * + * contact the KBibTeX team first. * + * Thomas Fischer <fischer@unix-ag.uni-kl.de> * + * * + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "z3950connection.h" +// #include "z3950fetcher.h" +#include "messagehandler.h" +#include "latin1literal.h" +#include <kdebug.h> +#include "iso5426converter.h" +#include "iso6937converter.h" + +#include <qapplication.h> + +#include <config.h> + +#ifdef HAVE_YAZ +extern "C" +{ +#include <yaz/zoom.h> +#include <yaz/marcdisp.h> +#include <yaz/yaz-version.h> +} +#endif + +#include <klocale.h> + +#include <qfile.h> + +namespace +{ + static const size_t Z3950_DEFAULT_MAX_RECORDS = 20; +} + +using KBibTeX::Z3950ResultFound; +using KBibTeX::Z3950Connection; + +Z3950ResultFound::Z3950ResultFound( const QString& s ) : QCustomEvent( uid() ) + , m_result( QDeepCopy<QString>( s ) ) +{ + ++Z3950Connection::resultsLeft; +} + +Z3950ResultFound::~Z3950ResultFound() +{ + --Z3950Connection::resultsLeft; +} + +class Z3950Connection::Private +{ +public: + Private() {} +#ifdef HAVE_YAZ + ~Private() + { + ZOOM_options_destroy( conn_opt ); + ZOOM_connection_destroy( conn ); + }; + + ZOOM_options conn_opt; + ZOOM_connection conn; +#endif +}; + +int Z3950Connection::resultsLeft = 0; + +// since the character set goes into a yaz api call +// I'm paranoid about user insertions, so just grab 64 +// characters at most +Z3950Connection::Z3950Connection( QObject* fetcher, + const QString& host, + uint port, + const QString& dbname, + const QString& sourceCharSet, + const QString& syntax, + const QString& esn ) + : QThread() + , d( new Private() ) + , m_connected( false ) + , m_aborted( false ) + , m_fetcher( fetcher ) + , m_host( QDeepCopy<QString>( host ) ) + , m_port( port ) + , m_dbname( QDeepCopy<QString>( dbname ) ) + , m_sourceCharSet( QDeepCopy<QString>( sourceCharSet.left( 64 ) ) ) + , m_syntax( QDeepCopy<QString>( syntax ) ) + , m_esn( QDeepCopy<QString>( esn ) ) + , m_start( 0 ) + , m_limit( Z3950_DEFAULT_MAX_RECORDS ) + , m_hasMore( false ) +{ +} + +Z3950Connection::~Z3950Connection() +{ + m_connected = false; + delete d; + d = 0; +} + +void Z3950Connection::reset() +{ + m_start = 0; + m_limit = Z3950_DEFAULT_MAX_RECORDS; +} + +void Z3950Connection::setQuery( const QString& query_, unsigned int numHits ) +{ + m_pqn = QDeepCopy<QString>( query_ ); + m_limit = Z3950_DEFAULT_MAX_RECORDS < numHits ? Z3950_DEFAULT_MAX_RECORDS : numHits; +} + +void Z3950Connection::setUserPassword( const QString& user_, const QString& pword_ ) +{ + m_user = QDeepCopy<QString>( user_ ); + m_password = QDeepCopy<QString>( pword_ ); +} + +void Z3950Connection::run() +{ +// kdDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl; + m_aborted = false; + m_hasMore = false; + resultsLeft = 0; +#ifdef HAVE_YAZ + + if ( !makeConnection() ) + { + done(); + return; + } + + ZOOM_query query = ZOOM_query_create(); + kdDebug() << "Z3950Connection::run() - pqn = " << toCString( m_pqn ) << endl; + int errcode = ZOOM_query_prefix( query, toCString( m_pqn ) ); + if ( errcode != 0 ) + { + kdDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl; + ZOOM_query_destroy( query ); + QString s = i18n( "Query error!" ); + s += ' ' + m_pqn; + done( s, MessageHandler::Error ); + return; + } + + ZOOM_resultset resultSet = ZOOM_connection_search( d->conn, query ); + + // check abort status + if ( m_aborted ) + { + done(); + return; + } + + // I know the LOC wants the syntax = "xml" and esn = "mods" + // to get MODS data, that seems a bit odd... + // esn only makes sense for marc and grs-1 + // if syntax is mods, set esn to mods too + QCString type = "raw"; + if ( m_syntax == Latin1Literal( "mods" ) ) + { + m_syntax = QString::fromLatin1( "xml" ); + ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" ); + type = "xml"; + } + else + { + ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() ); + } + ZOOM_resultset_option_set( resultSet, "start", QCString().setNum( m_start ) ); + ZOOM_resultset_option_set( resultSet, "count", QCString().setNum( m_limit - m_start ) ); + // search in default syntax, unless syntax is already set + if ( !m_syntax.isEmpty() ) + { + ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", m_syntax.latin1() ); + } + + const char* errmsg; + const char* addinfo; + errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo ); + if ( errcode != 0 ) + { + ZOOM_resultset_destroy( resultSet ); + ZOOM_query_destroy( query ); + m_connected = false; + + QString s = i18n( "Connection search error %1: %2" ).arg( errcode ).arg( toString( errmsg ) ); + if ( !QCString( addinfo ).isEmpty() ) + { + s += " (" + toString( addinfo ) + ")"; + } + kdDebug() << "Z3950Connection::run() - " << s << endl; + done( s, MessageHandler::Error ); + return; + } + + const size_t numResults = ZOOM_resultset_size( resultSet ); + + QString newSyntax = m_syntax; + if ( numResults > 0 ) + { + kdDebug() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl; + // so now we know that results exist, might have to check syntax + int len; + ZOOM_record rec = ZOOM_resultset_record( resultSet, 0 ); + // want raw unless it's mods + ZOOM_record_get( rec, type, &len ); + if ( len > 0 && m_syntax.isEmpty() ) + { + newSyntax = QString::fromLatin1( ZOOM_record_get( rec, "syntax", &len ) ).lower(); + kdDebug() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl; + if ( newSyntax == Latin1Literal( "mods" ) || newSyntax == Latin1Literal( "xml" ) ) + { + m_syntax = QString::fromLatin1( "xml" ); + ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" ); + } + else if ( newSyntax == Latin1Literal( "grs-1" ) ) + { + // if it's defaulting to grs-1, go ahead and change it to try to get a marc + // record since grs-1 is a last resort for us + newSyntax.truncate( 0 ); + } + } + // right now, we just understand mods, unimarc, marc21/usmarc, and grs-1 + if ( newSyntax != Latin1Literal( "xml" ) && + newSyntax != Latin1Literal( "usmarc" ) && + newSyntax != Latin1Literal( "marc21" ) && + newSyntax != Latin1Literal( "unimarc" ) && + newSyntax != Latin1Literal( "grs-1" ) ) + { + kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl; + newSyntax = QString::fromLatin1( "xml" ); + ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" ); + ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); + rec = ZOOM_resultset_record( resultSet, 0 ); + ZOOM_record_get( rec, "xml", &len ); + if ( len == 0 ) + { + // change set name back + ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() ); + newSyntax = QString::fromLatin1( "usmarc" ); // try usmarc + kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl; + ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); + rec = ZOOM_resultset_record( resultSet, 0 ); + ZOOM_record_get( rec, "raw", &len ); + } + if ( len == 0 ) + { + newSyntax = QString::fromLatin1( "marc21" ); // try marc21 + kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl; + ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); + rec = ZOOM_resultset_record( resultSet, 0 ); + ZOOM_record_get( rec, "raw", &len ); + } + if ( len == 0 ) + { + newSyntax = QString::fromLatin1( "unimarc" ); // try unimarc + kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl; + ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); + rec = ZOOM_resultset_record( resultSet, 0 ); + ZOOM_record_get( rec, "raw", &len ); + } + if ( len == 0 ) + { + newSyntax = QString::fromLatin1( "grs-1" ); // try grs-1 + kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl; + ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() ); + rec = ZOOM_resultset_record( resultSet, 0 ); + ZOOM_record_get( rec, "raw", &len ); + } + if ( len == 0 ) + { + kdDebug() << "Z3950Connection::run() - giving up" << endl; + ZOOM_resultset_destroy( resultSet ); + ZOOM_query_destroy( query ); + done( i18n( "Record syntax error" ), MessageHandler::Error ); + return; + } + kdDebug() << "Z3950Connection::run() - final syntax is " << newSyntax << endl; + } + } + + // go back to fooling ourselves and calling it mods + if ( m_syntax == Latin1Literal( "xml" ) ) + { + m_syntax = QString::fromLatin1( "mods" ); + } + if ( newSyntax == Latin1Literal( "xml" ) ) + { + newSyntax = QString::fromLatin1( "mods" ); + } + // save syntax change for next time + if ( m_syntax != newSyntax ) + { + qApp->postEvent( m_fetcher, new Z3950SyntaxChange( newSyntax ) ); + m_syntax = newSyntax; + } + + if ( m_sourceCharSet.isEmpty() ) + { + m_sourceCharSet = QString::fromLatin1( "marc-8" ); + } + + const size_t realLimit = QMIN( numResults, m_limit ); + + for ( size_t i = m_start; i < realLimit && !m_aborted; ++i ) + { + kdDebug() << "Z3950Connection::run() - grabbing index " << i << endl; + ZOOM_record rec = ZOOM_resultset_record( resultSet, i ); + if ( !rec ) + { + kdDebug() << "Z3950Connection::run() - no record returned for index " << i << endl; + continue; + } + int len; + QString data; + if ( m_syntax == Latin1Literal( "mods" ) ) + { + data = toString( ZOOM_record_get( rec, "xml", &len ) ); + } + else if ( m_syntax == Latin1Literal( "grs-1" ) ) // grs-1 + { + // we're going to parse the rendered data, very ugly... + data = toString( ZOOM_record_get( rec, "render", &len ) ); + } + else + { +#if 0 + kdWarning() << "Remove debug from z3950connection.cpp" << endl; + { + QFile f1( QString::fromLatin1( "/tmp/z3950.raw" ) ); + if ( f1.open( IO_WriteOnly ) ) + { + QDataStream t( &f1 ); + t << ZOOM_record_get( rec, "raw", &len ); + } + f1.close(); + } +#endif + data = toXML( ZOOM_record_get( rec, "raw", &len ), m_sourceCharSet ); + } + Z3950ResultFound* ev = new Z3950ResultFound( data ); + QApplication::postEvent( m_fetcher, ev ); + } + + ZOOM_resultset_destroy( resultSet ); + ZOOM_query_destroy( query ); + + m_hasMore = m_limit < numResults; + if ( m_hasMore ) + { + m_start = m_limit; + m_limit += Z3950_DEFAULT_MAX_RECORDS; + } +#endif + done(); +} + +bool Z3950Connection::makeConnection() +{ + if ( m_connected ) + { + return true; + } +// kdDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl; +// I don't know what to do except assume database, user, and password are in locale encoding +#ifdef HAVE_YAZ + d->conn_opt = ZOOM_options_create(); + ZOOM_options_set( d->conn_opt, "implementationName", "KBibTeX" ); + ZOOM_options_set( d->conn_opt, "databaseName", toCString( m_dbname ) ); + ZOOM_options_set( d->conn_opt, "user", toCString( m_user ) ); + ZOOM_options_set( d->conn_opt, "password", toCString( m_password ) ); + + d->conn = ZOOM_connection_create( d->conn_opt ); + ZOOM_connection_connect( d->conn, m_host.latin1(), m_port ); + + int errcode; + const char* errmsg; // unused: carries same info as 'errcode' + const char* addinfo; + errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo ); + if ( errcode != 0 ) + { + ZOOM_options_destroy( d->conn_opt ); + ZOOM_connection_destroy( d->conn ); + m_connected = false; + + QString s = i18n( "Connection error %1: %2" ).arg( errcode ).arg( toString( errmsg ) ); + if ( !QCString( addinfo ).isEmpty() ) + { + s += " (" + toString( addinfo ) + ")"; + } + kdDebug() << "Z3950Connection::makeConnection() - " << s << endl; + done( s, MessageHandler::Error ); + return false; + } +#endif + m_connected = true; + return true; +} + +void Z3950Connection::done() +{ + checkPendingEvents(); + qApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) ); +} + +void Z3950Connection::done( const QString& msg_, int type_ ) +{ + checkPendingEvents(); + if ( m_aborted ) + { + qApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) ); + } + else + { + qApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore, msg_, type_ ) ); + } +} + +void Z3950Connection::checkPendingEvents() +{ + // if there's still some pending result events, go ahead and just wait 1 second + if ( resultsLeft > 0 ) + { + sleep( 1 ); + } +} + +inline +QCString Z3950Connection::toCString( const QString& text_ ) +{ + return iconvRun( text_.utf8(), QString::fromLatin1( "utf-8" ), m_sourceCharSet ); +} + +inline +QString Z3950Connection::toString( const QCString& text_ ) +{ + return QString::fromUtf8( iconvRun( text_, m_sourceCharSet, QString::fromLatin1( "utf-8" ) ) ); +} + +// static +QCString Z3950Connection::iconvRun( const QCString& text_, const QString& fromCharSet_, const QString& toCharSet_ ) +{ +#ifdef HAVE_YAZ + if ( text_.isEmpty() ) + { + return text_; + } + + if ( fromCharSet_ == toCharSet_ ) + { + return text_; + } + + yaz_iconv_t cd = yaz_iconv_open( toCharSet_.latin1(), fromCharSet_.latin1() ); + if ( !cd ) + { + // maybe it's iso 5426, which we sorta support + QString charSetLower = fromCharSet_.lower(); + charSetLower.remove( '-' ).remove( ' ' ); + if ( charSetLower == Latin1Literal( "iso5426" ) ) + { + return iconvRun( Iso5426Converter::toUtf8( text_ ).utf8(), QString::fromLatin1( "utf-8" ), toCharSet_ ); + } + else if ( charSetLower == Latin1Literal( "iso6937" ) ) + { + return iconvRun( Iso6937Converter::toUtf8( text_ ).utf8(), QString::fromLatin1( "utf-8" ), toCharSet_ ); + } + kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_ + << " to " << toCharSet_ << " is unsupported" << endl; + return text_; + } + + const char* input = text_; + size_t inlen = text_.length(); + + size_t outlen = 2 * inlen; // this is enough, right? + QMemArray<char> result0( outlen ); + char* result = result0.data(); + + int r = yaz_iconv( cd, const_cast<char**>( &input ), &inlen, &result, &outlen ); + if ( r <= 0 ) + { + kdDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl; + return text_; + } + // bug in yaz, need to flush buffer to catch last character + yaz_iconv( cd, 0, 0, &result, &outlen ); + + // length is pointer difference + size_t len = result - result0; + + QCString output = QCString( result0, len + 1 ); +// kdDebug() << "-------------------------------------------" << endl; +// kdDebug() << output << endl; +// kdDebug() << "-------------------------------------------" << endl; + yaz_iconv_close( cd ); + return output; +#endif + return text_; +} + +QString Z3950Connection::toXML( const QCString& marc_, const QString& charSet_ ) +{ +#ifdef HAVE_YAZ + if ( marc_.isEmpty() ) + { + kdDebug() << "Z3950Connection::toXML() - empty string" << endl; + return QString::null; + } + + yaz_iconv_t cd = yaz_iconv_open( "utf-8", charSet_.latin1() ); + if ( !cd ) + { + // maybe it's iso 5426, which we sorta support + QString charSetLower = charSet_.lower(); + charSetLower.remove( '-' ).remove( ' ' ); + if ( charSetLower == Latin1Literal( "iso5426" ) ) + { + return toXML( Iso5426Converter::toUtf8( marc_ ).utf8(), QString::fromLatin1( "utf-8" ) ); + } + else if ( charSetLower == Latin1Literal( "iso6937" ) ) + { + return toXML( Iso6937Converter::toUtf8( marc_ ).utf8(), QString::fromLatin1( "utf-8" ) ); + } + kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl; + return QString::null; + } + + yaz_marc_t mt = yaz_marc_create(); + yaz_marc_iconv( mt, cd ); + yaz_marc_xml( mt, YAZ_MARC_MARCXML ); + + // first 5 bytes are length + bool ok; +#if YAZ_VERSIONL < 0x030000 + int len = marc_.left( 5 ).toInt( &ok ); +#else + size_t len = marc_.left( 5 ).toInt( &ok ); +#endif + if ( ok && ( len < 25 || len > 100000 ) ) + { + kdDebug() << "Z3950Connection::toXML() - bad length: " << ( ok ? len : -1 ) << endl; + return QString::null; + } + +#if YAZ_VERSIONL < 0x030000 + char* result; +#else + const char* result; +#endif + int r = yaz_marc_decode_buf( mt, marc_, -1, &result, &len ); + if ( r <= 0 ) + { + kdDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl; + return QString::null; + } + + QString output = QString::fromLatin1( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ); + output += QString::fromUtf8( QCString( result, len + 1 ), len + 1 ); +// kdDebug() << QCString(result) << endl; +// kdDebug() << "-------------------------------------------" << endl; +// kdDebug() << output << endl; + yaz_iconv_close( cd ); + yaz_marc_destroy( mt ); + + return output; +#else // no yaz + return QString::null; +#endif +} |