/*************************************************************************** copyright : (C) 2005-2006 by Robby Stephenson email : $EMAIL ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of version 2 of the GNU General Public License as * * published by the Free Software Foundation; * * * ***************************************************************************/ #include "z3950connection.h" #include "z3950fetcher.h" #include "messagehandler.h" #include "../latin1literal.h" #include "../tellico_debug.h" #include "../iso5426converter.h" #include "../iso6937converter.h" #include <config.h> #ifdef HAVE_YAZ extern "C" { #include <yaz/zoom.h> #include <yaz/marcdisp.h> #include <yaz/yaz-version.h> } #endif #include <klocale.h> #include <tqfile.h> namespace { static const size_t Z3950_DEFAULT_MAX_RECORDS = 20; } using Tellico::Fetch::Z3950ResultFound; using Tellico::Fetch::Z3950Connection; Z3950ResultFound::Z3950ResultFound(const TQString& s) : TQCustomEvent(uid()) , m_result(TQDeepCopy<TQString>(s)) { ++Z3950Connection::resultsLeft; } Z3950ResultFound::~Z3950ResultFound() { --Z3950Connection::resultsLeft; } class Z3950Connection::Private { public: Private() {} #ifdef HAVE_YAZ ~Private() { ZOOM_options_destroy(conn_opt); ZOOM_connection_destroy(conn); }; ZOOM_options conn_opt; ZOOM_connection conn; #endif }; int Z3950Connection::resultsLeft = 0; // since the character set goes into a yaz api call // I'm paranoid about user insertions, so just grab 64 // characters at most Z3950Connection::Z3950Connection(Z3950Fetcher* fetcher, const TQString& host, uint port, const TQString& dbname, const TQString& sourceCharSet, const TQString& syntax, const TQString& esn) : TQThread() , d(new Private()) , m_connected(false) , m_aborted(false) , m_fetcher(fetcher) , m_host(TQDeepCopy<TQString>(host)) , m_port(port) , m_dbname(TQDeepCopy<TQString>(dbname)) , m_sourceCharSet(TQDeepCopy<TQString>(sourceCharSet.left(64))) , m_syntax(TQDeepCopy<TQString>(syntax)) , m_esn(TQDeepCopy<TQString>(esn)) , m_start(0) , m_limit(Z3950_DEFAULT_MAX_RECORDS) , m_hasMore(false) { } Z3950Connection::~Z3950Connection() { m_connected = false; delete d; d = 0; } void Z3950Connection::reset() { m_start = 0; m_limit = Z3950_DEFAULT_MAX_RECORDS; } void Z3950Connection::setQuery(const TQString& query_) { m_pqn = TQDeepCopy<TQString>(query_); } void Z3950Connection::setUserPassword(const TQString& user_, const TQString& pword_) { m_user = TQDeepCopy<TQString>(user_); m_password = TQDeepCopy<TQString>(pword_); } void Z3950Connection::run() { // myDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl; m_aborted = false; m_hasMore = false; resultsLeft = 0; #ifdef HAVE_YAZ if(!makeConnection()) { done(); return; } ZOOM_query query = ZOOM_query_create(); myLog() << "Z3950Connection::run() - pqn = " << toCString(m_pqn) << endl; int errcode = ZOOM_query_prefix(query, toCString(m_pqn)); if(errcode != 0) { myDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl; ZOOM_query_destroy(query); TQString s = i18n("Query error!"); s += ' ' + m_pqn; done(s, MessageHandler::Error); return; } ZOOM_resultset resultSet = ZOOM_connection_search(d->conn, query); // check abort status if(m_aborted) { done(); return; } // I know the LOC wants the syntax = "xml" and esn = "mods" // to get MODS data, that seems a bit odd... // esn only makes sense for marc and grs-1 // if syntax is mods, set esn to mods too TQCString type = "raw"; if(m_syntax == Latin1Literal("mods")) { m_syntax = TQString::tqfromLatin1("xml"); ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); type = "xml"; } else { ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1()); } ZOOM_resultset_option_set(resultSet, "start", TQCString().setNum(m_start)); ZOOM_resultset_option_set(resultSet, "count", TQCString().setNum(m_limit-m_start)); // search in default syntax, unless syntax is already set if(!m_syntax.isEmpty()) { ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", m_syntax.latin1()); } const char* errmsg; const char* addinfo; errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo); if(errcode != 0) { ZOOM_resultset_destroy(resultSet); ZOOM_query_destroy(query); m_connected = false; TQString s = i18n("Connection search error %1: %2").tqarg(errcode).tqarg(toString(errmsg)); if(!TQCString(addinfo).isEmpty()) { s += " (" + toString(addinfo) + ")"; } myDebug() << "Z3950Connection::run() - " << s << endl; done(s, MessageHandler::Error); return; } const size_t numResults = ZOOM_resultset_size(resultSet); TQString newSyntax = m_syntax; if(numResults > 0) { myLog() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl; // so now we know that results exist, might have to check syntax int len; ZOOM_record rec = ZOOM_resultset_record(resultSet, 0); // want raw unless it's mods ZOOM_record_get(rec, type, &len); if(len > 0 && m_syntax.isEmpty()) { newSyntax = TQString::tqfromLatin1(ZOOM_record_get(rec, "syntax", &len)).lower(); myLog() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl; if(newSyntax == Latin1Literal("mods") || newSyntax == Latin1Literal("xml")) { m_syntax = TQString::tqfromLatin1("xml"); ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); } else if(newSyntax == Latin1Literal("grs-1")) { // if it's defaulting to grs-1, go ahead and change it to try to get a marc // record since grs-1 is a last resort for us newSyntax.truncate(0); } } // right now, we just understand mods, unimarc, marc21/usmarc, and grs-1 if(newSyntax != Latin1Literal("xml") && newSyntax != Latin1Literal("usmarc") && newSyntax != Latin1Literal("marc21") && newSyntax != Latin1Literal("unimarc") && newSyntax != Latin1Literal("grs-1")) { myLog() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl; newSyntax = TQString::tqfromLatin1("xml"); ZOOM_resultset_option_set(resultSet, "elementSetName", "mods"); ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); rec = ZOOM_resultset_record(resultSet, 0); ZOOM_record_get(rec, "xml", &len); if(len == 0) { // change set name back ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1()); newSyntax = TQString::tqfromLatin1("usmarc"); // try usmarc myLog() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl; ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); rec = ZOOM_resultset_record(resultSet, 0); ZOOM_record_get(rec, "raw", &len); } if(len == 0) { newSyntax = TQString::tqfromLatin1("marc21"); // try marc21 myLog() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl; ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); rec = ZOOM_resultset_record(resultSet, 0); ZOOM_record_get(rec, "raw", &len); } if(len == 0) { newSyntax = TQString::tqfromLatin1("unimarc"); // try unimarc myLog() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl; ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); rec = ZOOM_resultset_record(resultSet, 0); ZOOM_record_get(rec, "raw", &len); } if(len == 0) { newSyntax = TQString::tqfromLatin1("grs-1"); // try grs-1 myLog() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl; ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1()); rec = ZOOM_resultset_record(resultSet, 0); ZOOM_record_get(rec, "raw", &len); } if(len == 0) { myLog() << "Z3950Connection::run() - giving up" << endl; ZOOM_resultset_destroy(resultSet); ZOOM_query_destroy(query); done(i18n("Record syntax error"), MessageHandler::Error); return; } myLog() << "Z3950Connection::run() - final syntax is " << newSyntax << endl; } } // go back to fooling ourselves and calling it mods if(m_syntax == Latin1Literal("xml")) { m_syntax = TQString::tqfromLatin1("mods"); } if(newSyntax == Latin1Literal("xml")) { newSyntax = TQString::tqfromLatin1("mods"); } // save syntax change for next time if(m_syntax != newSyntax) { kapp->postEvent(m_fetcher, new Z3950SyntaxChange(newSyntax)); m_syntax = newSyntax; } if(m_sourceCharSet.isEmpty()) { m_sourceCharSet = TQString::tqfromLatin1("marc-8"); } const size_t realLimit = TQMIN(numResults, m_limit); for(size_t i = m_start; i < realLimit && !m_aborted; ++i) { myLog() << "Z3950Connection::run() - grabbing index " << i << endl; ZOOM_record rec = ZOOM_resultset_record(resultSet, i); if(!rec) { myDebug() << "Z3950Connection::run() - no record returned for index " << i << endl; continue; } int len; TQString data; if(m_syntax == Latin1Literal("mods")) { data = toString(ZOOM_record_get(rec, "xml", &len)); } else if(m_syntax == Latin1Literal("grs-1")) { // grs-1 // we're going to parse the rendered data, very ugly... data = toString(ZOOM_record_get(rec, "render", &len)); } else { #if 0 kdWarning() << "Remove debug from z3950connection.cpp" << endl; { TQFile f1(TQString::tqfromLatin1("/tmp/z3950.raw")); if(f1.open(IO_WriteOnly)) { TQDataStream t(&f1); t << ZOOM_record_get(rec, "raw", &len); } f1.close(); } #endif data = toXML(ZOOM_record_get(rec, "raw", &len), m_sourceCharSet); } Z3950ResultFound* ev = new Z3950ResultFound(data); TQApplication::postEvent(m_fetcher, ev); } ZOOM_resultset_destroy(resultSet); ZOOM_query_destroy(query); m_hasMore = m_limit < numResults; if(m_hasMore) { m_start = m_limit; m_limit += Z3950_DEFAULT_MAX_RECORDS; } #endif done(); } bool Z3950Connection::makeConnection() { if(m_connected) { return true; } // myDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl; // I don't know what to do except assume database, user, and password are in locale encoding #ifdef HAVE_YAZ d->conn_opt = ZOOM_options_create(); ZOOM_options_set(d->conn_opt, "implementationName", "Tellico"); ZOOM_options_set(d->conn_opt, "databaseName", toCString(m_dbname)); ZOOM_options_set(d->conn_opt, "user", toCString(m_user)); ZOOM_options_set(d->conn_opt, "password", toCString(m_password)); d->conn = ZOOM_connection_create(d->conn_opt); ZOOM_connection_connect(d->conn, m_host.latin1(), m_port); int errcode; const char* errmsg; // unused: carries same info as 'errcode' const char* addinfo; errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo); if(errcode != 0) { ZOOM_options_destroy(d->conn_opt); ZOOM_connection_destroy(d->conn); m_connected = false; TQString s = i18n("Connection error %1: %2").tqarg(errcode).tqarg(toString(errmsg)); if(!TQCString(addinfo).isEmpty()) { s += " (" + toString(addinfo) + ")"; } myDebug() << "Z3950Connection::makeConnection() - " << s << endl; done(s, MessageHandler::Error); return false; } #endif m_connected = true; return true; } void Z3950Connection::done() { checkPendingEvents(); kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore)); } void Z3950Connection::done(const TQString& msg_, int type_) { checkPendingEvents(); if(m_aborted) { kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore)); } else { kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore, msg_, type_)); } } void Z3950Connection::checkPendingEvents() { // if there's still some pending result events, go ahead and just wait 1 second if(resultsLeft > 0) { sleep(1); } } inline TQCString Z3950Connection::toCString(const TQString& text_) { return iconvRun(text_.utf8(), TQString::tqfromLatin1("utf-8"), m_sourceCharSet); } inline TQString Z3950Connection::toString(const TQCString& text_) { return TQString::fromUtf8(iconvRun(text_, m_sourceCharSet, TQString::tqfromLatin1("utf-8"))); } // static TQCString Z3950Connection::iconvRun(const TQCString& text_, const TQString& fromCharSet_, const TQString& toCharSet_) { #ifdef HAVE_YAZ if(text_.isEmpty()) { return text_; } if(fromCharSet_ == toCharSet_) { return text_; } yaz_iconv_t cd = yaz_iconv_open(toCharSet_.latin1(), fromCharSet_.latin1()); if(!cd) { // maybe it's iso 5426, which we sorta support TQString charSetLower = fromCharSet_.lower(); charSetLower.remove('-').remove(' '); if(charSetLower == Latin1Literal("iso5426")) { return iconvRun(Iso5426Converter::toUtf8(text_).utf8(), TQString::tqfromLatin1("utf-8"), toCharSet_); } else if(charSetLower == Latin1Literal("iso6937")) { return iconvRun(Iso6937Converter::toUtf8(text_).utf8(), TQString::tqfromLatin1("utf-8"), toCharSet_); } kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_ << " to " << toCharSet_ << " is unsupported" << endl; return text_; } const char* input = text_; size_t inlen = text_.length(); size_t outlen = 2 * inlen; // this is enough, right? TQMemArray<char> result0(outlen); char* result = result0.data(); int r = yaz_iconv(cd, const_cast<char**>(&input), &inlen, &result, &outlen); if(r <= 0) { myDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl; return text_; } // bug in yaz, need to flush buffer to catch last character yaz_iconv(cd, 0, 0, &result, &outlen); // length is pointer difference size_t len = result - result0; TQCString output = TQCString(result0, len+1); // myDebug() << "-------------------------------------------" << endl; // myDebug() << output << endl; // myDebug() << "-------------------------------------------" << endl; yaz_iconv_close(cd); return output; #endif return text_; } TQString Z3950Connection::toXML(const TQCString& marc_, const TQString& charSet_) { #ifdef HAVE_YAZ if(marc_.isEmpty()) { myDebug() << "Z3950Connection::toXML() - empty string" << endl; return TQString(); } yaz_iconv_t cd = yaz_iconv_open("utf-8", charSet_.latin1()); if(!cd) { // maybe it's iso 5426, which we sorta support TQString charSetLower = charSet_.lower(); charSetLower.remove('-').remove(' '); if(charSetLower == Latin1Literal("iso5426")) { return toXML(Iso5426Converter::toUtf8(marc_).utf8(), TQString::tqfromLatin1("utf-8")); } else if(charSetLower == Latin1Literal("iso6937")) { return toXML(Iso6937Converter::toUtf8(marc_).utf8(), TQString::tqfromLatin1("utf-8")); } kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl; return TQString(); } yaz_marc_t mt = yaz_marc_create(); yaz_marc_iconv(mt, cd); yaz_marc_xml(mt, YAZ_MARC_MARCXML); // first 5 bytes are length bool ok; #if YAZ_VERSIONL < 0x030000 int len = marc_.left(5).toInt(&ok); #else size_t len = marc_.left(5).toInt(&ok); #endif if(ok && (len < 25 || len > 100000)) { myDebug() << "Z3950Connection::toXML() - bad length: " << (ok ? len : -1) << endl; return TQString(); } #if YAZ_VERSIONL < 0x030000 char* result; #else const char* result; #endif int r = yaz_marc_decode_buf(mt, marc_, -1, &result, &len); if(r <= 0) { myDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl; return TQString(); } TQString output = TQString::tqfromLatin1("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); output += TQString::fromUtf8(TQCString(result, len+1), len+1); // myDebug() << TQCString(result) << endl; // myDebug() << "-------------------------------------------" << endl; // myDebug() << output << endl; yaz_iconv_close(cd); yaz_marc_destroy(mt); return output; #else // no yaz return TQString(); #endif }