diff options
author | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
---|---|---|
committer | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
commit | e38d2351b83fa65c66ccde443777647ef5cb6cff (patch) | |
tree | 1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/fetch/entrezfetcher.cpp | |
download | tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.tar.gz tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.zip |
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/fetch/entrezfetcher.cpp')
-rw-r--r-- | src/fetch/entrezfetcher.cpp | 498 |
1 files changed, 498 insertions, 0 deletions
diff --git a/src/fetch/entrezfetcher.cpp b/src/fetch/entrezfetcher.cpp new file mode 100644 index 0000000..14b9e20 --- /dev/null +++ b/src/fetch/entrezfetcher.cpp @@ -0,0 +1,498 @@ +/*************************************************************************** + copyright : (C) 2005-2006 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "entrezfetcher.h" +#include "../tellico_kernel.h" +#include "../latin1literal.h" +#include "../collection.h" +#include "../entry.h" +#include "../filehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kconfig.h> +#include <kstandarddirs.h> +#include <kio/job.h> + +#include <qdom.h> +#include <qlabel.h> +#include <qlayout.h> +#include <qfile.h> + +//#define ENTREZ_TEST + +namespace { + static const int ENTREZ_MAX_RETURNS_TOTAL = 25; + static const char* ENTREZ_BASE_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; + static const char* ENTREZ_SEARCH_CGI = "esearch.fcgi"; + static const char* ENTREZ_SUMMARY_CGI = "esummary.fcgi"; + static const char* ENTREZ_FETCH_CGI = "efetch.fcgi"; + static const char* ENTREZ_LINK_CGI = "elink.fcgi"; + static const char* ENTREZ_DEFAULT_DATABASE = "pubmed"; +} + +using Tellico::Fetch::EntrezFetcher; + +EntrezFetcher::EntrezFetcher(QObject* parent_, const char* name_) : Fetcher(parent_, name_), m_xsltHandler(0), + m_step(Begin), m_started(false) { +} + +EntrezFetcher::~EntrezFetcher() { +} + +QString EntrezFetcher::defaultName() { + return i18n("Entrez Database"); +} + +QString EntrezFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool EntrezFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void EntrezFetcher::readConfigHook(const KConfigGroup& config_) { + QString s = config_.readEntry("Database", QString::fromLatin1(ENTREZ_DEFAULT_DATABASE)); // default to pubmed + if(!s.isEmpty()) { + m_dbname = s; + } + m_fields = config_.readListEntry("Custom Fields"); +} + +void EntrezFetcher::search(FetchKey key_, const QString& value_) { + m_started = true; + m_start = 1; + m_total = -1; + +// only search if current collection is a bibliography + if(!canFetch(Kernel::self()->collectionType())) { + myDebug() << "EntrezFetcher::search() - collection type mismatch, stopping" << endl; + stop(); + return; + } + if(m_dbname.isEmpty()) { + m_dbname = QString::fromLatin1(ENTREZ_DEFAULT_DATABASE); + } + +#ifdef ENTREZ_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/esearch.xml")); +#else + KURL u(QString::fromLatin1(ENTREZ_BASE_URL)); + u.addPath(QString::fromLatin1(ENTREZ_SEARCH_CGI)); + u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("usehistory"), QString::fromLatin1("y")); + u.addQueryItem(QString::fromLatin1("retmax"), QString::fromLatin1("1")); // we're just getting the count + u.addQueryItem(QString::fromLatin1("db"), m_dbname); + u.addQueryItem(QString::fromLatin1("term"), value_); + switch(key_) { + case Title: + u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("titl")); + break; + + case Person: + u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("auth")); + break; + + case Keyword: + // for Tellico Keyword searches basically mean search for any field matching +// u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("word")); + break; + + case PubmedID: + u.addQueryItem(QString::fromLatin1("field"), QString::fromLatin1("pmid")); + break; + + case DOI: + case Raw: + u.setQuery(u.query() + '&' + value_); + break; + + default: + kdWarning() << "EntrezFetcher::search() - FetchKey not supported" << endl; + stop(); + return; + } +#endif + + m_step = Search; +// myLog() << "EntrezFetcher::doSearch() - url: " << u.url() << endl; + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void EntrezFetcher::continueSearch() { + m_started = true; + doSummary(); +} + +void EntrezFetcher::stop() { + if(!m_started) { + return; + } + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + m_step = Begin; + emit signalDone(this); +} + +void EntrezFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void EntrezFetcher::slotComplete(KIO::Job* job_) { + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "EntrezFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from entrezfetcher.cpp: " << __LINE__ << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + switch(m_step) { + case Search: + searchResults(); + break; + case Summary: + summaryResults(); + break; + case Begin: + case Fetch: + default: + myLog() << "EntrezFetcher::slotComplete() - wrong step = " << m_step << endl; + stop(); + break; + } +} + +void EntrezFetcher::searchResults() { + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "EntrezFetcher::searchResults() - server did not return valid XML." << endl; + stop(); + return; + } + // find Count, QueryKey, and WebEnv elements + int count = 0; + for(QDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) { + QDomElement e = n.toElement(); + if(e.isNull()) { + continue; + } + if(e.tagName() == Latin1Literal("Count")) { + m_total = e.text().toInt(); + ++count; + } else if(e.tagName() == Latin1Literal("QueryKey")) { + m_queryKey = e.text(); + ++count; + } else if(e.tagName() == Latin1Literal("WebEnv")) { + m_webEnv = e.text(); + ++count; + } + if(count >= 3) { + break; // found them all + } + } + + m_data.truncate(0); + doSummary(); +} + +void EntrezFetcher::doSummary() { +#ifdef ENTREZ_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/esummary.xml")); +#else + KURL u(QString::fromLatin1(ENTREZ_BASE_URL)); + u.addPath(QString::fromLatin1(ENTREZ_SUMMARY_CGI)); + u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("retstart"), QString::number(m_start)); + u.addQueryItem(QString::fromLatin1("retmax"), QString::number(QMIN(m_total-m_start-1, ENTREZ_MAX_RETURNS_TOTAL))); + u.addQueryItem(QString::fromLatin1("usehistory"), QString::fromLatin1("y")); + u.addQueryItem(QString::fromLatin1("db"), m_dbname); + u.addQueryItem(QString::fromLatin1("query_key"), m_queryKey); + u.addQueryItem(QString::fromLatin1("WebEnv"), m_webEnv); +#endif + + m_step = Summary; +// myLog() << "EntrezFetcher::searchResults() - url: " << u.url() << endl; + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void EntrezFetcher::summaryResults() { + QDomDocument dom; + if(!dom.setContent(m_data, false)) { + kdWarning() << "EntrezFetcher::summaryResults() - server did not return valid XML." << endl; + stop(); + return; + } + // top child is eSummaryResult + // all children are DocSum + for(QDomNode n = dom.documentElement().firstChild(); !n.isNull(); n = n.nextSibling()) { + QDomElement e = n.toElement(); + if(e.isNull() || e.tagName() != Latin1Literal("DocSum")) { + continue; + } + QDomNodeList nodes = e.elementsByTagName(QString::fromLatin1("Id")); + if(nodes.count() == 0) { + myDebug() << "EntrezFetcher::summaryResults() - no Id elements" << endl; + continue; + } + int id = nodes.item(0).toElement().text().toInt(); + QString title, pubdate, authors; + nodes = e.elementsByTagName(QString::fromLatin1("Item")); + for(uint j = 0; j < nodes.count(); ++j) { + if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("Title")) { + title = nodes.item(j).toElement().text(); + } else if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("PubDate")) { + pubdate = nodes.item(j).toElement().text(); + } else if(nodes.item(j).toElement().attribute(QString::fromLatin1("Name")) == Latin1Literal("AuthorList")) { + QStringList list; + for(QDomNode aNode = nodes.item(j).firstChild(); !aNode.isNull(); aNode = aNode.nextSibling()) { + // lazy, assume all children Items are authors + if(aNode.nodeName() == Latin1Literal("Item")) { + list << aNode.toElement().text(); + } + } + authors = list.join(QString::fromLatin1("; ")); + } + if(!title.isEmpty() && !pubdate.isEmpty() && !authors.isEmpty()) { + break; // done now + } + } + SearchResult* r = new SearchResult(this, title, pubdate + '/' + authors, QString()); + m_matches.insert(r->uid, id); + emit signalResultFound(r); + } + m_start = m_matches.count() + 1; + m_hasMoreResults = m_start <= m_total; + stop(); // done searching +} + +Tellico::Data::EntryPtr EntrezFetcher::fetchEntry(uint uid_) { + // if we already grabbed this one, then just pull it out of the dict + Data::EntryPtr entry = m_entries[uid_]; + if(entry) { + return entry; + } + + if(!m_matches.contains(uid_)) { + return 0; + } + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return 0; + } + } + + int id = m_matches[uid_]; +#ifdef ENTREZ_TEST + KURL u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/pubmed.xml")); +#else + KURL u(QString::fromLatin1(ENTREZ_BASE_URL)); + u.addPath(QString::fromLatin1(ENTREZ_FETCH_CGI)); + u.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + u.addQueryItem(QString::fromLatin1("retmode"), QString::fromLatin1("xml")); + u.addQueryItem(QString::fromLatin1("rettype"), QString::fromLatin1("abstract")); + u.addQueryItem(QString::fromLatin1("db"), m_dbname); + u.addQueryItem(QString::fromLatin1("id"), QString::number(id)); +#endif + // now it's sychronous, and we know that it's utf8 + QString xmlOutput = FileHandler::readTextFile(u, false /*quiet*/, true /*utf8*/); + if(xmlOutput.isEmpty()) { + kdWarning() << "EntrezFetcher::fetchEntry() - unable to download " << u << endl; + return 0; + } +#if 0 + kdWarning() << "EntrezFetcher::fetchEntry() - turn me off!" << endl; + QFile f1(QString::fromLatin1("/tmp/test-entry.xml")); + if(f1.open(IO_WriteOnly)) { + QTextStream t(&f1); + t.setEncoding(QTextStream::UnicodeUTF8); + t << xmlOutput; + } + f1.close(); +#endif + QString str = m_xsltHandler->applyStylesheet(xmlOutput); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(!coll) { + kdWarning() << "EntrezFetcher::fetchEntry() - invalid collection" << endl; + return 0; + } + if(coll->entryCount() == 0) { + myDebug() << "EntrezFetcher::fetchEntry() - no entries in collection" << endl; + return 0; + } else if(coll->entryCount() > 1) { + myDebug() << "EntrezFetcher::fetchEntry() - collection has multiple entries, taking first one" << endl; + } + + Data::EntryPtr e = coll->entries().front(); + + // try to get a link, but only if necessary + if(m_fields.contains(QString::fromLatin1("url"))) { + KURL link(QString::fromLatin1(ENTREZ_BASE_URL)); + link.addPath(QString::fromLatin1(ENTREZ_LINK_CGI)); + link.addQueryItem(QString::fromLatin1("tool"), QString::fromLatin1("Tellico")); + link.addQueryItem(QString::fromLatin1("cmd"), QString::fromLatin1("llinks")); + link.addQueryItem(QString::fromLatin1("db"), m_dbname); + link.addQueryItem(QString::fromLatin1("dbfrom"), m_dbname); + link.addQueryItem(QString::fromLatin1("id"), QString::number(id)); + + QDomDocument linkDom = FileHandler::readXMLFile(link, false /* namespace */, true /* quiet */); + // need eLinkResult/LinkSet/IdUrlList/IdUrlSet/ObjUrl/Url + QDomNode linkNode = linkDom.namedItem(QString::fromLatin1("eLinkResult")) + .namedItem(QString::fromLatin1("LinkSet")) + .namedItem(QString::fromLatin1("IdUrlList")) + .namedItem(QString::fromLatin1("IdUrlSet")) + .namedItem(QString::fromLatin1("ObjUrl")) + .namedItem(QString::fromLatin1("Url")); + if(!linkNode.isNull()) { + QString u = linkNode.toElement().text(); +// myDebug() << u << endl; + if(!u.isEmpty()) { + if(!coll->hasField(QString::fromLatin1("url"))) { + Data::FieldPtr field = new Data::Field(QString::fromLatin1("url"), i18n("URL"), Data::Field::URL); + field->setCategory(i18n("Miscellaneous")); + coll->addField(field); + } + e->setField(QString::fromLatin1("url"), u); + } + } + } + + const StringMap customFields = EntrezFetcher::customFields(); + for(StringMap::ConstIterator it = customFields.begin(); it != customFields.end(); ++it) { + if(!m_fields.contains(it.key())) { + coll->removeField(it.key()); + } + } + + m_entries.insert(uid_, e); + return e; +} + +void EntrezFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("pubmed2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "EntrezFetcher::initXSLTHandler() - can not locate pubmed2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + if(!m_xsltHandler) { + m_xsltHandler = new XSLTHandler(u); + } + if(!m_xsltHandler->isValid()) { + kdWarning() << "EntrezFetcher::initXSLTHandler() - error in pubmed2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +void EntrezFetcher::updateEntry(Data::EntryPtr entry_) { +// myDebug() << "EntrezFetcher::updateEntry()" << endl; + QString s = entry_->field(QString::fromLatin1("pmid")); + if(!s.isEmpty()) { + search(PubmedID, s); + return; + } + + s = entry_->field(QString::fromLatin1("doi")); + if(!s.isEmpty()) { + search(DOI, s); + return; + } + + s = entry_->field(QString::fromLatin1("title")); + if(!s.isEmpty()) { + search(Title, s); + return; + } + + myDebug() << "EntrezFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +Tellico::Fetch::ConfigWidget* EntrezFetcher::configWidget(QWidget* parent_) const { + return new EntrezFetcher::ConfigWidget(parent_, this); +} + +EntrezFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const EntrezFetcher* fetcher_/*=0*/) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); + + // now add additional fields widget + addFieldsWidget(EntrezFetcher::customFields(), fetcher_ ? fetcher_->m_fields : QStringList()); +} + +void EntrezFetcher::ConfigWidget::saveConfig(KConfigGroup& config_) { + saveFieldsConfig(config_); + slotSetModified(false); +} + +QString EntrezFetcher::ConfigWidget::preferredName() const { + return EntrezFetcher::defaultName(); +} + +//static +Tellico::StringMap EntrezFetcher::customFields() { + StringMap map; + map[QString::fromLatin1("institution")] = i18n("Institution"); + map[QString::fromLatin1("abstract")] = i18n("Abstract"); + map[QString::fromLatin1("url")] = i18n("URL"); + return map; +} + +#include "entrezfetcher.moc" |