diff options
author | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
---|---|---|
committer | tpearson <tpearson@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> | 2010-03-01 19:17:32 +0000 |
commit | e38d2351b83fa65c66ccde443777647ef5cb6cff (patch) | |
tree | 1897fc20e9f73a81c520a5b9f76f8ed042124883 /src/fetch/arxivfetcher.cpp | |
download | tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.tar.gz tellico-e38d2351b83fa65c66ccde443777647ef5cb6cff.zip |
Added KDE3 version of Tellico
git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/applications/tellico@1097620 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
Diffstat (limited to 'src/fetch/arxivfetcher.cpp')
-rw-r--r-- | src/fetch/arxivfetcher.cpp | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/src/fetch/arxivfetcher.cpp b/src/fetch/arxivfetcher.cpp new file mode 100644 index 0000000..442ef30 --- /dev/null +++ b/src/fetch/arxivfetcher.cpp @@ -0,0 +1,366 @@ +/*************************************************************************** + copyright : (C) 2007 by Robby Stephenson + email : robby@periapsis.org + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of version 2 of the GNU General Public License as * + * published by the Free Software Foundation; * + * * + ***************************************************************************/ + +#include "arxivfetcher.h" +#include "messagehandler.h" +#include "../translators/xslthandler.h" +#include "../translators/tellicoimporter.h" +#include "../tellico_kernel.h" +#include "../tellico_utils.h" +#include "../collection.h" +#include "../entry.h" +#include "../core/netaccess.h" +#include "../imagefactory.h" +#include "../tellico_debug.h" + +#include <klocale.h> +#include <kstandarddirs.h> +#include <kconfig.h> + +#include <qdom.h> +#include <qlabel.h> +#include <qlayout.h> + +//#define ARXIV_TEST + +namespace { + static const int ARXIV_RETURNS_PER_REQUEST = 20; + static const char* ARXIV_BASE_URL = "http://export.arxiv.org/api/query"; +} + +using Tellico::Fetch::ArxivFetcher; + +ArxivFetcher::ArxivFetcher(QObject* parent_) + : Fetcher(parent_), m_xsltHandler(0), m_start(0), m_job(0), m_started(false) { +} + +ArxivFetcher::~ArxivFetcher() { + delete m_xsltHandler; + m_xsltHandler = 0; +} + +QString ArxivFetcher::defaultName() { + return i18n("arXiv.org"); +} + +QString ArxivFetcher::source() const { + return m_name.isEmpty() ? defaultName() : m_name; +} + +bool ArxivFetcher::canFetch(int type) const { + return type == Data::Collection::Bibtex; +} + +void ArxivFetcher::readConfigHook(const KConfigGroup&) { +} + +void ArxivFetcher::search(FetchKey key_, const QString& value_) { + m_key = key_; + m_value = value_.stripWhiteSpace(); + m_started = true; + m_start = 0; + m_total = -1; + doSearch(); +} + +void ArxivFetcher::continueSearch() { + m_started = true; + doSearch(); +} + +void ArxivFetcher::doSearch() { + if(!canFetch(Kernel::self()->collectionType())) { + message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); + stop(); + return; + } + + m_data.truncate(0); + +// myDebug() << "ArxivFetcher::search() - value = " << value_ << endl; + + KURL u = searchURL(m_key, m_value); + if(u.isEmpty()) { + stop(); + return; + } + + m_job = KIO::get(u, false, false); + connect(m_job, SIGNAL(data(KIO::Job*, const QByteArray&)), + SLOT(slotData(KIO::Job*, const QByteArray&))); + connect(m_job, SIGNAL(result(KIO::Job*)), + SLOT(slotComplete(KIO::Job*))); +} + +void ArxivFetcher::stop() { + if(!m_started) { + return; + } +// myDebug() << "ArxivFetcher::stop()" << endl; + if(m_job) { + m_job->kill(); + m_job = 0; + } + m_data.truncate(0); + m_started = false; + emit signalDone(this); +} + +void ArxivFetcher::slotData(KIO::Job*, const QByteArray& data_) { + QDataStream stream(m_data, IO_WriteOnly | IO_Append); + stream.writeRawBytes(data_.data(), data_.size()); +} + +void ArxivFetcher::slotComplete(KIO::Job* job_) { +// myDebug() << "ArxivFetcher::slotComplete()" << endl; + // since the fetch is done, don't worry about holding the job pointer + m_job = 0; + + if(job_->error()) { + job_->showErrorDialog(Kernel::self()->widget()); + stop(); + return; + } + + if(m_data.isEmpty()) { + myDebug() << "ArxivFetcher::slotComplete() - no data" << endl; + stop(); + return; + } + +#if 0 + kdWarning() << "Remove debug from arxivfetcher.cpp" << endl; + QFile f(QString::fromLatin1("/tmp/test.xml")); + if(f.open(IO_WriteOnly)) { + QTextStream t(&f); + t.setEncoding(QTextStream::UnicodeUTF8); + t << QCString(m_data, m_data.size()+1); + } + f.close(); +#endif + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + stop(); + return; + } + } + + if(m_total == -1) { + QDomDocument dom; + if(!dom.setContent(m_data, true /*namespace*/)) { + kdWarning() << "ArxivFetcher::slotComplete() - server did not return valid XML." << endl; + return; + } + // total is top level element, with attribute totalResultsAvailable + QDomNodeList list = dom.elementsByTagNameNS(QString::fromLatin1("http://a9.com/-/spec/opensearch/1.1/"), + QString::fromLatin1("totalResults")); + if(list.count() > 0) { + m_total = list.item(0).toElement().text().toInt(); + } + } + + // assume result is always utf-8 + QString str = m_xsltHandler->applyStylesheet(QString::fromUtf8(m_data, m_data.size())); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + + if(!coll) { + myDebug() << "ArxivFetcher::slotComplete() - no valid result" << endl; + stop(); + return; + } + + Data::EntryVec entries = coll->entries(); + for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { + if(!m_started) { + // might get aborted + break; + } + QString desc = entry->field(QString::fromLatin1("author")) + + QChar('/') + entry->field(QString::fromLatin1("publisher")); + if(!entry->field(QString::fromLatin1("year")).isEmpty()) { + desc += QChar('/') + entry->field(QString::fromLatin1("year")); + } + + SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(QString::fromLatin1("isbn"))); + m_entries.insert(r->uid, Data::EntryPtr(entry)); + emit signalResultFound(r); + } + + m_start = m_entries.count(); + m_hasMoreResults = m_start < m_total; + stop(); // required +} + +Tellico::Data::EntryPtr ArxivFetcher::fetchEntry(uint uid_) { + Data::EntryPtr entry = m_entries[uid_]; + // if URL but no cover image, fetch it + if(!entry->field(QString::fromLatin1("url")).isEmpty()) { + Data::CollPtr coll = entry->collection(); + Data::FieldPtr field = coll->fieldByName(QString::fromLatin1("cover")); + if(!field && !coll->imageFields().isEmpty()) { + field = coll->imageFields().front(); + } else if(!field) { + field = new Data::Field(QString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image); + coll->addField(field); + } + if(entry->field(field).isEmpty()) { + QPixmap pix = NetAccess::filePreview(entry->field(QString::fromLatin1("url"))); + if(!pix.isNull()) { + QString id = ImageFactory::addImage(pix, QString::fromLatin1("PNG")); + if(!id.isEmpty()) { + entry->setField(field, id); + } + } + } + } + return entry; +} + +void ArxivFetcher::initXSLTHandler() { + QString xsltfile = locate("appdata", QString::fromLatin1("arxiv2tellico.xsl")); + if(xsltfile.isEmpty()) { + kdWarning() << "ArxivFetcher::initXSLTHandler() - can not locate arxiv2tellico.xsl." << endl; + return; + } + + KURL u; + u.setPath(xsltfile); + + delete m_xsltHandler; + m_xsltHandler = new XSLTHandler(u); + if(!m_xsltHandler->isValid()) { + kdWarning() << "ArxivFetcher::initXSLTHandler() - error in arxiv2tellico.xsl." << endl; + delete m_xsltHandler; + m_xsltHandler = 0; + return; + } +} + +KURL ArxivFetcher::searchURL(FetchKey key_, const QString& value_) const { + KURL u(QString::fromLatin1(ARXIV_BASE_URL)); + u.addQueryItem(QString::fromLatin1("start"), QString::number(m_start)); + u.addQueryItem(QString::fromLatin1("max_results"), QString::number(ARXIV_RETURNS_PER_REQUEST)); + + // quotes should be used if spaces are present, just use all the time + QString quotedValue = '"' + value_ + '"'; + switch(key_) { + case Title: + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("ti:%1").arg(quotedValue)); + break; + + case Person: + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("au:%1").arg(quotedValue)); + break; + + case Keyword: + // keyword gets to use all the words without being quoted + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("all:%1").arg(value_)); + break; + + case ArxivID: + { + // remove prefix and/or version number + QString value = value_; + value.remove(QRegExp(QString::fromLatin1("^arxiv:"), false)); + value.remove(QRegExp(QString::fromLatin1("v\\d+$"))); + u.addQueryItem(QString::fromLatin1("search_query"), QString::fromLatin1("id:%1").arg(value)); + } + break; + + default: + kdWarning() << "ArxivFetcher::search() - key not recognized: " << m_key << endl; + return KURL(); + } + +#ifdef ARXIV_TEST + u = KURL::fromPathOrURL(QString::fromLatin1("/home/robby/arxiv.xml")); +#endif + myDebug() << "ArxivFetcher::search() - url: " << u.url() << endl; + return u; +} + +void ArxivFetcher::updateEntry(Data::EntryPtr entry_) { + QString id = entry_->field(QString::fromLatin1("arxiv")); + if(!id.isEmpty()) { + search(Fetch::ArxivID, id); + return; + } + + // optimistically try searching for title and rely on Collection::sameEntry() to figure things out + QString t = entry_->field(QString::fromLatin1("title")); + if(!t.isEmpty()) { + search(Fetch::Title, t); + return; + } + + myDebug() << "ArxivFetcher::updateEntry() - insufficient info to search" << endl; + emit signalDone(this); // always need to emit this if not continuing with the search +} + +void ArxivFetcher::updateEntrySynchronous(Data::EntryPtr entry) { + if(!entry) { + return; + } + QString arxiv = entry->field(QString::fromLatin1("arxiv")); + if(arxiv.isEmpty()) { + return; + } + + KURL u = searchURL(ArxivID, arxiv); + QString xml = FileHandler::readTextFile(u, true, true); + if(xml.isEmpty()) { + return; + } + + if(!m_xsltHandler) { + initXSLTHandler(); + if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading + return; + } + } + + // assume result is always utf-8 + QString str = m_xsltHandler->applyStylesheet(xml); + Import::TellicoImporter imp(str); + Data::CollPtr coll = imp.collection(); + if(coll && coll->entryCount() > 0) { + myLog() << "ArxivFetcher::updateEntrySynchronous() - found Arxiv result, merging" << endl; + Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/); + // the arxiv id might have a version# + entry->setField(QString::fromLatin1("arxiv"), + coll->entries().front()->field(QString::fromLatin1("arxiv"))); + } +} + +Tellico::Fetch::ConfigWidget* ArxivFetcher::configWidget(QWidget* parent_) const { + return new ArxivFetcher::ConfigWidget(parent_, this); +} + +ArxivFetcher::ConfigWidget::ConfigWidget(QWidget* parent_, const ArxivFetcher*) + : Fetch::ConfigWidget(parent_) { + QVBoxLayout* l = new QVBoxLayout(optionsWidget()); + l->addWidget(new QLabel(i18n("This source has no options."), optionsWidget())); + l->addStretch(); +} + +void ArxivFetcher::ConfigWidget::saveConfig(KConfigGroup&) { +} + +QString ArxivFetcher::ConfigWidget::preferredName() const { + return ArxivFetcher::defaultName(); +} + +#include "arxivfetcher.moc" |