/*************************************************************************** copyright : (C) 2006 by Robby Stephenson email : robby@periapsis.org ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of version 2 of the GNU General Public License as * * published by the Free Software Foundation; * * * ***************************************************************************/ #include "ibsfetcher.h" #include "messagehandler.h" #include "../tellico_kernel.h" #include "../tellico_utils.h" #include "../collections/bookcollection.h" #include "../entry.h" #include "../filehandler.h" #include "../latin1literal.h" #include "../imagefactory.h" #include "../tellico_debug.h" #include #include #include #include #include #include #include //#define IBS_TEST namespace { static const char* IBS_BASE_URL = "http://www.internetbookshop.it/ser/serpge.asp"; } using Tellico::Fetch::IBSFetcher; IBSFetcher::IBSFetcher(TQObject* parent_, const char* name_ /*=0*/) : Fetcher(parent_, name_), m_started(false) { } TQString IBSFetcher::defaultName() { return i18n("Internet Bookshop (ibs.it)"); } TQString IBSFetcher::source() const { return m_name.isEmpty() ? defaultName() : m_name; } bool IBSFetcher::canFetch(int type) const { return type == Data::Collection::Book || type == Data::Collection::Bibtex; } void IBSFetcher::readConfigHook(const TDEConfigGroup& config_) { Q_UNUSED(config_); } void IBSFetcher::search(FetchKey key_, const TQString& value_) { m_started = true; m_matches.clear(); #ifdef IBS_TEST KURL u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/ibs.html")); #else KURL u(TQString::fromLatin1(IBS_BASE_URL)); if(!canFetch(Kernel::self()->collectionType())) { message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); stop(); return; } switch(key_) { case Title: u.addQueryItem(TQString::fromLatin1("Type"), TQString::fromLatin1("keyword")); u.addQueryItem(TQString::fromLatin1("T"), value_); break; case Person: u.addQueryItem(TQString::fromLatin1("Type"), TQString::fromLatin1("keyword")); u.addQueryItem(TQString::fromLatin1("A"), value_); break; case ISBN: { TQString s = value_; s.remove('-'); // limit to first isbn s = s.section(';', 0, 0); u.setFileName(TQString::fromLatin1("serdsp.asp")); u.addQueryItem(TQString::fromLatin1("isbn"), s); } break; case Keyword: u.addQueryItem(TQString::fromLatin1("Type"), TQString::fromLatin1("keyword")); u.addQueryItem(TQString::fromLatin1("S"), value_); break; default: kdWarning() << "IBSFetcher::search() - key not recognized: " << key_ << endl; stop(); return; } #endif // myDebug() << "IBSFetcher::search() - url: " << u.url() << endl; m_job = TDEIO::get(u, false, false); connect(m_job, TQT_SIGNAL(data(TDEIO::Job*, const TQByteArray&)), TQT_SLOT(slotData(TDEIO::Job*, const TQByteArray&))); if(key_ == ISBN) { connect(m_job, TQT_SIGNAL(result(TDEIO::Job*)), TQT_SLOT(slotCompleteISBN(TDEIO::Job*))); } else { connect(m_job, TQT_SIGNAL(result(TDEIO::Job*)), TQT_SLOT(slotComplete(TDEIO::Job*))); } } void IBSFetcher::stop() { if(!m_started) { return; } if(m_job) { m_job->kill(); m_job = 0; } m_data.truncate(0); m_started = false; emit signalDone(this); } void IBSFetcher::slotData(TDEIO::Job*, const TQByteArray& data_) { TQDataStream stream(m_data, IO_WriteOnly | IO_Append); stream.writeRawBytes(data_.data(), data_.size()); } void IBSFetcher::slotComplete(TDEIO::Job* job_) { // since the fetch is done, don't worry about holding the job pointer m_job = 0; if(job_->error()) { job_->showErrorDialog(Kernel::self()->widget()); stop(); return; } if(m_data.isEmpty()) { myDebug() << "IBSFetcher::slotComplete() - no data" << endl; stop(); return; } TQString s = Tellico::decodeHTML(TQString(m_data)); // really specific regexp TQString pat = TQString::fromLatin1("http://www.internetbookshop.it/code/"); TQRegExp anchorRx(TQString::fromLatin1("]*href\\s*=\\s*[\"'](") + TQRegExp::escape(pat) + TQString::fromLatin1("[^\"]*)\"[^>]*>([^<]+)<"), false); anchorRx.setMinimal(true); TQRegExp tagRx(TQString::fromLatin1("<.*>")); tagRx.setMinimal(true); TQString u, t, d; int pos2; for(int pos = anchorRx.search(s); m_started && pos > -1; pos = anchorRx.search(s, pos+anchorRx.matchedLength())) { if(!u.isEmpty()) { SearchResult* r = new SearchResult(this, t, d, TQString()); emit signalResultFound(r); #ifdef IBS_TEST KURL url = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/ibs2.html")); #else // the url probable contains & so be careful KURL url = u.replace(TQString::fromLatin1("&"), TQChar('&')); #endif m_matches.insert(r->uid, url); u.truncate(0); t.truncate(0); d.truncate(0); } u = anchorRx.cap(1); t = anchorRx.cap(2); pos2 = s.find(TQString::fromLatin1("
"), pos, false); if(pos2 > -1) { int pos3 = s.find(TQString::fromLatin1("
"), pos2+1, false); if(pos3 > -1) { d = s.mid(pos2, pos3-pos2).remove(tagRx).simplifyWhiteSpace(); } } } #ifndef IBS_TEST if(!u.isEmpty()) { SearchResult* r = new SearchResult(this, t, d, TQString()); emit signalResultFound(r); m_matches.insert(r->uid, u.replace(TQString::fromLatin1("&"), TQChar('&'))); } #endif stop(); } void IBSFetcher::slotCompleteISBN(TDEIO::Job* job_) { // since the fetch is done, don't worry about holding the job pointer m_job = 0; if(job_->error()) { job_->showErrorDialog(Kernel::self()->widget()); stop(); return; } if(m_data.isEmpty()) { myDebug() << "IBSFetcher::slotCompleteISBN() - no data" << endl; stop(); return; } TQString str = Tellico::decodeHTML(TQString(m_data)); if(str.find(TQString::fromLatin1("Libro non presente"), 0, false /* cas-sensitive */) > -1) { stop(); return; } Data::EntryPtr entry = parseEntry(str); if(entry) { TQString desc = entry->field(TQString::fromLatin1("author")) + '/' + entry->field(TQString::fromLatin1("publisher")); SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(TQString::fromLatin1("isbn"))); emit signalResultFound(r); m_matches.insert(r->uid, static_cast(job_)->url().url()); } stop(); } Tellico::Data::EntryPtr IBSFetcher::fetchEntry(uint uid_) { // if we already grabbed this one, then just pull it out of the dict Data::EntryPtr entry = m_entries[uid_]; if(entry) { return entry; } KURL url = m_matches[uid_]; if(url.isEmpty()) { kdWarning() << "IBSFetcher::fetchEntry() - no url in map" << endl; return 0; } TQString results = Tellico::decodeHTML(FileHandler::readTextFile(url, true)); if(results.isEmpty()) { myDebug() << "IBSFetcher::fetchEntry() - no text results" << endl; return 0; } // myDebug() << url.url() << endl; #if 0 kdWarning() << "Remove debug from ibsfetcher.cpp" << endl; TQFile f(TQString::fromLatin1("/tmp/test.html")); if(f.open(IO_WriteOnly)) { TQTextStream t(&f); t.setEncoding(TQTextStream::UnicodeUTF8); t << results; } f.close(); #endif entry = parseEntry(results); if(!entry) { myDebug() << "IBSFetcher::fetchEntry() - error in processing entry" << endl; return 0; } m_entries.insert(uid_, entry); // keep for later return entry; } Tellico::Data::EntryPtr IBSFetcher::parseEntry(const TQString& str_) { // myDebug() << "IBSFetcher::parseEntry()" << endl; // class might be anime_info_top TQString pat = TQString::fromLatin1("%1(?:<[^>]+>)+([^<>\\s][^<>]+)"); TQRegExp isbnRx(TQString::fromLatin1("isbn=([\\dxX]{13})"), false); TQString isbn; int pos = isbnRx.search(str_); if(pos > -1) { isbn = isbnRx.cap(1); } Data::CollPtr coll = new Data::BookCollection(true); // map captions in HTML to field names TQMap fieldMap; fieldMap.insert(TQString::fromLatin1("Titolo"), TQString::fromLatin1("title")); fieldMap.insert(TQString::fromLatin1("Autore"), TQString::fromLatin1("author")); fieldMap.insert(TQString::fromLatin1("Anno"), TQString::fromLatin1("pub_year")); fieldMap.insert(TQString::fromLatin1("Categoria"), TQString::fromLatin1("genre")); fieldMap.insert(TQString::fromLatin1("Rilegatura"), TQString::fromLatin1("binding")); fieldMap.insert(TQString::fromLatin1("Editore"), TQString::fromLatin1("publisher")); fieldMap.insert(TQString::fromLatin1("Dati"), TQString::fromLatin1("edition")); TQRegExp pagesRx(TQString::fromLatin1("(\\d+) p\\.(\\s*,\\s*)?")); Data::EntryPtr entry = new Data::Entry(coll); for(TQMap::Iterator it = fieldMap.begin(); it != fieldMap.end(); ++it) { TQRegExp infoRx(pat.arg(it.key())); pos = infoRx.search(str_); if(pos > -1) { if(it.data() == Latin1Literal("edition")) { int pos2 = pagesRx.search(infoRx.cap(1)); if(pos2 > -1) { entry->setField(TQString::fromLatin1("pages"), pagesRx.cap(1)); entry->setField(it.data(), infoRx.cap(1).remove(pagesRx)); } else { entry->setField(it.data(), infoRx.cap(1)); } } else { entry->setField(it.data(), infoRx.cap(1)); } } } // image if(!isbn.isEmpty()) { entry->setField(TQString::fromLatin1("isbn"), isbn); #if 1 TQString imgURL = TQString::fromLatin1("http://giotto.ibs.it/cop/copt13.asp?f=%1").arg(isbn); myLog() << "IBSFetcher() - cover = " << imgURL << endl; TQString id = ImageFactory::addImage(imgURL, true, TQString::fromLatin1("http://internetbookshop.it")); if(!id.isEmpty()) { entry->setField(TQString::fromLatin1("cover"), id); } #else TQRegExp imgRx(TQString::fromLatin1("]*\\s*src\\s*=\\s*\"(http://[^/]*\\.ibs\\.it/[^\"]+e=%1)").arg(isbn)); imgRx.setMinimal(true); pos = imgRx.search(str_); if(pos > -1) { myLog() << "IBSFetcher() - cover = " << imgRx.cap(1) << endl; TQString id = ImageFactory::addImage(imgRx.cap(1), true, TQString::fromLatin1("http://internetbookshop.it")); if(!id.isEmpty()) { entry->setField(TQString::fromLatin1("cover"), id); } } #endif } // now look for description TQRegExp descRx(TQString::fromLatin1("Descrizione(?:<[^>]+>)+([^<>\\s].+)"), false); descRx.setMinimal(true); pos = descRx.search(str_); if(pos == -1) { descRx.setPattern(TQString::fromLatin1("In sintesi(?:<[^>]+>)+([^<>\\s].+)")); pos = descRx.search(str_); } if(pos > -1) { Data::FieldPtr f = new Data::Field(TQString::fromLatin1("plot"), i18n("Plot Summary"), Data::Field::Para); coll->addField(f); entry->setField(f, descRx.cap(1).simplifyWhiteSpace()); } // IBS switches the surname and family name of the author TQStringList names = entry->fields(TQString::fromLatin1("author"), false); if(!names.isEmpty() && !names[0].isEmpty()) { for(TQStringList::Iterator it = names.begin(); it != names.end(); ++it) { if((*it).find(',') > -1) { continue; // skip if it has a comma } TQStringList words = TQStringList::split(' ', *it); if(words.isEmpty()) { continue; } // put first word in back words.append(words[0]); words.pop_front(); *it = words.join(TQChar(' ')); } entry->setField(TQString::fromLatin1("author"), names.join(TQString::fromLatin1("; "))); } return entry; } void IBSFetcher::updateEntry(Data::EntryPtr entry_) { TQString isbn = entry_->field(TQString::fromLatin1("isbn")); if(!isbn.isEmpty()) { search(Fetch::ISBN, isbn); return; } TQString t = entry_->field(TQString::fromLatin1("title")); if(!t.isEmpty()) { search(Fetch::Title, t); return; } myDebug() << "IBSFetcher::updateEntry() - insufficient info to search" << endl; emit signalDone(this); // always need to emit this if not continuing with the search } Tellico::Fetch::ConfigWidget* IBSFetcher::configWidget(TQWidget* parent_) const { return new IBSFetcher::ConfigWidget(parent_); } IBSFetcher::ConfigWidget::ConfigWidget(TQWidget* parent_) : Fetch::ConfigWidget(parent_) { TQVBoxLayout* l = new TQVBoxLayout(optionsWidget()); l->addWidget(new TQLabel(i18n("This source has no options."), optionsWidget())); l->addStretch(); } TQString IBSFetcher::ConfigWidget::preferredName() const { return IBSFetcher::defaultName(); } #include "ibsfetcher.moc"