summaryrefslogtreecommitdiffstats
path: root/src/webquerygooglescholar.cpp
diff options
context:
space:
mode:
authorSlávek Banko <slavek.banko@axis.cz>2013-06-24 02:08:15 +0200
committerSlávek Banko <slavek.banko@axis.cz>2013-07-04 02:44:37 +0200
commit998f21e02a725cd553d7c278819f67cd81295af4 (patch)
tree4bd158018e9302c31367b00c01cd2b41eb228414 /src/webquerygooglescholar.cpp
downloadkbibtex-998f21e02a725cd553d7c278819f67cd81295af4.tar.gz
kbibtex-998f21e02a725cd553d7c278819f67cd81295af4.zip
Initial import
Diffstat (limited to 'src/webquerygooglescholar.cpp')
-rw-r--r--src/webquerygooglescholar.cpp469
1 files changed, 469 insertions, 0 deletions
diff --git a/src/webquerygooglescholar.cpp b/src/webquerygooglescholar.cpp
new file mode 100644
index 0000000..a1c2d52
--- /dev/null
+++ b/src/webquerygooglescholar.cpp
@@ -0,0 +1,469 @@
+/***************************************************************************
+ * Copyright (C) 2004-2009 by Thomas Fischer *
+ * fischer@unix-ag.uni-kl.de *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+#include <qfile.h>
+#include <qapplication.h>
+#include <qregexp.h>
+#include <qtimer.h>
+#include <qmap.h>
+#include <qspinbox.h>
+#include <kconfig.h>
+
+#include <klineedit.h>
+#include <klocale.h>
+#include <kdebug.h>
+#include <kmessagebox.h>
+#include <kio/job.h>
+
+#include <dcopref.h>
+
+#include <settings.h>
+#include "webquerygooglescholar.h"
+
+namespace KBibTeX
+{
+ WebQueryGoogleScholarWidget::WebQueryGoogleScholarWidget( QWidget *parent, const char *name )
+ : WebQueryWidget( parent, name )
+ {
+ init();
+
+ Settings *settings = Settings::self();
+ QString value = settings->getWebQueryDefault( "GoogleScholar" );
+ value = value == QString::null ? "" : value;
+ lineEditQuery->setText( value );
+ slotTextChanged( value, true );
+ }
+
+ WebQueryGoogleScholar::WebQueryGoogleScholar( QWidget* parent )
+ : WebQuery( parent ), m_transferJob( NULL ), m_transferJobBuffer( NULL )
+ {
+ m_importer = new BibTeX::FileImporterBibTeX( FALSE );
+ m_importer->setIgnoreComments( TRUE );
+ m_widget = new WebQueryGoogleScholarWidget( parent );
+ }
+
+ WebQueryGoogleScholar::~WebQueryGoogleScholar()
+ {
+ delete m_widget;
+ delete m_importer;
+ }
+
+ QString WebQueryGoogleScholar::title()
+ {
+ return i18n( "Google Scholar" );
+ }
+
+ QString WebQueryGoogleScholar::disclaimer()
+ {
+ return i18n( "About Google Scholar" );
+ }
+
+ QString WebQueryGoogleScholar::disclaimerURL()
+ {
+ return "http://scholar.google.com/intl/en/scholar/about.html";
+ }
+
+ WebQueryWidget *WebQueryGoogleScholar::widget()
+ {
+ return m_widget;
+ }
+
+ void WebQueryGoogleScholar::query()
+ {
+ WebQuery::query();
+
+ /** save search term in settings */
+ Settings *settings = Settings::self();
+ settings->setWebQueryDefault( "GoogleScholar", m_widget->lineEditQuery->text() );
+
+ /** generate web-save search term */
+ m_searchTerm = m_widget->lineEditQuery->text().stripWhiteSpace().replace( '$', "" );
+ m_searchTerm = m_searchTerm.replace( "%", "%25" ).replace( "+", "%2B" ).replace( " ", "%20" ).replace( "#", "%23" ).replace( "&", "%26" ).replace( "?", "%3F" );
+ if ( m_searchTerm.isEmpty() )
+ {
+ setEndSearch( WebQuery::statusInvalidQuery );
+ return;
+ }
+
+ /** initialize variables */
+ m_abort = false;
+ m_numberOfResults = m_widget->spinBoxMaxHits->value();
+ setNumStages( m_numberOfResults + 5 );
+
+ /** reset KDE configuration for cookie handling */
+ readAndChangeConfig();
+
+ /** prepare HTTP request (buffer, signals, job) */
+ m_transferJobBuffer = new QBuffer();
+ m_transferJobBuffer->open( IO_WriteOnly );
+ KIO::TransferJob* m_transferJob = KIO::get( KURL( "http://scholar.google.com/scholar_ncr" ), false, false );
+ connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
+ connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedStartpage( KIO::Job * ) ) );
+ }
+
+ void WebQueryGoogleScholar::cancelQuery()
+ {
+ /** user aborted search */
+ m_abort = true;
+ if ( m_transferJob != NULL ) m_transferJob->kill( false );
+ setEndSearch( WebQuery::statusError );
+ }
+
+ void WebQueryGoogleScholar::slotFinishedStartpage( KIO::Job *job )
+ {
+ /** close and delete buffer (content does not matter) */
+ m_transferJobBuffer->close();
+ delete m_transferJobBuffer;
+
+ /** if aborted in the mean time, clean up everything */
+ if ( m_abort )
+ {
+ restoreConfig();
+ return;
+ }
+
+ /** error occurred */
+ if ( job->error() != 0 )
+ {
+ restoreConfig();
+ kdDebug() << "Error in slotFinishedStartpage: " << job->error() << endl;
+ setEndSearch( statusError );
+ return;
+ }
+
+ /** update progress bar */
+ enterNextStage();
+
+ /** prepare next HTTP request for preferences page (buffer, signals, job) */
+ m_transferJobBuffer = new QBuffer();
+ m_transferJobBuffer->open( IO_WriteOnly );
+ KIO::TransferJob* m_transferJob = KIO::get( KURL( "http://scholar.google.com/scholar_preferences?hl=en" ), false, false );
+ connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
+ connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedLoadingSettings( KIO::Job * ) ) );
+
+ }
+
+ void WebQueryGoogleScholar::slotFinishedLoadingSettings( KIO::Job *job )
+ {
+ /** close and delete buffer (content does not matter) */
+ m_transferJobBuffer->close();
+ QString htmlCode = textFromBuffer( m_transferJobBuffer );
+ delete m_transferJobBuffer;
+
+ /** if aborted in the mean time, clean up everything */
+ if ( m_abort )
+ {
+ restoreConfig();
+ return;
+ }
+
+ /** error occurred */
+ if ( job->error() != 0 )
+ {
+ restoreConfig();
+ kdDebug() << "Error in slotFinishedLoadingSettings: " << job->error() << endl;
+ setEndSearch( statusError );
+ return;
+ }
+
+ /** update progress bar */
+ enterNextStage();
+
+ /** parse html code to get form values */
+ QMap<QString, QString> keyValues = evalFormFields( htmlCode );
+ /** set form values for BibTeX search */
+ keyValues["scis"] = "yes";
+ keyValues["scisf"] = "4";
+ keyValues["submit"] = "Save+Preferences";
+ keyValues["num"] = QString::number( m_numberOfResults );
+
+ /** prepare next HTTP request to submit preferences (buffer, signals, job) */
+ KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar_setprefs", keyValues ) );
+ m_transferJobBuffer = new QBuffer();
+ m_transferJobBuffer->open( IO_WriteOnly );
+ KIO::TransferJob* m_transferJob = KIO::get( nextUrl, false, false );
+ connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
+ connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedSavingSettings( KIO::Job * ) ) );
+ }
+
+ void WebQueryGoogleScholar::slotFinishedSavingSettings( KIO::Job *job )
+ {
+ /** close and delete buffer (content does not matter) */
+ m_transferJobBuffer->close();
+ QString htmlCode = textFromBuffer( m_transferJobBuffer );
+ delete m_transferJobBuffer;
+
+ /** if aborted in the mean time, clean up everything */
+ if ( m_abort )
+ {
+ restoreConfig();
+ return;
+ }
+
+ /** error occurred */
+ if ( job->error() != 0 )
+ {
+ restoreConfig();
+ kdDebug() << "Error in slotFinishedSavingSettings: " << job->error() << endl;
+ setEndSearch( statusError );
+ return;
+ }
+
+ /** update progress bar */
+ enterNextStage();
+
+ /** parse html code to get form values */
+ QMap<QString, QString> keyValues = evalFormFields( htmlCode );
+ /** set form values for search */
+ keyValues["q"] = m_searchTerm;
+ keyValues["num"] = QString::number( m_numberOfResults );
+
+ /** prepare next HTTP request for actual search (buffer, signals, job) */
+ KURL nextUrl( formFieldsToUrl( "http://scholar.google.com/scholar", keyValues ) );
+ m_transferJobBuffer = new QBuffer();
+ m_transferJobBuffer->open( IO_WriteOnly );
+ KIO::TransferJob* m_transferJob = KIO::get( nextUrl, false, false );
+ connect( m_transferJob, SIGNAL( data( KIO::Job *, const QByteArray & ) ), this, SLOT( slotData( KIO::Job *, const QByteArray & ) ) );
+ connect( m_transferJob, SIGNAL( result( KIO::Job * ) ), this, SLOT( slotFinishedReceivingResultOverview( KIO::Job * ) ) );
+ }
+
+ void WebQueryGoogleScholar::slotFinishedReceivingResultOverview( KIO::Job *job )
+ {
+ /** close and delete buffer (content does not matter) */
+ m_transferJobBuffer->close();
+ QString htmlCode = textFromBuffer( m_transferJobBuffer );
+ delete m_transferJobBuffer;
+
+ /** if aborted in the mean time, clean up everything */
+ if ( m_abort )
+ {
+ restoreConfig();
+ return;
+ }
+
+ /** error occurred */
+ if ( job->error() != 0 )
+ {
+ restoreConfig();
+ kdDebug() << "Error in slotFinishedReceivingResultOverview: " << job->error() << endl;
+ setEndSearch( statusError );
+ return;
+ }
+
+ /** update progress bar */
+ enterNextStage();
+
+ /** find all links to BibTeX files in result page */
+ QRegExp reBibUrl( "/scholar.bib[^ \">]+" );
+ int pos = 0;
+ while ( !m_aborted && ( pos = htmlCode.find( reBibUrl, pos + 1 ) ) > 0 )
+ {
+ /** download individual BibTeX file for each search hit */
+ KURL bibUrl( "http://scholar.google.com" + reBibUrl.cap( 0 ).replace( "&amp;", "&" ) );
+ BibTeX::File *tmpBibFile = downloadBibTeXFile( bibUrl );
+
+ /** update progress bar */
+ enterNextStage();
+
+ /** parse, evaluate and store first BibTeX entry */
+ if ( tmpBibFile != NULL )
+ {
+ BibTeX::File::ElementList::iterator it = tmpBibFile->begin();
+ if ( it != tmpBibFile->end() )
+ {
+ BibTeX::Entry *entry = dynamic_cast<BibTeX::Entry*>( *it );
+ if ( entry != NULL )
+ emit foundEntry( new BibTeX::Entry( entry ), false );
+ }
+ delete tmpBibFile;
+ }
+ }
+
+ /** restore old cookie configuration */
+ restoreConfig();
+
+ /** set result status */
+ if ( m_aborted )
+ setEndSearch( statusAborted );
+ else
+ setEndSearch( statusSuccess );
+ }
+
+ void WebQueryGoogleScholar::readAndChangeConfig()
+ {
+ KConfig cfg( "kcookiejarrc" );
+ cfg.setGroup( "Cookie Policy" );
+ m_originalEnableCookies = cfg.readBoolEntry( "Cookies", true );
+ m_originalSessionCookies = cfg.readBoolEntry( "AcceptSessionCookies", true );
+ QStringList cookieSettingsList = QStringList::split( ',', cfg.readEntry( "CookieDomainAdvice", "" ) );
+ m_originalCookieGlobalAdvice = cfg.readEntry( "CookieGlobalAdvice", "Accept" );
+
+ for ( QStringList::Iterator it = cookieSettingsList.begin(); it != cookieSettingsList.end(); ++it )
+ {
+ QStringList keyValue = QStringList::split( ':', *it );
+ if ( keyValue.size() == 2 )
+ {
+ m_originalCookieMap[keyValue[0]] = keyValue[1];
+ }
+ }
+
+ cfg.writeEntry( "Cookies", true );
+ cfg.writeEntry( "CookieGlobalAdvice", "Accept" );
+ cfg.writeEntry( "AcceptSessionCookies", true );
+ cookieSettingsList.clear();
+ for ( QMap<QString, QString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it )
+ {
+ QString value = it.key().contains( ".google." ) ? "Accept" : it.data();
+ cookieSettingsList << it.key() + ":" + value;
+ }
+ cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) );
+ cfg.sync();
+
+ ( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" );
+ }
+
+ void WebQueryGoogleScholar::restoreConfig()
+ {
+ KConfig cfg( "kcookiejarrc" );
+ cfg.setGroup( "Cookie Policy" );
+ cfg.writeEntry( "CookieGlobalAdvice", m_originalCookieGlobalAdvice );
+ cfg.writeEntry( "Cookies", m_originalEnableCookies );
+ cfg.writeEntry( "AcceptSessionCookies", m_originalSessionCookies );
+ QStringList cookieSettingsList;
+ for ( QMap<QString, QString>::Iterator it = m_originalCookieMap.begin(); it != m_originalCookieMap.end(); ++it )
+ cookieSettingsList << it.key() + ":" + it.data();
+ cfg.writeEntry( "CookieDomainAdvice", cookieSettingsList.join( "," ) );
+ cfg.sync();
+
+ if ( !m_originalEnableCookies )
+ ( void )DCOPRef( "kded", "kcookiejar" ).send( "shutdown" );
+ else
+ ( void )DCOPRef( "kded", "kcookiejar" ).send( "reloadPolicy" );
+ }
+
+ QString WebQueryGoogleScholar::textFromBuffer( QBuffer *buffer )
+ {
+ QString htmlCode = "";
+ buffer->open( IO_ReadOnly );
+ QTextStream ts( buffer );
+ while ( !ts.atEnd() )
+ htmlCode.append( ts.readLine() );
+ buffer->close();
+ return htmlCode;
+ }
+
+ QMap <QString, QString> WebQueryGoogleScholar::evalFormFields( const QString &htmlCode )
+ {
+ QMap<QString, QString> keyValues;
+
+ QRegExp reInput( "<input[^>]+>" );
+ QRegExp reSplit( "[<>=\" ]+" );
+ int pos = 0;
+ while (( pos = htmlCode.find( reInput, pos + 1 ) ) > 5 )
+ {
+ QStringList elements = QStringList::split( reSplit, reInput.cap( 0 ) );
+ bool checked = false;
+ bool isCheckable = false;
+ bool isSubmit = false;
+ QString key = QString::null;
+ QString value = QString::null;
+ for ( QStringList::Iterator it = elements.begin(); it != elements.end(); ++it )
+ {
+ if ( *it == "name" )
+ {
+ ++it; if ( it != elements.end() ) key = *it; else break;
+ }
+ if ( *it == "value" )
+ {
+ ++it; if ( it != elements.end() ) value = *it; else
+ {
+ value = ""; break;
+ }
+ }
+ if ( *it == "checked" )
+ checked = true;
+ if ( *it == "type" )
+ {
+ ++it;
+ if ( it == elements.end() ) break;
+ isCheckable = *it == "radio" || *it == "checkbox";
+ isSubmit = *it == "submit";
+ }
+ }
+ if (( !isCheckable || checked ) && ( !isSubmit || value == "submit" ) && value != QString::null && key != QString::null )
+ {
+ keyValues[key] = value;
+ }
+ }
+
+ QRegExp reSelect( "<select name=([^ >\"]+).*</select>" );
+ reSelect.setMinimal( true );
+ QRegExp reOption( "<option[^>]+>" );
+ int pos3 = 0;
+ while (( pos3 = htmlCode.find( reSelect, pos3 + 1 ) ) > 5 )
+ {
+ QString key = reSelect.cap( 1 );
+ QString sub = reSelect.cap( 0 );
+ int pos2 = 0;
+ while (( pos2 = sub.find( reOption, pos2 + 1 ) ) > 5 )
+ {
+ QStringList elements = QStringList::split( reSplit, reOption.cap( 0 ) );
+ bool selected = false;
+ QString value = QString::null;
+ for ( QStringList::Iterator it = elements.begin(); it != elements.end(); ++it )
+ {
+ if ( *it == "value" )
+ {
+ ++it; if ( it != elements.end() ) value = *it; else
+ {
+ value = ""; break;
+ }
+ }
+ if ( *it == "selected" )
+ selected = true;
+ }
+ if ( selected && value != QString::null && key != QString::null )
+ {
+ keyValues[key] = value;
+ }
+ }
+ }
+
+ return keyValues;
+ }
+
+ QString WebQueryGoogleScholar::formFieldsToUrl( const QString &prefix, const QMap<QString, QString> &keyValues )
+ {
+ bool first = true;
+ QString nextUrl = prefix;
+ for ( QMap<QString, QString>::ConstIterator it = keyValues.begin(); it != keyValues.end(); ++it )
+ {
+ if ( first )
+ nextUrl.append( "?" );
+ else
+ nextUrl.append( "&" );
+ first = false;
+ nextUrl.append( it.key() + "=" + it.data() );
+ }
+
+ return nextUrl;
+ }
+
+}
+#include "webquerygooglescholar.moc"