/* This file is part of Akregator. Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net> 2005 Frank Osterfeld <frank.osterfeld at kdemail.net> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. As a special exception, permission is given to link this program with any edition of TQt, and distribute the resulting executable, without including the source code for TQt in the source distribution. */ #include "article.h" #include "feed.h" #include "feedstorage.h" #include "storage.h" #include "librss/librss.h" #include "shared.h" #include "utils.h" #include <tqdatetime.h> #include <tqdom.h> #include <tqregexp.h> #include <tqstringlist.h> #include <tqvaluelist.h> #include <krfcdate.h> #include <kdebug.h> #include <kurl.h> namespace Akregator { struct Article::Private : public Shared { /** The status of the article is stored in an int, the bits having the following meaning: 0000 0001 Deleted 0000 0010 Trash 0000 0100 New 0000 1000 Read 0001 0000 Keep */ enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10}; TQString guid; Backend::FeedStorage* archive; Feed* feed; // the variables below are initialized to null values in the Article constructor // and then loaded on demand instead. // // to read their values, you should therefore use the accessor methods of the Article // hash(), pubDate(), statusBits() rather than accessing them directly. uint hash; TQDateTime pubDate; int status; }; Article::Article() : d(new Private) { d->hash = 0; d->status = 0; d->feed = 0; d->archive = 0; } Article::Article(const TQString& guid, Feed* feed) : d(new Private) { // this constructor should be as cheap as possible, so avoid calls to // read information from the archive in here if possible // // d->hash, d->pubDate and d->status are loaded on-demand by // the hash(), pubDate() and statusBits() methods respectively d->feed = feed; d->guid = guid; d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()); d->status = 0; d->hash = 0; } void Article::initialize(RSS::Article article, Backend::FeedStorage* archive) { d->archive = archive; d->status = Private::New; d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url() + article.commentsLink().url() ); d->guid = article.guid(); if (!d->archive->contains(d->guid)) { d->archive->addEntry(d->guid); if (article.meta("deleted") == "true") { // if article is in deleted state, we just add the status and omit the rest d->status = Private::Read | Private::Deleted; d->archive->setStatus(d->guid, d->status); } else { // article is not deleted, let's add it to the archive d->archive->setHash(d->guid, hash() ); TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title(); d->archive->setTitle(d->guid, title); d->archive->setDescription(d->guid, article.description()); d->archive->setLink(d->guid, article.link().url()); d->archive->setComments(d->guid, article.comments()); d->archive->setCommentsLink(d->guid, article.commentsLink().url()); d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink()); d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true"); d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime(); d->archive->setPubDate(d->guid, d->pubDate.toTime_t()); d->archive->setAuthor(d->guid, article.author()); TQValueList<RSS::Category> cats = article.categories(); TQValueList<RSS::Category>::ConstIterator end = cats.end(); for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it) { Backend::Category cat; cat.term = (*it).category(); cat.scheme = (*it).domain(); cat.name = (*it).category(); d->archive->addCategory(d->guid, cat); } if (!article.enclosure().isNull()) { d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length()); } else { d->archive->removeEnclosure(d->guid); } TQString status = article.meta("status"); if (!status.isEmpty()) { int statusInt = status.toInt(); if (statusInt == New) statusInt = Unread; setStatus(statusInt); } setKeep(article.meta("keep") == "true"); } } else { // always update comments count, as it's not used for hash calculation d->archive->setComments(d->guid, article.comments()); if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified? { // if yes, update d->pubDate.setTime_t(d->archive->pubDate(d->guid)); d->archive->setHash(d->guid, hash() ); TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title(); d->archive->setTitle(d->guid, title); d->archive->setDescription(d->guid, article.description()); d->archive->setLink(d->guid, article.link().url()); d->archive->setCommentsLink(d->guid, article.commentsLink().url()); d->archive->setAuthor(d->guid, article.author()); } } } Article::Article(RSS::Article article, Feed* feed) : d(new Private) { //assert(feed) d->feed = feed; initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl())); } Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private) { d->feed = 0; initialize(article, archive); } bool Article::isNull() const { return d->archive == 0; // TODO: use proper null state } void Article::offsetPubDate(int secs) { d->pubDate = pubDate().addSecs(secs); d->archive->setPubDate(d->guid, d->pubDate.toTime_t()); } void Article::setDeleted() { if (isDeleted()) return; setStatus(Read); d->status = Private::Deleted | Private::Read; d->archive->setStatus(d->guid, d->status); d->archive->setDeleted(d->guid); if (d->feed) d->feed->setArticleDeleted(*this); } bool Article::isDeleted() const { return (statusBits() & Private::Deleted) != 0; } Article::Article(const Article &other) : d(new Private) { *this = other; } Article::~Article() { if (d->deref()) { delete d; d = 0; } } Article &Article::operator=(const Article &other) { if (this != &other) { other.d->ref(); if (d && d->deref()) delete d; d = other.d; } return *this; } bool Article::operator<(const Article &other) const { return pubDate() > other.pubDate() || (pubDate() == other.pubDate() && guid() < other.guid() ); } bool Article::operator<=(const Article &other) const { return (pubDate() > other.pubDate() || *this == other); } bool Article::operator>(const Article &other) const { return pubDate() < other.pubDate() || (pubDate() == other.pubDate() && guid() > other.guid() ); } bool Article::operator>=(const Article &other) const { return (pubDate() > other.pubDate() || *this == other); } bool Article::operator==(const Article &other) const { return d->guid == other.guid(); } int Article::statusBits() const { // delayed loading of status information from archive if ( d->status == 0 ) { d->status = d->archive->status(d->guid); } return d->status; } int Article::status() const { if ((statusBits() & Private::Read) != 0) return Read; if ((statusBits() & Private::New) != 0) return New; else return Unread; } void Article::setStatus(int stat) { // use status() rather than statusBits() here to filter out status flags that we are not // interested in int oldStatus = status(); if (oldStatus != stat) { switch (stat) { case Read: d->status = ( d->status | Private::Read) & ~Private::New; break; case Unread: d->status = ( d->status & ~Private::Read) & ~Private::New; break; case New: d->status = ( d->status | Private::New) & ~Private::Read; break; } d->archive->setStatus(d->guid, d->status); if (d->feed) d->feed->setArticleChanged(*this, oldStatus); } } TQString Article::title() const { return d->archive->title(d->guid); } TQString Article::author() const { return d->archive->author(d->guid); } KURL Article::link() const { return d->archive->link(d->guid); } TQString Article::description() const { return d->archive->description(d->guid); } TQString Article::guid() const { return d->guid; } KURL Article::commentsLink() const { return d->archive->commentsLink(d->guid); } int Article::comments() const { return d->archive->comments(d->guid); } bool Article::guidIsPermaLink() const { return d->archive->guidIsPermaLink(d->guid); } bool Article::guidIsHash() const { return d->archive->guidIsHash(d->guid); } uint Article::hash() const { // delayed loading of hash from archive if ( d->hash == 0 ) { d->hash = d->archive->hash(d->guid); } return d->hash; } bool Article::keep() const { return ( statusBits() & Private::Keep) != 0; } RSS::Enclosure Article::enclosure() const { bool hasEnc; TQString url, type; int length; d->archive->enclosure(d->guid, hasEnc, url, type, length); return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure(); } void Article::setKeep(bool keep) { d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep); d->archive->setStatus(d->guid, d->status); if (d->feed) d->feed->setArticleChanged(*this); } void Article::addTag(const TQString& tag) { d->archive->addTag(d->guid, tag); if (d->feed) d->feed->setArticleChanged(*this); } void Article::removeTag(const TQString& tag) { d->archive->removeTag(d->guid, tag); if (d->feed) d->feed->setArticleChanged(*this); } bool Article::hasTag(const TQString& tag) const { return d->archive->tags(d->guid).contains(tag); } TQStringList Article::tags() const { return d->archive->tags(d->guid); } Feed* Article::feed() const { return d->feed; } const TQDateTime& Article::pubDate() const { // delayed loading of publication date information from archive if ( d->pubDate.isNull() ) { d->pubDate.setTime_t(d->archive->pubDate(d->guid)); } return d->pubDate; } TQString Article::buildTitle(const TQString& description) { TQString s = description; if (description.stripWhiteSpace().isEmpty()) return ""; int i = s.find('>',500); /*avoid processing too much */ if (i != -1) s = s.left(i+1); TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false); TQString tagName, toReplace, replaceWith; while (rx.search(s) != -1 ) { tagName=rx.cap(2); if (tagName=="SCRIPT"||tagName=="script") toReplace=rx.cap(0); // strip tag AND tag contents else if (tagName.startsWith("br") || tagName.startsWith("BR")) { toReplace=rx.cap(1); replaceWith=" "; } else toReplace=rx.cap(1); // strip just tag s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed } if (s.length()> 90) s=s.left(90)+"..."; return s.simplifyWhiteSpace(); } } // namespace Akregator