/*************************************************************************** copyright : (C) 2003-2006 by Robby Stephenson email : robby@periapsis.org ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of version 2 of the GNU General Public License as * * published by the Free Software Foundation; * * * ***************************************************************************/ #include "bibtexhandler.h" #include "../collections/bibtexcollection.h" #include "../entry.h" #include "../field.h" #include "../collection.h" #include "../document.h" #include "../filehandler.h" #include "../latin1literal.h" #include "../tellico_debug.h" #include <kstandarddirs.h> #include <kurl.h> #include <kstringhandler.h> #include <tdelocale.h> #include <tqstring.h> #include <tqstringlist.h> #include <tqregexp.h> #include <tqdom.h> // don't add braces around capital letters by default #define TELLICO_BIBTEX_BRACES 0 using Tellico::BibtexHandler; BibtexHandler::StringListMap* BibtexHandler::s_utf8LatexMap = 0; BibtexHandler::QuoteStyle BibtexHandler::s_quoteStyle = BibtexHandler::BRACES; const TQRegExp BibtexHandler::s_badKeyChars(TQString::fromLatin1("[^0-9a-zA-Z-]")); TQStringList BibtexHandler::bibtexKeys(const Data::EntryVec& entries_) { TQStringList keys; for(Data::EntryVec::ConstIterator it = entries_.begin(); it != entries_.end(); ++it) { TQString s = bibtexKey(it.data()); if(!s.isEmpty()) { keys << s; } } return keys; } TQString BibtexHandler::bibtexKey(Data::ConstEntryPtr entry_) { if(!entry_ || !entry_->collection() || entry_->collection()->type() != Data::Collection::Bibtex) { return TQString(); } const Data::BibtexCollection* c = static_cast<const Data::BibtexCollection*>(entry_->collection().data()); Data::FieldPtr f = c->fieldByBibtexName(TQString::fromLatin1("key")); if(f) { TQString key = entry_->field(f->name()); if(!key.isEmpty()) { return key; } } TQString author; Data::FieldPtr authorField = c->fieldByBibtexName(TQString::fromLatin1("author")); if(authorField) { if(authorField->flags() & Data::Field::AllowMultiple) { // grab first author only; TQString tmp = entry_->field(authorField->name()); author = tmp.section(';', 0, 0); } else { author = entry_->field(authorField->name()); } } Data::FieldPtr titleField = c->fieldByBibtexName(TQString::fromLatin1("title")); TQString title; if(titleField) { title = entry_->field(titleField->name()); } Data::FieldPtr yearField = c->fieldByBibtexName(TQString::fromLatin1("year")); TQString year; if(yearField) { year = entry_->field(yearField->name()); } if(year.isEmpty()) { year = entry_->field(TQString::fromLatin1("pub_year")); if(year.isEmpty()) { year = entry_->field(TQString::fromLatin1("cr_year")); } } year = year.section(';', 0, 0); return bibtexKey(author, title, year); } TQString BibtexHandler::bibtexKey(const TQString& author_, const TQString& title_, const TQString& year_) { TQString key; // if no comma, take the last word if(!author_.isEmpty()) { if(author_.find(',') == -1) { key += author_.section(' ', -1).lower() + '-'; } else { // if there is a comma, take the string up to the first comma key += author_.section(',', 0, 0).lower() + '-'; } } TQStringList words = TQStringList::split(' ', title_); for(TQStringList::ConstIterator it = words.begin(); it != words.end(); ++it) { key += (*it).left(1).lower(); } key += year_; // bibtex key may only contain [0-9a-zA-Z-] return key.replace(s_badKeyChars, TQString()); } void BibtexHandler::loadTranslationMaps() { TQString mapfile = locate("appdata", TQString::fromLatin1("bibtex-translation.xml")); if(mapfile.isEmpty()) { return; } s_utf8LatexMap = new StringListMap(); KURL u; u.setPath(mapfile); // no namespace processing TQDomDocument dom = FileHandler::readXMLFile(u, false); TQDomNodeList keyList = dom.elementsByTagName(TQString::fromLatin1("key")); for(unsigned i = 0; i < keyList.count(); ++i) { TQDomNodeList strList = keyList.item(i).toElement().elementsByTagName(TQString::fromLatin1("string")); // the strList might have more than one node since there are multiple ways // to represent a character in LaTex. TQString s = keyList.item(i).toElement().attribute(TQString::fromLatin1("char")); for(unsigned j = 0; j < strList.count(); ++j) { (*s_utf8LatexMap)[s].append(strList.item(j).toElement().text()); // kdDebug() << "BibtexHandler::loadTranslationMaps - " // << s << " = " << strList.item(j).toElement().text() << endl; } } } TQString BibtexHandler::importText(char* text_) { if(!s_utf8LatexMap) { loadTranslationMaps(); } TQString str = TQString::fromUtf8(text_); for(StringListMap::Iterator it = s_utf8LatexMap->begin(); it != s_utf8LatexMap->end(); ++it) { for(TQStringList::Iterator sit = it.data().begin(); sit != it.data().end(); ++sit) { str.replace(*sit, it.key()); } } // now replace capitalized letters, such as {X} // but since we don't want to turn "... X" into "... {X}" later when exporting // we need to lower-case any capitalized text after the first letter that is // NOT contained in braces TQRegExp rx(TQString::fromLatin1("\\{([A-Z]+)\\}")); rx.setMinimal(true); str.replace(rx, TQString::fromLatin1("\\1")); return str; } TQString BibtexHandler::exportText(const TQString& text_, const TQStringList& macros_) { if(!s_utf8LatexMap) { loadTranslationMaps(); } TQChar lquote, rquote; switch(s_quoteStyle) { case BRACES: lquote = '{'; rquote = '}'; break; case QUOTES: lquote = '"'; rquote = '"'; break; } TQString text = text_; for(StringListMap::Iterator it = s_utf8LatexMap->begin(); it != s_utf8LatexMap->end(); ++it) { text.replace(it.key(), it.data()[0]); } if(macros_.isEmpty()) { return lquote + addBraces(text) + rquote; } // Now, split the text by the character '#', and examine each token to see if it is in // the macro list. If it is not, then add left-quote and right-quote around it. If it is, don't // change it. Then, in case '#' occurs in a non-macro string, replace any occurrences of '}#{' with '#' // list of new tokens TQStringList list; // first, split the text TQStringList tokens = TQStringList::split('#', text, true); for(TQStringList::Iterator it = tokens.begin(); it != tokens.end(); ++it) { // check to see if token is a macro if(macros_.findIndex((*it).stripWhiteSpace()) == -1) { // the token is NOT a macro, add braces around whole words and also around capitals list << lquote + addBraces(*it) + rquote; } else { list << *it; } } const TQChar octo = '#'; text = list.join(octo); text.replace(TQString(rquote)+octo+lquote, octo); return text; } bool BibtexHandler::setFieldValue(Data::EntryPtr entry_, const TQString& bibtexField_, const TQString& value_) { Data::BibtexCollection* c = static_cast<Data::BibtexCollection*>(entry_->collection().data()); Data::FieldPtr field = c->fieldByBibtexName(bibtexField_); if(!field) { // it was the case that the default bibliography did not have a bibtex property for keywords // so a "keywords" field would get created in the imported collection // but the existing collection had a field "keyword" so the values would not get imported // here, check to see if the current collection has a field with the same bibtex name and // use it instead of creating a new one Data::BibtexCollection* existingColl = Data::Document::self()->collection()->type() == Data::Collection::Bibtex ? static_cast<Data::BibtexCollection*>(Data::Document::self()->collection().data()) : 0; Data::FieldPtr existingField = existingColl ? existingColl->fieldByBibtexName(bibtexField_) : 0; if(existingField) { field = new Data::Field(*existingField); } else if(value_.length() < 100) { // arbitrarily say if the value has more than 100 chars, then it's a paragraph TQString vlower = value_.lower(); // special case, try to detect URLs // In qt 3.1, TQString::startsWith() is always case-sensitive if(bibtexField_ == Latin1Literal("url") || vlower.startsWith(TQString::fromLatin1("http")) // may also be https || vlower.startsWith(TQString::fromLatin1("ftp:/")) || vlower.startsWith(TQString::fromLatin1("file:/")) || vlower.startsWith(TQString::fromLatin1("/"))) { // assume this indicates a local path myDebug() << "BibtexHandler::setFieldValue() - creating a URL field for " << bibtexField_ << endl; field = new Data::Field(bibtexField_, KStringHandler::capwords(bibtexField_), Data::Field::URL); } else { field = new Data::Field(bibtexField_, KStringHandler::capwords(bibtexField_), Data::Field::Line); } field->setCategory(i18n("Unknown")); } else { field = new Data::Field(bibtexField_, KStringHandler::capwords(bibtexField_), Data::Field::Para); } field->setProperty(TQString::fromLatin1("bibtex"), bibtexField_); c->addField(field); } // special case keywords, replace commas with semi-colons so they get separated TQString value = value_; if(field->property(TQString::fromLatin1("bibtex")).startsWith(TQString::fromLatin1("keyword"))) { value.replace(',', ';'); // special case refbase bibtex export, with multiple keywords fields TQString oValue = entry_->field(field); if(!oValue.isEmpty()) { value = oValue + "; " + value; } } return entry_->setField(field, value); } TQString& BibtexHandler::cleanText(TQString& text_) { // FIXME: need to improve this for removing all Latex entities // TQRegExp rx(TQString::fromLatin1("(?=[^\\\\])\\\\.+\\{")); TQRegExp rx(TQString::fromLatin1("\\\\.+\\{")); rx.setMinimal(true); text_.replace(rx, TQString()); text_.replace(TQRegExp(TQString::fromLatin1("[{}]")), TQString()); text_.replace('~', ' '); return text_; } // add braces around capital letters TQString& BibtexHandler::addBraces(TQString& text) { #if !TELLICO_BIBTEX_BRACES return text; #else int inside = 0; uint l = text.length(); // start at first letter, but skip if only the first is capitalized for(uint i = 0; i < l; ++i) { const TQChar c = text.at(i); if(inside == 0 && c >= 'A' && c <= 'Z') { uint j = i+1; while(text.at(j) >= 'A' && text.at(j) <= 'Z' && j < l) { ++j; } if(i == 0 && j == 1) { continue; // no need to do anything to first letter } text.insert(i, '{'); // now j should be incremented text.insert(j+1, '}'); i = j+1; l += 2; // the length changed } else if(c == '{') { ++inside; } else if(c == '}') { --inside; } } return text; #endif }