diff options
Diffstat (limited to 'tdefile-plugins/html')
-rw-r--r-- | tdefile-plugins/html/Makefile.am | 22 | ||||
-rw-r--r-- | tdefile-plugins/html/tdefile_html.cpp | 158 | ||||
-rw-r--r-- | tdefile-plugins/html/tdefile_html.desktop | 66 | ||||
-rw-r--r-- | tdefile-plugins/html/tdefile_html.h | 39 |
4 files changed, 285 insertions, 0 deletions
diff --git a/tdefile-plugins/html/Makefile.am b/tdefile-plugins/html/Makefile.am new file mode 100644 index 0000000..6097b26 --- /dev/null +++ b/tdefile-plugins/html/Makefile.am @@ -0,0 +1,22 @@ +## Makefile.am for html file meta info plugin + +# set the include path for X, qt and KDE +INCLUDES = $(all_includes) + +# these are the headers for your project +noinst_HEADERS = tdefile_html.h + +kde_module_LTLIBRARIES = tdefile_html.la + +tdefile_html_la_SOURCES = tdefile_html.cpp +tdefile_html_la_LDFLAGS = $(all_libraries) -module $(KDE_PLUGIN) +tdefile_html_la_LIBADD = $(LIB_KSYCOCA) + +# let automoc handle all of the meta source files (moc) +METASOURCES = AUTO + +messages: rc.cpp + $(XGETTEXT) tdefile_html.cpp -o $(podir)/tdefile_html.pot + +services_DATA = tdefile_html.desktop +servicesdir = $(kde_servicesdir) diff --git a/tdefile-plugins/html/tdefile_html.cpp b/tdefile-plugins/html/tdefile_html.cpp new file mode 100644 index 0000000..c59711a --- /dev/null +++ b/tdefile-plugins/html/tdefile_html.cpp @@ -0,0 +1,158 @@ +/* This file is part of the KDE project + * Copyright (C) 2001, 2002 Rolf Magnus <ramagnus@kde.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $Id$ + */ + +#include "tdefile_html.h" +#include "tdefile_html.moc" +#include <kgenericfactory.h> +#include <kmimetype.h> +#include <kurl.h> +#include <kprocess.h> +#include <kdebug.h> +#include <tqcstring.h> +#include <tqfile.h> +#include <tqregexp.h> +#include <tqtextcodec.h> + +typedef KGenericFactory<KHtmlPlugin> HtmlFactory; + +K_EXPORT_COMPONENT_FACTORY( tdefile_html, HtmlFactory( "tdefile_html" ) ) + +KHtmlPlugin::KHtmlPlugin( TQObject *parent, const char *name, + const TQStringList &args ) + : KFilePlugin( parent, name, args ) +{ + kdDebug(7034) << "html plugin\n"; + + KFileMimeTypeInfo* info = addMimeTypeInfo("text/html"); + + KFileMimeTypeInfo::GroupInfo* group; + KFileMimeTypeInfo::ItemInfo* item; + + group = addGroupInfo(info, "General", i18n("General")); + addItemInfo(group, "Doctype", i18n("Document Type"), TQVariant::String); + addItemInfo(group, "Javascript", i18n("JavaScript"), TQVariant::Bool); + item = addItemInfo(group, "Title", i18n("Title"), TQVariant::String); + setHint(item, KFileMimeTypeInfo::Name); + + group = addGroupInfo(info, "Metatags", i18n("Meta Tags")); + addVariableInfo(group, TQVariant::String, 0); +} + + +bool KHtmlPlugin::readInfo( KFileMetaInfo& info, uint ) +{ + if ( info.path().isEmpty() ) // remote file + return false; + + TQFile f(info.path()); + if (!f.open(IO_ReadOnly)) + return false; + + // we're only interested in the header, so just read until before </head> + // or until <body> if the author forgot it + // In this case, it's better to limit the size of the buffer to something + // sensible. Think a 0-filled 3GB file with an .html extension. + int maxBufSize = TQMIN(f.size(), 32768); + TQByteArray data(maxBufSize + 1); + f.readBlock(data.data(), maxBufSize); + data[maxBufSize]='\0'; + + TQString s(data); + + int start=0, last=0; + TQRegExp exp; + exp.setCaseSensitive(false); + exp.setMinimal(true); + + KFileMetaInfoGroup group = appendGroup(info, "General"); + + exp.setPattern("\\s*<\\s*!doctype\\s*([^>]*)\\s*>"); + if (exp.search(s, last) != -1) + { + kdDebug(7034) << "DocType: " << TQString(exp.capturedTexts().join("-")) << endl; + appendItem(group, "Doctype", exp.cap(1)); + last += exp.matchedLength(); + } + + TQString title; + exp.setPattern("<\\s*title\\s*>\\s*(.*)\\s*<\\s*/\\s*title\\s*>"); + if (exp.search(s, last) != -1) + { + title = exp.cap(1); + last += exp.matchedLength(); + } + + KFileMetaInfoGroup metatags = appendGroup(info, "Metatags"); + + TQString meta, name, content; + exp.setPattern("<\\s*meta\\s*([^>]*)\\s*>"); + TQRegExp rxName("(?:name|http-equiv)\\s*=\\s*\"([^\"]+)\"", false); + TQRegExp rxContent("content\\s*=\\s*\"([^\"]+)\"", false); + TQRegExp rxCharset("charset\\s*=\\s*(.*)", false); + TQTextCodec *codec = 0; + + // find the meta tags + last = 0; + while (1) + { + if ((start=exp.search(s, last)) == -1) + break; + meta = exp.cap(1); + last = start+exp.matchedLength(); + + kdDebug(7034) << "Found Meta: " << meta << endl; + + if (rxName.search(meta) == -1) + continue; + name = rxName.cap(1); + + if (rxContent.search(meta) == -1) + continue; + content = rxContent.cap(1); + + appendItem(metatags, name, content.left(50)); + + // check if it has a charset defined + if ( rxCharset.search(content) != -1 ) + { + kdDebug(7034) << "CodecForName : " << rxCharset.cap(1) << endl; + codec = TQTextCodec::codecForName(rxCharset.cap(1).ascii()); + } + } + + if ( ! title.isEmpty() ) + { + if ( codec ) + { + title = codec->toUnicode(title.ascii()); + kdDebug(7034) << "Codec : " << codec->name() << endl; + } + + appendItem(group, "Title", title); + } + + // find out if it contains javascript + exp.setPattern("<script>"); + + appendItem(group, "Javascript", TQVariant( s.find(exp)!=-1, 42)); + + return true; +} + diff --git a/tdefile-plugins/html/tdefile_html.desktop b/tdefile-plugins/html/tdefile_html.desktop new file mode 100644 index 0000000..4dfd89c --- /dev/null +++ b/tdefile-plugins/html/tdefile_html.desktop @@ -0,0 +1,66 @@ +[Desktop Entry] +Type=Service +Name=HTML Playlist Info +Name[af]=Html Liedjielys Inligting +Name[ar]=معلومات قائمة أغاني HTML +Name[az]=HTML Çalğı Siyahısı Mə'lumatı +Name[bg]=Информация за HTML файл +Name[br]=Titouroù diwar-benn ar roll tonioù HTML +Name[ca]=Info. de la llista de reproducció HTML +Name[cs]=Informace o seznamu skladeb HTML +Name[cy]=Gwybodaeth Rhestr Chwarae HTML +Name[da]=Information om HTML-spilleliste +Name[de]=HTML-Wiedergabelisteninfo +Name[el]=Πληροφορίες λίστας αναπαραγωγής HTML +Name[eo]=HTML-ludlistinformo +Name[es]=Información de la lista de reproducción HTML +Name[et]=HTML esitusnimekirja info +Name[eu]=HTML erreprodukzio-zerrendaren informazioa +Name[fa]=اطلاعات فهرست پخش زنگام +Name[fi]=HTML soittolistan tiedot +Name[fo]=HTML spælilistaupplýsingar +Name[fr]=Informations sur une liste de lecture HTML +Name[fy]=HTML-ôfspyllistynfo +Name[gl]=Información de Listas de Reprodución HTML +Name[he]=מידע רשימת ניגון HTML +Name[hi]=एचटीएमएल प्लेलिस्ट जानकारी +Name[hr]=HTML podaci o popisu za sviranje +Name[hu]=HTML lejátszási lista jellemzői +Name[is]=Upplýsingar um HTML lagalista +Name[it]=Informazioni playlist HTML +Name[ja]=HTML プレイリスト情報 +Name[ka]=HTML სიმღერათა სიის ინფორმაცია +Name[kk]=HTML орындау тізім мәліметі +Name[km]=ព័ត៌មានបញ្ជីចាក់ HTML +Name[lt]=HTML grojaraščio informacija +Name[mk]=Информации за HTML-листа на нумери +Name[ms]=Maklumat Senarai Main HTML +Name[nb]=HTML-spilleliste-informasjon +Name[nds]=HTML-Afspellistinformatschonen +Name[ne]=एचटीएमएल प्लेसूची सूचना +Name[nl]=HTML-afspeellijstinfo +Name[nn]=Informasjon om HTML-speleliste +Name[pa]=HTML ਸੰਗੀਤ-ਸੂਚੀ ਜਾਣਕਾਰੀ +Name[pl]=Informacja o liście nagrań w HTML-u +Name[pt]=Informações de Listas de Músicas HTML +Name[pt_BR]=Informação de listas de reprodução HTML +Name[ro]=Informaţii listă de redare HTML +Name[ru]=Информация о списке песен в формате HTML +Name[sk]=HTML informácie o playliste +Name[sl]=Informacije o predvajalnem seznamu HTML +Name[sr]=HTML информације о листи нумера +Name[sr@Latn]=HTML informacije o listi numera +Name[sv]=Information om HTML-spellista +Name[ta]=HTML வாசிப்புப்பட்டியல் தகவல் +Name[tg]=Ахборот дар бораи рӯйхати сурудҳо дар HTML +Name[th]=ข้อมูลรายการเล่น HTML +Name[tr]=HTML Çalma Listesi Bilgisi +Name[uk]=Інформація списку композицій в HTML +Name[vi]=Thông tin danh mục nhạc HTML +Name[xh]=Ulwazi loluhlu lodweliso lomdlali we HTML +Name[zh_CN]=HTML 播放列表信息 +Name[zh_TW]=HTML 播放清單資訊 +ServiceTypes=KFilePlugin +X-TDE-Library=tdefile_html +MimeType=text/html +PreferredItems=Title,Javascript,Doctype,Keywords diff --git a/tdefile-plugins/html/tdefile_html.h b/tdefile-plugins/html/tdefile_html.h new file mode 100644 index 0000000..858d6a1 --- /dev/null +++ b/tdefile-plugins/html/tdefile_html.h @@ -0,0 +1,39 @@ +/* This file is part of the KDE project + * Copyright (C) 2001, 2002 Rolf Magnus <ramagnus@kde.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $Id$ + */ + +#ifndef __KMIME_HTML_H__ +#define __KMIME_HTML_H__ + +#include <tdefilemetainfo.h> +#include <kurl.h> + +class TQStringList; + +class KHtmlPlugin: public KFilePlugin +{ + Q_OBJECT + + +public: + KHtmlPlugin( TQObject *parent, const char *name, const TQStringList& args ); + virtual bool readInfo( KFileMetaInfo& info, uint what ); +}; + +#endif |