Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features.

BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdewebdev@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da
author: toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2009-11-25 17:56:58 +0000
committer: toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da> 2009-11-25 17:56:58 +0000
commit: e9ae80694875f869892f13f4fcaf1170a00dea41 (patch)
tree: aa2f8d8a217e2d376224c8d46b7397b68d35de2d /klinkstatus/src/parser/htmlparser.h
download: tdewebdev-e9ae80694875f869892f13f4fcaf1170a00dea41.tar.gz
tdewebdev-e9ae80694875f869892f13f4fcaf1170a00dea41.zip
1 files changed, 124 insertions, 0 deletions
diff --git a/klinkstatus/src/parser/htmlparser.h b/klinkstatus/src/parser/htmlparser.h
new file mode 100644
index 00000000..cf487ebf
--- /dev/null
+++ b/klinkstatus/src/parser/htmlparser.h
@@ -0,0 +1,124 @@
+	/***************************************************************************
+ *   Copyright (C) 2004 by Paulo Moura Guedes                              *
+ *   moura@kdewebdev.org                                                        *
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.             *
+ ***************************************************************************/
+
+#ifndef HTML_PARSER_H
+#define HTML_PARSER_H
+
+#include <qstring.h>
+
+#include <vector>
+
+
+#include "mstring.h"
+#include "node.h"
+
+#include <iostream>
+
+using namespace std;
+
+typedef unsigned int uint;
+
+
+
+class HtmlParser
+{
+public:
+
+  HtmlParser();
+  HtmlParser(QString const& documento);
+  ~HtmlParser();
+
+  vector<Node*> const& nodes() const;
+  bool hasBaseUrl() const;
+  bool hasTitle() const;
+  bool hasContentType() const;
+  NodeBASE const& baseUrl() const;
+  NodeTITLE const& title() const;
+  NodeMETA const& contentTypeMetaNode() const;
+
+  static uint estimativaLinks(uint doc_size);
+  /**
+   * Convenience function for performance as it only parse in order 
+   * to get the charset.
+   */
+  static QString findCharsetInMetaElement(QString const& html);
+
+  // test:
+  void mostra() const;
+
+private:
+
+  vector<QString> const& parseNodesOfType(QString const& element);
+  /**
+   * Vector nodes passed for performance.
+   */
+  static void parseNodesOfType(QString const& element, QString const& doc, vector<QString>& nodes);
+
+  void parseNodesOfTypeA();
+  void parseNodesOfTypeAREA();
+  void parseNodesOfTypeLINK();
+  void parseNodesOfTypeMETA();
+  void parseNodesOfTypeIMG();
+  void parseNodesOfTypeFRAME();
+  void parseNodesOfTypeIFRAME();
+  void parseNodesOfTypeBASE();
+  void parseNodesOfTypeTITLE();
+
+  void stripComments();
+  void stripScriptContent();
+
+  /**
+     Return the index of the next character of the end of tag.
+     e.g.
+     endOfTag("<img src=\"bad > luck\">") => 22 (not 15)
+  */
+  static int endOfTag(QString const& s, int index = 0, QChar end_of_tag = '>');
+
+private:
+
+  vector<QString> aux_; // for what the hell is this? looks ugly... maybe I was drunk, can't remember
+  vector<Node*> nodes_;
+  NodeBASE node_BASE_;
+  NodeTITLE node_TITLE_;
+  NodeMETA node_META_content_type_;
+  bool is_content_type_set_;
+
+  QString document_;
+  QString script_; // Fica aqui guardado (JavaScript, etc)
+  QString comments_;
+};
+
+
+inline HtmlParser::~HtmlParser()
+{
+  //kdDebug(23100) <<  "*";
+}
+
+inline uint HtmlParser::estimativaLinks(uint doc_size)
+{
+  return doc_size / 100; // valor estimado...
+}
+
+inline bool HtmlParser::hasContentType() const 
+{
+    return is_content_type_set_;
+}
+
+#endif
author	toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2009-11-25 17:56:58 +0000
committer	toma <toma@283d02a7-25f6-0310-bc7c-ecb5cbfe19da>	2009-11-25 17:56:58 +0000
commit	e9ae80694875f869892f13f4fcaf1170a00dea41 (patch)
tree	aa2f8d8a217e2d376224c8d46b7397b68d35de2d /klinkstatus/src/parser/htmlparser.h
download	tdewebdev-e9ae80694875f869892f13f4fcaf1170a00dea41.tar.gz tdewebdev-e9ae80694875f869892f13f4fcaf1170a00dea41.zip