diff options
Diffstat (limited to 'tdehtml/html/htmltokenizer.cpp')
-rw-r--r-- | tdehtml/html/htmltokenizer.cpp | 1798 |
1 files changed, 1798 insertions, 0 deletions
diff --git a/tdehtml/html/htmltokenizer.cpp b/tdehtml/html/htmltokenizer.cpp new file mode 100644 index 000000000..292d1773d --- /dev/null +++ b/tdehtml/html/htmltokenizer.cpp @@ -0,0 +1,1798 @@ +/* + This file is part of the KDE libraries + + Copyright (C) 1997 Martin Jones (mjones@kde.org) + (C) 1997 Torben Weis (weis@kde.org) + (C) 1998 Waldo Bastian (bastian@kde.org) + (C) 1999 Lars Knoll (knoll@kde.org) + (C) 1999 Antti Koivisto (koivisto@kde.org) + (C) 2001-2003 Dirk Mueller (mueller@kde.org) + (C) 2004 Apple Computer, Inc. + (C) 2006 Germain Garand (germain@ebooksfrance.org) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ +//---------------------------------------------------------------------------- +// +// KDE HTML Widget - Tokenizers + +//#define TOKEN_DEBUG 1 +//#define TOKEN_DEBUG 2 + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "html/htmltokenizer.h" +#include "html/html_documentimpl.h" +#include "html/htmlparser.h" +#include "html/dtd.h" + +#include "misc/loader.h" +#include "misc/htmlhashes.h" + +#include "tdehtmlview.h" +#include "tdehtml_part.h" +#include "xml/dom_docimpl.h" +#include "css/csshelper.h" +#include "ecma/kjs_proxy.h" +#include <kcharsets.h> +#include <tdeglobal.h> +#include <ctype.h> +#include <assert.h> +#include <tqvariant.h> +#include <kdebug.h> +#include <stdlib.h> + +#include "kentities.c" + +using namespace tdehtml; + +static const TQChar commentStart [] = { '<','!','-','-', TQChar::null }; + +static const char scriptEnd [] = "</script"; +static const char xmpEnd [] = "</xmp"; +static const char styleEnd [] = "</style"; +static const char textareaEnd [] = "</textarea"; +static const char titleEnd [] = "</title"; + +#define TDEHTML_ALLOC_QCHAR_VEC( N ) (TQChar*) malloc( sizeof(TQChar)*( N ) ) +#define TDEHTML_REALLOC_QCHAR_VEC(P, N ) (TQChar*) realloc(P, sizeof(TQChar)*( N )) +#define TDEHTML_DELETE_QCHAR_VEC( P ) free((char*)( P )) + +// Full support for MS Windows extensions to Latin-1. +// Technically these extensions should only be activated for pages +// marked "windows-1252" or "cp1252", but +// in the standard Microsoft way, these extensions infect hundreds of thousands +// of web pages. Note that people with non-latin-1 Microsoft extensions +// are SOL. +// +// See: http://www.microsoft.com/globaldev/reference/WinCP.asp +// http://www.bbsinc.com/iso8859.html +// http://www.obviously.com/ +// +// There may be better equivalents +#if 0 +#define fixUpChar(x) +#else +#define fixUpChar(x) \ + switch ((x).unicode()) \ + { \ + case 0x80: (x) = 0x20ac; break; \ + case 0x82: (x) = 0x201a; break; \ + case 0x83: (x) = 0x0192; break; \ + case 0x84: (x) = 0x201e; break; \ + case 0x85: (x) = 0x2026; break; \ + case 0x86: (x) = 0x2020; break; \ + case 0x87: (x) = 0x2021; break; \ + case 0x88: (x) = 0x02C6; break; \ + case 0x89: (x) = 0x2030; break; \ + case 0x8A: (x) = 0x0160; break; \ + case 0x8b: (x) = 0x2039; break; \ + case 0x8C: (x) = 0x0152; break; \ + case 0x8E: (x) = 0x017D; break; \ + case 0x91: (x) = 0x2018; break; \ + case 0x92: (x) = 0x2019; break; \ + case 0x93: (x) = 0x201C; break; \ + case 0x94: (x) = 0X201D; break; \ + case 0x95: (x) = 0x2022; break; \ + case 0x96: (x) = 0x2013; break; \ + case 0x97: (x) = 0x2014; break; \ + case 0x98: (x) = 0x02DC; break; \ + case 0x99: (x) = 0x2122; break; \ + case 0x9A: (x) = 0x0161; break; \ + case 0x9b: (x) = 0x203A; break; \ + case 0x9C: (x) = 0x0153; break; \ + case 0x9E: (x) = 0x017E; break; \ + case 0x9F: (x) = 0x0178; break; \ + default: break; \ + } +#endif +// ---------------------------------------------------------------------------- + +HTMLTokenizer::HTMLTokenizer(DOM::DocumentImpl *_doc, TDEHTMLView *_view) +{ + view = _view; + buffer = 0; + scriptCode = 0; + scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; + charsets = TDEGlobal::charsets(); + parser = new TDEHTMLParser(_view, _doc); + m_executingScript = 0; + m_autoCloseTimer = 0; + onHold = false; + + reset(); +} + +HTMLTokenizer::HTMLTokenizer(DOM::DocumentImpl *_doc, DOM::DocumentFragmentImpl *i) +{ + view = 0; + buffer = 0; + scriptCode = 0; + scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; + charsets = TDEGlobal::charsets(); + parser = new TDEHTMLParser( i, _doc ); + m_executingScript = 0; + m_autoCloseTimer = 0; + onHold = false; + + reset(); +} + +void HTMLTokenizer::reset() +{ + assert(m_executingScript == 0); + Q_ASSERT(onHold == false); + m_abort = false; + + while (!cachedScript.isEmpty()) + cachedScript.dequeue()->deref(this); + + if ( buffer ) + TDEHTML_DELETE_QCHAR_VEC(buffer); + buffer = dest = 0; + size = 0; + + if ( scriptCode ) + TDEHTML_DELETE_QCHAR_VEC(scriptCode); + scriptCode = 0; + scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; + + if (m_autoCloseTimer) { + killTimer(m_autoCloseTimer); + m_autoCloseTimer = 0; + } + + currToken.reset(); +} + +void HTMLTokenizer::begin() +{ + m_executingScript = 0; + onHold = false; + reset(); + size = 254; + buffer = TDEHTML_ALLOC_QCHAR_VEC( 255 ); + dest = buffer; + tag = NoTag; + pending = NonePending; + discard = NoneDiscard; + pre = false; + prePos = 0; + plaintext = false; + xmp = false; + processingInstruction = false; + script = false; + escaped = false; + style = false; + skipLF = false; + select = false; + comment = false; + server = false; + textarea = false; + title = false; + startTag = false; + tquote = NoQuote; + searchCount = 0; + Entity = NoEntity; + noMoreData = false; + brokenComments = false; + brokenServer = false; + brokenScript = false; + lineno = 0; + scriptStartLineno = 0; + tagStartLineno = 0; +} + +void HTMLTokenizer::processListing(TokenizerString list) +{ + bool old_pre = pre; + + // This function adds the listing 'list' as + // preformatted text-tokens to the token-collection + // thereby converting TABs. + if(!style) pre = true; + prePos = 0; + + while ( !list.isEmpty() ) + { + checkBuffer(3*TAB_SIZE); + + if (skipLF && ( *list != '\n' )) + { + skipLF = false; + } + + if (skipLF) + { + skipLF = false; + ++list; + } + else if (( *list == '\n' ) || ( *list == '\r' )) + { + if (discard == LFDiscard) + { + // Ignore this LF + discard = NoneDiscard; // We have discarded 1 LF + } + else + { + // Process this LF + if (pending) + addPending(); + + // we used to do it not at all and we want to have + // it fixed for textarea. So here we are + if ( textarea ) { + prePos++; + *dest++ = *list; + } else + pending = LFPending; + } + /* Check for MS-DOS CRLF sequence */ + if (*list == '\r') + { + skipLF = true; + } + ++list; + } + else if (( *list == ' ' ) || ( *list == '\t')) + { + if (pending) + addPending(); + if (*list == ' ') + pending = SpacePending; + else + pending = TabPending; + + ++list; + } + else + { + discard = NoneDiscard; + if (pending) + addPending(); + + prePos++; + *dest++ = *list; + ++list; + } + + } + + if ((pending == SpacePending) || (pending == TabPending)) + addPending(); + else + pending = NonePending; + + prePos = 0; + pre = old_pre; +} + +void HTMLTokenizer::parseSpecial(TokenizerString &src) +{ + assert( textarea || title || !Entity ); + assert( !tag ); + assert( xmp+textarea+title+style+script == 1 ); + if (script) + scriptStartLineno = lineno+src.lineCount(); + + if ( comment ) parseComment( src ); + + while ( !src.isEmpty() ) { + checkScriptBuffer(); + unsigned char ch = src->latin1(); + if ( !scriptCodeResync && !brokenComments && !textarea && !xmp && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && TQConstString( scriptCode+scriptCodeSize-3, 3 ).string() == "<!-" ) { + comment = true; + scriptCode[ scriptCodeSize++ ] = ch; + ++src; + parseComment( src ); + continue; + } + if ( scriptCodeResync && !tquote && ( ch == '>' ) ) { + ++src; + scriptCodeSize = scriptCodeResync-1; + scriptCodeResync = 0; + scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0; + if ( script ) + scriptHandler(); + else { + processListing(TokenizerString(scriptCode, scriptCodeSize)); + processToken(); + if ( style ) { currToken.tid = ID_STYLE + ID_CLOSE_TAG; } + else if ( textarea ) { currToken.tid = ID_TEXTAREA + ID_CLOSE_TAG; } + else if ( title ) { currToken.tid = ID_TITLE + ID_CLOSE_TAG; } + else if ( xmp ) { currToken.tid = ID_XMP + ID_CLOSE_TAG; } + processToken(); + script = style = textarea = title = xmp = false; + tquote = NoQuote; + scriptCodeSize = scriptCodeResync = 0; + } + return; + } + // possible end of tagname, lets check. + if ( !scriptCodeResync && !escaped && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch && + scriptCodeSize >= searchStopperLen && + !TQConstString( scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen ).string().find( searchStopper, 0, false )) { + scriptCodeResync = scriptCodeSize-searchStopperLen+1; + tquote = NoQuote; + continue; + } + if ( scriptCodeResync && !escaped ) { + if(ch == '\"') + tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote); + else if(ch == '\'') + tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote; + else if (tquote != NoQuote && (ch == '\r' || ch == '\n')) + tquote = NoQuote; + } + escaped = ( !escaped && ch == '\\' ); + if (!scriptCodeResync && (textarea||title) && !src.escaped() && ch == '&') { + TQChar *scriptCodeDest = scriptCode+scriptCodeSize; + ++src; + parseEntity(src,scriptCodeDest,true); + scriptCodeSize = scriptCodeDest-scriptCode; + } + else { + scriptCode[ scriptCodeSize++ ] = *src; + ++src; + } + } +} + +void HTMLTokenizer::scriptHandler() +{ + TQString currentScriptSrc = scriptSrc; + scriptSrc = TQString::null; + + processListing(TokenizerString(scriptCode, scriptCodeSize)); + TQString exScript( buffer, dest-buffer ); + + processToken(); + currToken.tid = ID_SCRIPT + ID_CLOSE_TAG; + processToken(); + + // Scripts following a frameset element should not be executed or even loaded in the case of extern scripts. + bool followingFrameset = (parser->doc()->body() && parser->doc()->body()->id() == ID_FRAMESET); + bool effectiveScript = !parser->skipMode() && !followingFrameset; + bool deferredScript = false; + + if ( effectiveScript ) { + CachedScript* cs = 0; + + // forget what we just got, load from src url instead + if ( !currentScriptSrc.isEmpty() && javascript && + (cs = parser->doc()->docLoader()->requestScript(currentScriptSrc, scriptSrcCharset) )) { + cachedScript.enqueue(cs); + } + + if (cs) { + pendingQueue.push(src); + uint scriptCount = cachedScript.count(); + setSrc(TokenizerString()); + scriptCodeSize = scriptCodeResync = 0; + cs->ref(this); + if (cachedScript.count() == scriptCount) + deferredScript = true; + } + else if (currentScriptSrc.isEmpty() && view && javascript ) { + pendingQueue.push(src); + setSrc(TokenizerString()); + scriptCodeSize = scriptCodeResync = 0; + scriptExecution( exScript, TQString::null, tagStartLineno /*scriptStartLineno*/ ); + } else { + // script was filtered or disallowed + effectiveScript = false; + } + } + + script = false; + scriptCodeSize = scriptCodeResync = 0; + + if ( !effectiveScript ) + return; + + if ( !m_executingScript && cachedScript.isEmpty() ) { + src.append(pendingQueue.pop()); + } else if ( cachedScript.isEmpty() ) { + write( pendingQueue.pop(), false ); + } else if ( !deferredScript && pendingQueue.count() > 1) { + TokenizerString t = pendingQueue.pop(); + pendingQueue.top().prepend( t ); + } +} + +void HTMLTokenizer::scriptExecution( const TQString& str, const TQString& scriptURL, + int baseLine) +{ + bool oldscript = script; + m_executingScript++; + script = false; + TQString url; + if (scriptURL.isNull() && view) + url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL().url(); + else + url = scriptURL; + + if (view) + view->part()->executeScript(url,baseLine+1,Node(),str); + m_executingScript--; + script = oldscript; +} + +void HTMLTokenizer::parseComment(TokenizerString &src) +{ + // SGML strict + bool strict = parser->doc()->inStrictMode() && parser->doc()->htmlMode() != DocumentImpl::XHtml && !script && !style; + int delimiterCount = 0; + bool canClose = false; + + checkScriptBuffer(src.length()); + while ( src.length() ) { + scriptCode[ scriptCodeSize++ ] = *src; + +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("comment is now: *%s*", src.toString().left(16).latin1()); +#endif + + if (strict) + { + if (src->unicode() == '-') { + delimiterCount++; + if (delimiterCount == 2) { + delimiterCount = 0; + canClose = !canClose; + } + } + else + delimiterCount = 0; + } + + if ((!strict || canClose) && src->unicode() == '>') + { + bool handleBrokenComments = brokenComments && !( script || style ); + bool scriptEnd=false; + if (!strict) + { + if ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' && + scriptCode[scriptCodeSize-2] == '-' ) + scriptEnd=true; + } + + if (canClose || handleBrokenComments || scriptEnd ){ + ++src; + if ( !( title || script || xmp || textarea || style) ) { +#ifdef COMMENTS_IN_DOM + checkScriptBuffer(); + scriptCode[ scriptCodeSize ] = 0; + scriptCode[ scriptCodeSize + 1 ] = 0; + currToken.tid = ID_COMMENT; + processListing(DOMStringIt(scriptCode, scriptCodeSize - 2)); + processToken(); + currToken.tid = ID_COMMENT + ID_CLOSE_TAG; + processToken(); +#endif + scriptCodeSize = 0; + } + comment = false; + return; // Finished parsing comment + } + } + ++src; + } +} + +void HTMLTokenizer::parseServer(TokenizerString &src) +{ + checkScriptBuffer(src.length()); + while ( !src.isEmpty() ) { + scriptCode[ scriptCodeSize++ ] = *src; + if (src->unicode() == '>' && + scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') { + ++src; + server = false; + scriptCodeSize = 0; + return; // Finished parsing server include + } + ++src; + } +} + +void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src) +{ + char oldchar = 0; + while ( !src.isEmpty() ) + { + unsigned char chbegin = src->latin1(); + if(chbegin == '\'') { + tquote = tquote == SingleQuote ? NoQuote : SingleQuote; + } + else if(chbegin == '\"') { + tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote; + } + // Look for '?>' + // some crappy sites omit the "?" before it, so + // we look for an unquoted '>' instead. (IE compatible) + else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) ) + { + // We got a '?>' sequence + processingInstruction = false; + ++src; + discard=LFDiscard; + return; // Finished parsing comment! + } + ++src; + oldchar = chbegin; + } +} + +void HTMLTokenizer::parseText(TokenizerString &src) +{ + while ( !src.isEmpty() ) + { + // do we need to enlarge the buffer? + checkBuffer(); + + // ascii is okay because we only do ascii comparisons + unsigned char chbegin = src->latin1(); + + if (skipLF && ( chbegin != '\n' )) + { + skipLF = false; + } + + if (skipLF) + { + skipLF = false; + ++src; + } + else if (( chbegin == '\n' ) || ( chbegin == '\r' )) + { + if (chbegin == '\r') + skipLF = true; + + *dest++ = '\n'; + ++src; + } + else { + *dest++ = *src; + ++src; + } + } +} + + +void HTMLTokenizer::parseEntity(TokenizerString &src, TQChar *&dest, bool start) +{ + if( start ) + { + cBufferPos = 0; + entityLen = 0; + Entity = SearchEntity; + } + + while( !src.isEmpty() ) + { + ushort cc = src->unicode(); + switch(Entity) { + case NoEntity: + return; + + break; + case SearchEntity: + if(cc == '#') { + cBuffer[cBufferPos++] = cc; + ++src; + Entity = NumericSearch; + } + else + Entity = EntityName; + + break; + + case NumericSearch: + if(cc == 'x' || cc == 'X') { + cBuffer[cBufferPos++] = cc; + ++src; + Entity = Hexadecimal; + } + else if(cc >= '0' && cc <= '9') + Entity = Decimal; + else + Entity = SearchSemicolon; + + break; + + case Hexadecimal: + { + int uc = EntityChar.unicode(); + int ll = kMin<uint>(src.length(), 8); + while(ll--) { + TQChar csrc(src->lower()); + cc = csrc.cell(); + + if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) { + break; + } + uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10)); + cBuffer[cBufferPos++] = cc; + ++src; + } + EntityChar = TQChar(uc); + Entity = SearchSemicolon; + break; + } + case Decimal: + { + int uc = EntityChar.unicode(); + int ll = kMin(src.length(), 9-cBufferPos); + while(ll--) { + cc = src->cell(); + + if(src->row() || !(cc >= '0' && cc <= '9')) { + Entity = SearchSemicolon; + break; + } + + uc = uc * 10 + (cc - '0'); + cBuffer[cBufferPos++] = cc; + ++src; + } + EntityChar = TQChar(uc); + if(cBufferPos == 9) Entity = SearchSemicolon; + break; + } + case EntityName: + { + int ll = kMin(src.length(), 9-cBufferPos); + while(ll--) { + TQChar csrc = *src; + cc = csrc.cell(); + + if(csrc.row() || !((cc >= 'a' && cc <= 'z') || + (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) { + Entity = SearchSemicolon; + break; + } + + cBuffer[cBufferPos++] = cc; + ++src; + + // be IE compatible and interpret even unterminated entities + // outside tags. like "foo  stuff bla". + if ( tag == NoTag ) { + const entity* e = kde_findEntity(cBuffer, cBufferPos); + if ( e && e->code < 256 ) { + EntityChar = e->code; + entityLen = cBufferPos; + } + } + } + if(cBufferPos == 9) Entity = SearchSemicolon; + if(Entity == SearchSemicolon) { + if(cBufferPos > 1) { + const entity *e = kde_findEntity(cBuffer, cBufferPos); + // IE only accepts unterminated entities < 256, + // Gecko accepts them all, but only outside tags + if(e && ( tag == NoTag || e->code < 256 || *src == ';' )) { + EntityChar = e->code; + entityLen = cBufferPos; + } + } + } + break; + } + case SearchSemicolon: +#ifdef TOKEN_DEBUG + kdDebug( 6036 ) << "ENTITY " << EntityChar.unicode() << endl; +#endif + fixUpChar(EntityChar); + + if (*src == ';') + ++src; + + if ( !EntityChar.isNull() ) { + checkBuffer(); + if (entityLen > 0 && entityLen < cBufferPos) { + int rem = cBufferPos - entityLen; + src.prepend( TokenizerString(TQString::fromAscii(cBuffer+entityLen, rem)) ); + } + src.push( EntityChar ); + } else { +#ifdef TOKEN_DEBUG + kdDebug( 6036 ) << "unknown entity!" << endl; +#endif + checkBuffer(11); + // ignore the sequence, add it to the buffer as plaintext + *dest++ = '&'; + for(unsigned int i = 0; i < cBufferPos; i++) + dest[i] = cBuffer[i]; + dest += cBufferPos; + if (pre) + prePos += cBufferPos+1; + } + + Entity = NoEntity; + EntityChar = TQChar::null; + return; + }; + } +} + +void HTMLTokenizer::parseTag(TokenizerString &src) +{ + assert(!Entity ); + checkScriptBuffer( src.length() ); + + while ( !src.isEmpty() ) + { + checkBuffer(); +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + uint l = 0; + while(l < src.length() && (src.toString()[l]).latin1() != '>') + l++; + tqDebug("src is now: *%s*, tquote: %d", + src.toString().left(l).latin1(), tquote); +#endif + switch(tag) { + case NoTag: + return; + case TagName: + { +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("TagName"); +#endif + if (searchCount > 0) + { + if (*src == commentStart[searchCount]) + { + searchCount++; + if (searchCount == 4) + { +#ifdef TOKEN_DEBUG + kdDebug( 6036 ) << "Found comment" << endl; +#endif + // Found '<!--' sequence + ++src; + dest = buffer; // ignore the previous part of this tag + tag = NoTag; + + comment = true; + parseComment(src); + return; // Finished parsing tag! + } + // cuts of high part, is okay + cBuffer[cBufferPos++] = src->cell(); + ++src; + break; + } + else + searchCount = 0; // Stop looking for '<!--' sequence + } + + bool finish = false; + unsigned int ll = kMin(src.length(), CBUFLEN-cBufferPos); + while(ll--) { + ushort curchar = *src; + if(curchar <= ' ' || curchar == '>' ) { + finish = true; + break; + } + // this is a nasty performance trick. will work for the A-Z + // characters, but not for others. if it contains one, + // we fail anyway + char cc = curchar; + cBuffer[cBufferPos++] = cc | 0x20; + ++src; + } + + // Disadvantage: we add the possible rest of the tag + // as attribute names. ### judge if this causes problems + if(finish || CBUFLEN == cBufferPos) { + bool beginTag; + char* ptr = cBuffer; + unsigned int len = cBufferPos; + cBuffer[cBufferPos] = '\0'; + if ((cBufferPos > 0) && (*ptr == '/')) + { + // End Tag + beginTag = false; + ptr++; + len--; + } + else + // Start Tag + beginTag = true; + // Accept empty xml tags like <br/> + if(len > 1 && ptr[len-1] == '/' ) { + ptr[--len] = '\0'; + // if its like <br/> and not like <input/ value=foo>, take it as flat + if (*src == '>') + currToken.flat = true; + } + + uint tagID = tdehtml::getTagID(ptr, len); + if (!tagID) { +#ifdef TOKEN_DEBUG + TQCString tmp(ptr, len+1); + kdDebug( 6036 ) << "Unknown tag: \"" << tmp.data() << "\"" << endl; +#endif + dest = buffer; + } + else + { +#ifdef TOKEN_DEBUG + TQCString tmp(ptr, len+1); + kdDebug( 6036 ) << "found tag id=" << tagID << ": " << tmp.data() << endl; +#endif + currToken.tid = beginTag ? tagID : tagID + ID_CLOSE_TAG; + dest = buffer; + } + tag = SearchAttribute; + cBufferPos = 0; + } + break; + } + case SearchAttribute: + { +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("SearchAttribute"); +#endif + bool atespace = false; + ushort curchar; + while(!src.isEmpty()) { + curchar = *src; + if(curchar > ' ') { + if(curchar == '<' || curchar == '>') + tag = SearchEnd; + else if(atespace && (curchar == '\'' || curchar == '"')) + { + tag = SearchValue; + *dest++ = 0; + attrName = TQString::null; + } + else + tag = AttributeName; + + cBufferPos = 0; + break; + } + atespace = true; + ++src; + } + break; + } + case AttributeName: + { +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("AttributeName"); +#endif + ushort curchar; + int ll = kMin(src.length(), CBUFLEN-cBufferPos); + + while(ll--) { + curchar = *src; + if(curchar <= '>') { + if(curchar <= ' ' || curchar == '=' || curchar == '>') { + unsigned int a; + cBuffer[cBufferPos] = '\0'; + a = tdehtml::getAttrID(cBuffer, cBufferPos); + if ( !a ) { + // did we just get /> or e.g checked/> + if (curchar == '>' && cBufferPos >=1 && cBuffer[cBufferPos-1] == '/') { + currToken.flat = true; + if (cBufferPos>1) + a = tdehtml::getAttrID(cBuffer, cBufferPos-1); + } + if (!a) + attrName = TQString::fromLatin1(TQCString(cBuffer, cBufferPos+1).data()); + } + + dest = buffer; + *dest++ = a; +#ifdef TOKEN_DEBUG + if (!a || (cBufferPos && *cBuffer == '!')) + kdDebug( 6036 ) << "Unknown attribute: *" << TQCString(cBuffer, cBufferPos+1).data() << "*" << endl; + else + kdDebug( 6036 ) << "Known attribute: " << TQCString(cBuffer, cBufferPos+1).data() << endl; +#endif + + tag = SearchEqual; + break; + } + } + cBuffer[cBufferPos++] = + ( curchar >= 'A' && curchar <= 'Z' ) ? curchar | 0x20 : curchar; + ++src; + } + if ( cBufferPos == CBUFLEN ) { + cBuffer[cBufferPos] = '\0'; + attrName = TQString::fromLatin1(TQCString(cBuffer, cBufferPos+1).data()); + dest = buffer; + *dest++ = 0; + tag = SearchEqual; + } + break; + } + case SearchEqual: + { +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("SearchEqual"); +#endif + ushort curchar; + bool atespace = false; + while(!src.isEmpty()) { + curchar = src->unicode(); + if(curchar > ' ') { + if(curchar == '=') { +#ifdef TOKEN_DEBUG + kdDebug(6036) << "found equal" << endl; +#endif + tag = SearchValue; + ++src; + } + else if(atespace && (curchar == '\'' || curchar == '"')) + { + tag = SearchValue; + *dest++ = 0; + attrName = TQString::null; + } + else { + DOMString v(""); + currToken.addAttribute(parser->docPtr(), buffer, attrName, v); + dest = buffer; + tag = SearchAttribute; + } + break; + } + atespace = true; + ++src; + } + break; + } + case SearchValue: + { + ushort curchar; + while(!src.isEmpty()) { + curchar = src->unicode(); + if(curchar > ' ') { + if(( curchar == '\'' || curchar == '\"' )) { + tquote = curchar == '\"' ? DoubleQuote : SingleQuote; + tag = QuotedValue; + ++src; + } else + tag = Value; + + break; + } + ++src; + } + break; + } + case QuotedValue: + { +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("QuotedValue"); +#endif + ushort curchar; + while(!src.isEmpty()) { + checkBuffer(); + + curchar = src->unicode(); + if(curchar <= '\'' && !src.escaped()) { + // ### attributes like '&{blaa....};' are supposed to be treated as jscript. + if ( curchar == '&' ) + { + ++src; + parseEntity(src, dest, true); + break; + } + else if ( (tquote == SingleQuote && curchar == '\'') || + (tquote == DoubleQuote && curchar == '\"') ) + { + // some <input type=hidden> rely on trailing spaces. argh + while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r')) + dest--; // remove trailing newlines + DOMString v(buffer+1, dest-buffer-1); + currToken.addAttribute(parser->docPtr(), buffer, attrName, v); + + dest = buffer; + tag = SearchAttribute; + tquote = NoQuote; + ++src; + break; + } + } + *dest++ = *src; + ++src; + } + break; + } + case Value: + { +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("Value"); +#endif + ushort curchar; + while(!src.isEmpty()) { + checkBuffer(); + curchar = src->unicode(); + if(curchar <= '>' && !src.escaped()) { + // parse Entities + if ( curchar == '&' ) + { + ++src; + parseEntity(src, dest, true); + break; + } + // no quotes. Every space means end of value + // '/' does not delimit in IE! + if ( curchar <= ' ' || curchar == '>' ) + { + DOMString v(buffer+1, dest-buffer-1); + currToken.addAttribute(parser->docPtr(), buffer, attrName, v); + dest = buffer; + tag = SearchAttribute; + break; + } + } + + *dest++ = *src; + ++src; + } + break; + } + case SearchEnd: + { +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1 + tqDebug("SearchEnd"); +#endif + while(!src.isEmpty()) { + if(*src == '<' || *src == '>') + break; + + if (*src == '/') + currToken.flat = true; + + ++src; + } + if(src.isEmpty() && *src != '<' && *src != '>') break; + + searchCount = 0; // Stop looking for '<!--' sequence + tag = NoTag; + tquote = NoQuote; + if ( *src == '>' ) + ++src; + + if ( !currToken.tid ) //stop if tag is unknown + return; + + uint tagID = currToken.tid; +#if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0 + kdDebug( 6036 ) << "appending Tag: " << tagID << endl; +#endif + // If the tag requires an end tag it cannot be flat, + // unless we are using the HTML parser to parse XHTML + // The only exception is SCRIPT and priority 0 tokens. + if (tagID < ID_CLOSE_TAG && tagID != ID_SCRIPT && + DOM::endTag[tagID] == DOM::REQUIRED && + parser->doc()->htmlMode() != DocumentImpl::XHtml) + currToken.flat = false; + + bool beginTag = !currToken.flat && (tagID < ID_CLOSE_TAG); + + if(tagID >= ID_CLOSE_TAG) + tagID -= ID_CLOSE_TAG; + else if ( !brokenScript && tagID == ID_SCRIPT ) { + DOMStringImpl* a = 0; + bool foundTypeAttribute = false; + scriptSrc = scriptSrcCharset = TQString::null; + if ( currToken.attrs && /* potentially have a ATTR_SRC ? */ + view && /* are we a regular tokenizer or just for innerHTML ? */ + parser->doc()->view()->part()->jScriptEnabled() /* jscript allowed at all? */ + ) { + if ( ( a = currToken.attrs->getValue( ATTR_SRC ) ) ) + scriptSrc = parser->doc()->completeURL(tdehtml::parseURL( DOMString(a) ).string() ); + if ( ( a = currToken.attrs->getValue( ATTR_CHARSET ) ) ) + scriptSrcCharset = DOMString(a).string().stripWhiteSpace(); + if ( scriptSrcCharset.isEmpty() && view) + scriptSrcCharset = parser->doc()->view()->part()->encoding(); + /* Check type before language, since language is deprecated */ + if ((a = currToken.attrs->getValue(ATTR_TYPE)) != 0 && !DOMString(a).string().isEmpty()) + foundTypeAttribute = true; + else + a = currToken.attrs->getValue(ATTR_LANGUAGE); + } + javascript = true; + + if( foundTypeAttribute ) { + /* + Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does. + Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does. + Mozilla 1.5 accepts application/x-javascript, WinIE 6 doesn't. + Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't. + Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string. + We want to accept all the values that either of these browsers accept, but not other values. + */ + TQString type = DOMString(a).string().stripWhiteSpace().lower(); + if( type.compare("text/javascript") != 0 && + type.compare("text/javascript1.0") != 0 && + type.compare("text/javascript1.1") != 0 && + type.compare("text/javascript1.2") != 0 && + type.compare("text/javascript1.3") != 0 && + type.compare("text/javascript1.4") != 0 && + type.compare("text/javascript1.5") != 0 && + type.compare("text/jscript") != 0 && + type.compare("text/ecmascript") != 0 && + type.compare("text/livescript") != 0 && + type.compare("application/x-javascript") != 0 && + type.compare("application/x-ecmascript") != 0 && + type.compare("application/javascript") != 0 && + type.compare("application/ecmascript") != 0 ) + javascript = false; + } else if( a ) { + /* + Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does. + Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3. + Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace. + We want to accept all the values that either of these browsers accept, but not other values. + */ + TQString lang = DOMString(a).string(); + lang = lang.lower(); + if( lang.compare("") != 0 && + lang.compare("javascript") != 0 && + lang.compare("javascript1.0") != 0 && + lang.compare("javascript1.1") != 0 && + lang.compare("javascript1.2") != 0 && + lang.compare("javascript1.3") != 0 && + lang.compare("javascript1.4") != 0 && + lang.compare("javascript1.5") != 0 && + lang.compare("ecmascript") != 0 && + lang.compare("livescript") != 0 && + lang.compare("jscript") ) + javascript = false; + } + } + + processToken(); + + if ( parser->selectMode() && beginTag) + discard = AllDiscard; + + switch( tagID ) { + case ID_PRE: + pre = beginTag; + if (beginTag) + discard = LFDiscard; + prePos = 0; + break; + case ID_BR: + prePos = 0; + break; + case ID_SCRIPT: + if (beginTag) { + searchStopper = scriptEnd; + searchStopperLen = 8; + script = true; + parseSpecial(src); + } + else if (tagID < ID_CLOSE_TAG) // Handle <script src="foo"/> + scriptHandler(); + break; + case ID_STYLE: + if (beginTag) { + searchStopper = styleEnd; + searchStopperLen = 7; + style = true; + parseSpecial(src); + } + break; + case ID_TEXTAREA: + if(beginTag) { + searchStopper = textareaEnd; + searchStopperLen = 10; + textarea = true; + discard = NoneDiscard; + parseSpecial(src); + } + break; + case ID_TITLE: + if (beginTag) { + searchStopper = titleEnd; + searchStopperLen = 7; + title = true; + parseSpecial(src); + } + break; + case ID_XMP: + if (beginTag) { + searchStopper = xmpEnd; + searchStopperLen = 5; + xmp = true; + parseSpecial(src); + } + break; + case ID_SELECT: + select = beginTag; + break; + case ID_PLAINTEXT: + plaintext = beginTag; + break; + } + return; // Finished parsing tag! + } + } // end switch + } + return; +} + +void HTMLTokenizer::addPending() +{ + if ( select && !(comment || script)) + { + *dest++ = ' '; + } + else if ( textarea ) + { + switch(pending) { + case LFPending: *dest++ = '\n'; prePos = 0; break; + case SpacePending: *dest++ = ' '; ++prePos; break; + case TabPending: *dest++ = '\t'; prePos += TAB_SIZE - (prePos % TAB_SIZE); break; + case NonePending: + assert(0); + } + } + else + { + int p; + + switch (pending) + { + case SpacePending: + // Insert a breaking space + *dest++ = TQChar(' '); + prePos++; + break; + + case LFPending: + *dest = '\n'; + dest++; + prePos = 0; + break; + + case TabPending: + p = TAB_SIZE - ( prePos % TAB_SIZE ); + for ( int x = 0; x < p; x++ ) + *dest++ = TQChar(' '); + prePos += p; + break; + + case NonePending: + assert(0); + break; + } + } + + pending = NonePending; +} + +void HTMLTokenizer::write( const TokenizerString &str, bool appendData ) +{ +#ifdef TOKEN_DEBUG + kdDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")" << endl; +#endif + + if ( !buffer ) + return; + + if ( ( m_executingScript && appendData ) || cachedScript.count() ) { + // don't parse; we will do this later + if (pendingQueue.isEmpty()) + pendingQueue.push(str); + else if (appendData) + pendingQueue.bottom().append(str); + else + pendingQueue.top().append(str); + return; + } + + if ( onHold ) { + src.append(str); + return; + } + + if (!src.isEmpty()) + src.append(str); + else + setSrc(str); + m_abort = false; + +// if (Entity) +// parseEntity(src, dest); + + while ( !src.isEmpty() ) + { + if ( m_abort ) + return; + // do we need to enlarge the buffer? + checkBuffer(); + + ushort cc = src->unicode(); + + if (skipLF && (cc != '\n')) + skipLF = false; + + if (skipLF) { + skipLF = false; + ++src; + } + else if ( Entity ) + parseEntity( src, dest ); + else if ( plaintext ) + parseText( src ); + else if (script) + parseSpecial(src); + else if (style) + parseSpecial(src); + else if (xmp) + parseSpecial(src); + else if (textarea) + parseSpecial(src); + else if (title) + parseSpecial(src); + else if (comment) + parseComment(src); + else if (server) + parseServer(src); + else if (processingInstruction) + parseProcessingInstruction(src); + else if (tag) + parseTag(src); + else if ( startTag ) + { + startTag = false; + bool endTag = false; + + switch(cc) { + case '/': + endTag = true; + break; + case '!': + { + // <!-- comment --> + searchCount = 1; // Look for '<!--' sequence to start comment + + break; + } + case '?': + { + // xml processing instruction + processingInstruction = true; + tquote = NoQuote; + parseProcessingInstruction(src); + continue; + + break; + } + case '%': + if (!brokenServer) { + // <% server stuff, handle as comment %> + server = true; + tquote = NoQuote; + parseServer(src); + continue; + } + // else fall through + default: + { + if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z'))) + { + // Start of a Start-Tag + } + else + { + // Invalid tag + // Add as is + if (pending) + addPending(); + *dest = '<'; + dest++; + continue; + } + } + }; // end case + + // According to SGML any LF immediately after a starttag, or + // immediately before an endtag should be ignored. + // ### Gecko and MSIE though only ignores LF immediately after + // starttags and only for PRE elements -- asj (28/06-2005) + if ( pending ) + if (!select) + addPending(); + else + pending = NonePending; + + // Cancel unused discards + discard = NoneDiscard; + // if (!endTag) discard = LFDiscard; + + processToken(); + + cBufferPos = 0; + tag = TagName; + parseTag(src); + } + else if ( cc == '&' && !src.escaped()) + { + ++src; + if ( pending ) + addPending(); + discard = NoneDiscard; + parseEntity(src, dest, true); + } + else if ( cc == '<' && !src.escaped()) + { + tagStartLineno = lineno+src.lineCount(); + ++src; + discard = NoneDiscard; + startTag = true; + } + else if (( cc == '\n' ) || ( cc == '\r' )) + { + if (discard == SpaceDiscard) + discard = NoneDiscard; + + if (discard == LFDiscard) { + // Ignore one LF + discard = NoneDiscard; + } + else if (discard == AllDiscard) + { + // Ignore + } + else + { + if (select && !script) { + pending = LFPending; + } else { + if (pending) + addPending(); + pending = LFPending; + } + } + + /* Check for MS-DOS CRLF sequence */ + if (cc == '\r') + { + skipLF = true; + } + ++src; + } + else if (( cc == ' ' ) || ( cc == '\t' )) + { + if(discard == LFDiscard) + discard = NoneDiscard; + + if(discard == SpaceDiscard) { + // Ignore one space + discard = NoneDiscard; + } + else if(discard == AllDiscard) + { + // Ignore + } + else { + if (select && !script) { + if (!pending) + pending = SpacePending; + } else { + if (pending) + addPending(); + if (cc == ' ') + pending = SpacePending; + else + pending = TabPending; + } + } + + ++src; + } + else + { + if (pending) + addPending(); + + discard = NoneDiscard; + if ( pre ) + { + prePos++; + } + *dest = *src; + fixUpChar( *dest ); + ++dest; + ++src; + } + } + + if (noMoreData && cachedScript.isEmpty() && !m_executingScript) + end(); // this actually causes us to be deleted +} + +void HTMLTokenizer::timerEvent( TQTimerEvent *e ) +{ + if ( e->timerId() == m_autoCloseTimer && cachedScript.isEmpty() ) { + finish(); + } +} + +void HTMLTokenizer::setAutoClose( bool b ) { + killTimer( m_autoCloseTimer ); + m_autoCloseTimer = 0; + if ( b ) + m_autoCloseTimer = startTimer(100); +} + +void HTMLTokenizer::end() +{ + if ( buffer == 0 ) { + emit finishedParsing(); + return; + } + + // parseTag is using the buffer for different matters + if ( !tag ) + processToken(); + + if(buffer) + TDEHTML_DELETE_QCHAR_VEC(buffer); + + if(scriptCode) + TDEHTML_DELETE_QCHAR_VEC(scriptCode); + + scriptCode = 0; + scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; + buffer = 0; + emit finishedParsing(); +} + +void HTMLTokenizer::finish() +{ + if ( m_autoCloseTimer ) { + killTimer( m_autoCloseTimer ); + m_autoCloseTimer = 0; + } + // do this as long as we don't find matching comment ends + while((title || script || comment || server) && scriptCode && scriptCodeSize) + { + // we've found an unmatched comment start + if (comment) + brokenComments = true; + else if (server) + brokenServer = true; + else if (script) + brokenScript = true; + + checkScriptBuffer(); + scriptCode[ scriptCodeSize ] = 0; + scriptCode[ scriptCodeSize + 1 ] = 0; + int pos; + TQString food; + if (title || style || script) + food.setUnicode(scriptCode, scriptCodeSize); + else if (server) { + food = "<"; + food += TQString(scriptCode, scriptCodeSize); + } + else { + pos = TQConstString(scriptCode, scriptCodeSize).string().find('>'); + food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy + } + TDEHTML_DELETE_QCHAR_VEC(scriptCode); + scriptCode = 0; + scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; + if (script) + scriptHandler(); + + comment = title = server = script = false; + if ( !food.isEmpty() ) + write(food, true); + } + // this indicates we will not receive any more data... but if we are waiting on + // an external script to load, we can't finish parsing until that is done + noMoreData = true; + if (cachedScript.isEmpty() && !m_executingScript && !onHold) + end(); // this actually causes us to be deleted +} + +void HTMLTokenizer::processToken() +{ + KJSProxy *jsProxy = view ? view->part()->jScript() : 0L; + if (jsProxy) + jsProxy->setEventHandlerLineno(tagStartLineno+1); + if ( dest > buffer ) + { +#if 0 + if(currToken.tid) { + tqDebug( "unexpected token id: %d, str: *%s*", currToken.tid,TQConstString( buffer,dest-buffer ).string().latin1() ); + assert(0); + } + +#endif + currToken.text = new DOMStringImpl( buffer, dest - buffer ); + currToken.text->ref(); + currToken.tid = ID_TEXT; + } + else if(!currToken.tid) { + currToken.reset(); + if (jsProxy) + jsProxy->setEventHandlerLineno(lineno+src.lineCount()+1); + return; + } + + dest = buffer; + +#ifdef TOKEN_DEBUG + TQString name = TQString( getTagName(currToken.tid) ); + TQString text; + if(currToken.text) + text = TQConstString(currToken.text->s, currToken.text->l).string(); + + kdDebug( 6036 ) << "Token --> " << name << " id = " << currToken.tid << endl; + if (currToken.flat) + kdDebug( 6036 ) << "Token is FLAT!" << endl; + if(!text.isNull()) + kdDebug( 6036 ) << "text: \"" << text << "\"" << endl; + unsigned long l = currToken.attrs ? currToken.attrs->length() : 0; + if(l) { + kdDebug( 6036 ) << "Attributes: " << l << endl; + for (unsigned long i = 0; i < l; ++i) { + NodeImpl::Id tid = currToken.attrs->idAt(i); + DOMString value = currToken.attrs->valueAt(i); + kdDebug( 6036 ) << " " << tid << " " << parser->doc()->getDocument()->getName(NodeImpl::AttributeId, tid).string() + << "=\"" << value.string() << "\"" << endl; + } + } + kdDebug( 6036 ) << endl; +#endif + + // In some cases, parseToken() can cause javascript code to be executed + // (for example, when setting an attribute that causes an event handler + // to be created). So we need to protect against re-entrancy into the parser + m_executingScript++; + + // pass the token over to the parser, the parser DOES NOT delete the token + parser->parseToken(&currToken); + + m_executingScript--; + + if ( currToken.flat && currToken.tid != ID_TEXT && !parser->noSpaces() ) + discard = NoneDiscard; + + currToken.reset(); + if (jsProxy) + jsProxy->setEventHandlerLineno(1); +} + + +HTMLTokenizer::~HTMLTokenizer() +{ + reset(); + delete parser; +} + + +void HTMLTokenizer::enlargeBuffer(int len) +{ + int newsize = kMax(size*2, size+len); + int oldoffs = (dest - buffer); + + buffer = TDEHTML_REALLOC_QCHAR_VEC(buffer, newsize); + dest = buffer + oldoffs; + size = newsize; +} + +void HTMLTokenizer::enlargeScriptBuffer(int len) +{ + int newsize = kMax(scriptCodeMaxSize*2, scriptCodeMaxSize+len); + scriptCode = TDEHTML_REALLOC_QCHAR_VEC(scriptCode, newsize); + scriptCodeMaxSize = newsize; +} + +void HTMLTokenizer::notifyFinished(CachedObject* /*finishedObj*/) +{ + assert(!cachedScript.isEmpty()); + bool done = false; + while (!done && cachedScript.head()->isLoaded()) { + + kdDebug( 6036 ) << "Finished loading an external script" << endl; + + CachedScript* cs = cachedScript.dequeue(); + DOMString scriptSource = cs->script(); +#ifdef TOKEN_DEBUG + kdDebug( 6036 ) << "External script is:" << endl << scriptSource.string() << endl; +#endif + setSrc(TokenizerString()); + + // make sure we forget about the script before we execute the new one + // infinite recursion might happen otherwise + TQString cachedScriptUrl( cs->url().string() ); + cs->deref(this); + + scriptExecution( scriptSource.string(), cachedScriptUrl ); + + done = cachedScript.isEmpty(); + + // 'script' is true when we are called synchronously from + // scriptHandler(). In that case scriptHandler() will take care + // of 'scriptOutput'. + if ( !script ) { + while (pendingQueue.count() > 1) { + TokenizerString t = pendingQueue.pop(); + pendingQueue.top().prepend( t ); + } + if (done) { + write(pendingQueue.pop(), false); + } + // we might be deleted at this point, do not + // access any members. + } + } +} + +bool HTMLTokenizer::isWaitingForScripts() const +{ + return cachedScript.count(); +} + +bool HTMLTokenizer::isExecutingScript() const +{ + return (m_executingScript > 0); +} + +void HTMLTokenizer::setSrc(const TokenizerString& source) +{ + lineno += src.lineCount(); + src = source; + src.resetLineCount(); +} + +void HTMLTokenizer::setOnHold(bool _onHold) +{ + if (onHold == _onHold) return; + onHold = _onHold; +} + |