diff options
Diffstat (limited to 'libksieve')
-rw-r--r-- | libksieve/Makefile.am | 17 | ||||
-rw-r--r-- | libksieve/impl/lexer.h | 189 | ||||
-rw-r--r-- | libksieve/impl/parser.h | 118 | ||||
-rw-r--r-- | libksieve/impl/utf8validator.h | 42 | ||||
-rw-r--r-- | libksieve/ksieve/Makefile.am | 8 | ||||
-rw-r--r-- | libksieve/ksieve/error.h | 139 | ||||
-rw-r--r-- | libksieve/ksieve/lexer.h | 108 | ||||
-rw-r--r-- | libksieve/ksieve/parser.h | 72 | ||||
-rw-r--r-- | libksieve/ksieve/scriptbuilder.h | 80 | ||||
-rw-r--r-- | libksieve/parser/Makefile.am | 12 | ||||
-rw-r--r-- | libksieve/parser/lexer.cpp | 666 | ||||
-rw-r--r-- | libksieve/parser/parser.cpp | 651 | ||||
-rw-r--r-- | libksieve/parser/utf8validator.cpp | 141 | ||||
-rw-r--r-- | libksieve/shared/Makefile.am | 8 | ||||
-rw-r--r-- | libksieve/shared/error.cpp | 247 | ||||
-rw-r--r-- | libksieve/tests/Makefile.am | 13 | ||||
-rw-r--r-- | libksieve/tests/lexertest.cpp | 484 | ||||
-rw-r--r-- | libksieve/tests/parsertest.cpp | 667 |
18 files changed, 3662 insertions, 0 deletions
diff --git a/libksieve/Makefile.am b/libksieve/Makefile.am new file mode 100644 index 000000000..fa7ff1a32 --- /dev/null +++ b/libksieve/Makefile.am @@ -0,0 +1,17 @@ +SUBDIRS = ksieve shared parser . tests + +INCLUDES = $(all_includes) + +lib_LTLIBRARIES = libksieve.la + +CLEANFILES = dummy.cpp + +libksieve_la_SOURCES = dummy.cpp +libksieve_la_LIBADD = parser/libksieve_parser.la +libksieve_la_LDFLAGS = $(all_libraries) -version-info 0:0:0 -no-undefined + +dummy.cpp: + echo > dummy.cpp + +messages: + $(XGETTEXT) shared/*.cpp parser/*.cpp impl/*.h ksieve/*.h -o $(podir)/libksieve.pot diff --git a/libksieve/impl/lexer.h b/libksieve/impl/lexer.h new file mode 100644 index 000000000..0eba0e585 --- /dev/null +++ b/libksieve/impl/lexer.h @@ -0,0 +1,189 @@ +/* -*- c++ -*- + impl/lexer.h + + Internal header file. Subject to change without notice. DO NOT USE. + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#ifndef __KSIEVE_IMPL_LEXER_H__ +#define __KSIEVE_IMPL_LEXER_H__ + +#include <ksieve/lexer.h> +#include <ksieve/error.h> + +#include <qvaluestack.h> +#include <qcstring.h> + +namespace KSieve { + + class Lexer::Impl { + public: + Impl( const char * scursor, const char * send, int options ); + + bool ignoreComments() const { + return mIgnoreComments; + } + + bool ignoreLineFeeds() const { + return mIgnoreLF; + } + + const Error & error() const { + return mState.error; + } + + bool atEnd() const { + return mState.cursor >= mEnd; + } + + int column() const { + return mState.cursor - mState.beginOfLine; + } + + int line() const { + return mState.line; + } + + void save() { + mStateStack.push( mState ); + } + + void restore() { + mState = mStateStack.pop(); + } + + Lexer::Token nextToken( QString & tokenValue ); + + private: + /** Cursor must be positioned on the \r or the \n. */ + bool eatCRLF(); + + /** Cursor must be positioned after the opening hash (#). If + parsing is successful, cursor is positioned behind the CRLF + that ended the comment's line (or past the end). */ + bool parseHashComment( QString & result, bool reallySave=false ); + + /** Cursor must be positioned after the opening slash-asterisk */ + bool parseBracketComment( QString & result, bool reallySave=false ); + + /** Cursor must be positioned on the opening '/'or '#' */ + bool parseComment( QString & result, bool reallySave=false ); + + /** Eats whitespace, but not comments */ + bool eatWS(); + + /** Eats comments and whitespace */ + bool eatCWS(); + + /** Cursor must be positioned on the first character */ + bool parseIdentifier( QString & result ); + + /** Cursor must be positioned after the initial ':' */ + bool parseTag( QString & result ); + + /** Cursor must be positioned on the first digit */ + bool parseNumber( QString & result ); + + /** Cursor must be positioned after the "text:" token. */ + bool parseMultiLine( QString & result ); + + /** Cursor must be positioned after the initial " */ + bool parseQuotedString( QString & result ); + + struct State { + State( const char * s=0 ) + : cursor( s ), line( 0 ), beginOfLine( s ), error() {} + const char * cursor; + int line; + const char * beginOfLine; + Error error; + } mState; + + const char * const mEnd; + const bool mIgnoreComments : 1; + const bool mIgnoreLF : 1; + QValueStack<State> mStateStack; + + const char * beginOfLine() const { return mState.beginOfLine; } + + int _strnicmp( const char * left, const char * right, size_t len ) const { + return charsLeft() >= len ? qstrnicmp( left, right, len ) : 1 ; + } + + void clearErrors() { mState.error = Error(); } + + unsigned int charsLeft() const { + return mEnd - mState.cursor < 0 ? 0 : mEnd - mState.cursor ; + } + void makeError( Error::Type e ) { + makeError( e, line(), column() ); + } + void makeError( Error::Type e, int errorLine, int errorCol ) { + mState.error = Error( e, errorLine, errorCol ); + } + void makeIllegalCharError( char ch ); + void makeIllegalCharError() { + makeIllegalCharError( *mState.cursor ); + } + /** Defines the current char to end a line. + Warning: increases @p mCursor! + **/ + void newLine() { + ++mState.line; + mState.beginOfLine = ++mState.cursor; + } + bool skipTo( char c, bool acceptEnd=false ) { + while( !atEnd() ) { + if ( *mState.cursor == '\n' || *mState.cursor == '\r' ) { + if ( !eatCRLF() ) return false; + } else if ( *mState.cursor == c ) { + return true; + } else { + ++mState.cursor; + } + } + return acceptEnd; + } + bool skipToCRLF( bool acceptEnd=true ) { + for ( ; !atEnd() ; ++mState.cursor ) + if ( *mState.cursor == '\n' || *mState.cursor == '\r' ) + return eatCRLF(); + return acceptEnd; + } + void skipTo8BitEnd() { + while ( !atEnd() && (signed char)*mState.cursor < 0 ) + ++mState.cursor; + } + void skipToDelim(); + }; + +} + +#endif // __KSIEVE_IMPL_LEXER_H__ diff --git a/libksieve/impl/parser.h b/libksieve/impl/parser.h new file mode 100644 index 000000000..736678b43 --- /dev/null +++ b/libksieve/impl/parser.h @@ -0,0 +1,118 @@ +/* -*- c++ -*- + impl/parser.h + + Internal header file. Subject to change without notice. DO NOT USE. + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#ifndef __KSIEVE_IMPL_PARSER_H__ +#define __KSIEVE_IMPL_PARSER_H__ + +#include <ksieve/parser.h> + +#include <ksieve/error.h> +#include <ksieve/lexer.h> +#include <impl/lexer.h> + +#include <ksieve/scriptbuilder.h> + + +namespace KSieve { + + class Parser::Impl { + friend class Parser; + private: + Impl( const char * scursor, const char * const send, int options=0 ); + + void setScriptBuilder( ScriptBuilder * builder ) { + mBuilder = builder; + } + ScriptBuilder * scriptBuilder() const { + return mBuilder; + } + + bool parse(); + + const Error & error() const { return mError == Error::None ? lexer.error() : mError ; } + + bool parseCommandList(); + + bool parseCommand(); + + bool parseArgumentList(); + + bool parseArgument(); + + bool parseTestList(); + + bool parseTest(); + + bool parseBlock(); + + bool parseStringList(); + + bool parseNumber(); + + + Lexer::Token token() const { return mToken; } + QString tokenValue() const { return mTokenValue; } + + bool atEnd() const { + return !mToken && lexer.atEnd() ; + } + bool obtainToken(); + void consumeToken() { + mToken = Lexer::None; + mTokenValue = QString::null; + } + void makeError( Error::Type e, int line, int col ) { + mError = Error( e, line, col ); + if ( scriptBuilder() ) + scriptBuilder()->error( mError ); + } + void makeError( Error::Type e ) { + makeError( e, lexer.line(), lexer.column() ); + } + void makeUnexpectedTokenError( Error::Type e ) { + makeError( e ); // ### save wrong token... + } + bool isArgumentToken() const; + bool isStringToken() const; + + Error mError; + Lexer::Token mToken; + QString mTokenValue; + Lexer::Impl lexer; + ScriptBuilder * mBuilder; + }; + +} + +#endif // __KSIEVE_IMPL_PARSER_H__ diff --git a/libksieve/impl/utf8validator.h b/libksieve/impl/utf8validator.h new file mode 100644 index 000000000..aa10bad2a --- /dev/null +++ b/libksieve/impl/utf8validator.h @@ -0,0 +1,42 @@ +/* -*- c++ -*- + impl/utf8validator.h + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#ifndef __KSIEVE_UTF8VALIDATOR_H__ +#define __KSIEVE_UTF8VALIDATOR_H__ + +namespace KSieve { + + extern bool isValidUtf8( const char * s, unsigned int len ); + +} // namespace KSieve + +#endif // __KSIEVE_ERROR_H__ diff --git a/libksieve/ksieve/Makefile.am b/libksieve/ksieve/Makefile.am new file mode 100644 index 000000000..9d086dd2a --- /dev/null +++ b/libksieve/ksieve/Makefile.am @@ -0,0 +1,8 @@ +# here are header files that are part of the public api: +ksievedir = $(includedir)/ksieve + +ksieve_HEADERS = \ + error.h \ + lexer.h \ + parser.h \ + scriptbuilder.h diff --git a/libksieve/ksieve/error.h b/libksieve/ksieve/error.h new file mode 100644 index 000000000..2dbed32c8 --- /dev/null +++ b/libksieve/ksieve/error.h @@ -0,0 +1,139 @@ +/* -*- c++ -*- + ksieve/error.h + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#ifndef __KSIEVE_ERROR_H__ +#define __KSIEVE_ERROR_H__ + +#include <qstring.h> + +#include <kdepimmacros.h> + +#ifdef None // X headers +# undef None +#endif + +namespace KSieve { + + class KDE_EXPORT Error { + public: + enum Type { + None = 0, + Custom, + // parse (well-formedness in XML speak) errors: + FirstParseError, + + CRWithoutLF = FirstParseError, + SlashWithoutAsterisk, + IllegalCharacter, + UnexpectedCharacter, + NoLeadingDigits, + NonCWSAfterTextColon, + + NumberOutOfRange, + InvalidUTF8, + + UnfinishedBracketComment, + PrematureEndOfMultiLine, + PrematureEndOfQuotedString, + PrematureEndOfStringList, + PrematureEndOfTestList, + PrematureEndOfBlock, + MissingWhitespace, + MissingSemicolonOrBlock, + + ExpectedBlockOrSemicolon, + ExpectedCommand, + ConsecutiveCommasInStringList, + ConsecutiveCommasInTestList, + MissingCommaInTestList, + MissingCommaInStringList, + NonStringInStringList, + NonCommandInCommandList, + NonTestInTestList, + LastParseError = NonTestInTestList, + // validity errors: + FirstValidityError, + RequireNotFirst = FirstValidityError, // rfc3028, 3.2 + RequireMissingForCommand, + RequireMissingForTest, + RequireMissingForComparator, + UnsupportedCommand, + UnsupportedTest, + UnsupportedComparator, + TestNestingTooDeep, // site policy + BlockNestingTooDeep, // site policy + InvalidArgument, + ConflictingArguments, // e.g. rfc3028, 2.7.{1,3} + ArgumentsRepeated, // similar to ConflictingArguments, e.g. :is :is + CommandOrderingConstraintViolation, // e.g. else w/o if, rfc3028, 3.1 + LastValidityError = CommandOrderingConstraintViolation, + // runtime errors: + FirstRuntimeError, + IncompatibleActionsRequested = FirstRuntimeError, + MailLoopDetected, + TooManyActions, + LastRuntimeError = TooManyActions + }; + + static const char * typeToString( Type type ); + + Error( Type type=None, + const QString & s1=QString::null, const QString & s2=QString::null, + int line=-1, int col=-1 ) + : mType( type ), mLine( line ), mCol( col ), + mStringOne( s1 ), mStringTwo( s2 ) {} + Error( Type type, int line, int col ) + : mType( type ), mLine( line ), mCol( col ) {} + + QString asString() const; + + /** So you can write <pre>if( error() )</pre> with e.g. @ref Lexer */ + operator bool() const { + return type() != None; + } + + Type type() const { return mType; } + int line() const { return mLine; } + int column() const { return mCol; } + QString firstString() const { return mStringOne; } + QString secondString() const { return mStringTwo; } + + protected: + Type mType; + int mLine; + int mCol; + QString mStringOne, mStringTwo; + }; + +} // namespace KSieve + +#endif // __KSIEVE_ERROR_H__ diff --git a/libksieve/ksieve/lexer.h b/libksieve/ksieve/lexer.h new file mode 100644 index 000000000..d5bb1fc3b --- /dev/null +++ b/libksieve/ksieve/lexer.h @@ -0,0 +1,108 @@ +/* -*- c++ -*- + ksieve/lexer.h + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#ifndef __KSIEVE_LEXER_H__ +#define __KSIEVE_LEXER_H__ + +class QString; + +namespace KSieve { + + class Error; + + class Lexer { + public: + enum Options { + IncludeComments = 0, + IgnoreComments = 1, + IncludeLineFeeds = 0, + IgnoreLineFeeds = 2 + }; + + Lexer( const char * scursor, const char * send, int options=0 ); + ~Lexer(); + + /** Return whether comments are returned by @ref + nextToken. Default is to not ignore comments. Ignoring them + can speed up script parsing a bit, and can be used when the + internal representation of the script won't be serialized into + string form again (or if you simply want to delete all + comments) + **/ + bool ignoreComments() const; + + /** Return whether line feeds are returned by @ref + nextToken. Default is to not ignore line feeds. Ignoring them + can speed up script parsing a bit, and can be used when the + internal representation of the script won't be serialized into + string form again. + **/ + bool ignoreLineFeeds() const; + + const Error & error() const; + + bool atEnd() const; + int column() const; + int line() const; + + enum Token { + None = 0, + Number, // 1, 100, 1M, 10k, 1G, 2g, 3m + Identifier, // atom + Tag, // :tag + Special, // {} [] () ,; + QuotedString, // "foo\"bar" -> foo"bar + MultiLineString, // text: \nfoo\n. -> foo + HashComment, // # foo + BracketComment, // /* foo */ + LineFeeds // the number of line feeds encountered + }; + + /** Parse the next token and return it's type. @p result will contain + the value of the token. */ + Token nextToken( QString & result ); + + void save(); + void restore(); + + class Impl; + private: + Impl * i; + + private: + const Lexer & operator=( const Lexer & ); + Lexer( const Lexer & ); + }; + +} // namespace KSieve + +#endif // __KSIEVE_LEXER_H__ diff --git a/libksieve/ksieve/parser.h b/libksieve/ksieve/parser.h new file mode 100644 index 000000000..e70e1db4d --- /dev/null +++ b/libksieve/ksieve/parser.h @@ -0,0 +1,72 @@ +/* -*- c++ -*- + ksieve/parser.h + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#ifndef __KSIEVE_PARSING_H__ +#define __KSIEVE_PARSING_H__ + +#include <kdepimmacros.h> + +class QString; + +namespace KSieve { + + class ScriptBuilder; + class Error; + + /** @short Parser for the Sieve grammar. + @author Marc Mutz <mutz@kde.org> + **/ + class KDE_EXPORT Parser { + public: + + Parser( const char * scursor, const char * const send, int options=0 ); + ~Parser(); + + void setScriptBuilder( ScriptBuilder * builder ); + ScriptBuilder * scriptBuilder() const; + + bool parse(); + + const Error & error() const; + + class Impl; + private: + Impl * i; + + private: + const Parser & operator=( const Parser & ); + Parser( const Parser & ); + }; + +} // namespace KSieve + +#endif // __KSIEVE_PARSING_H__ diff --git a/libksieve/ksieve/scriptbuilder.h b/libksieve/ksieve/scriptbuilder.h new file mode 100644 index 000000000..5e0a955bb --- /dev/null +++ b/libksieve/ksieve/scriptbuilder.h @@ -0,0 +1,80 @@ +/* -*- c++ -*- + ksieve/interfaces/scriptbuilder.h + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#ifndef __KSIEVE_INTERFACES_SCRIPTBUILDER_H__ +#define __KSIEVE_INTERFACES_SCRIPTBUILDER_H__ + +class QString; + +namespace KSieve { + + class Error; + + class ScriptBuilder { + public: + virtual ~ScriptBuilder() {} + + virtual void taggedArgument( const QString & tag ) = 0; + virtual void stringArgument( const QString & string, bool multiLine, const QString & embeddedHashComment ) = 0; + virtual void numberArgument( unsigned long number, char quantifier ) = 0; + + virtual void stringListArgumentStart() = 0; + virtual void stringListEntry( const QString & string, bool multiLine, const QString & embeddedHashComment ) = 0; + virtual void stringListArgumentEnd() = 0; + + virtual void commandStart( const QString & identifier ) = 0; + virtual void commandEnd() = 0; + + virtual void testStart( const QString & identifier ) = 0; + virtual void testEnd() = 0; + + virtual void testListStart() = 0; + virtual void testListEnd() = 0; + + virtual void blockStart() = 0; + virtual void blockEnd() = 0; + + /** A hash comment always includes an implicit lineFeed() at it's end. */ + virtual void hashComment( const QString & comment ) = 0; + /** Bracket comments inclde explicit lineFeed()s in their content */ + virtual void bracketComment( const QString & comment ) = 0; + + virtual void lineFeed() = 0; + + virtual void error( const Error & error ) = 0; + + virtual void finished() = 0; + }; + +} // namespace KSieve + +#endif // __KSIEVE_INTERFACES_SCRIPTBUILDER_H__ diff --git a/libksieve/parser/Makefile.am b/libksieve/parser/Makefile.am new file mode 100644 index 000000000..044d045cf --- /dev/null +++ b/libksieve/parser/Makefile.am @@ -0,0 +1,12 @@ +# final breaks static use: +# If you feel like "fixing" it, better talk to mutz@kde.org first :) +KDE_OPTIONS = nofinal + +INCLUDES = -I$(top_srcdir)/libksieve $(all_includes) + +noinst_LTLIBRARIES = libksieve_parser.la + +libksieve_parser_la_SOURCES = utf8validator.cpp lexer.cpp parser.cpp +libksieve_parser_la_LIBADD = ../shared/libksieve_shared.la +libksieve_parser_la_LDFLAGS = $(all_libraries) -no-undefined + diff --git a/libksieve/parser/lexer.cpp b/libksieve/parser/lexer.cpp new file mode 100644 index 000000000..d8b76da71 --- /dev/null +++ b/libksieve/parser/lexer.cpp @@ -0,0 +1,666 @@ +/* -*- c++ -*- + parser/lexer.cpp + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#include <config.h> + +#include <ksieve/lexer.h> +#include <impl/lexer.h> + +#include <impl/utf8validator.h> +#include <ksieve/error.h> + +#include <qstring.h> +#include <qstringlist.h> +#include <qtextcodec.h> + +#include <memory> // std::auto_ptr + +#include <assert.h> +#include <ctype.h> // isdigit + +#ifdef STR_DIM +# undef STR_DIM +#endif +#define STR_DIM(x) (sizeof(x) - 1) + +namespace KSieve { + + // + // + // Lexer Bridge implementation + // + // + + Lexer::Lexer( const char * scursor, const char * send, int options ) + : i( 0 ) + { + i = new Impl( scursor, send, options ); + } + + Lexer::~Lexer() { + delete i; i = 0; + } + + bool Lexer::ignoreComments() const { + assert( i ); + return i->ignoreComments(); + } + + const Error & Lexer::error() const { + assert( i ); + return i->error(); + } + + bool Lexer::atEnd() const { + assert( i ); + return i->atEnd(); + } + + int Lexer::column() const { + assert( i ); + return i->column(); + } + + int Lexer::line() const { + assert( i ); + return i->line(); + } + + void Lexer::save() { + assert( i ); + i->save(); + } + + void Lexer::restore() { + assert( i ); + i->restore(); + } + + Lexer::Token Lexer::nextToken( QString & result ) { + assert( i ); + return i->nextToken( result ); + } + +} // namespace KSieve + + +// none except a-zA-Z0-9_ +static const unsigned char iTextMap[16] = { + 0x00, 0x00, 0x00, 0x00, // CTLs: none + 0x00, 0x00, 0xFF, 0xC0, // SP ... '?': 0-9 + 0x7F, 0xFF, 0xFF, 0xE1, // '@' ... '_': A-Z_ + 0x7F, 0xFF, 0xFF, 0xE0 // '`' ... DEL: a-z +}; + +// SP, HT, CR, LF, {}[]();,#/ +// ### exclude '['? Why would one want to write identifier["foo"]? +static const unsigned char delimMap[16] = { + 0x00, 0x64, 0x00, 0x00, // CTLs: CR, HT, LF + 0x90, 0xC9, 0x00, 0x10, // SP ... '?': SP, #(),; + 0x00, 0x00, 0x00, 0x16, // '@' ... '_': [] + 0x00, 0x00, 0x00, 0x16 // '`' ... DEL: {} +}; + +// All except iText, delim, "*: +static const unsigned char illegalMap[16] = { + 0xFF, 0x9B, 0xFF, 0xFF, + 0x4F, 0x16, 0x00, 0x0F, + 0x80, 0x00, 0x00, 0x0A, + 0x80, 0x00, 0x00, 0x0A +}; + +static inline bool isOfSet( const unsigned char map[16], unsigned char ch ) { + assert( ch < 128 ); + return ( map[ ch/8 ] & 0x80 >> ch%8 ); +} + +static inline bool isIText( unsigned char ch ) { + return ch <= 'z' && isOfSet( iTextMap, ch ); +} + +static inline bool isDelim( unsigned char ch ) { + return ch <= '}' && isOfSet( delimMap, ch ); +} + +static inline bool isIllegal( unsigned char ch ) { + return ch >= '~' || isOfSet( illegalMap, ch ); +} + +static inline bool is8Bit( signed char ch ) { + return ch < 0; +} + +static QString removeCRLF( const QString & s ) { + const bool CRLF = s.endsWith( "\r\n" ); + const bool LF = !CRLF && s.endsWith( "\n" ); + + const int e = CRLF ? 2 : LF ? 1 : 0 ; // what to chop off at the end + + return s.left( s.length() - e ); +} + +static QString removeDotStuff( const QString & s ) { + return s.startsWith( ".." ) ? s.mid( 1 ) : s ; +} + +namespace KSieve { + + // + // + // Lexer Implementation + // + // + + Lexer::Impl::Impl( const char * scursor, const char * send, int options ) + : mState( scursor ? scursor : send ), + mEnd( send ? send : scursor ), + mIgnoreComments( options & IgnoreComments ), + mIgnoreLF( options & IgnoreLineFeeds ) + { + if ( !scursor || !send ) + assert( atEnd() ); + } + + Lexer::Token Lexer::Impl::nextToken( QString & result ) { + assert( !atEnd() ); + result = QString::null; + //clearErrors(); + + const int oldLine = line(); + + const bool eatingWSSucceeded = ignoreComments() ? eatCWS() : eatWS() ; + + if ( !ignoreLineFeeds() && oldLine != line() ) { + result.setNum( line() - oldLine ); // return number of linefeeds encountered + return LineFeeds; + } + + if ( !eatingWSSucceeded ) + return None; + + if ( atEnd() ) + return None; + + switch ( *mState.cursor ) { + case '#': // HashComment + assert( !ignoreComments() ); + ++mState.cursor; + if ( !atEnd() ) + parseHashComment( result, true ); + return HashComment; + case '/': // BracketComment + assert( !ignoreComments() ); + ++mState.cursor; // eat slash + if ( atEnd() || *mState.cursor != '*' ) { + makeError( Error::SlashWithoutAsterisk ); + return BracketComment; + } + ++mState.cursor; // eat asterisk + if ( atEnd() ) { + makeError( Error::UnfinishedBracketComment ); + return BracketComment; + } + parseBracketComment( result, true ); + return BracketComment; + case ':': // Tag + ++mState.cursor; + if ( atEnd() ) { + makeError( Error::UnexpectedCharacter, line(), column() - 1 ); + return Tag; + } + if ( !isIText( *mState.cursor ) ) { + makeIllegalCharError( *mState.cursor ); + return Tag; + } + parseTag( result ); + return Tag; + case '"': // QuotedString + ++mState.cursor; + parseQuotedString( result ); + return QuotedString; + case '{': + case '}': + case '[': + case ']': + case '(': + case ')': + case ';': + case ',': // Special + result = *mState.cursor++; + return Special; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': // Number + parseNumber( result ); + return Number; + case 't': // maybe MultiLineString, else Identifier + if ( _strnicmp( mState.cursor, "text:", STR_DIM("text:") ) == 0 ) { + // MultiLineString + mState.cursor += STR_DIM("text:"); + parseMultiLine( result ); + // ### FIXME: There can be a hash-comment between "text:" + // and CRLF! That should be preserved somehow... + return MultiLineString; + } + // else fall through: + default: // Identifier (first must not be 0-9, and can't (caught by Number above)) + if ( !isIText( *mState.cursor ) ) { + makeError( Error::IllegalCharacter ); + return None; + } + parseIdentifier( result ); + return Identifier; + } + } + + bool Lexer::Impl::eatWS() { + while ( !atEnd() ) + switch ( *mState.cursor ) { + case '\r': + case '\n': + if ( !eatCRLF() ) + return false; + break; + case ' ': + case '\t': + ++mState.cursor; + break; + default: + return true; + } + + // at end: + return true; + } + + bool Lexer::Impl::eatCRLF() { + assert( !atEnd() ); + assert( *mState.cursor == '\n' || *mState.cursor == '\r' ); + + if ( *mState.cursor == '\r' ) { + ++mState.cursor; + if ( atEnd() || *mState.cursor != '\n' ) { + // CR w/o LF -> error + makeError( Error::CRWithoutLF ); + return false; + } else { + // good CRLF + newLine(); + return true; + } + } else /* *mState.cursor == '\n' */ { + // good, LF only + newLine(); + return true; + } + } + + + bool Lexer::Impl::parseHashComment( QString & result, bool reallySave ) { + // hash-comment := "#" *CHAR-NOT-CRLF CRLF + + // check that the caller plays by the rules: + assert( *(mState.cursor-1) == '#' ); + + const char * const commentStart = mState.cursor; + + // find next CRLF: + while ( !atEnd() ) { + if ( *mState.cursor == '\n' || *mState.cursor == '\r' ) break; + ++mState.cursor; + } + + const char * const commentEnd = mState.cursor - 1; + + if ( commentEnd == commentStart ) return true; // # was last char in script... + + if ( atEnd() || eatCRLF() ) { + const int commentLength = commentEnd - commentStart + 1; + if ( commentLength > 0 ) { + if ( !isValidUtf8( commentStart, commentLength ) ) { + makeError( Error::InvalidUTF8 ); + return false; + } + if ( reallySave ) + result += QString::fromUtf8( commentStart, commentLength ); + } + return true; + } + + return false; + } + + bool Lexer::Impl::parseBracketComment( QString & result, bool reallySave ) { + // bracket-comment := "/*" *(CHAR-NOT-STAR / ("*" CHAR-NOT-SLASH )) "*/" + + // check that caller plays by the rules: + assert( *(mState.cursor-2) == '/' ); + assert( *(mState.cursor-1) == '*' ); + + const char * const commentStart = mState.cursor; + const int commentCol = column() - 2; + const int commentLine = line(); + + // find next asterisk: + do { + if ( !skipTo( '*' ) ) { + if ( !error() ) + makeError( Error::UnfinishedBracketComment, commentLine, commentCol ); + return false; + } + } while ( !atEnd() && *++mState.cursor != '/' ); + + if ( atEnd() ) { + makeError( Error::UnfinishedBracketComment, commentLine, commentCol ); + return false; + } + + assert( *mState.cursor == '/' ); + + const int commentLength = mState.cursor - commentStart - 1; + if ( commentLength > 0 ) { + if ( !isValidUtf8( commentStart, commentLength ) ) { + makeError( Error::InvalidUTF8 ); + return false; + } + if ( reallySave ) { + QString tmp = QString::fromUtf8( commentStart, commentLength ); + result += tmp.remove( '\r' ); // get rid of CR in CRLF pairs + } + } + + ++mState.cursor; // eat '/' + return true; + } + + bool Lexer::Impl::parseComment( QString & result, bool reallySave ) { + // comment := hash-comment / bracket-comment + + switch( *mState.cursor ) { + case '#': + ++mState.cursor; + return parseHashComment( result, reallySave ); + case '/': + if ( charsLeft() < 2 || mState.cursor[1] != '*' ) { + makeError( Error::IllegalCharacter ); + return false; + } else { + mState.cursor += 2; // eat "/*" + return parseBracketComment( result, reallySave ); + } + default: + return false; // don't set an error here - there was no comment + } + } + + bool Lexer::Impl::eatCWS() { + // white-space := 1*(SP / CRLF / HTAB / comment ) + + while ( !atEnd() ) { + switch( *mState.cursor ) { + case ' ': + case '\t': // SP / HTAB + ++mState.cursor; + break;; + case '\n': + case '\r': // CRLF + if ( !eatCRLF() ) + return false; + break; + case '#': + case '/': // comments + { + QString dummy; + if ( !parseComment( dummy ) ) + return false; + } + break; + default: + return true; + } + } + return true; + } + + bool Lexer::Impl::parseIdentifier( QString & result ) { + // identifier := (ALPHA / "_") *(ALPHA DIGIT "_") + + assert( isIText( *mState.cursor ) ); + + const char * const identifierStart = mState.cursor; + + // first char: + if ( isdigit( *mState.cursor ) ) { // no digits for the first + makeError( Error::NoLeadingDigits ); + return false; + } + + // rest of identifier chars ( now digits are allowed ): + for ( ++mState.cursor ; !atEnd() && isIText( *mState.cursor ) ; ++mState.cursor ); + + const int identifierLength = mState.cursor - identifierStart; + + // Can use the fast fromLatin1 here, since identifiers are always + // in the us-ascii subset: + result += QString::fromLatin1( identifierStart, identifierLength ); + + if ( atEnd() || isDelim( *mState.cursor ) ) + return true; + + makeIllegalCharError( *mState.cursor ); + return false; + } + + bool Lexer::Impl::parseTag( QString & result ) { + // tag := ":" identifier + + // check that the caller plays by the rules: + assert( *(mState.cursor-1) == ':' ); + assert( !atEnd() ); + assert( isIText( *mState.cursor ) ); + + return parseIdentifier( result ); + } + + bool Lexer::Impl::parseNumber( QString & result ) { + // number := 1*DIGIT [QUANTIFIER] + // QUANTIFIER := "K" / "M" / "G" + + assert( isdigit( *mState.cursor ) ); + + while ( !atEnd() && isdigit( *mState.cursor ) ) + result += *mState.cursor++; + + if ( atEnd() || isDelim( *mState.cursor ) ) + return true; + + switch ( *mState.cursor ) { + case 'G': + case 'g': + case 'M': + case 'm': + case 'K': + case 'k': + result += *mState.cursor++; + break; + default: + makeIllegalCharError(); + return false; + } + + // quantifier found. Check for delimiter: + if ( atEnd() || isDelim( *mState.cursor ) ) + return true; + makeIllegalCharError(); + return false; + } + + bool Lexer::Impl::parseMultiLine( QString & result ) { + // multi-line := "text:" *(SP / HTAB) (hash-comment / CRLF) + // *(multi-line-literal / multi-line-dotstuff) + // "." CRLF + // multi-line-literal := [CHAR-NOT-DOT *CHAR-NOT-CRLF] CRLF + // multi-line-dotstuff := "." 1*CHAR-NOT-CRLF CRLF + // ;; A line containing only "." ends the multi-line. + // ;; Remove a leading '.' if followed by another '.'. + + assert( _strnicmp( mState.cursor - 5, "text:", STR_DIM("text:") ) == 0 ); + + const int mlBeginLine = line(); + const int mlBeginCol = column() - 5; + + while ( !atEnd() ) { + switch ( *mState.cursor ) { + case ' ': + case '\t': + ++mState.cursor; + break; + case '#': + { + ++mState.cursor; + QString dummy; + if ( !parseHashComment( dummy ) ) + return false; + goto MultiLineStart; // break from switch _and_ while + } + case '\n': + case '\r': + if ( !eatCRLF() ) return false; + goto MultiLineStart; // break from switch _and_ while + default: + makeError( Error::NonCWSAfterTextColon ); + return false; + } + } + + MultiLineStart: + if ( atEnd() ) { + makeError( Error::PrematureEndOfMultiLine, mlBeginLine, mlBeginCol ); + return false; + } + + // Now, collect the single lines until one with only a single dot is found: + QStringList lines; + while ( !atEnd() ) { + const char * const oldBeginOfLine = beginOfLine(); + if ( !skipToCRLF() ) + return false; + const int lineLength = mState.cursor - oldBeginOfLine; + if ( lineLength > 0 ) { + if ( !isValidUtf8( oldBeginOfLine, lineLength ) ) { + makeError( Error::InvalidUTF8 ); + return false; + } + const QString line = removeCRLF( QString::fromUtf8( oldBeginOfLine, lineLength ) ); + lines.push_back( removeDotStuff( line ) ); + if ( line == "." ) + break; + } else { + lines.push_back( QString::null ); + } + } + + if ( lines.back() != "." ) { + makeError( Error::PrematureEndOfMultiLine, mlBeginLine, mlBeginCol ); + return false; + } + + assert( !lines.empty() ); + lines.erase( --lines.end() ); // don't include the lone dot. + result = lines.join("\n"); + return true; + } + + bool Lexer::Impl::parseQuotedString( QString & result ) { + // quoted-string := DQUOTE *CHAR DQUOTE + + // check that caller plays by the rules: + assert( *(mState.cursor-1) == '"' ); + + const int qsBeginCol = column() - 1; + const int qsBeginLine = line(); + + const QTextCodec * const codec = QTextCodec::codecForMib( 106 ); // UTF-8 + assert( codec ); + const std::auto_ptr<QTextDecoder> dec( codec->makeDecoder() ); + assert( dec.get() ); + + while ( !atEnd() ) + switch ( *mState.cursor ) { + case '"': + ++mState.cursor; + return true; + case '\r': + case '\n': + if ( !eatCRLF() ) + return false; + result += '\n'; + break; + case '\\': + ++mState.cursor; + if ( atEnd() ) + break; + // else fall through: + default: + if ( !is8Bit( *mState.cursor ) ) + result += *mState.cursor++; + else { // probably UTF-8 + const char * const eightBitBegin = mState.cursor; + skipTo8BitEnd(); + const int eightBitLen = mState.cursor - eightBitBegin; + assert( eightBitLen > 0 ); + if ( isValidUtf8( eightBitBegin, eightBitLen ) ) + result += dec->toUnicode( eightBitBegin, eightBitLen ); + else { + assert( column() >= eightBitLen ); + makeError( Error::InvalidUTF8, line(), column() - eightBitLen ); + return false; + } + } + } + + makeError( Error::PrematureEndOfQuotedString, qsBeginLine, qsBeginCol ); + return false; + } + + void Lexer::Impl::makeIllegalCharError( char ch ) { + makeError( isIllegal( ch ) ? Error::IllegalCharacter : Error::UnexpectedCharacter ); + } + +} // namespace KSieve diff --git a/libksieve/parser/parser.cpp b/libksieve/parser/parser.cpp new file mode 100644 index 000000000..8c2db050e --- /dev/null +++ b/libksieve/parser/parser.cpp @@ -0,0 +1,651 @@ +/* -*- c++ -*- + parser/parser.cpp + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#include <config.h> + +#include <ksieve/parser.h> +#include <impl/parser.h> + +#include <ksieve/error.h> + +#include <qstring.h> + +#include <assert.h> +#include <limits.h> // ULONG_MAX +#include <ctype.h> // isdigit + +namespace KSieve { + + // + // + // Parser Bridge implementation + // + // + + Parser::Parser( const char * scursor, const char * const send, int options ) + : i( 0 ) + { + i = new Impl( scursor, send, options ); + } + + Parser::~Parser() { + delete i; i = 0; + } + + void Parser::setScriptBuilder( ScriptBuilder * builder ) { + assert( i ); + i->mBuilder = builder; + } + + ScriptBuilder * Parser::scriptBuilder() const { + assert( i ); + return i->mBuilder; + } + + const Error & Parser::error() const { + assert( i ); + return i->error(); + } + + bool Parser::parse() { + assert( i ); + return i->parse(); + } + +} + +static inline unsigned long factorForQuantifier( char ch ) { + switch ( ch ) { + case 'g': + case 'G': + return 1024*1024*1024; + case 'm': + case 'M': + return 1024*1024; + case 'k': + case 'K': + return 1024; + default: + assert( 0 ); // lexer should prohibit this + return 1; // make compiler happy + } +} + +static inline bool willOverflowULong( unsigned long result, unsigned long add ) { + static const unsigned long maxULongByTen = (unsigned long)(ULONG_MAX / 10.0) ; + return result > maxULongByTen || ULONG_MAX - 10 * result < add ; +} + +namespace KSieve { + + // + // + // Parser Implementation + // + // + + Parser::Impl::Impl( const char * scursor, const char * const send, int options ) + : mToken( Lexer::None ), + lexer( scursor, send, options ), + mBuilder( 0 ) + { + + } + + bool Parser::Impl::isStringToken() const { + return token() == Lexer::QuotedString || + token() == Lexer::MultiLineString ; + } + + + bool Parser::Impl::isArgumentToken() const { + return isStringToken() || + token() == Lexer::Number || + token() == Lexer::Tag || + token() == Lexer::Special && mTokenValue == "[" ; + } + + bool Parser::Impl::obtainToken() { + while ( !mToken && !lexer.atEnd() && !lexer.error() ) { + mToken = lexer.nextToken( mTokenValue ); + if ( lexer.error() ) + break; + // comments and line feeds are semantically invisible and may + // appear anywhere, so we handle them here centrally: + switch ( token() ) { + case Lexer::HashComment: + if ( scriptBuilder() ) + scriptBuilder()->hashComment( tokenValue() ); + consumeToken(); + break; + case Lexer::BracketComment: + if ( scriptBuilder() ) + scriptBuilder()->bracketComment( tokenValue() ); + consumeToken(); + break; + case Lexer::LineFeeds: + for ( unsigned int i = 0, end = tokenValue().toUInt() ; i < end ; ++i ) + if ( scriptBuilder() ) // better check every iteration, b/c + // we call out to ScriptBuilder, + // where nasty things might happen! + scriptBuilder()->lineFeed(); + consumeToken(); + break; + default: ; // make compiler happy + } + } + if ( lexer.error() && scriptBuilder() ) + scriptBuilder()->error( lexer.error() ); + return !lexer.error(); + } + + bool Parser::Impl::parse() { + // this is the entry point: START := command-list + if ( !parseCommandList() ) + return false; + if ( !atEnd() ) { + makeUnexpectedTokenError( Error::ExpectedCommand ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->finished(); + return true; + } + + + bool Parser::Impl::parseCommandList() { + // our ABNF: + // command-list := *comand + + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + if ( token() == Lexer::None ) + continue; + if ( token() != Lexer::Identifier ) + return true; + if ( !parseCommand() ) { + assert( error() ); + return false; + } + } + return true; + } + + + bool Parser::Impl::parseCommand() { + // command := identifier arguments ( ";" / block ) + // arguments := *argument [ test / test-list ] + // block := "{" *command "}" + // our ABNF: + // block := "{" [ command-list ] "}" + + if ( atEnd() ) + return false; + + // + // identifier + // + + if ( !obtainToken() || token() != Lexer::Identifier ) + return false; + + if ( scriptBuilder() ) + scriptBuilder()->commandStart( tokenValue() ); + consumeToken(); + + // + // *argument + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( isArgumentToken() && !parseArgumentList() ) { + assert( error() ); + return false; + } + + // + // test / test-list + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list + if ( !parseTestList() ) { + assert( error() ); + return false; + } + } else if ( token() == Lexer::Identifier ) { // should be test: + if ( !parseTest() ) { + assert( error() ); + return false; + } + } + + // + // ";" / block + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( token() != Lexer::Special ) { + makeUnexpectedTokenError( Error::ExpectedBlockOrSemicolon ); + return false; + } + + if ( tokenValue() == ";" ) + consumeToken(); + else if ( tokenValue() == "{" ) { // block + if ( !parseBlock() ) + return false; // it's an error since we saw '{' + } else { + makeError( Error::MissingSemicolonOrBlock ); + return false; + } + + if ( scriptBuilder() ) + scriptBuilder()->commandEnd(); + return true; + } + + + bool Parser::Impl::parseArgumentList() { + // our ABNF: + // argument-list := *argument + + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + if ( !isArgumentToken() ) + return true; + if ( !parseArgument() ) + return !error(); + } + return true; + } + + + bool Parser::Impl::parseArgument() { + // argument := string-list / number / tag + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() == Lexer::Number ) { + if ( !parseNumber() ) { + assert( error() ); + return false; + } + return true; + } else if ( token() == Lexer::Tag ) { + if ( scriptBuilder() ) + scriptBuilder()->taggedArgument( tokenValue() ); + consumeToken(); + return true; + } else if ( isStringToken() ) { + if ( scriptBuilder() ) + scriptBuilder()->stringArgument( tokenValue(), token() == Lexer::MultiLineString, QString::null ); + consumeToken(); + return true; + } else if ( token() == Lexer::Special && tokenValue() == "[" ) { + if ( !parseStringList() ) { + assert( error() ); + return false; + } + return true; + } + + return false; + } + + + bool Parser::Impl::parseTestList() { + // test-list := "(" test *("," test) ")" + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Special || tokenValue() != "(" ) + return false; + if ( scriptBuilder() ) + scriptBuilder()->testListStart(); + consumeToken(); + + // generic while/switch construct for comma-separated lists. See + // parseStringList() for another one. Any fix here is like to apply there, too. + bool lastWasComma = true; + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + + switch ( token() ) { + case Lexer::None: + break; + case Lexer::Special: + assert( tokenValue().length() == 1 ); + assert( tokenValue()[0].latin1() ); + switch ( tokenValue()[0].latin1() ) { + case ')': + consumeToken(); + if ( lastWasComma ) { + makeError( Error::ConsecutiveCommasInTestList ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->testListEnd(); + return true; + case ',': + consumeToken(); + if( lastWasComma ) { + makeError( Error::ConsecutiveCommasInTestList ); + return false; + } + lastWasComma = true; + break; + default: + makeError( Error::NonStringInStringList ); + return false; + } + break; + + case Lexer::Identifier: + if ( !lastWasComma ) { + makeError( Error::MissingCommaInTestList ); + return false; + } else { + lastWasComma = false; + if ( !parseTest() ) { + assert( error() ); + return false; + } + } + break; + + default: + makeUnexpectedTokenError( Error::NonTestInTestList ); + return false; + } + } + + makeError( Error::PrematureEndOfTestList ); + return false; + } + + + bool Parser::Impl::parseTest() { + // test := identifier arguments + // arguments := *argument [ test / test-list ] + + // + // identifier + // + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Identifier ) + return false; + + if ( scriptBuilder() ) + scriptBuilder()->testStart( tokenValue() ); + consumeToken(); + + // + // *argument + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) // a test w/o args + goto TestEnd; + + if ( isArgumentToken() && !parseArgumentList() ) { + assert( error() ); + return false; + } + + // + // test / test-list + // + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) // a test w/o nested tests + goto TestEnd; + + if ( token() == Lexer::Special && tokenValue() == "(" ) { // test-list + if ( !parseTestList() ) { + assert( error() ); + return false; + } + } else if ( token() == Lexer::Identifier ) { // should be test: + if ( !parseTest() ) { + assert( error() ); + return false; + } + } + + TestEnd: + if ( scriptBuilder() ) + scriptBuilder()->testEnd(); + return true; + } + + + bool Parser::Impl::parseBlock() { + // our ABNF: + // block := "{" [ command-list ] "}" + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Special || tokenValue() != "{" ) + return false; + if ( scriptBuilder() ) + scriptBuilder()->blockStart(); + consumeToken(); + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::PrematureEndOfBlock ); + return false; + } + + if ( token() == Lexer::Identifier ) { + if ( !parseCommandList() ) { + assert( error() ); + return false; + } + } + + if ( !obtainToken() ) + return false; + + if ( atEnd() ) { + makeError( Error::PrematureEndOfBlock ); + return false; + } + + if ( token() != Lexer::Special || tokenValue() != "}" ) { + makeError( Error::NonCommandInCommandList ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->blockEnd(); + consumeToken(); + return true; + } + + bool Parser::Impl::parseStringList() { + // string-list := "[" string *("," string) "]" / string + // ;; if there is only a single string, the brackets are optional + // + // However, since strings are already handled separately from + // string lists in parseArgument(), our ABNF is modified to: + // string-list := "[" string *("," string) "]" + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Special || tokenValue() != "[" ) + return false; + + if ( scriptBuilder() ) + scriptBuilder()->stringListArgumentStart(); + consumeToken(); + + // generic while/switch construct for comma-separated lists. See + // parseTestList() for another one. Any fix here is like to apply there, too. + bool lastWasComma = true; + while ( !atEnd() ) { + if ( !obtainToken() ) + return false; + + switch ( token() ) { + case Lexer::None: + break; + case Lexer::Special: + assert( tokenValue().length() == 1 ); + switch ( tokenValue()[0].latin1() ) { + case ']': + consumeToken(); + if ( lastWasComma ) { + makeError( Error::ConsecutiveCommasInStringList ); + return false; + } + if ( scriptBuilder() ) + scriptBuilder()->stringListArgumentEnd(); + return true; + case ',': + consumeToken(); + if ( lastWasComma ) { + makeError( Error::ConsecutiveCommasInStringList ); + return false; + } + lastWasComma = true; + break; + default: + makeError( Error::NonStringInStringList ); + return false; + } + break; + + case Lexer::QuotedString: + case Lexer::MultiLineString: + if ( !lastWasComma ) { + makeError( Error::MissingCommaInStringList ); + return false; + } + lastWasComma = false; + if ( scriptBuilder() ) + scriptBuilder()->stringListEntry( tokenValue(), token() == Lexer::MultiLineString, QString::null ); + consumeToken(); + break; + + default: + makeError( Error::NonStringInStringList ); + return false; + } + } + + makeError( Error::PrematureEndOfStringList ); + return false; + } + + bool Parser::Impl::parseNumber() { + // The lexer returns the number including the quantifier as a + // single token value. Here, we split is an check that the number + // is not out of range: + + if ( !obtainToken() || atEnd() ) + return false; + + if ( token() != Lexer::Number ) + return false; + + // number: + unsigned long result = 0; + unsigned int i = 0; + const QCString s = tokenValue().latin1(); + for ( const unsigned int len = s.length() ; i < len && isdigit( s[i] ) ; ++i ) { + const unsigned long digitValue = s[i] - '0' ; + if ( willOverflowULong( result, digitValue ) ) { + makeError( Error::NumberOutOfRange ); + return false; + } else { + result *= 10 ; result += digitValue ; + } + } + + // optional quantifier: + char quantifier = '\0'; + if ( i < s.length() ) { + assert( i + 1 == s.length() ); + quantifier = s[i]; + const unsigned long factor = factorForQuantifier( quantifier ); + if ( result > double(ULONG_MAX) / double(factor) ) { + makeError( Error::NumberOutOfRange ); + return false; + } + result *= factor; + } + + if ( scriptBuilder() ) + scriptBuilder()->numberArgument( result, quantifier ); + consumeToken(); + return true; + } + +} // namespace KSieve diff --git a/libksieve/parser/utf8validator.cpp b/libksieve/parser/utf8validator.cpp new file mode 100644 index 000000000..248a1f5e9 --- /dev/null +++ b/libksieve/parser/utf8validator.cpp @@ -0,0 +1,141 @@ +/* -*- c++ -*- + utf8validator.cpp + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#include <impl/utf8validator.h> + +#include <qglobal.h> +#include <qcstring.h> + +static inline bool is8Bit( signed char ch ) { + return ch < 0; +} + +static inline bool isUtf8TupelIndicator( unsigned char ch ) { + return (ch & 0xE0) == 0xC0; // 110x xxxx +} + +static inline bool isUtf8OverlongTupel( unsigned char ch ) { + return (ch & 0xFE) == 0xC0; +} + +static inline bool isUtf8TripleIndicator( unsigned char ch ) { + return (ch & 0xF0) == 0xE0; // 1110 xxxx +} + +static inline bool isUtf8OverlongTriple( unsigned char ch1, unsigned char ch2 ) { + return (ch1 & 0xFF) == 0xE0 && (ch2 & 0xE0) == 0x80 ; +} + +static inline bool isUtf8QuartetIndicator( unsigned char ch ) { + return (ch & 0xF8) == 0xF0; // 1111 0xxx +} + +static inline bool isUtf8OverlongQuartet( unsigned char ch1, unsigned char ch2 ) { + return (ch1 & 0xFF) == 0xF0 && (ch2 & 0xF0) == 0x80 ; +} + +static inline bool isUtf8QuintetIndicator( unsigned char ch ) { + return (ch & 0xFC) == 0xF8; // 1111 10xx +} + +static inline bool isUtf8OverlongQuintet( unsigned char ch1, unsigned char ch2 ) { + return (ch1 & 0xFF) == 0xF8 && (ch2 & 0xF8) == 0x80 ; +} + +static inline bool isUtf8SextetIndicator( unsigned char ch ) { + return (ch & 0xFE) == 0xFC; // 1111 110x +} + +static inline bool isUtf8OverlongSextet( unsigned char ch1, unsigned char ch2 ) { + return (ch1 & 0xFF) == 0xFC && (ch2 & 0xFC) == 0x80 ; +} + +static inline bool isUtf8Continuation( unsigned char ch ) { + return (ch & 0xC0) == 0x80; +} + +bool KSieve::isValidUtf8( const char * s, unsigned int len ) { + for ( unsigned int i = 0 ; i < len ; ++i ) { + const unsigned char ch = s[i]; + if ( !is8Bit( ch ) ) + continue; + if ( isUtf8TupelIndicator( ch ) ) { + if ( len - i < 1 ) // too short + return false; + if ( isUtf8OverlongTupel( ch ) ) // not minimally encoded + return false; + if ( !isUtf8Continuation( s[i+1] ) ) // not followed by 10xx xxxx + return false; + i += 1; + } else if ( isUtf8TripleIndicator( ch ) ) { + if ( len - i < 2 ) // too short + return false; + if ( isUtf8OverlongTriple( ch, s[i+1] ) ) // not minimally encoded + return false; + if ( !isUtf8Continuation( s[i+2] ) ) // not followed by 10xx xxxx + return false; + i += 2; + } else if ( isUtf8QuartetIndicator( ch ) ) { + if ( len - i < 3 ) // too short + return false; + if ( isUtf8OverlongQuartet( ch, s[i+1] ) ) // not minimally encoded + return false; + if ( !isUtf8Continuation( s[i+2] ) || + !isUtf8Continuation( s[i+3] ) ) // not followed by 2x 10xx xxxx + return false; + i += 3; + } else if ( isUtf8QuintetIndicator( ch ) ) { + if ( len - i < 4 ) // too short + return false; + if ( isUtf8OverlongQuintet( ch, s[i+1] ) ) // not minimally encoded + return false; + if ( !isUtf8Continuation( s[i+2] ) || + !isUtf8Continuation( s[i+3] ) || + !isUtf8Continuation( s[i+4] ) ) // not followed by 3x 10xx xxxx + return false; + i += 4; + } else if ( isUtf8SextetIndicator( ch ) ) { + if ( len - i < 5 ) // too short + return false; + if ( isUtf8OverlongSextet( ch, s[i+1] ) ) // not minimally encoded + return false; + if ( !isUtf8Continuation( s[i+2] ) || + !isUtf8Continuation( s[i+3] ) || + !isUtf8Continuation( s[i+4] ) || + !isUtf8Continuation( s[i+5] ) ) // not followed by 4x 10xx xxxx + return false; + i += 5; + } else + return false; + } + return true; +} diff --git a/libksieve/shared/Makefile.am b/libksieve/shared/Makefile.am new file mode 100644 index 000000000..a2999f687 --- /dev/null +++ b/libksieve/shared/Makefile.am @@ -0,0 +1,8 @@ +INCLUDES = -I$(top_srcdir)/libksieve $(all_includes) + +noinst_LTLIBRARIES = libksieve_shared.la + +libksieve_shared_la_SOURCES = error.cpp +libksieve_shared_la_LIBADD = $(LIB_KDECORE) +libksieve_shared_la_LDFLAGS = $(all_libraries) -no-undefined + diff --git a/libksieve/shared/error.cpp b/libksieve/shared/error.cpp new file mode 100644 index 000000000..e53b0c252 --- /dev/null +++ b/libksieve/shared/error.cpp @@ -0,0 +1,247 @@ +/* -*- c++ -*- + error.cpp + + This file is part of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2002-2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ + +#include <config.h> + +#include <ksieve/error.h> + +#include <klocale.h> // i18n + +#include <climits> // UINT_MAX + +namespace KSieve { + + const char * Error::typeToString( Type t ) { + switch ( t ) { +#define CASE(x) case x: return #x + CASE( None ); + CASE( Custom ); + + CASE( CRWithoutLF ); + CASE( SlashWithoutAsterisk ); + CASE( IllegalCharacter ); + CASE( UnexpectedCharacter ); + CASE( NoLeadingDigits ); + CASE( NonCWSAfterTextColon ); + + CASE( NumberOutOfRange ); + CASE( InvalidUTF8 ); + + CASE( UnfinishedBracketComment ); + CASE( PrematureEndOfMultiLine ); + CASE( PrematureEndOfQuotedString ); + CASE( PrematureEndOfStringList ); + CASE( PrematureEndOfTestList ); + CASE( PrematureEndOfBlock ); + CASE( MissingWhitespace ); + CASE( MissingSemicolonOrBlock ); + + CASE( ExpectedBlockOrSemicolon ); + CASE( ExpectedCommand ); + CASE( ConsecutiveCommasInStringList ); + CASE( ConsecutiveCommasInTestList ); + CASE( MissingCommaInTestList ); + CASE( MissingCommaInStringList ); + CASE( NonStringInStringList ); + CASE( NonCommandInCommandList ); + CASE( NonTestInTestList ); + + CASE( RequireNotFirst ); + CASE( RequireMissingForCommand ); + CASE( RequireMissingForTest ); + CASE( RequireMissingForComparator ); + CASE( UnsupportedCommand ); + CASE( UnsupportedTest ); + CASE( UnsupportedComparator ); + CASE( TestNestingTooDeep ); + CASE( BlockNestingTooDeep ); + CASE( InvalidArgument ); + CASE( ConflictingArguments ); + CASE( ArgumentsRepeated ); + CASE( CommandOrderingConstraintViolation ); + + CASE( IncompatibleActionsRequested ); + CASE( MailLoopDetected ); + CASE( TooManyActions ); +#undef CASE + default: + return "<unknown>"; + } + } + + QString Error::asString() const { + + QString err; + switch( type() ) { + case None: + return QString::null; + case Custom: + return mStringOne; + + // Parse errors: + case CRWithoutLF: + err = i18n("Parse error: Carriage Return (CR) without Line Feed (LF)"); + break; + case SlashWithoutAsterisk: + err = i18n("Parse error: Unquoted Slash ('/') without Asterisk ('*'). " + "Broken Comment?"); + break; + case IllegalCharacter: + err = i18n("Parse error: Illegal Character"); + break; + case UnexpectedCharacter: + err = i18n("Parse error: Unexpected Character, probably a missing space?"); + break; + case NoLeadingDigits: + err = i18n("Parse error: Tag Name has leading Digits"); + break; + case NonCWSAfterTextColon: + err = i18n("Parse error: Only whitespace and #comments may " + "follow \"text:\" on the same line"); + break; + case NumberOutOfRange: + err = i18n("Parse error: Number out of Range (must be smaller than %1)").arg(UINT_MAX); + break; + case InvalidUTF8: + err = i18n("Parse error: Invalid UTF-8 sequence"); + break; + case PrematureEndOfMultiLine: + err = i18n("Parse error: Premature end of Multiline String (did you forget the '.'?)"); + break; + case PrematureEndOfQuotedString: + err = i18n("Parse error: Premature end of Quoted String (missing closing '\"')"); + break; + case PrematureEndOfStringList: + err = i18n("Parse error: Premature end of String List (missing closing ']')"); + break; + case PrematureEndOfTestList: + err = i18n("Parse error: Premature end of Test List (missing closing ')')"); + break; + case PrematureEndOfBlock: + err = i18n("Parse error: Premature end of Block (missing closing '}')"); + break; + case MissingWhitespace: + err = i18n("Parse error: Missing Whitespace"); + break; + case MissingSemicolonOrBlock: + err = i18n("Parse error: Missing ';' or Block"); + break; + case ExpectedBlockOrSemicolon: + err = i18n("Parse error: Expected ';' or '{', got something else"); + break; + case ExpectedCommand: + err = i18n("Parse error: Expected Command, got something else"); + break; + case ConsecutiveCommasInStringList: + err = i18n("Parse error: Trailing, Leading or Duplicate Commas in String List"); + break; + case ConsecutiveCommasInTestList: + err = i18n("Parse error: Trailing, Leading or Duplicate Commas in Test List"); + break; + case MissingCommaInStringList: + err = i18n("Parse error: Missing ',' between Strings in String List"); + break; + case MissingCommaInTestList: + err = i18n("Parse error: Missing ',' between Tests in Test List"); + break; + case NonCommandInCommandList: + err = i18n("Parse error: Expected Command, got something else"); + break; + case NonStringInStringList: + err = i18n("Parse error: Only Strings allowed in String Lists"); + break; + case NonTestInTestList: + err = i18n("Parse error: Only Tests allowed in Test Lists"); + break; + + // validity errors: + case RequireNotFirst: + err = i18n("\"require\" must be first command"); + break; + case RequireMissingForCommand: + err = i18n("\"require\" missing for command \"%1\"").arg(mStringOne); + break; + case RequireMissingForTest: + err = i18n("\"require\" missing for test \"%1\"").arg(mStringOne); + break; + case RequireMissingForComparator: + err = i18n("\"require\" missing for comparator \"%1\"").arg(mStringOne); + break; + case UnsupportedCommand: + err = i18n("Command \"%1\" not supported").arg(mStringOne); + break; + case UnsupportedTest: + err = i18n("Test \"%1\" not supported").arg(mStringOne); + break; + case UnsupportedComparator: + err = i18n("Comparator \"%1\" not supported").arg(mStringOne); + break; + case TestNestingTooDeep: + err = i18n("Site Policy Limit Violation: Test nesting too deep (max. %1)").arg( mStringOne.toUInt() ); + break; + case BlockNestingTooDeep: + err = i18n("Site Policy Limit Violation: Block nesting too deep (max. %1)").arg( mStringOne.toUInt() ); + break; + case InvalidArgument: + err = i18n("Invalid Argument \"%1\" to \"%2\"").arg(mStringOne).arg(mStringTwo); + break; + case ConflictingArguments: + err = i18n("Conflicting Arguments: \"%1\" and \"%2\"").arg(mStringOne).arg(mStringTwo); + break; + case ArgumentsRepeated: + err = i18n("Argument \"%1\" Repeated").arg(mStringOne); + break; + case CommandOrderingConstraintViolation: + err = i18n("Command \"%1\" violates command ordering constraints").arg(mStringOne); + break; + + // runtime errors: + case IncompatibleActionsRequested: + err = i18n("Incompatible Actions \"%1\" and \"%2\" requested").arg(mStringOne).arg(mStringTwo); + break; + case MailLoopDetected: + err = i18n("Mail Loop detected"); + break; + case TooManyActions: + err = i18n("Site Policy Limit Violation: Too many Actions requested (max. %1)").arg( mStringOne.toUInt() ); + break; + default: + err = i18n("Unknown error"); + break; + } + + return err; + } + + +} // namespace KSieve + diff --git a/libksieve/tests/Makefile.am b/libksieve/tests/Makefile.am new file mode 100644 index 000000000..36b538408 --- /dev/null +++ b/libksieve/tests/Makefile.am @@ -0,0 +1,13 @@ + +INCLUDES = -I$(top_srcdir)/libksieve $(all_includes) +LDADD = ../libksieve.la + +# test programs: +check_PROGRAMS = \ + lexertest \ + parsertest + +TESTS = $(check_PROGRAMS) + +lexertest_SOURCES = lexertest.cpp +parsertest_SOURCES = parsertest.cpp diff --git a/libksieve/tests/lexertest.cpp b/libksieve/tests/lexertest.cpp new file mode 100644 index 000000000..461499501 --- /dev/null +++ b/libksieve/tests/lexertest.cpp @@ -0,0 +1,484 @@ +/* -*- c++ -*- + tests/lexertest.cpp + + This file is part of the testsuite of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ +#include <config.h> +#include <ksieve/lexer.h> +using KSieve::Lexer; + +#include <ksieve/error.h> +using KSieve::Error; + +#include <qcstring.h> // qstrlen +#include <qstring.h> + +#include <iostream> +using std::cout; +using std::cerr; +using std::endl; + +static const char * token2string( Lexer::Token t ) { + switch ( t ) { +#define CASE(x) case Lexer::x: return #x + CASE( None ); + CASE( HashComment ); + CASE( BracketComment ); + CASE( Identifier ); + CASE( Tag ); + CASE( Number ); + CASE( MultiLineString ); + CASE( QuotedString ); + CASE( Special ); + CASE( LineFeeds ); + } + return ""; +#undef CASE +} + +struct TestCase { + const char * name; + const char * string; + struct { + Lexer::Token token; + const char * result; + } expected[16]; // end with { None, 0 } + Error::Type expectedError; + int errorLine, errorCol; +}; + +static const TestCase testcases[] = { + // + // Whitespace: + // + + { "Null script", 0, + { { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Empty script", "", + { { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Whitespace-only script", " \t\n\t \n", + { { Lexer::LineFeeds, "2" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Lone CR", "\r", + { { Lexer::None, 0 } }, + Error::CRWithoutLF, 0, 1 + }, + + { "CR+Space", "\r ", + { { Lexer::None, 0 } }, + Error::CRWithoutLF, 0, 1 + }, + + { "CRLF alone", "\r\n", + { { Lexer::LineFeeds, "1" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + // + // hash comments: + // + + { "Basic hash comment (no newline)", "#comment", + { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Basic hash comment (LF)", "#comment\n", + { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Basic hash comment (CRLF)", "#comment\r\n", + { { Lexer::HashComment, "comment" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Basic hash comment (CR)", "#comment\r", + { { Lexer::HashComment, 0 } }, + Error::CRWithoutLF, 0, 9 + }, + + { "Non-UTF-8 in hash comment", "#\xA9 copyright", + { { Lexer::HashComment, 0 } }, + Error::InvalidUTF8, 0, 12 + }, + + // + // bracket comments: + // + + { "Basic bracket comment", "/* comment */", + { { Lexer::BracketComment, " comment " }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Basic bracket comment - missing trailing slash", "/* comment *", + { { Lexer::BracketComment, 0 } }, + Error::UnfinishedBracketComment, 0, 0 + }, + + { "Basic bracket comment - missing trailing asterisk + slash", "/* comment ", + { { Lexer::BracketComment, 0 } }, + Error::UnfinishedBracketComment, 0, 0 + }, + + { "Basic bracket comment - missing leading slash", "* comment */", + { { Lexer::None, 0 } }, + Error::IllegalCharacter, 0, 0 + }, + + { "Basic bracket comment - missing leading asterisk + slash", "comment */", + { { Lexer::Identifier, "comment" }, { Lexer::None, 0 } }, + Error::IllegalCharacter, 0, 8 + }, + + { "Basic multiline bracket comment (LF)", "/* comment\ncomment */", + { { Lexer::BracketComment, " comment\ncomment " }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Basic multiline bracket comment (CRLF)", "/* comment\r\ncomment */", + { { Lexer::BracketComment, " comment\ncomment " }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + { "Basic multiline bracket comment (CR)", "/* comment\rcomment */", + { { Lexer::BracketComment, 0 } }, + Error::CRWithoutLF, 0, 11 + }, + + { "Non-UTF-8 in bracket comment", "/*\xA9 copyright*/", + { { Lexer::BracketComment, 0 } }, + Error::InvalidUTF8, 0, 14 + }, + + // + // numbers: + // + { "Basic number 1", "1", + { { Lexer::Number, "1" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Basic number 01", "01", + { { Lexer::Number, "01" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Qualified number 1k", "1k", + { { Lexer::Number, "1k" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Qualified number 1M", "1M", + { { Lexer::Number, "1M" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Qualified number 1G", "1G", + { { Lexer::Number, "1G" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + // + // identifiers: + // + { "Basic identifier \"id\"", "id", + { { Lexer::Identifier, "id" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Basic identifier \"_id\"", "_id", + { { Lexer::Identifier, "_id" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + // + // tags: + // + { "Basic tag \":tag\"", ":tag", + { { Lexer::Tag, "tag" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Basic tag \":_tag\"", ":_tag", + { { Lexer::Tag, "_tag" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + // + // specials: + // + { "Basic special \"{}[]();,\"", "{}[]();,", + { { Lexer::Special, "{" }, { Lexer::Special, "}" }, + { Lexer::Special, "[" }, { Lexer::Special, "]" }, + { Lexer::Special, "(" }, { Lexer::Special, ")" }, + { Lexer::Special, ";" }, { Lexer::Special, "," }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + // + // quoted-string: + // + { "Basic quoted string \"foo\"", "\"foo\"", + { { Lexer::QuotedString, "foo" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Basic quoted string, UTF-8", "\"foo\xC3\xB1" "foo\"", // fooäfoo + { { Lexer::QuotedString, "foo\xC3\xB1" "foo" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Quoted string, escaped '\"'", "\"foo\\\"bar\"", + { { Lexer::QuotedString, "foo\"bar" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Quoted string, escaped '\\'", "\"foo\\\\bar\"", + { { Lexer::QuotedString, "foo\\bar" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Quoted string, excessive escapes", "\"\\fo\\o\"", + { { Lexer::QuotedString, "foo" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Quoted string across lines (LF)", "\"foo\nbar\"", + { { Lexer::QuotedString, "foo\nbar" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Quoted string across lines (CRLF)", "\"foo\r\nbar\"", + { { Lexer::QuotedString, "foo\nbar" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + // + // multiline strings: + // + { "Basic multiline string I (LF)", "text:\nfoo\n.", + { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Basic multiline string I (CRLF)", "text:\r\nfoo\r\n.", + { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Basic multiline string II (LF)", "text:\nfoo\n.\n", + { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Basic multiline string II (CRLF)", "text:\r\nfoo\r\n.\r\n", + { { Lexer::MultiLineString, "foo" /* "foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Dotstuffed multiline string (LF)", "text:\n..foo\n.", + { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Dotstuffed multiline string (CRLF)", "text:\r\n..foo\r\n.", + { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Incompletely dotstuffed multiline string (LF)", "text:\n.foo\n.", + { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Incompletely dotstuffed multiline string (CRLF)", "text:\r\n.foo\r\n.", + { { Lexer::MultiLineString, ".foo" /* ".foo\n" ? */ }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + { "Mutiline with a line with only one '.'","text:\r\nfoo\r\n..\r\nbar\r\n.", + { { Lexer::MultiLineString, "foo\n.\nbar" }, { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + + // + // Errors in single tokens: + // + + // + // numbers: + // + { "Number, unknown qualifier", "100f", + { { Lexer::Number, "100" } }, + Error::UnexpectedCharacter, 0, 3 + }, + { "Negative number", "-100", + { { Lexer::None, 0 } }, + Error::IllegalCharacter, 0, 0 + }, + // + // identifiers: + // + { "Identifier, leading digits", "0id", + { { Lexer::Number, "0" } }, + Error::UnexpectedCharacter, 0, 1 + }, + { "Identifier, embedded umlaut", "idäid", + { { Lexer::Identifier, "id" } }, + Error::IllegalCharacter, 0, 2 + }, + // + // tags: + // + { "Lone ':' (at end)", ":", + { { Lexer::Tag, 0 } }, + Error::UnexpectedCharacter, 0, 0 + }, + { "Lone ':' (in stream)", ": ", + { { Lexer::Tag, 0 } }, + Error::UnexpectedCharacter, 0, 1 + }, + { "Tag, leading digits", ":0tag", + { { Lexer::Tag, 0 } }, + Error::NoLeadingDigits, 0, 1 + }, + { "Tag, embedded umlaut", ":tagätag", + { { Lexer::Tag, "tag" } }, + Error::IllegalCharacter, 0, 4 + }, + // + // specials: (none) + // quoted string: + // + { "Premature end of quoted string", "\"foo", + { { Lexer::QuotedString, "foo" } }, + Error::PrematureEndOfQuotedString, 0, 0 + }, + { "Invalid UTF-8 in quoted string", "\"foo\xC0\xA0" "foo\"", + { { Lexer::QuotedString, "foo" } }, + Error::InvalidUTF8, 0, 4 + }, + + // + // Whitespace / token separation: valid + // + + { "Two identifiers with linebreaks", "foo\nbar\n", + { { Lexer::Identifier, "foo" }, + { Lexer::LineFeeds, "1" }, + { Lexer::Identifier, "bar" }, + { Lexer::LineFeeds, "1" }, + { Lexer::None, 0 } }, + Error::None, 0, 0 + }, + + // + // Whitespace / token separation: invalid + // + +}; + +static const int numTestCases = sizeof testcases / sizeof *testcases ; + +int main( int argc, char * argv[] ) { + + if ( argc == 2 ) { // manual test + + const char * scursor = argv[1]; + const char * const send = argv[1] + qstrlen( argv[1] ); + + Lexer lexer( scursor, send ); + + cout << "Begin" << endl; + while ( !lexer.atEnd() ) { + QString result; + Lexer::Token token = lexer.nextToken( result ); + if ( lexer.error() ) { + cout << "Error " << token2string( token ) << ": \"" + << lexer.error().asString().latin1() << "\" at (" + << lexer.error().line() << "," << lexer.error().column() + << ")" << endl; + break; + } else + cout << "Got " << token2string( token ) << ": \"" + << result.utf8().data() << "\" at (" + << lexer.line() << "," << lexer.column() << ")" << endl; + } + cout << "End" << endl; + + } else if ( argc == 1 ) { // automated test + bool success = true; + for ( int i = 0 ; i < numTestCases ; ++i ) { + bool ok = true; + const TestCase & t = testcases[i]; + const char * const send = t.string + qstrlen( t.string ); + Lexer lexer( t.string, send, Lexer::IncludeComments ); + cerr << t.name << ":"; + for ( int j = 0 ; !lexer.atEnd() ; ++j ) { + QString result; + Lexer::Token token = lexer.nextToken( result ); + Error error = lexer.error(); + if ( t.expected[j].token != token ) { + ok = false; + cerr << " expected token " << token2string( t.expected[j].token ) + << ", got " << token2string( token ); + } + if ( QString::fromUtf8( t.expected[j].result ) != result ) { + ok = false; + if ( t.expected[j].result ) + cerr << " expected string \"" << t.expected[j].result << "\""; + else + cerr << " expected null string"; + if ( !result.utf8().isNull() ) + cerr << ", got \"" << result.utf8().data() << "\""; + else + cerr << ", got null string"; + } + if ( error && error.type() != t.expectedError ) { + ok = false; + cerr << " expected error #" << (int)t.expectedError + << ", got #" << (int)error.type(); + } + if ( error && ( error.line() != t.errorLine || error.column() != t.errorCol ) ) { + ok = false; + cerr << " expected position (" << t.errorLine << "," << t.errorCol + << "), got (" << error.line() << "," << error.column() << ")"; + } + if ( error ) + goto ErrorOut; + if ( t.expected[j].token == Lexer::None && + t.expected[j].result == 0 ) + break; + } + if ( !lexer.atEnd() ) { + ok = false; + cerr << " premature end of expected token list"; + } + ErrorOut: + if ( ok ) + cerr << " ok"; + cerr << endl; + if ( !ok ) + success = false; + } + if ( !success ) + return 1; + } else { // usage error + cerr << "usage: lexertest [ <string> ]" << endl; + exit( 1 ); + } + + return 0; +} diff --git a/libksieve/tests/parsertest.cpp b/libksieve/tests/parsertest.cpp new file mode 100644 index 000000000..e2ea0fd39 --- /dev/null +++ b/libksieve/tests/parsertest.cpp @@ -0,0 +1,667 @@ +/* -*- c++ -*- + tests/parsertest.cpp + + This file is part of the testsuite of KSieve, + the KDE internet mail/usenet news message filtering library. + Copyright (c) 2003 Marc Mutz <mutz@kde.org> + + KSieve is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License, version 2, as + published by the Free Software Foundation. + + KSieve is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + In addition, as a special exception, the copyright holders give + permission to link the code of this program with any edition of + the Qt library by Trolltech AS, Norway (or with modified versions + of Qt that use the same license as Qt), and distribute linked + combinations including the two. You must obey the GNU General + Public License in all respects for all of the code used other than + Qt. If you modify this file, you may extend this exception to + your version of the file, but you are not obligated to do so. If + you do not wish to do so, delete this exception statement from + your version. +*/ +#include <config.h> +#include <ksieve/parser.h> +using KSieve::Parser; + +#include <ksieve/error.h> +#include <ksieve/scriptbuilder.h> + +#include <qcstring.h> // qstrlen +#include <qstring.h> + +#include <iostream> +using std::cout; +using std::cerr; +using std::endl; + +#include <cassert> + +enum BuilderMethod { + TaggedArgument, + StringArgument, + NumberArgument, + CommandStart, + CommandEnd, + TestStart, + TestEnd, + TestListStart, + TestListEnd, + BlockStart, + BlockEnd, + StringListArgumentStart, + StringListEntry, + StringListArgumentEnd, + HashComment, + BracketComment, + Error, + Finished +}; + +static const unsigned int MAX_RESPONSES = 100; + +struct TestCase { + const char * name; + const char * script; + struct Response { + BuilderMethod method; + const char * string; + bool boolean; + } responses[MAX_RESPONSES]; +} testCases[] = { + + // + // single commands: + // + + { "Null script", + 0, + { { Finished, 0, false } } + }, + + { "Empty script", + "", + { { Finished, 0, false } } + }, + + { "WS-only script", + " \t\n\r\n", + { { Finished, 0, false } } + }, + + { "Bare hash comment", + "#comment", + { { HashComment, "comment", false }, + { Finished, 0, false } } + }, + + { "Bare bracket comment", + "/*comment*/", + { { BracketComment, "comment", false }, + { Finished, 0, false } } + }, + + { "Bare command", + "command;", + { { CommandStart, "command", false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "Bare command - missing semicolon", + "command", + { { CommandStart, "command", false }, + { Error, "MissingSemicolonOrBlock", false } } + }, + + { "surrounded by bracket comments", + "/*comment*/command/*comment*/;/*comment*/", + { { BracketComment, "comment", false }, + { CommandStart, "command", false }, + { BracketComment, "comment", false }, + { CommandEnd, 0, false }, + { BracketComment, "comment", false }, + { Finished, 0, false } } + }, + + { "surrounded by hash comments", + "#comment\ncommand#comment\n;#comment", + { { HashComment, "comment", false }, + { CommandStart, "command", false }, + { HashComment, "comment", false }, + { CommandEnd, 0, false }, + { HashComment, "comment", false }, + { Finished, 0, false } } + }, + + { "single tagged argument", + "command :tag;", + { { CommandStart, "command", false }, + { TaggedArgument, "tag", false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single tagged argument - missing semicolon", + "command :tag", + { { CommandStart, "command", false }, + { TaggedArgument, "tag", false }, + { Error, "MissingSemicolonOrBlock", false } } + }, + + { "single string argument - quoted string", + "command \"string\";", + { { CommandStart, "command", false }, + { StringArgument, "string", false /*quoted*/ }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single string argument - multi-line string", + "command text:\nstring\n.\n;", + { { CommandStart, "command", false }, + { StringArgument, "string", true /*multiline*/ }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single number argument - 100", + "command 100;", + { { CommandStart, "command", false }, + { NumberArgument, "100 ", false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single number argument - 100k", + "command 100k;", + { { CommandStart, "command", false }, + { NumberArgument, "102400k", false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single number argument - 100M", + "command 100M;", + { { CommandStart, "command", false }, + { NumberArgument, "104857600M", false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single number argument - 2G", + "command 2G;", + { { CommandStart, "command", false }, + { NumberArgument, "2147483648G", false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + +#if SIZEOF_UNSIGNED_LONG == 8 +# define ULONG_MAX_STRING "18446744073709551615" +# define ULONG_MAXP1_STRING "18446744073709551616" +#elif SIZEOF_UNSIGNED_LONG == 4 +# define ULONG_MAX_STRING "4294967295" +# define ULONG_MAXP1_STRING "4G" +#else +# error sizeof( unsigned long ) != 4 && sizeof( unsigned long ) != 8 ??? +#endif + + { "single number argument - ULONG_MAX + 1", + "command " ULONG_MAXP1_STRING ";", + { { CommandStart, "command", false }, + { Error, "NumberOutOfRange", false } } + }, + + { "single number argument - ULONG_MAX", + "command " ULONG_MAX_STRING ";", + { { CommandStart, "command", false }, + { NumberArgument, ULONG_MAX_STRING " ", false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single one-element string list argument - quoted string", + "command [\"string\"];", + { { CommandStart, "command", false }, + { StringListArgumentStart, 0, false }, + { StringListEntry, "string", false /*quoted*/ }, + { StringListArgumentEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single one-element string list argument - multi-line string", + "command [text:\nstring\n.\n];", + { { CommandStart, "command", false }, + { StringListArgumentStart, 0, false }, + { StringListEntry, "string", true /*multiline*/ }, + { StringListArgumentEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single two-element string list argument - quoted strings", + "command [\"string\",\"string\"];", + { { CommandStart, "command", false }, + { StringListArgumentStart, 0, false }, + { StringListEntry, "string", false /*quoted*/ }, + { StringListEntry, "string", false /*quoted*/ }, + { StringListArgumentEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single two-element string list argument - multi-line strings", + "command [text:\nstring\n.\n,text:\nstring\n.\n];", + { { CommandStart, "command", false }, + { StringListArgumentStart, 0, false }, + { StringListEntry, "string", true /*multiline*/ }, + { StringListEntry, "string", true /*multiline*/ }, + { StringListArgumentEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single two-element string list argument - quoted + multi-line strings", + "command [\"string\",text:\nstring\n.\n];", + { { CommandStart, "command", false }, + { StringListArgumentStart, 0, false }, + { StringListEntry, "string", false /*quoted*/ }, + { StringListEntry, "string", true /*multiline*/ }, + { StringListArgumentEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single two-element string list argument - multi-line + quoted strings", + "command [text:\nstring\n.\n,\"string\"];", + { { CommandStart, "command", false }, + { StringListArgumentStart, 0, false }, + { StringListEntry, "string", true /*multiline*/ }, + { StringListEntry, "string", false /*quoted*/ }, + { StringListArgumentEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "single bare test argument", + "command test;", + { { CommandStart, "command", false }, + { TestStart, "test", false }, + { TestEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "one-element test list argument", + "command(test);", + { { CommandStart, "command", false }, + { TestListStart, 0, false }, + { TestStart, "test", false }, + { TestEnd, 0, false }, + { TestListEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "two-element test list argument", + "command(test,test);", + { { CommandStart, "command", false }, + { TestListStart, 0, false }, + { TestStart, "test", false }, + { TestEnd, 0, false }, + { TestStart, "test", false }, + { TestEnd, 0, false }, + { TestListEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "zero-element block", + "command{}", + { { CommandStart, "command", false }, + { BlockStart, 0, false }, + { BlockEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "one-element block", + "command{command;}", + { { CommandStart, "command", false }, + { BlockStart, 0, false }, + { CommandStart, "command", false }, + { CommandEnd, 0, false }, + { BlockEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "two-element block", + "command{command;command;}", + { { CommandStart, "command", false }, + { BlockStart, 0, false }, + { CommandStart, "command", false }, + { CommandEnd, 0, false }, + { CommandStart, "command", false }, + { CommandEnd, 0, false }, + { BlockEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + + { "command with a test with a test with a test", + "command test test test;", + { { CommandStart, "command", false }, + { TestStart, "test", false }, + { TestStart, "test", false }, + { TestStart, "test", false }, + { TestEnd, 0, false }, + { TestEnd, 0, false }, + { TestEnd, 0, false }, + { CommandEnd, 0, false }, + { Finished, 0, false } } + }, + +}; + +static const int numTestCases = sizeof testCases / sizeof *testCases ; + +// Prints out the parse tree in XML-like format. For visual inspection +// (manual tests). +class PrintingScriptBuilder : public KSieve::ScriptBuilder { +public: + PrintingScriptBuilder() + : KSieve::ScriptBuilder(), indent( 0 ) + { + write( "<script type=\"application/sieve\">" ); + ++indent; + } + virtual ~PrintingScriptBuilder() {} + + void taggedArgument( const QString & tag ) { + write( "tag", tag ); + } + void stringArgument( const QString & string, bool multiLine, const QString & /*fixme*/ ) { + write( multiLine ? "string type=\"multiline\"" : "string type=\"quoted\"", string ); + } + void numberArgument( unsigned long number, char quantifier ) { + const QString txt = "number" + ( quantifier ? QString(" quantifier=\"%1\"").arg( quantifier ) : QString::null ) ; + write( txt.latin1(), QString::number( number ) ); + } + void commandStart( const QString & identifier ) { + write( "<command>" ); + ++indent; + write( "identifier", identifier ); + } + void commandEnd() { + --indent; + write( "</command>" ); + } + void testStart( const QString & identifier ) { + write( "<test>" ); + ++indent; + write( "identifier", identifier ); + } + void testEnd() { + --indent; + write( "</test>" ); + } + void testListStart() { + write( "<testlist>" ); + ++indent; + } + void testListEnd() { + --indent; + write( "</testlist>" ); + } + void blockStart() { + write( "<block>" ); + ++indent; + } + void blockEnd() { + --indent; + write( "</block>" ); + } + void stringListArgumentStart() { + write( "<stringlist>" ); + ++indent; + } + void stringListArgumentEnd() { + --indent; + write( "</stringlist>" ); + } + void stringListEntry( const QString & string, bool multiline, const QString & hashComment ) { + stringArgument( string, multiline, hashComment ); + } + void hashComment( const QString & comment ) { + write( "comment type=\"hash\"", comment ); + } + void bracketComment( const QString & comment ) { + write( "comment type=\"bracket\"", comment ); + } + + void lineFeed() { + write( "<crlf/>" ); + } + + void error( const KSieve::Error & error ) { + indent = 0; + write( ("Error: " + error.asString()).latin1() ); + } + void finished() { + --indent; + write( "</script>" ); + } +private: + int indent; + void write( const char * msg ) { + for ( int i = 2*indent ; i > 0 ; --i ) + cout << " "; + cout << msg << endl; + } + void write( const QCString & key, const QString & value ) { + if ( value.isEmpty() ) { + write( "<" + key + "/>" ); + return; + } + write( "<" + key + ">" ); + ++indent; + write( value.utf8().data() ); + --indent; + write( "</" + key + ">" ); + } +}; + + +// verifes that methods get called with expected arguments (and in +// expected sequence) as specified by the TestCase. For automated +// tests. +class VerifyingScriptBuilder : public KSieve::ScriptBuilder { +public: + VerifyingScriptBuilder( const TestCase & testCase ) + : KSieve::ScriptBuilder(), + mNextResponse( 0 ), mTestCase( testCase ), mOk( true ) + { + } + virtual ~VerifyingScriptBuilder() {} + + bool ok() const { return mOk; } + + void taggedArgument( const QString & tag ) { + checkIs( TaggedArgument ); + checkEquals( tag ); + ++mNextResponse; + } + void stringArgument( const QString & string, bool multiline, const QString & /*fixme*/ ) { + checkIs( StringArgument ); + checkEquals( string ); + checkEquals( multiline ); + ++mNextResponse; + } + void numberArgument( unsigned long number, char quantifier ) { + checkIs( NumberArgument ); + checkEquals( QString::number( number ) + ( quantifier ? quantifier : ' ' ) ); + ++mNextResponse; + } + void commandStart( const QString & identifier ) { + checkIs( CommandStart ); + checkEquals( identifier ); + ++mNextResponse; + } + void commandEnd() { + checkIs( CommandEnd ); + ++mNextResponse; + } + void testStart( const QString & identifier ) { + checkIs( TestStart ); + checkEquals( identifier ); + ++mNextResponse; + } + void testEnd() { + checkIs( TestEnd ); + ++mNextResponse; + } + void testListStart() { + checkIs( TestListStart ); + ++mNextResponse; + } + void testListEnd() { + checkIs( TestListEnd ); + ++mNextResponse; + } + void blockStart() { + checkIs( BlockStart ); + ++mNextResponse; + } + void blockEnd() { + checkIs( BlockEnd ); + ++mNextResponse; + } + void stringListArgumentStart() { + checkIs( StringListArgumentStart ); + ++mNextResponse; + } + void stringListEntry( const QString & string, bool multiLine, const QString & /*fixme*/ ) { + checkIs( StringListEntry ); + checkEquals( string ); + checkEquals( multiLine ); + ++mNextResponse; + } + void stringListArgumentEnd() { + checkIs( StringListArgumentEnd ); + ++mNextResponse; + } + void hashComment( const QString & comment ) { + checkIs( HashComment ); + checkEquals( comment ); + ++mNextResponse; + } + void bracketComment( const QString & comment ) { + checkIs( BracketComment ); + checkEquals( comment ); + ++mNextResponse; + } + void lineFeed() { + // FIXME + } + void error( const KSieve::Error & error ) { + checkIs( Error ); + checkEquals( QString( KSieve::Error::typeToString( error.type() ) ) ); + ++mNextResponse; + } + void finished() { + checkIs( Finished ); + //++mNextResponse (no!) + } + +private: + const TestCase::Response & currentResponse() const { + assert( mNextResponse <= MAX_RESPONSES ); + return mTestCase.responses[mNextResponse]; + } + + void checkIs( BuilderMethod m ) { + if ( currentResponse().method != m ) { + cerr << " expected method " << (int)currentResponse().method + << ", got " << (int)m; + mOk = false; + } + } + + void checkEquals( const QString & s ) { + if ( s != QString::fromUtf8( currentResponse().string ) ) { + cerr << " expected string arg \"" + << ( currentResponse().string ? currentResponse().string : "<null>" ) + << "\", got \"" << ( s.isNull() ? "<null>" : s.utf8().data() ) << "\""; + mOk = false; + } + } + void checkEquals( bool b ) { + if ( b != currentResponse().boolean ) { + cerr << " expected boolean arg <" << currentResponse().boolean + << ">, got <" << b << ">"; + mOk = false; + } + } + + unsigned int mNextResponse; + const TestCase & mTestCase; + bool mOk; +}; + + +int main( int argc, char * argv[] ) { + + if ( argc == 2 ) { // manual test + + const char * scursor = argv[1]; + const char * const send = argv[1] + qstrlen( argv[1] ); + + Parser parser( scursor, send ); + PrintingScriptBuilder psb; + parser.setScriptBuilder( &psb ); + if ( parser.parse() ) + cout << "ok" << endl; + else + cout << "bad" << endl; + + + } else if ( argc == 1 ) { // automated test + bool success = true; + for ( int i = 0 ; i < numTestCases ; ++i ) { + const TestCase & t = testCases[i]; + cerr << t.name << ":"; + VerifyingScriptBuilder v( t ); + Parser p( t.script, t.script + qstrlen( t.script ) ); + p.setScriptBuilder( &v ); + const bool ok = p.parse(); + if ( v.ok() ) + if ( ok ) + cerr << " ok"; + else + cerr << " xfail"; + else + success = false; + cerr << endl; + } + if ( !success ) + exit( 1 ); + + } else { // usage error + cerr << "usage: parsertest [ <string> ]" << endl; + exit( 1 ); + } + + return 0; +} |