diff options
Diffstat (limited to 'tools/designer/plugins/cppeditor/yyreg.cpp')
-rw-r--r-- | tools/designer/plugins/cppeditor/yyreg.cpp | 800 |
1 files changed, 800 insertions, 0 deletions
diff --git a/tools/designer/plugins/cppeditor/yyreg.cpp b/tools/designer/plugins/cppeditor/yyreg.cpp new file mode 100644 index 000000000..2e0709e46 --- /dev/null +++ b/tools/designer/plugins/cppeditor/yyreg.cpp @@ -0,0 +1,800 @@ +/********************************************************************** +** +** Copyright (C) 2005-2008 Trolltech ASA. All rights reserved. +** +** This file is part of TQt Designer. +** +** This file may be used under the terms of the GNU General +** Public License versions 2.0 or 3.0 as published by the Free +** Software Foundation and appearing in the files LICENSE.GPL2 +** and LICENSE.GPL3 included in the packaging of this file. +** Alternatively you may (at your option) use any later version +** of the GNU General Public License if such license has been +** publicly approved by Trolltech ASA (or its successors, if any) +** and the KDE Free TQt Foundation. +** +** Please review the following information to ensure GNU General +** Public Licensing retquirements will be met: +** http://trolltech.com/products/qt/licenses/licensing/opensource/. +** If you are unsure which license is appropriate for your use, please +** review the following information: +** http://trolltech.com/products/qt/licenses/licensing/licensingoverview +** or contact the sales department at sales@trolltech.com. +** +** Licensees holding valid TQt Commercial licenses may use this file in +** accordance with the TQt Commercial License Agreement provided with +** the Software. +** +** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, +** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR +** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted +** herein. +** +**********************************************************************/ + +#include <qregexp.h> + +#include <ctype.h> +#include <stdio.h> + +#include "yyreg.h" + +/* + First comes the tokenizer. We don't need something that knows much + about C++. However, we need something that gives tokens from the + end of the file to the start, which is tricky. + + If you are not familiar with hand-written tokenizers and parsers, + you might want to read other simpler parsers written in the same + style: + + $(QTDIR)/src/tools/qregexp.cpp + $(QTDIR)/tools/inspector/cppparser.cpp + + You might also want to read Section 2 in the Dragon Book. +*/ + +/* + Those are the tokens we are interested in. Tok_Something represents + any C++ token that does not interest us, but it's dangerous to + ignore tokens completely. +*/ +enum { Tok_Boi, Tok_Ampersand, Tok_Aster, Tok_LeftParen, Tok_RightParen, + Tok_Equal, Tok_LeftBrace, Tok_RightBrace, Tok_Semicolon, Tok_Colon, + Tok_LeftAngle, Tok_RightAngle, Tok_Comma, Tok_Ellipsis, Tok_Gulbrandsen, + Tok_LeftBracket, Tok_RightBracket, Tok_Tilde, Tok_Something, Tok_Comment, + Tok_Ident, + + Tok_char, Tok_const, Tok_double, Tok_int, Tok_long, Tok_operator, + Tok_short, Tok_signed, Tok_unsigned }; + +/* + The following variables store the lexical analyzer state. The best way + to understand them is to implement a function myGetToken() that calls + getToken(), to add some qDebug() statements in there and then to + #define getToken() myGetToken(). +*/ +static TQString *yyIn; // the input stream +static int yyPos; // the position of the current token in yyIn +static int yyCurPos; // the position of the next lookahead character +static char *yyLexBuf; // the lexeme buffer +static const int YYLexBufSize = 65536; // big enough for long comments +static char *yyLex; // the lexeme itself (a pointer into yyLexBuf) +static int yyCh; // the lookbehind character + +/* + Moves back to the previous character in the input stream and + updates the tokenizer state. This function is to be used only by + getToken(), which provides the right abstraction. +*/ +static inline void readChar() +{ + if ( yyCh == EOF ) + return; + + if ( yyLex > yyLexBuf ) + *--yyLex = (char) yyCh; + + if ( yyCurPos < 0 ) + yyCh = EOF; + else + yyCh = (*yyIn)[yyCurPos].unicode(); + yyCurPos--; +} + +/* + Sets up the tokenizer. +*/ +static void startTokenizer( const TQString& in ) +{ + yyIn = new TQString; + *yyIn = in; + yyPos = yyIn->length() - 1; + yyCurPos = yyPos; + yyLexBuf = new char[YYLexBufSize]; + yyLex = yyLexBuf + YYLexBufSize - 1; + *yyLex = '\0'; + yyCh = '\0'; + readChar(); +} + +/* + Frees resources allocated by the tokenizer. +*/ +static void stopTokenizer() +{ + delete yyIn; + delete[] yyLexBuf; + yyLexBuf = 0; +} + +/* + These two macros implement tquick-and-dirty hashing for telling + apart keywords fast. +*/ +#define HASH( ch, len ) ( (ch) | ((len) << 8) ) +#define CHECK( target ) \ + if ( strcmp((target), yyLex) != 0 ) \ + break; + +/* + Returns the previous token in the abstract token stream. The parser + deals only with tokens, not with characters. +*/ +static int getToken() +{ + // why "+ 2"? try putting some qDebug()'s and see + yyPos = yyCurPos + 2; + + for ( ;; ) { + /* + See if the previous token is interesting. If it isn't, we + will loop anyway an go to the token before the previous + token, and so on. + */ + + yyLex = yyLexBuf + YYLexBufSize - 1; + *yyLex = '\0'; + + if ( yyCh == EOF ) { + break; + } else if ( isspace(yyCh) ) { + bool metNL = FALSE; + do { + metNL = ( metNL || yyCh == '\n' ); + readChar(); + } while ( isspace(yyCh) ); + + if ( metNL ) { + /* + C++ style comments are tricky. In left-to-right + thinking, C++ comments start with "//" and end with + '\n'. In right-to-left thinking, they start with a + '\n'; but of course not every '\n' starts a comment. + + When we meet the '\n', we look behind, on the same + line, for a "//", and if there is one we mess + around with the tokenizer state to effectively + ignore the comment. Beware of off-by-one and + off-by-two bugs when you modify this code by adding + qDebug()'s here and there. + */ + if ( yyCurPos >= 0 ) { + int lineStart = yyIn->findRev( TQChar('\n'), yyCurPos ) + 1; + TQString line = yyIn->mid( lineStart, + yyCurPos - lineStart + 2 ); + int commentStart = line.find( TQString("//") ); + if ( commentStart != -1 ) { + yyCurPos = lineStart + commentStart - 1; + yyPos = yyCurPos + 2; + readChar(); + } + } + } + } else if ( isalnum(yyCh) || yyCh == '_' ) { + do { + readChar(); + } while ( isalnum(yyCh) || yyCh == '_' ); + + switch ( HASH(yyLex[0], strlen(yyLex)) ) { + case HASH( 'c', 4 ): + CHECK( "char" ); + return Tok_char; + case HASH( 'c', 5 ): + CHECK( "const" ); + return Tok_const; + case HASH( 'd', 6 ): + CHECK( "double" ); + return Tok_double; + case HASH( 'i', 3 ): + CHECK( "int" ); + return Tok_int; + case HASH( 'l', 4 ): + CHECK( "long" ); + return Tok_long; + case HASH( 'o', 8 ): + CHECK( "operator" ); + return Tok_operator; + case HASH( 's', 5 ): + CHECK( "short" ); + return Tok_short; + case HASH( 's', 6 ): + CHECK( "signed" ); + return Tok_signed; + case HASH( 'u', 8 ): + CHECK( "unsigned" ); + return Tok_unsigned; + } + if ( isdigit(*yyLex) ) + return Tok_Something; + else + return Tok_Ident; + } else { + int quote; + + switch ( yyCh ) { + case '!': + case '%': + case '^': + case '+': + case '-': + case '?': + case '|': + readChar(); + return Tok_Something; + case '"': + case '\'': + quote = yyCh; + readChar(); + + while ( yyCh != EOF && yyCh != '\n' ) { + if ( yyCh == quote ) { + readChar(); + if ( yyCh != '\\' ) + break; + } else { + readChar(); + } + } + return Tok_Something; + case '&': + readChar(); + if ( yyCh == '&' ) { + readChar(); + return Tok_Something; + } else { + return Tok_Ampersand; + } + case '(': + readChar(); + return Tok_LeftParen; + case ')': + readChar(); + return Tok_RightParen; + case '*': + readChar(); + return Tok_Aster; + case ',': + readChar(); + return Tok_Comma; + case '.': + readChar(); + if ( yyCh == '.' ) { + do { + readChar(); + } while ( yyCh == '.' ); + return Tok_Ellipsis; + } else { + return Tok_Something; + } + case '/': + /* + C-style comments are symmetric. C++-style comments + are handled elsewhere. + */ + readChar(); + if ( yyCh == '*' ) { + bool metAster = FALSE; + bool metAsterSlash = FALSE; + + readChar(); + + while ( !metAsterSlash ) { + if ( yyCh == EOF ) + break; + + if ( yyCh == '*' ) + metAster = TRUE; + else if ( metAster && yyCh == '/' ) + metAsterSlash = TRUE; + else + metAster = FALSE; + readChar(); + } + break; + // return Tok_Comment; + } else { + return Tok_Something; + } + case ':': + readChar(); + if ( yyCh == ':' ) { + readChar(); + return Tok_Gulbrandsen; + } else { + return Tok_Colon; + } + case ';': + readChar(); + return Tok_Semicolon; + case '<': + readChar(); + return Tok_LeftAngle; + case '=': + readChar(); + return Tok_Equal; + case '>': + readChar(); + return Tok_RightAngle; + case '[': + readChar(); + return Tok_LeftBracket; + case ']': + readChar(); + return Tok_RightBracket; + case '{': + readChar(); + return Tok_LeftBrace; + case '}': + readChar(); + return Tok_RightBrace; + case '~': + readChar(); + return Tok_Tilde; + default: + readChar(); + } + } + } + return Tok_Boi; +} + +/* + Follow the member function(s) of CppFunction. +*/ + +/* + Returns the prototype for the C++ function, without the semicolon. +*/ +TQString CppFunction::prototype() const +{ + TQString proto; + + if ( !returnType().isEmpty() ) + proto = returnType() + TQChar( ' ' ); + proto += scopedName(); + proto += TQChar( '(' ); + if ( !parameterList().isEmpty() ) { + TQStringList::ConstIterator p = parameterList().begin(); + proto += *p; + ++p; + while ( p != parameterList().end() ) { + proto += TQString( ", " ); + proto += *p; + ++p; + } + } + proto += TQChar( ')' ); + if ( isConst() ) + proto += TQString( " const" ); + return proto; +} + +/* + The parser follows. We are not really parsing C++, just trying to + find the start and end of function definitions. + + One important pitfall is that the parsed code needs not be valid. + Parsing from right to left helps cope with that, as explained in + comments below. + + In the examples, we will use the symbol @ to stand for the position + in the token stream. In "int @ x ;", the lookahead token (yyTok) is + 'int'. +*/ + +static int yyTok; // the current token + +/* + Returns TRUE if thingy is a constructor or a destructor; otherwise + returns FALSE. +*/ +static bool isCtorOrDtor( const TQString& thingy ) +{ + // e.g., Alpha<a>::Beta<Bar<b, c> >::~Beta + TQRegExp xtor( TQString( + "(?:([A-Z_a-z][0-9A-Z_a-z]*)" // class name + "(?:<(?:[^>]|<[^>]*>)*>)*" // template arguments + "::)+" // many in a row + "~?" // ctor or dtor? + "\\1") ); // function has same name as class + return xtor.exactMatch( thingy ); +} + +/* + Skips over any template arguments with balanced angle brackets, and + returns the skipped material as a string. + + Before: TQMap < TQString , TQValueList < TQString > > @ m ; + After: TQMap @ < TQString , TQValueList < TQString > > m ; +*/ +static TQString matchTemplateAngles() +{ + TQString t; + + if ( yyTok == Tok_RightAngle ) { + int depth = 0; + do { + if ( yyTok == Tok_RightAngle ) + depth++; + else if ( yyTok == Tok_LeftAngle ) + depth--; + t.prepend( yyLex ); + yyTok = getToken(); + } while ( depth > 0 && yyTok != Tok_Boi && yyTok != Tok_LeftBrace ); + } + return t; +} + +/* + Similar to matchTemplateAngles(), but for array brackets in parameter + data types (as in "int *argv[]"). +*/ +static TQString matchArrayBrackets() +{ + TQString t; + + while ( yyTok == Tok_RightBracket ) { + t.prepend( yyLex ); + yyTok = getToken(); + if ( yyTok == Tok_Something ) { + t.prepend( yyLex ); + yyTok = getToken(); + } + if ( yyTok != Tok_LeftBracket ) + return TQString::null; + t.prepend( yyLex ); + yyTok = getToken(); + } + return t; +} + +/* + Prepends prefix to *type. This operation is in theory trivial, but + for the spacing to look good, we have to do something. The original + spacing is lost as the input is tokenized. +*/ +static void prependToType( TQString *type, const TQString& prefix ) +{ + if ( !type->isEmpty() && !prefix.isEmpty() ) { + TQChar left = prefix[(int) prefix.length() - 1]; + TQChar right = (*type)[0]; + + if ( left.isLetter() && + (right.isLetter() || right == TQChar('*') || right == TQChar('&')) ) + type->prepend( TQChar(' ') ); + } + type->prepend( prefix ); +} + +static bool isModifier( int tok ) +{ + return ( tok == Tok_signed || tok == Tok_unsigned || + tok == Tok_short || tok == Tok_long ); +} + +/* + Parses a data type (backwards as usual) and returns a textual + representation of it. +*/ +static TQString matchDataType() +{ + TQString type; + + while ( yyTok == Tok_Ampersand || yyTok == Tok_Aster || + yyTok == Tok_const ) { + prependToType( &type, yyLex ); + yyTok = getToken(); + } + + /* + This code is really hard to follow... sorry. The loop matches + Alpha::Beta::Gamma::...::Omega. + */ + for ( ;; ) { + bool modifierMet = FALSE; + + prependToType( &type, matchTemplateAngles() ); + + if ( yyTok != Tok_Ident ) { + /* + People may write 'const unsigned short' or + 'short unsigned const' or any other permutation. + */ + while ( yyTok == Tok_const || isModifier(yyTok) ) { + prependToType( &type, yyLex ); + yyTok = getToken(); + if ( yyTok != Tok_const ) + modifierMet = TRUE; + } + + if ( yyTok == Tok_Tilde ) { + prependToType( &type, yyLex ); + yyTok = getToken(); + } + } + + if ( !modifierMet ) { + if ( yyTok == Tok_Ellipsis || yyTok == Tok_Ident || + yyTok == Tok_char || yyTok == Tok_int || + yyTok == Tok_double ) { + prependToType( &type, yyLex ); + yyTok = getToken(); + } else { + return TQString::null; + } + } else if ( yyTok == Tok_int || yyTok == Tok_char || + yyTok == Tok_double ) { + prependToType( &type, yyLex ); + yyTok = getToken(); + } + + while ( yyTok == Tok_const || isModifier(yyTok) ) { + prependToType( &type, yyLex ); + yyTok = getToken(); + } + + if ( yyTok == Tok_Gulbrandsen ) { + prependToType( &type, yyLex ); + yyTok = getToken(); + } else { + break; + } + } + return type; +} + +/* + Parses a function prototype (without the semicolon) and returns an + object that stores information about this function. +*/ +static CppFunction matchFunctionPrototype( bool stripParamNames ) +{ + CppFunction func; +#if 0 + TQString documentation; +#endif + TQString returnType; + TQString scopedName; + TQStringList params; + TQString qualifier; + bool cnst = FALSE; + + if ( yyTok == Tok_const ) { + cnst = TRUE; + yyTok = getToken(); + } + + if ( yyTok != Tok_RightParen ) + return func; + yyTok = getToken(); + + if ( yyTok != Tok_LeftParen ) { + for ( ;; ) { + TQString brackets = matchArrayBrackets(); + TQString name; + if ( yyTok == Tok_Ident ) { + name = yyLex; + yyTok = getToken(); + } + TQString type = matchDataType(); + + if ( type.isEmpty() ) { + if ( name.isEmpty() ) + return func; + type = name; + name = TQString::null; + } + if ( stripParamNames ) + name = TQString::null; + + TQString param = type + TQChar( ' ' ) + name + brackets; + params.prepend( param.stripWhiteSpace() ); + + if ( yyTok != Tok_Comma ) + break; + yyTok = getToken(); + } + if ( yyTok != Tok_LeftParen ) + return func; + } + yyTok = getToken(); + + for ( ;; ) { + scopedName.prepend( matchTemplateAngles() ); + + if ( yyTok != Tok_Ident ) { + // the operator keyword should be close + int i = 0; + while ( i < 4 && yyTok != Tok_operator ) { + scopedName.prepend( yyLex ); + i++; + } + if ( yyTok != Tok_operator ) + return func; + } + scopedName.prepend( yyLex ); + yyTok = getToken(); + + if ( yyTok != Tok_Gulbrandsen ) + break; + scopedName.prepend( yyLex ); + yyTok = getToken(); + } + + if ( !isCtorOrDtor(scopedName) ) { + returnType = matchDataType(); + if ( returnType.isEmpty() ) + return func; + } + + /* + The documentation feature is unused so far, since we cannot + really distinguist between a normal comment between two + functions and one that relates to the following function. One + good heuristic is to assume that a comment immediately followed + by a function with no blank line in between relates to the + function, but there's no easy way to find that out with a + tokenizer. + */ +#if 0 + if ( yyTok == Tok_Comment ) { + documentation = yyLex; + yyTok = getToken(); + } + + func.setDocumentation( documentation ); +#endif + func.setReturnType( returnType ); + func.setScopedName( scopedName ); + func.setParameterList( params ); + func.setConst( cnst ); + return func; +} + +/* + Try to set the body. It's not sufficient to call + func->setBody(somewhatBody), as the somewhatBody might be too large. + Case in point: + + void foo() + { + printf( "Hello" ); + } + + int n; + + void bar() + { + printf( " world!\n" ); + } + + The parser first finds bar(). Then it finds "void foo() {" and + naively expects the body to extend up to "void bar()". This + function's job is to count braces and make sure "int n;" is not + counted as part of the body. + + Cases where the closing brace of the body is missing retquire no + special processing. +*/ +static void setBody( CppFunction *func, const TQString& somewhatBody ) +{ + TQString body = somewhatBody; + + int braceDepth = 0; + int i = 0; + while ( i < (int) body.length() ) { + if ( body[i] == TQChar('{') ) { + braceDepth++; + } else if ( body[i] == TQChar('}') ) { + braceDepth--; + if ( braceDepth == 0 ) { + body.truncate( i + 1 ); + break; + } + } + i++; + } + + func->setBody( body ); +} + +/* + Parses a whole C++ file, looking for function definitions. Case in + point: + + void foo() + { + printf( "Hello" ); + + void bar() + { + printf( " world!\n" ); + } + + The parser looks for left braces and tries to parse a function + prototype backwards. First it finds "void bar() {". Then it works + up and finds "void foo() {". +*/ +static void matchTranslationUnit( TQValueList<CppFunction> *flist ) +{ + int endBody = -1; + int startBody; + + for ( ;; ) { + if ( endBody == -1 ) + endBody = yyPos; + + while ( yyTok != Tok_Boi && yyTok != Tok_LeftBrace ) + yyTok = getToken(); + if ( yyTok == Tok_Boi ) + break; + + // found a left brace + yyTok = getToken(); + startBody = yyPos; + CppFunction func = matchFunctionPrototype( FALSE ); + if ( !func.scopedName().isEmpty() ) { + TQString body = yyIn->mid( startBody, endBody - startBody ); + setBody( &func, body ); + body = func.body(); // setBody() can change the body + + /* + Compute important line numbers. + */ + int functionStartLineNo = 1 + TQConstString( yyIn->unicode(), yyPos ) + .string().contains( TQChar('\n') ); + int startLineNo = functionStartLineNo + + TQConstString( yyIn->unicode() + yyPos, startBody - yyPos ) + .string().contains( TQChar('\n') ); + int endLineNo = startLineNo + body.contains( TQChar('\n') ); + + func.setLineNums( functionStartLineNo, startLineNo, endLineNo ); + flist->prepend( func ); + endBody = -1; + } + } +} + +/* + Extracts C++ function from source code and put them in a list. +*/ +void extractCppFunctions( const TQString& code, TQValueList<CppFunction> *flist ) +{ + startTokenizer( code ); + yyTok = getToken(); + matchTranslationUnit( flist ); + stopTokenizer(); +} + +/* + Returns the prototype with the parameter names removed. +*/ +TQString canonicalCppProto( const TQString& proto ) +{ + startTokenizer( proto ); + yyTok = getToken(); + CppFunction func = matchFunctionPrototype( TRUE ); + stopTokenizer(); + return func.prototype(); +} |