1 files changed, 319 insertions, 0 deletions
diff --git a/kregexpeditor/qregexpparser.l b/kregexpeditor/qregexpparser.l
new file mode 100644
index 0000000..4fb90cc
--- /dev/null
+++ b/kregexpeditor/qregexpparser.l
@@ -0,0 +1,319 @@
+/*
+ *  Copyright (c) 2002-2003 Jesper K. Pedersen <blackie@kde.org>
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License version 2 as published by the Free Software Foundation.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ **/
+%option noyywrap
+
+%{ 
+  #include <qstring.h>
+  #include "textrangeregexp.h"
+  #include "gen_qregexpparser.hh"
+#ifdef QT_ONLY
+  #include "compat.h"
+#endif
+  void parseRange( char* txt, int* min, int* max );  
+  RegExp* parseCharClass( char* match );
+%}
+  
+Escape   \\.
+BackRef  \\[1-9][0-9]*
+CharClass \[^?\]?[^]]*\]
+Range \{[0-9]*(,[0-9]*)?\}
+HexChar \\x[0-9a-fA-F]{1,4}
+OctChar \\0[0-7]{1,4}
+SpecialEsc \\[afnrtv]
+%%
+"\\b"      return TOK_PosWordChar;
+"\\B"      return TOK_PosNonWordChar;
+"\\d"      { 
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->setDigit( true );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+"\\D"      { 
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->setNonDigit( true );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+"\\s"      { 
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->setSpace( true );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+"\\S"      { 
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->setNonSpace( true );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+"\\w"      { 
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->setWordChar( true );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+"\\W"      { 
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->setNonWordChar( true );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+{SpecialEsc} {
+               TextRangeRegExp* regexp = new TextRangeRegExp( false );
+               regexp->addCharacter( QString::fromLocal8Bit( yytext ) );
+               qregexplval.regexp = regexp;
+               return TOK_CharClass;
+             }
+
+{HexChar}  {
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->addCharacter( QString::fromLocal8Bit(yytext) );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+{OctChar}  {
+             TextRangeRegExp* regexp = new TextRangeRegExp( false ); 
+             regexp->addCharacter( QString::fromLocal8Bit(yytext) );
+             qregexplval.regexp = regexp;
+             return TOK_CharClass;
+           }
+"."        return TOK_Dot;
+"$"        return TOK_Dollar;
+"^"        return TOK_Carat;
+"(?:"      return TOK_MagicLeftParent;
+"(?="      return TOK_PosLookAhead;
+"(?!"      return TOK_NegLookAhead;
+"("        return TOK_LeftParen;
+")"        return TOK_RightParent;
+"|"        return TOK_Bar;
+"*"        { qregexplval.range.min = 0; qregexplval.range.max=-1; return TOK_Quantifier; }
+"?"        { qregexplval.range.min = 0; qregexplval.range.max=1;  return TOK_Quantifier; }
+"+"        { qregexplval.range.min = 1; qregexplval.range.max=-1; return TOK_Quantifier; }
+{Range}     { parseRange( yytext, &qregexplval.range.min, &qregexplval.range.max ); return TOK_Quantifier; }  
+{CharClass} { qregexplval.regexp = parseCharClass(yytext); return TOK_CharClass; }
+{BackRef}   { qregexplval.backRef = atoi( yytext+1 ); return TOK_BackRef; }
+{Escape}    { qregexplval.ch = yytext[1]; return TOK_EscapeChar; }
+.           { qregexplval.ch = yytext[0]; return TOK_Char; }
+
+%%
+
+void setParseData( QString qstr ) {  
+  const char* cstr;
+  if ( qstr.isNull() ) 
+    cstr = "";
+  else
+    cstr = qstr.latin1();
+  yy_switch_to_buffer( yy_scan_string( cstr ) );
+}
+
+/**
+   This function parses a range in a form similar to "{3,4}", "{,7}"
+   etc. and returns the value in the integers pointed to by min and max.
+*/
+void parseRange( char* txt, int* min, int* max ) 
+{
+
+  /*  
+      case  txt   min  max
+       1    {}     0   -1
+       2    {,}    0   -1
+       3    {5}    5    5
+       4    {5,}   5   -1
+       5    {,7}   0    7
+       6    {5,7}  5    7
+  */    
+  char c;
+  int i = 1;
+  int minimum=0, maximum=0;
+  int minFound=0, maxFound=0, commaFound = 0;
+
+  while ( (c = txt[i++]) != ',' && c != '}') {
+    minimum = minimum*10+ c-'0';
+    minFound=1;
+  }
+
+  if ( c == ',' )
+    commaFound = 1;
+
+  if ( c != '}' ) {
+    while ( (c = txt[i++]) != '}') {
+      maximum = maximum*10+ c-'0';
+      maxFound = 1;
+    }
+  }
+  
+  *min = minimum;
+  if ( maxFound ) 
+    *max = maximum;   /* case 5,6 */
+  else if ( !minFound )
+    *max = -1;        /* case 1,2 */
+  else if ( commaFound )
+    *max = -1;        /* case 4 */
+  else
+    *max = minimum;   /* case 3 */
+}
+
+
+/** 
+    This function parses a character range like "[^ab1-4]".
+*/
+RegExp* parseCharClass( char* match )
+{
+  TextRangeRegExp* res = new TextRangeRegExp( false );
+  QString txt = QString::fromLocal8Bit( match );
+  txt = txt.mid(1,txt.length()-2);
+  
+  unsigned int i = 0;
+  QChar ch = txt.at(i++);
+  QString pendingChar;
+  QString thisChar;
+  bool charPending = false;
+  bool rangePending = false;
+  bool flushPending = false;
+  
+  if ( ch == QChar('^') ) {
+    res->setNegate( true );
+    ch = txt.at(i++);
+  }
+
+  do {
+    // If a character is pending, and the next char is '-' then we are
+    // possible looking at a range.
+    if ( ch == QChar('-') && charPending ) {
+      rangePending = true;
+      ch = txt.at(i++);
+      continue;
+    }
+
+    // If we have a pending character, but do not also have a pending
+    // range, then the pending character was not part of a range, and
+    // should therefore just be added as a single character.
+    if ( charPending && !rangePending ) {
+      res->addCharacter( pendingChar );
+      charPending = false;
+    }
+
+    if ( ch == QChar('\\') ) {
+      // Handle the cases where an escape character is specified.
+      ch = txt.at(i++);
+      
+      if ( ch == QChar('a') || ch == QChar('f') || ch == QChar('n') || ch == QChar('r') || ch == QChar('t') || ch == QChar('v') ) {
+        // These are just seen as normal characters.
+        thisChar = QString::fromLocal8Bit("\\") + ch;
+      }
+      else if ( ch == QChar('d') ) {  
+        // The following characters represent character groups. If any of
+        // these are seen in a range, then the range is ignored, thus [a-\s]
+        // matches an 'a', a '-', and a space (\s means space).
+        res->setDigit( true );
+        flushPending = true;
+      }
+      else if ( ch == QChar('D') ) {
+        res->setNonDigit( true );
+        flushPending = true;
+      }
+      else if ( ch == QChar('s') ) {
+        res->setSpace( true );
+        flushPending = true;
+      }
+      else if ( ch == QChar('S') ) {
+        res->setNonSpace( true );
+        flushPending = true;
+      }
+      else if ( ch == QChar('w') ) {
+        res->setWordChar( true );
+        flushPending = true;
+      }
+      else if ( ch == QChar('W') ) {
+        res->setNonWordChar( true );
+        flushPending = true;
+      }
+      else if ( ch == QChar('x') || ch == QChar('X') ) { 
+        // This is a hexidecimal character: \xHHHH
+        QString str;
+        for ( int j=0; j<4; j++) {
+          ch = txt.at(i++);
+            if ( ch == 'a' || ch == 'A' || ch == 'b' || ch == 'B' || ch == 'c' || ch == 'C' || ch == 'd' || ch == 'D' || 
+                 ch == 'e' || ch == 'E' || ch == 'f' || ch == 'F' ||
+                 ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' || 
+                 ch == '8' || ch == '9' )
+              str += ch;
+            else
+              i--;
+        }
+        thisChar = QString::fromLocal8Bit("\\x") + str;
+      }
+      else if ( ch == QChar('0') ) {
+        // This is an octal character
+        QString str;
+        for ( int j=0; j<4; j++) {
+          ch = txt.at(i++);
+          if ( ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' )
+            str += ch;
+          else
+            i--;
+        }
+        thisChar = QString::fromLocal8Bit("\\x") + str ;
+      }
+      else {
+        // Anything else escaped just means the character itself.
+        thisChar = ch;
+      }
+    }
+    else {
+      // A non escaped character.
+      thisChar = ch;
+    }
+    
+    // The characters \s,\S,\w,\W,\d or \D, can not be part of a range,
+    // thus if they are meet in what looks like a range, then the
+    // characters of the range is justed seen as normal non range
+    // characters. thus [a-\s] matches an 'a', a '-', and a space (\s means
+    // space). 
+    if ( flushPending ) {
+      if ( charPending ) 
+        res->addCharacter( pendingChar );
+      if ( rangePending ) 
+        res->addCharacter( QString::fromLocal8Bit("-") );
+      flushPending = false; 
+      charPending = false;
+      rangePending = false;
+    }
+    else {
+      if ( rangePending ) {
+        res->addRange( pendingChar, thisChar );
+        charPending = false;
+        rangePending = false;
+      }
+      else {
+        pendingChar = thisChar;
+        charPending = true;
+      }
+    }
+    ch = txt.at(i++);
+  }
+  while ( ch != QChar(']') && i <= txt.length() );
+
+  if ( charPending ) 
+    res->addCharacter( pendingChar );
+  if ( rangePending ) 
+    res->addCharacter( QString::fromLocal8Bit("-") );
+
+  return res;
+}