diff options
Diffstat (limited to 'tdecore')
-rw-r--r-- | tdecore/CMakeLists.txt | 2 | ||||
-rw-r--r-- | tdecore/tdeglobal.cpp | 9 | ||||
-rw-r--r-- | tdecore/tdeglobal.h | 7 | ||||
-rwxr-xr-x | tdecore/tdestringmatcher.cpp | 44 | ||||
-rw-r--r-- | tdecore/tdestringmatcher.h | 95 | ||||
-rw-r--r-- | tdecore/tequivchars-mapping.h | 5 | ||||
-rwxr-xr-x | tdecore/tequivchars.cpp | 59 | ||||
-rw-r--r-- | tdecore/tequivchars.h | 10 |
8 files changed, 91 insertions, 140 deletions
diff --git a/tdecore/CMakeLists.txt b/tdecore/CMakeLists.txt index 3df630ac3..b56d98418 100644 --- a/tdecore/CMakeLists.txt +++ b/tdecore/CMakeLists.txt @@ -142,7 +142,7 @@ tde_add_library( ${target} SHARED AUTOMOC SOURCES ${${target}_SRCS} VERSION 14.1.0 EMBED tdecorenetwork-static - LINK DCOP-shared tdefx-shared ICE SM ${ZLIB_LIBRARIES} + LINK DCOP-shared tdefx-shared ICE SM ${ZLIB_LIBRARIES} ${RESOLV_LIBRARIES} LINK_PRIVATE ltdlc-static ${KDESVGICONS} ${XCOMPOSITE_LIBRARIES} ${LIBIDN_LIBRARIES} ${LIBBFD_LIBRARIES} ${LIB_UTIL} ${GAMIN_LIBRARIES} ${Backtrace_LIBRARY} diff --git a/tdecore/tdeglobal.cpp b/tdecore/tdeglobal.cpp index 03f39ac80..594ad1c3b 100644 --- a/tdecore/tdeglobal.cpp +++ b/tdecore/tdeglobal.cpp @@ -133,14 +133,6 @@ KCharsets *TDEGlobal::charsets() return _charsets; } -TEquivChars *TDEGlobal::equivChars() -{ - if( _equivChars == 0 ) { - _equivChars = new TEquivChars(); - } - return _equivChars; -} - TDEStringMatcher *TDEGlobal::hiddenFileMatcher() { if( _hiddenFileMatcher == 0 ) { @@ -251,7 +243,6 @@ TDEInstance *TDEGlobal::_instance = 0; TDEInstance *TDEGlobal::_activeInstance = 0; TDELocale *TDEGlobal::_locale = 0; KCharsets *TDEGlobal::_charsets = 0; -TEquivChars *TDEGlobal::_equivChars = 0; TDEStringMatcher *TDEGlobal::_hiddenFileMatcher = 0; KStaticDeleterList *TDEGlobal::_staticDeleters = 0; diff --git a/tdecore/tdeglobal.h b/tdecore/tdeglobal.h index ebb465cf0..997d94102 100644 --- a/tdecore/tdeglobal.h +++ b/tdecore/tdeglobal.h @@ -110,12 +110,6 @@ public: static KCharsets *charsets(); /** - * The global alphanumeric character equivalence mapper. - * @return the global alphanumeric character equivalence mapper. - */ - static TEquivChars *equivChars(); - - /** * The global hidden file matcher. * @return the global hidden file matcher */ @@ -189,7 +183,6 @@ public: static TDELocale *_locale; static KCharsets *_charsets; static TDEStringMatcher *_hiddenFileMatcher; - static TEquivChars *_equivChars; static KStaticDeleterList *_staticDeleters; diff --git a/tdecore/tdestringmatcher.cpp b/tdecore/tdestringmatcher.cpp index 7e9f1ee0d..c3c2756b4 100755 --- a/tdecore/tdestringmatcher.cpp +++ b/tdecore/tdestringmatcher.cpp @@ -5,7 +5,11 @@ #include <tqregexp.h> #include <kdebug.h> +#if __has_include( <features.h> ) // C++17 +#pragma message "Using features.h to check for __GLIBC__" #include <features.h> +#endif + #ifdef __GLIBC__ #include <fnmatch.h> #pragma message "TSM using GLIBC fnmatch() for wildcard matching" @@ -13,8 +17,6 @@ //================================================================================================ -namespace TSM { - class AuxData { public: @@ -34,15 +36,11 @@ AuxData::AuxData() fnmatchFlags = FNM_EXTMATCH; // Bash shell option 'extglob' #endif matchEngine = nullptr; - patternConverted = ""; + patternConverted = TQString::null; } -} // End of namespace TSM - //================================================================================================ -using namespace TSM; - typedef TQValueVector<AuxData> AuxDataList; class TDEStringMatcher::TDEStringMatcherPrivate { @@ -59,11 +57,11 @@ public: void TDEStringMatcher::TDEStringMatcherPrivate::clearAll() { - m_matchSpecString = ""; + m_matchSpecString = TQString::null; m_matchSpecList.clear(); for ( size_t index = 0 ; index < m_auxData.count() ; index++ ) { if ( m_auxData[index].matchEngine != nullptr ) { - TSMTRACE << "Freeing match engine " << m_auxData[index].matchEngine << endl; + TSMTRACE << "Freeing regex match engine " << m_auxData[index].matchEngine << endl; delete m_auxData[index].matchEngine; } } @@ -95,7 +93,7 @@ const TQString TDEStringMatcher::getMatchSpecString() const return d->m_matchSpecString; } -const MatchSpecList TDEStringMatcher::getMatchSpecs() const +const TDEStringMatcher::MatchSpecList TDEStringMatcher::getMatchSpecs() const { return d->m_matchSpecList; } @@ -120,7 +118,7 @@ bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList ) workArea.clearAll(); return false; } - if ( matchSpec.pattern.find( TQChar(PatterStringDivider) ) >= 0 ) { + if ( matchSpec.pattern.find( TQChar(PatternStringDivider) ) >= 0 ) { TSMTRACE << " Error: pattern contains reserved separator character" << endl; workArea.clearAll(); return false; @@ -170,7 +168,7 @@ bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList ) case ANCHandling::EQUIVALENCE : inferredOptionString += TQChar('e'); auxWork.isCaseSensitive = true; - auxWork.patternConverted = TDEGlobal::equivChars()->replaceChars( auxWork.patternConverted, true ); + auxWork.patternConverted = TEquivChars::replaceChars( auxWork.patternConverted, true ); TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl; break; default: @@ -191,7 +189,8 @@ bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList ) switch ( matchSpec.patternType ) { case PatternType::WILDCARD : -#ifdef __GLIBC__ // Test wildcard expression using a subject matter expert +#ifdef __GLIBC__ + // Test wildcard expression using a subject matter expert result = fnmatch( auxWork.patternConverted.local8Bit().data(), auxWork.patternConverted.local8Bit().data(), @@ -207,7 +206,10 @@ bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList ) return false; } break; -#endif // Otherwise we will test wildcard expression as one converted to a regex +#else + // Wildcard expression was converted to regex during earlier PatternType + // processing and will be subsequently validated as such. +#endif case PatternType::REGEX : // Prepare regex rxWork.setPattern( auxWork.patternConverted ); @@ -237,7 +239,7 @@ bool TDEStringMatcher::setMatchSpecs( MatchSpecList newMatchSpecList ) // All proposed match specifications are good, update everything accordingly workArea.m_matchSpecList = newMatchSpecList; - workArea.m_matchSpecString = newMatchSpecs.join( TQChar(PatterStringDivider) ); + workArea.m_matchSpecString = newMatchSpecs.join( TQChar(PatternStringDivider) ); d->clearAll(); *d = workArea; //-Debug: TSMTRACE << " Notifying slots of pattern change" << endl; @@ -273,7 +275,7 @@ bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString ) return true; } - TQStringList newMatchSpecs = TQStringList::split( PatterStringDivider, newMatchSpecString, true ); + TQStringList newMatchSpecs = TQStringList::split( PatternStringDivider, newMatchSpecString, true ); if ( newMatchSpecs.count() % 2 != 0 ) { TSMTRACE << " Error: match specification string must contain an even number of components" << endl; @@ -361,7 +363,7 @@ bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString ) break; case ANCHandling::EQUIVALENCE : auxWork.isCaseSensitive = true; - auxWork.patternConverted = TDEGlobal::equivChars()->replaceChars( auxWork.patternConverted, true ); + auxWork.patternConverted = TEquivChars::replaceChars( auxWork.patternConverted, true ); TSMTRACE << " Converted match pattern '" << before << "' to equivalent '" << auxWork.patternConverted << "'" << endl; break; default: break; @@ -404,7 +406,7 @@ bool TDEStringMatcher::setMatchSpecs( TQString newMatchSpecString ) if ( rxWork.isValid() ) { auxWork.matchEngine = new TQRegExp; *auxWork.matchEngine = rxWork; - TSMTRACE << " AuxData: Allocated regex engine " << auxWork.matchEngine << "for pattern: " << auxWork.matchEngine->pattern() << endl; + TSMTRACE << " AuxData: Allocated regex engine " << auxWork.matchEngine << " for pattern: " << auxWork.matchEngine->pattern() << endl; } else { TSMTRACE << " Error: invalid regex syntax" << endl; @@ -457,7 +459,8 @@ bool TDEStringMatcher::matchAny( const TQString& stringToMatch ) const if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) { if ( equivalentString.isEmpty() ) { - equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ; +//TBR equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ; + equivalentString = TEquivChars::replaceChars( stringToMatch, false ) ; } matchWhat = equivalentString; } @@ -508,7 +511,8 @@ bool TDEStringMatcher::matchAll( const TQString& stringToMatch ) const if ( d->m_matchSpecList[index].ancHandling == ANCHandling::EQUIVALENCE ) { if ( equivalentString.isEmpty() ) { - equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ; +//TBR equivalentString = TDEGlobal::equivChars()->replaceChars( stringToMatch, false ) ; + equivalentString = TEquivChars::replaceChars( stringToMatch, false ) ; } matchWhat = equivalentString; } diff --git a/tdecore/tdestringmatcher.h b/tdecore/tdestringmatcher.h index 504ccfa8c..24adeb665 100644 --- a/tdecore/tdestringmatcher.h +++ b/tdecore/tdestringmatcher.h @@ -8,55 +8,8 @@ #define TSMTRACE kdWarning() << "<TSMTRACE> " -namespace TSM -{ -/** - * Enumeration used by the TDEStringMatcher class - * defining types of patterns to be matched - */ -enum class PatternType: uchar -{ - REGEX, - WILDCARD, - SUBSTRING, - //OTHER, - DEFAULT = REGEX -}; - -/** - * Enumeration used by the TDEStringMatcher class - * defining special handling of alphanumeric characters - */ -enum class ANCHandling: uchar -{ - CASE_SENSITIVE = 0, // No handling, each character distinct - CASE_INSENSITIVE = 1, // Alphabetic case variants are same - EQUIVALENCE = 2, // Alphanumeric equivalents are same - DEFAULT = CASE_SENSITIVE -}; - -/** - * Structure used by the TDEStringMatcher class - * representing properties of a single match specification. - */ -struct MatchSpec -{ - PatternType patternType; - ANCHandling ancHandling; - bool expectMatch; // "matching" vs. "not matching" - TQString pattern; -}; - -/** - * Container used in a TDEStringMatcher object - * representing multiple match specifications. - */ -typedef TQValueVector<MatchSpec> MatchSpecList; - // Use horizontal tab as m_patternString separator -inline constexpr char PatterStringDivider { '\t' }; - -} // End of namespace TSM +inline constexpr char PatternStringDivider = '\t' ; /** @@ -67,13 +20,53 @@ class TDECORE_EXPORT TDEStringMatcher : public TQObject Q_OBJECT public: + /** + * Enumeration defining types of patterns to be matched + */ + enum class PatternType: uchar + { + REGEX, + WILDCARD, + SUBSTRING, + //OTHER, + DEFAULT = REGEX + }; + + /** + * Enumeration defining special handling of alphanumeric characters + */ + enum class ANCHandling: uchar + { + CASE_SENSITIVE = 0, // No handling, each character distinct + CASE_INSENSITIVE = 1, // Alphabetic case variants are same + EQUIVALENCE = 2, // Alphanumeric equivalents are same + DEFAULT = CASE_SENSITIVE + }; + + /** + * Structure representing properties of a single match specification. + */ + struct MatchSpec + { + PatternType patternType; + ANCHandling ancHandling; + bool expectMatch; // "matching" vs. "not matching" + TQString pattern; + }; + + /** + * Container representing multiple match specifications. + */ + typedef TQValueVector<MatchSpec> MatchSpecList; + + TDEStringMatcher(); ~TDEStringMatcher(); /** @return list of currently defined match specifications. */ - const TSM::MatchSpecList getMatchSpecs() const; + const MatchSpecList getMatchSpecs() const; /** @return string encoding list of currently defined match specifications. @@ -84,7 +77,7 @@ public: Use @param newMatchSpecList to generate the internal list of match specifications to be used for pattern matching. */ - bool setMatchSpecs( TSM::MatchSpecList newMatchSpecList ); + bool setMatchSpecs( MatchSpecList newMatchSpecList ); /** Use specially encoded @param newPatternString to generate the internal @@ -106,6 +99,8 @@ public: */ bool matchAll( const TQString& stringToMatch ) const; +protected: + /** @return a basic regular expression formed by converting the basic wildcard expression in @param wildcardPattern. diff --git a/tdecore/tequivchars-mapping.h b/tdecore/tequivchars-mapping.h index fd625189b..459c63d7a 100644 --- a/tdecore/tequivchars-mapping.h +++ b/tdecore/tequivchars-mapping.h @@ -2,7 +2,6 @@ #define TEQUIVCHARS_MAPPING_H #ifndef OPTIMIZE_ASCII_LOOKUP -[2993]={ // Make sure this dimension accurately reflects content of table below { 0x00041 , 0x00061 }, // <LATIN CAPITAL LETTER A> => <LATIN SMALL LETTER A> { 0x00042 , 0x00062 }, // <LATIN CAPITAL LETTER B> => <LATIN SMALL LETTER B> { 0x00043 , 0x00063 }, // <LATIN CAPITAL LETTER C> => <LATIN SMALL LETTER C> @@ -29,9 +28,6 @@ { 0x00058 , 0x00078 }, // <LATIN CAPITAL LETTER X> => <LATIN SMALL LETTER X> { 0x00059 , 0x00079 }, // <LATIN CAPITAL LETTER Y> => <LATIN SMALL LETTER Y> { 0x0005a , 0x0007a }, // <LATIN CAPITAL LETTER Z> => <LATIN SMALL LETTER Z> -#else -[2967]={ // Make sure this dimension accurately reflects content of table below - /* Excluded ASCII characters are handled as a special case in our code. */ #endif { 0x000aa , 0x00061 }, // <FEMININE ORDINAL INDICATOR> => <LATIN SMALL LETTER A> { 0x000b2 , 0x00032 }, // <SUPERSCRIPT TWO> => <DIGIT TWO> @@ -3000,7 +2996,6 @@ { 0x0ffda , 0x01173 }, // <HALFWIDTH HANGUL LETTER EU> => <HANGUL JUNGSEONG EU> { 0x0ffdb , 0x01174 }, // <HALFWIDTH HANGUL LETTER YI> => <HANGUL JUNGSEONG YI> { 0x0ffdc , 0x01175 } // <HALFWIDTH HANGUL LETTER I> => <HANGUL JUNGSEONG I> -}; #endif diff --git a/tdecore/tequivchars.cpp b/tdecore/tequivchars.cpp index d259946b2..65383fde0 100755 --- a/tdecore/tequivchars.cpp +++ b/tdecore/tequivchars.cpp @@ -11,35 +11,18 @@ #include "tequivchars.h" -//typedef wchar_t CHAR16; -//typedef unsigned short CHAR16; -typedef TQChar CHAR16; - -class TEquivChars_Private -{ -public: - - struct defaultCollation { - CHAR16 character; - CHAR16 collatesTo; - }; - - const defaultCollation EquivalentsTable // terminating ';' is provided in include file - #include "tequivchars-mapping.h" - uint EquivTableROWS = sizeof(EquivalentsTable)/sizeof(EquivalentsTable[0]); +struct defaultCollation { + TQChar character; + TQChar collatesTo; }; -TEquivChars::TEquivChars() -{ - p = new TEquivChars_Private; -} +static const defaultCollation EquivalentsTable[] = { +#include "tequivchars-mapping.h" +}; +uint EquivTableROWS = sizeof(EquivalentsTable)/sizeof(EquivalentsTable[0]); -TEquivChars::~TEquivChars() -{ - delete p; -} -TQString TEquivChars::replaceChars( const TQString &inputString, bool isRegex ) +const TQString TEquivChars::replaceChars( const TQString &inputString, bool isRegex ) { int inStrLen = inputString.length(); TQString outString = TQString::fromLatin1( "" ); @@ -56,11 +39,11 @@ TQString TEquivChars::replaceChars( const TQString &inputString, bool isRegex ) bool inDirective = false; // (*___) bool inGroupName = false; // (?<___> #endif // REGEXP_IS_PCRE2 - CHAR16 currChar = 0; - CHAR16 prevChar = 0; - CHAR16 nextChar = 0; + TQChar currChar = 0; + TQChar prevChar = 0; + TQChar nextChar = 0; - for ( int i = 0 ; i < inStrLen ; outString[i] = CHAR16(currChar), i++ ) { + for ( int i = 0 ; i < inStrLen ; outString[i] = currChar, i++ ) { prevChar = currChar; currChar = char16[i].unicode(); @@ -206,35 +189,33 @@ TQString TEquivChars::replaceChars( const TQString &inputString, bool isRegex ) if ( codepoint < 128 ) { if ( codepoint > 64 && codepoint < 91 ) // convert upper case ASCII currChar = TQChar(codepoint + 32 ); // to corresponding lower case + // All other ASCII characters are equivalent to themselves //-Debug: std::cerr << TQString(currChar).utf8().data() << "' (ascii)" << std::endl; continue; } #endif + // Only letters and numbers are in the table + if ( ! currChar.isLetterOrNumber() ) + continue; // Use a simple binary search to look up an equivalent character int low = 0; - int high = p->EquivTableROWS - 1; + int high = EquivTableROWS - 1; while (low <= high) { int mid = low + (high - low) / 2; - if ( currChar == p->EquivalentsTable[mid].character ) { + if ( currChar == EquivalentsTable[mid].character ) { // Found equivalent character, use it instead - currChar = p->EquivalentsTable[mid].collatesTo; + currChar = EquivalentsTable[mid].collatesTo; break; } - if ( p->EquivalentsTable[mid].character < currChar ) + if ( EquivalentsTable[mid].character < currChar ) low = mid + 1; else high = mid - 1; } //-Debug: std::cerr << TQString(currChar).utf8().data() << "'" << std::endl; - /* FIXME: Possible ideas for optimizing table lookup speed - (1) Detect & handle ASCII (<128) characters separately. *DONE* - (2) Split table into multiple lookup tables and search each - in order of descending likelihood of character match. - */ - } return outString; diff --git a/tdecore/tequivchars.h b/tdecore/tequivchars.h index 0b933ae7a..5bd007a3d 100644 --- a/tdecore/tequivchars.h +++ b/tdecore/tequivchars.h @@ -13,9 +13,6 @@ class TDECORE_EXPORT TEquivChars { public: - TEquivChars(); - ~TEquivChars(); - /** @return copy of @param inputString modified such that each alphanumeric character is replaced with it's collating character equivalent. If the @@ -23,12 +20,7 @@ public: expression and the alphabetical characters inside Posix bracket [::] expressions are left as-is */ - TQString replaceChars( const TQString &inputString, bool isRegex = false ); - - -private: - - class TEquivChars_Private *p; + static const TQString replaceChars( const TQString &inputString, bool isRegex = false ); }; #endif // TEQUIVCHARS_H |