summaryrefslogtreecommitdiffstats
path: root/kjs
diff options
context:
space:
mode:
authorMichele Calgaro <michele.calgaro@yahoo.it>2024-08-17 22:26:29 +0900
committerMichele Calgaro <michele.calgaro@yahoo.it>2024-08-18 23:02:35 +0900
commit3ccc8ad970ccf6e4c5e2abef212af44e17eb0fca (patch)
treeca31ef78594141e9a4c66cbfaac42d9a69ab8033 /kjs
parent8b5a45212267b1730a3b59a32067ec088216c725 (diff)
downloadtdelibs-3ccc8ad970ccf6e4c5e2abef212af44e17eb0fca.tar.gz
tdelibs-3ccc8ad970ccf6e4c5e2abef212af44e17eb0fca.zip
kjs: use libpcre2 instead of libpcre
Signed-off-by: Michele Calgaro <michele.calgaro@yahoo.it> (cherry picked from commit 7740e825a683a9cc84f8422c94109c5fcc4beb8e)
Diffstat (limited to 'kjs')
-rw-r--r--kjs/CMakeLists.txt6
-rw-r--r--kjs/Makefile.am6
-rw-r--r--kjs/configure.in.in51
-rw-r--r--kjs/regexp.cpp229
-rw-r--r--kjs/regexp.h21
5 files changed, 149 insertions, 164 deletions
diff --git a/kjs/CMakeLists.txt b/kjs/CMakeLists.txt
index 8e9b16849..c74bf1d5b 100644
--- a/kjs/CMakeLists.txt
+++ b/kjs/CMakeLists.txt
@@ -15,12 +15,12 @@ include_directories(
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_BINARY_DIR}
${CMAKE_SOURCE_DIR}/tdecore
- ${LIBPCRE_INCLUDEDIR}
+ ${LIBPCRE2_INCLUDEDIR}
)
link_directories(
${TDECORE_LIBRARY_DIRS}
- ${LIBPCRE_LIBDIR}
+ ${LIBPCRE2_LIBDIR}
)
@@ -62,6 +62,6 @@ tde_add_library( ${target} SHARED
SOURCES ${${target}_SRCS}
VERSION 1.2.0
LINK tdecore-shared
- LINK_PRIVATE ${LIBPCRE_LIBRARIES}
+ LINK_PRIVATE ${LIBPCRE2_LIBRARIES}
DESTINATION ${LIB_INSTALL_DIR}
)
diff --git a/kjs/Makefile.am b/kjs/Makefile.am
index 89f937906..8dc05f656 100644
--- a/kjs/Makefile.am
+++ b/kjs/Makefile.am
@@ -17,7 +17,7 @@
# Boston, MA 02110-1301, USA.
YACC = bison
-INCLUDES = $(PCRECFLAGS) $(all_includes)
+INCLUDES = $(PCRE2CFLAGS) $(all_includes)
lib_LTLIBRARIES = libkjs.la
@@ -50,7 +50,7 @@ endif
libkjs_la_LDFLAGS = -version-info 3:0:2 -no-undefined $(VSCRIPT) \
$(USER_LDFLAGS) $(all_libraries)
-libkjs_la_LIBADD = -lm $(LIBPCRE)
+libkjs_la_LIBADD = -lm $(LIBPCRE2)
EXTRA_DIST = grammar.y
@@ -93,7 +93,7 @@ CLEANFILES = $(LUT_FILES)
## test program (in one program for easier profiling/memory debugging)
EXTRA_PROGRAMS = testkjs_static
testkjs_static_SOURCES = testkjs.cpp
-testkjs_static_LDADD = $(LIBPCRE) libkjs.la
+testkjs_static_LDADD = $(LIBPCRE2) libkjs.la
testkjs_static_LDFLAGS = -static
## test program (linked to libkjs)
diff --git a/kjs/configure.in.in b/kjs/configure.in.in
index 1c4d3ac52..4e6bd9742 100644
--- a/kjs/configure.in.in
+++ b/kjs/configure.in.in
@@ -2,52 +2,55 @@ dnl KDE JavaScript specific configure tests
AC_CHECK_HEADERS(ieeefp.h float.h)
-AC_DEFUN([AC_CHECK_PCREPOSIX],
+AC_DEFUN(AC_CHECK_PCRE2POSIX],
[
- dnl define the configure option that disables pcre
- AC_ARG_ENABLE(pcre,AC_HELP_STRING([--disable-pcre],[don't require libpcre (poor RegExp support in Javascript)]),
- with_pcre=$enableval, with_pcre=yes)
+ dnl define the configure option that disables pcre2
+ AC_ARG_ENABLE(pcre2,AC_HELP_STRING([--disable-pcre],[don't require libpcre (poor RegExp support in Javascript)]),
+ with_pcre2=$enableval, with_pcre2=yes)
- if test "$with_pcre" = "yes"; then
+ if test "$with_pcre2" = "yes"; then
- KDE_FIND_PATH(pcre-config, PCRE_CONFIG, [${exec_prefix}/bin ${prefix}/bin], [PCRE_CONFIG="" ])
- if test -n "$PCRE_CONFIG" && $PCRE_CONFIG --libs >/dev/null 2>&1; then
- LIBPCRE=`$PCRE_CONFIG --libs-posix | sed -e "s,-L/usr/lib ,," -e "s,[\b-].\+pcreposix[^[:space:]]*\b,,"`
- PCRECFLAGS=`$PCRE_CONFIG --cflags`
+ KDE_FIND_PATH(pcre2-config, PCRE2_CONFIG, [${exec_prefix}/bin ${prefix}/bin], [PCRE2_CONFIG="" ])
+ if test -n "$PCRE2_CONFIG" && $PCRE2_CONFIG --libs8 >/dev/null 2>&1; then
+ LIBPCRE2=`$PCRE2_CONFIG --libs-posix | sed -e "s,-L/usr/lib ,," -e "s,[\b-].\+pcreposix[^[:space:]]*\b,,"`
+ PCRE2CFLAGS=`$PCRE2_CONFIG --cflags`
else
- LIBPCRE="-lpcre"
- PCRECFLAGS=
+ LIBPCRE2="-lpcre2-8"
+ PCRE2CFLAGS=
fi
- AC_CACHE_VAL(ac_cv_have_pcreposix, [
+ AC_CACHE_VAL(ac_cv_have_pcre2posix, [
ac_save_libs="$LIBS"
- LIBS="$LIBPCRE"
+ LIBS="$LIBPCRE2"
ac_CPPFLAGS_save="$CPPFLAGS"
- CPPFLAGS="$CPPFLAGS $PCRECFLAGS $all_includes"
+ CPPFLAGS="$CPPFLAGS $PCRE2CFLAGS $all_includes"
ac_LDFLAGS_save="$LDFLAGS"
LDFLAGS="$LDFLAGS $all_libraries"
AC_TRY_LINK(
- [#include <pcre.h>],
- [regfree(0);],
- [ac_cv_have_pcreposix="yes"],
- [ac_cv_have_pcreposix="no"]
+ [
+ #define PCRE2_CODE_UNIT_WIDTH 8
+ #include <pcre2.h>
+ ],
+ [pcre2_regfree(0);],
+ [ac_cv_have_pcre2posix="yes"],
+ [ac_cv_have_pcre2posix="no"]
)
LIBS="$ac_save_libs"
LDFLAGS="$ac_LDFLAGS_save"
CPPFLAGS="$ac_CPPFLAGS_save"
])
- if test "$ac_cv_have_pcreposix" = "yes"; then
- AC_DEFINE(HAVE_PCREPOSIX, 1, [Define if you have pcreposix libraries and header files.])
+ if test "$ac_cv_have_pcre2posix" = "yes"; then
+ AC_DEFINE(HAVE_PCRE2POSIX, 1, [Define if you have pcre2posix libraries and header files.])
else
AC_MSG_ERROR([You're missing libpcre.
-Download libpcre from http://www.pcre.org or find a binary package for your platform.
+Download libpcre2 from http://www.pcre.org or find a binary package for your platform.
Alternatively, you can specify --disable-pcre, but some web pages - using regular
expressions in Javascript code - will not work correctly, the regexp support being
quite limited if libpcre isn't present.])
fi
fi
])
-AC_CHECK_PCREPOSIX
-AC_SUBST(LIBPCRE)
-AC_SUBST(PCRECFLAGS)
+AC_CHECK_PCRE2POSIX
+AC_SUBST(LIBPCRE2)
+AC_SUBST(PCRE2CFLAGS)
AM_CONFIG_HEADER([kjs/global.h])
diff --git a/kjs/regexp.cpp b/kjs/regexp.cpp
index 0c2675588..a693fdc1a 100644
--- a/kjs/regexp.cpp
+++ b/kjs/regexp.cpp
@@ -30,21 +30,17 @@
using namespace KJS;
-#ifdef PCRE_CONFIG_UTF8
RegExp::UTF8SupportState RegExp::utf8Support = RegExp::Unknown;
-#endif
RegExp::RegExp(const UString &p, int f)
: pat(p), flgs(f), m_notEmpty(false), valid(true), buffer(0), originalPos(0)
{
// Determine whether libpcre has unicode support if need be..
-#ifdef PCRE_CONFIG_UTF8
if (utf8Support == Unknown) {
- int supported;
- pcre_config(PCRE_CONFIG_UTF8, (void*)&supported);
- utf8Support = supported ? Supported : Unsupported;
+ uint32_t supported;
+ pcre2_config(PCRE2_CONFIG_COMPILED_WIDTHS, (void*)&supported);
+ utf8Support = (supported & 0x0001) ? Supported : Unsupported;
}
-#endif
nrSubPatterns = 0; // determined in match() with POSIX regex.
@@ -63,33 +59,33 @@ RegExp::RegExp(const UString &p, int f)
escape = false;
// we only care about \u
if (c == 'u') {
- // standard unicode escape sequence looks like \uxxxx but
- // other browsers also accept less then 4 hex digits
- unsigned short u = 0;
- int j = 0;
- for (j = 0; j < 4; ++j) {
- if (i + 1 < p.size() && Lexer::isHexDigit(p[i + 1].unicode())) {
- u = (u << 4) + Lexer::convertHex(p[i + 1].unicode());
- ++i;
- } else {
- // sequence incomplete. restore index.
- // TODO: cleaner way to propagate warning
- fprintf(stderr, "KJS: saw %d digit \\u sequence.\n", j);
- i -= j;
- break;
- }
- }
- if (j < 4) {
- // sequence was incomplete. treat \u as u which IE always
- // and FF sometimes does.
- intern.append(UString('u'));
- } else {
+ // standard unicode escape sequence looks like \uxxxx but
+ // other browsers also accept less then 4 hex digits
+ unsigned short u = 0;
+ int j = 0;
+ for (j = 0; j < 4; ++j) {
+ if (i + 1 < p.size() && Lexer::isHexDigit(p[i + 1].unicode())) {
+ u = (u << 4) + Lexer::convertHex(p[i + 1].unicode());
+ ++i;
+ } else {
+ // sequence incomplete. restore index.
+ // TODO: cleaner way to propagate warning
+ fprintf(stderr, "KJS: saw %d digit \\u sequence.\n", j);
+ i -= j;
+ break;
+ }
+ }
+ if (j < 4) {
+ // sequence was incomplete. treat \u as u which IE always
+ // and FF sometimes does.
+ intern.append(UString('u'));
+ } else {
c = UChar(u);
switch (u) {
case 0:
- // Make sure to encode 0, to avoid terminating the string
- intern += UString(nil);
- break;
+ // Make sure to encode 0, to avoid terminating the string
+ intern += UString(nil);
+ break;
case '^':
case '$':
case '\\':
@@ -101,13 +97,13 @@ RegExp::RegExp(const UString &p, int f)
case '{': case '}':
case '[': case ']':
case '|':
- // escape pattern characters have to remain escaped
- intern.append(UString('\\'));
- // intentional fallthrough
+ // escape pattern characters have to remain escaped
+ intern.append(UString('\\'));
+ // intentional fallthrough
default:
- intern += UString(&c, 1);
- break;
- }
+ intern += UString(&c, 1);
+ break;
+ }
}
continue;
}
@@ -126,45 +122,46 @@ RegExp::RegExp(const UString &p, int f)
intern = p;
}
-#ifdef HAVE_PCREPOSIX
- int pcreflags = 0;
- const char *perrormsg;
- int errorOffset;
+#ifdef HAVE_PCRE2POSIX
+ uint32_t pcre2flags = 0;
+ int errorCode;
+ PCRE2_SIZE errorOffset;
if (flgs & IgnoreCase)
- pcreflags |= PCRE_CASELESS;
+ pcre2flags |= PCRE2_CASELESS;
if (flgs & Multiline)
- pcreflags |= PCRE_MULTILINE;
+ pcre2flags |= PCRE2_MULTILINE;
-#ifdef PCRE_CONFIG_UTF8
if (utf8Support == Supported)
- pcreflags |= (PCRE_UTF8 | PCRE_NO_UTF8_CHECK);
-#endif
+ pcre2flags |= (PCRE2_UTF | PCRE2_NO_UTF_CHECK);
// Fill our buffer with an encoded version, whether utf-8, or,
// if PCRE is incapable, truncated.
prepareMatch(intern);
- pcregex = pcre_compile(buffer, pcreflags,
- &perrormsg, &errorOffset, NULL);
+ pcregex = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, pcre2flags,
+ &errorCode, &errorOffset, NULL);
doneMatch(); // Cleanup buffers
if (!pcregex) {
#ifndef NDEBUG
- fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg);
+ PCRE2_UCHAR errorMsg[256];
+ pcre2_get_error_message(errorCode, errorMsg, sizeof(errorMsg));
+ fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", errorMsg);
#endif
valid = false;
return;
}
-#ifdef PCRE_INFO_CAPTURECOUNT
// Get number of subpatterns that will be returned
- int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
+ int rc = pcre2_pattern_info(pcregex, PCRE2_INFO_CAPTURECOUNT, &nrSubPatterns);
if (rc != 0)
-#endif
+ {
nrSubPatterns = 0; // fallback. We always need the first pair of offsets.
+ }
-#else /* HAVE_PCREPOSIX */
+ match_data = pcre2_match_data_create_from_pattern(pcregex, NULL);
+#else
int regflags = 0;
#ifdef REG_EXTENDED
@@ -195,9 +192,15 @@ RegExp::RegExp(const UString &p, int f)
RegExp::~RegExp()
{
doneMatch(); // Be 100% sure buffers are freed
-#ifdef HAVE_PCREPOSIX
+#ifdef HAVE_PCRE2POSIX
+ if (match_data)
+ {
+ pcre2_match_data_free(match_data);
+ }
if (pcregex)
- pcre_free(pcregex);
+ {
+ pcre2_code_free(pcregex);
+ }
#else
/* TODO: is this really okay after an error ? */
regfree(&preg);
@@ -208,7 +211,7 @@ void RegExp::prepareUtf8(const UString& s)
{
// Allocate a buffer big enough to hold all the characters plus \0
const int length = s.size();
- buffer = new char[length * 3 + 1];
+ buffer = new buftype_t[length * 3 + 1];
// Also create buffer for positions. We need one extra character in there,
// even past the \0 since the non-empty handling may jump one past the end
@@ -217,7 +220,7 @@ void RegExp::prepareUtf8(const UString& s)
// Convert to runs of 8-bit characters, and generate indeces
// Note that we do NOT combine surrogate pairs here, as
// regexps operate on them as separate characters
- char *p = buffer;
+ buftype_t *p = buffer;
int *posOut = originalPos;
const UChar *d = s.data();
for (int i = 0; i != length; ++i) {
@@ -225,16 +228,16 @@ void RegExp::prepareUtf8(const UString& s)
int sequenceLen;
if (c < 0x80) {
- *p++ = (char)c;
+ *p++ = (buftype_t)c;
sequenceLen = 1;
} else if (c < 0x800) {
- *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8
- *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
+ *p++ = (buftype_t)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8
+ *p++ = (buftype_t)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
sequenceLen = 2;
} else {
- *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8
- *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
- *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
+ *p++ = (buftype_t)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8
+ *p++ = (buftype_t)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set
+ *p++ = (buftype_t)((c | 0x80) & 0xBF); // next 6 bits, with high bit set
sequenceLen = 3;
}
@@ -262,7 +265,7 @@ void RegExp::prepareASCII (const UString& s)
// when we don't have utf 8 available -- use
// truncated version, and pray for the best
CString truncated = s.cstring();
- buffer = new char[truncated.size() + 1];
+ buffer = new buftype_t[truncated.size() + 1];
memcpy(buffer, truncated.c_str(), truncated.size());
buffer[truncated.size()] = '\0'; // For _compile use
bufferSize = truncated.size();
@@ -272,11 +275,9 @@ void RegExp::prepareMatch(const UString &s)
{
delete[] originalPos; // Just to be sure..
delete[] buffer;
-#ifdef PCRE_CONFIG_UTF8
if (utf8Support == Supported)
prepareUtf8(s);
else
-#endif
prepareASCII(s);
#ifndef NDEBUG
@@ -308,17 +309,16 @@ UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
if (i > s.size() || s.isNull())
return UString::null;
-#ifdef HAVE_PCREPOSIX
- int ovecsize = (nrSubPatterns+1)*3; // see pcre docu
- if (ovector) *ovector = new int[ovecsize];
- if (!pcregex)
+#ifdef HAVE_PCRE2POSIX
+ if (!pcregex || !match_data)
+ return UString::null;
+ if (!ovector)
return UString::null;
int startPos;
int nextPos;
-
-#ifdef PCRE_CONFIG_UTF8
- if (utf8Support == Supported) {
+ if (utf8Support == Supported)
+ {
startPos = i;
while (originalPos[startPos] < i)
++startPos;
@@ -328,53 +328,59 @@ UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
while (originalPos[nextPos] < (i + 1))
++nextPos;
}
- } else
-#endif
+ }
+ else
{
startPos = i;
nextPos = i + (i < s.size() ? 1 : 0);
}
- int baseFlags =
-#ifdef PCRE_CONFIG_UTF8
- utf8Support == Supported ? PCRE_NO_UTF8_CHECK :
-#endif
- 0;
- int numMatches = pcre_exec(pcregex, NULL, buffer, bufferSize, startPos,
- m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED | baseFlags) : baseFlags, // see man pcretest
- ovector ? *ovector : 0L, ovecsize);
- if (numMatches < 0)
+ uint32_t baseFlags = (utf8Support == Supported ? PCRE2_NO_UTF_CHECK : 0);
+ if (m_notEmpty)
+ {
+ baseFlags |= PCRE2_NOTEMPTY | PCRE2_ANCHORED;
+ }
+ int numMatches = pcre2_match(pcregex, buffer, PCRE2_ZERO_TERMINATED, startPos, baseFlags, match_data, NULL);
+ if (numMatches <= 0)
{
// Failed to match.
- if (numMatches == PCRE_ERROR_NOMATCH && (flgs & Global) && m_notEmpty && ovector && startPos < nextPos)
+ if (numMatches == PCRE2_ERROR_NOMATCH && (flgs & Global) && m_notEmpty && startPos < nextPos)
{
// We set m_notEmpty ourselves, to look for a non-empty match
- // (see man pcretest or pcretest.c for details).
// So we don't stop here, we want to try again at i+1.
#ifdef KJS_VERBOSE
fprintf(stderr, "No match after m_notEmpty. +1 and keep going.\n");
#endif
m_notEmpty = 0;
- numMatches = pcre_exec(pcregex, NULL, buffer, bufferSize, nextPos, baseFlags,
- ovector ? *ovector : 0L, ovecsize);
- if (numMatches < 0)
+ baseFlags = (utf8Support == Supported ? PCRE2_NO_UTF_CHECK : 0);
+ numMatches = pcre2_match(pcregex, buffer, PCRE2_ZERO_TERMINATED, nextPos, baseFlags, match_data, NULL);
+ if (numMatches <= 0)
return UString::null;
}
- else // done
+ else
return UString::null;
}
- // Got a match, proceed with it.
- // But fix up the ovector if need be..
- if (ovector && originalPos) {
- for (unsigned c = 0; c < 2 * TQMIN((unsigned)numMatches, nrSubPatterns+1); ++c) {
- if ((*ovector)[c] != -1)
- (*ovector)[c] = originalPos[(*ovector)[c]];
+ PCRE2_SIZE *pcre2_ovector = pcre2_get_ovector_pointer(match_data);
+ if (!pcre2_ovector)
+ return UString::null;
+
+ uint32_t pcre2_ovecCount = pcre2_get_ovector_count(match_data);
+ *ovector = new int[pcre2_ovecCount * 2];
+ if (originalPos)
+ {
+ for (size_t c = 0; c < 2 * pcre2_ovecCount; ++c)
+ {
+ (*ovector)[c] = (pcre2_ovector[c] != -1) ? originalPos[pcre2_ovector[c]] : -1;
+ }
+ }
+ else
+ {
+ for (size_t c = 0; c < 2 * pcre2_ovecCount; ++c)
+ {
+ (*ovector)[c] = pcre2_ovector[c];
}
}
-
- if (!ovector)
- return UString::null; // don't rely on the return value if you pass ovector==0
#else
const uint maxMatch = 10;
regmatch_t rmatch[maxMatch];
@@ -419,28 +425,3 @@ UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
}
return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
}
-
-#if 0 // unused
-bool RegExp::test(const UString &s, int)
-{
-#ifdef HAVE_PCREPOSIX
- int ovector[300];
- CString buffer(s.cstring());
-
- if (s.isNull() ||
- pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
- 0, ovector, 300) == PCRE_ERROR_NOMATCH)
- return false;
- else
- return true;
-
-#else
-
- char *str = strdup(s.ascii());
- int r = regexec(&preg, str, 0, 0, 0);
- free(str);
-
- return r == 0;
-#endif
-}
-#endif
diff --git a/kjs/regexp.h b/kjs/regexp.h
index 88851260e..e731eb714 100644
--- a/kjs/regexp.h
+++ b/kjs/regexp.h
@@ -25,13 +25,16 @@
#include "config.h"
-#ifdef HAVE_PCREPOSIX
-#include <pcre.h>
+#ifdef HAVE_PCRE2POSIX
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+typedef PCRE2_UCHAR8 buftype_t;
#else // POSIX regex - not so good...
extern "C" { // bug with some libc5 distributions
#include <regex.h>
+typedef char buftype_t;
}
-#endif //HAVE_PCREPOSIX
+#endif
#include "ustring.h"
@@ -61,7 +64,7 @@ namespace KJS {
bool valid;
// Cached encoding info...
- char* buffer;
+ buftype_t *buffer;
int* originalPos;
int bufferSize;
@@ -71,22 +74,20 @@ namespace KJS {
UString originalS; // the original string, used for sanity-checking
#endif
-#ifndef HAVE_PCREPOSIX
+#ifndef HAVE_PCRE2POSIX
regex_t preg;
#else
- pcre *pcregex;
+ pcre2_code *pcregex;
+ pcre2_match_data *match_data;
enum UTF8SupportState {
Unknown,
Supported,
Unsupported
};
-
-#ifdef PCRE_CONFIG_UTF8
static UTF8SupportState utf8Support;
#endif
-#endif
- unsigned int nrSubPatterns;
+ uint32_t nrSubPatterns;
RegExp();
};