diff options
Diffstat (limited to 'kviewshell/plugins/djvu/libdjvu/GString.cpp')
-rw-r--r-- | kviewshell/plugins/djvu/libdjvu/GString.cpp | 2811 |
1 files changed, 2811 insertions, 0 deletions
diff --git a/kviewshell/plugins/djvu/libdjvu/GString.cpp b/kviewshell/plugins/djvu/libdjvu/GString.cpp new file mode 100644 index 00000000..a618055e --- /dev/null +++ b/kviewshell/plugins/djvu/libdjvu/GString.cpp @@ -0,0 +1,2811 @@ +//C- -*- C++ -*- +//C- ------------------------------------------------------------------- +//C- DjVuLibre-3.5 +//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. +//C- Copyright (c) 2001 AT&T +//C- +//C- This software is subject to, and may be distributed under, the +//C- GNU General Public License, Version 2. The license should have +//C- accompanied the software or you may obtain a copy of the license +//C- from the Free Software Foundation at http://www.fsf.org . +//C- +//C- This program is distributed in the hope that it will be useful, +//C- but WITHOUT ANY WARRANTY; without even the implied warranty of +//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//C- GNU General Public License for more details. +//C- +//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library +//C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech +//C- Software authorized us to replace the original DjVu(r) Reference +//C- Library notice by the following text (see doc/lizard2002.djvu): +//C- +//C- ------------------------------------------------------------------ +//C- | DjVu (r) Reference Library (v. 3.5) +//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. +//C- | The DjVu Reference Library is protected by U.S. Pat. No. +//C- | 6,058,214 and patents pending. +//C- | +//C- | This software is subject to, and may be distributed under, the +//C- | GNU General Public License, Version 2. The license should have +//C- | accompanied the software or you may obtain a copy of the license +//C- | from the Free Software Foundation at http://www.fsf.org . +//C- | +//C- | The computer code originally released by LizardTech under this +//C- | license and unmodified by other parties is deemed "the LIZARDTECH +//C- | ORIGINAL CODE." Subject to any third party intellectual property +//C- | claims, LizardTech grants recipient a worldwide, royalty-free, +//C- | non-exclusive license to make, use, sell, or otherwise dispose of +//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the +//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU +//C- | General Public License. This grant only confers the right to +//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to +//C- | the extent such infringement is reasonably necessary to enable +//C- | recipient to make, have made, practice, sell, or otherwise dispose +//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to +//C- | any greater extent that may be necessary to utilize further +//C- | modifications or combinations. +//C- | +//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY +//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF +//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. +//C- +------------------------------------------------------------------ +// +// $Id: GString.cpp,v 1.22 2005/04/27 16:34:13 leonb Exp $ +// $Name: release_3_5_15 $ + +// From: Leon Bottou, 1/31/2002 +// This file has very little to do with my initial implementation. +// It has been practically rewritten by Lizardtech for i18n changes. +// My original implementation was very small in comparison +// <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>. +// In my opinion, the duplication of the string classes is a failed +// attempt to use the type system to enforce coding policies. +// This could be fixed. But there are better things to do in djvulibre. + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#if NEED_GNUG_PRAGMAS +# pragma implementation +#endif + +#include "GString.h" +#include "GThreads.h" +#include "debug.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#if HAS_WCHAR +# include <locale.h> +# if !defined(AUTOCONF) || HAVE_WCHAR_H +# include <wchar.h> +# endif +# if HAS_WCTYPE +# include <wctype.h> +# endif +#endif +#include <ctype.h> + +#ifndef DO_CHANGELOCALE +#define DO_CHANGELOCALE 1 +#ifdef UNIX +#if THREADMODEL != COTHREADS +#if THREADMODEL != NOTHREADS +#undef DO_CHANGELOCALE +#define DO_CHANGELOCALE 0 +#endif +#endif +#endif +#endif + + +#ifdef HAVE_NAMESPACES +namespace DJVU { +# ifdef NOT_DEFINED // Just to fool emacs c++ mode +} +#endif +#endif + + +GBaseString::~GBaseString() {} +GNativeString::~GNativeString() {} +GUTF8String::~GUTF8String() {} + +#if !HAS_MBSTATE && HAS_WCHAR +// Under some systems, wctomb() and mbtowc() are not thread +// safe. In those cases, wcrtomb and mbrtowc are preferred. +// For Solaris, wctomb() and mbtowc() are thread safe, and +// wcrtomb() and mbrtowc() don't exist. + +#define wcrtomb MYwcrtomb +#define mbrtowc MYmbrtowc +#define mbrlen MYmbrlen + +static inline int +wcrtomb(char *bytes,wchar_t w,mbstate_t *) +{ + return wctomb(bytes,w); +} + +static inline int +mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *) +{ + return mbtowc(w,source,n); +} + +static inline size_t +mbrlen(const char *s, size_t n, mbstate_t *) +{ + return mblen(s,n); +} +#endif // !HAS_MBSTATE || HAS_WCHAR + + +GP<GStringRep> +GStringRep::upcase(void) const +{ return tocase(giswupper,gtowupper); } + +GP<GStringRep> +GStringRep::downcase(void) const +{ return tocase(giswlower,gtowlower); } + +GP<GStringRep> +GStringRep::UTF8::create(const unsigned int sz) +{ + return GStringRep::create(sz,(GStringRep::UTF8 *)0); +} + +GP<GStringRep> +GStringRep::UTF8::create(const char *s) +{ + GStringRep::UTF8 dummy; + return dummy.strdup(s); +} + +GP<GStringRep> +GStringRep::UTF8::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::UTF8::create( const GP<GStringRep> &s1,const char *s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::UTF8::create( const char *s1, const GP<GStringRep> &s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::UTF8::create( const char *s1,const char *s2) +{ + GStringRep::UTF8 dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::UTF8::create(const char *s,const int start,const int length) +{ + GStringRep::UTF8 dummy; + return dummy.substr(s,start,length); +} + +GP<GStringRep> +GStringRep::UTF8::create( + const unsigned short *s,const int start,const int length) +{ + GStringRep::UTF8 dummy; + return dummy.substr(s,start,length); +} + +GP<GStringRep> +GStringRep::UTF8::create( + const unsigned long *s,const int start,const int length) +{ + GStringRep::UTF8 dummy; + return dummy.substr(s,start,length); +} + +GP<GStringRep> +GStringRep::UTF8::blank(const unsigned int sz) const +{ + return GStringRep::create(sz,(GStringRep::UTF8 *)0); +} + +bool +GStringRep::UTF8::isUTF8(void) const +{ + return true; +} + +GP<GStringRep> +GStringRep::UTF8::toThis( + const GP<GStringRep> &rep,const GP<GStringRep> &) const +{ + return rep?(rep->toUTF8(true)):rep; +} + +GP<GStringRep> +GStringRep::UTF8::create(const char fmt[],va_list& args) +{ + const GP<GStringRep> s(create(fmt)); + return (s?(s->vformat(args)):s); +} + +#if !HAS_WCHAR + +#define NATIVE_CREATE(x) UTF8::create( x ); + +#ifdef LC_ALL +#undef LC_ALL +#endif +#define LC_ALL 0 + +class GStringRep::ChangeLocale +{ +public: + ChangeLocale(const int,const char *) {} + ~ChangeLocale() {}; +}; + +GP<GStringRep> +GStringRep::NativeToUTF8( const char *s ) +{ + return GStringRep::UTF8::create(s); +} + +#else + +#define NATIVE_CREATE(x) Native::create( x ); + +// The declaration and implementation of GStringRep::ChangeLocale +// Not used in WinCE + +class GStringRep::ChangeLocale +{ +public: + ChangeLocale(const int category,const char locale[]); + ~ChangeLocale(); +private: + GUTF8String locale; + int category; +}; + +class GStringRep::Native : public GStringRep +{ +public: + // default constructor + Native(void); + // virtual destructor + virtual ~Native(); + + // Other virtual methods. + // Create an empty string. + virtual GP<GStringRep> blank(const unsigned int sz = 0) const; + // Append a string. + virtual GP<GStringRep> append(const GP<GStringRep> &s2) const; + // Test if Native. + virtual bool isNative(void) const; + // Convert to Native. + virtual GP<GStringRep> toNative( + const EscapeMode escape=UNKNOWN_ESCAPED) const; + // Convert to UTF8. + virtual GP<GStringRep> toUTF8(const bool nothrow=false) const; + // Convert to UTF8. + virtual GP<GStringRep> toThis( + const GP<GStringRep> &rep,const GP<GStringRep> &) const; + // Compare with #s2#. + virtual int cmp(const GP<GStringRep> &s2, const int len=(-1)) const; + + // Convert strings to numbers. + virtual int toInt(void) const; + virtual long toLong( + const int pos, int &endpos, const int base=10) const; + virtual unsigned long toULong( + const int pos, int &endpos, const int base=10) const; + virtual double toDouble( + const int pos, int &endpos) const; + + // Create an empty string + static GP<GStringRep> create(const unsigned int sz = 0); + + // Create a strdup string. + static GP<GStringRep> create(const char *s); + + // Creates by appending to the current string + + // Creates with a concat operation. + static GP<GStringRep> create( + const GP<GStringRep> &s1,const GP<GStringRep> &s2); + static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2); + static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2); + static GP<GStringRep> create(const char *s1,const char *s2); + + // Create with a strdup and substr operation. + static GP<GStringRep> create( + const char *s,const int start,const int length=(-1)); + static GP<GStringRep> create( + const unsigned short *s,const int start,const int length=(-1)); + static GP<GStringRep> create( + const unsigned long *s,const int start,const int length=(-1)); + + // Create with an sprintf() + static GP<GStringRep> create_format(const char fmt[],...); + static GP<GStringRep> create(const char fmt[],va_list &args); + + virtual unsigned char *UCS4toString( + const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const; + + // Tests if a string is legally encoded in the current character set. + virtual bool is_valid(void) const; + + virtual int ncopy(wchar_t * const buf, const int buflen) const; + + friend class GBaseString; +protected: + // Return the next character and increment the source pointer. + virtual unsigned long getValidUCS4(const char *&source) const; +}; + +GP<GStringRep> +GStringRep::Native::create(const unsigned int sz) +{ + return GStringRep::create(sz,(GStringRep::Native *)0); +} + + // Create a strdup string. +GP<GStringRep> +GStringRep::Native::create(const char *s) +{ + GStringRep::Native dummy; + return dummy.strdup(s); +} + +GP<GStringRep> +GStringRep::Native::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::Native::create( const GP<GStringRep> &s1,const char *s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::Native::create( const char *s1, const GP<GStringRep> &s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::Native::create(const char *s1,const char *s2) +{ + GStringRep::Native dummy; + return dummy.concat(s1,s2); +} + +GP<GStringRep> +GStringRep::Native::create( + const char *s,const int start,const int length) +{ + GStringRep::Native dummy; + return dummy.substr(s,start,length); +} + +GP<GStringRep> +GStringRep::Native::create( + const unsigned short *s,const int start,const int length) +{ + GStringRep::Native dummy; + return dummy.substr(s,start,length); +} + +GP<GStringRep> +GStringRep::Native::create( + const unsigned long *s,const int start,const int length) +{ + GStringRep::Native dummy; + return dummy.substr(s,start,length); +} + +GP<GStringRep> +GStringRep::Native::blank(const unsigned int sz) const +{ + return GStringRep::create(sz,(GStringRep::Native *)0); +} + +bool +GStringRep::Native::isNative(void) const +{ + return true; +} + +GP<GStringRep> +GStringRep::Native::toThis( + const GP<GStringRep> &rep,const GP<GStringRep> &) const +{ + return rep?(rep->toNative(NOT_ESCAPED)):rep; +} + +GP<GStringRep> +GStringRep::Native::create(const char fmt[],va_list &args) +{ + const GP<GStringRep> s(create(fmt)); + return (s?(s->vformat(args)):s); +} + +int +GStringRep::Native::ncopy( + wchar_t * const buf, const int buflen ) const +{ + return toUTF8()->ncopy(buf,buflen); +} + +GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] ) + : category(xcategory) +{ +#if DO_CHANGELOCALE + // This is disabled under UNIX because + // it does not play nice with MT. + if(xlocale) + { + locale=setlocale(xcategory,0); + if(locale.length() &&(locale!=xlocale)) + { + if(locale == setlocale(category,xlocale)) + { + locale.empty(); + } + } + else + { + locale.empty(); + } + } +#endif +} + +GStringRep::ChangeLocale::~ChangeLocale() +{ +#if DO_CHANGELOCALE + if(locale.length()) + { + setlocale(category,(const char *)locale); + } +#endif +} + +GNativeString & +GNativeString::format(const char fmt[], ... ) +{ + va_list args; + va_start(args, fmt); + return init(GStringRep::Native::create(fmt,args)); +} + +// Gather the native implementations here. Not used in WinCE. + +GStringRep::Native::Native(void) {} +GStringRep::Native::~Native() {} + +GP<GStringRep> +GStringRep::Native::append(const GP<GStringRep> &s2) const +{ + GP<GStringRep> retval; + if(s2) + { + if(s2->isUTF8()) + { + G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") ); + } + retval=concat(data,s2->data); + }else + { + retval=const_cast<GStringRep::Native *>(this); + } + return retval; +} + +GP<GStringRep> +GStringRep::Native::create_format(const char fmt[],...) +{ + va_list args; + va_start(args, fmt); + return create(fmt,args); +} + +unsigned char * +GStringRep::Native::UCS4toString( + const unsigned long w0,unsigned char *ptr, mbstate_t *ps) const +{ + return UCS4toNative(w0,ptr,ps); +} + +// Convert a UCS4 to a multibyte string in the value bytes. +// The data pointed to by ptr should be long enough to contain +// the results with a nill termination. (Normally 7 characters +// is enough.) +unsigned char * +GStringRep::UCS4toNative( + const unsigned long w0,unsigned char *ptr, mbstate_t *ps) +{ + unsigned short w1; + unsigned short w2=1; + for(int count=(sizeof(wchar_t)==sizeof(w1)) ? UCS4toUTF16(w0,w1,w2) : 1; + count; + --count,w1=w2) + { + // wchar_t can be either UCS4 or UCS2 + const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0; + int i=wcrtomb((char *)ptr,w,ps); + if(i<0) + { + break; + } + ptr[i]=0; + ptr += i; + } + ptr[0]=0; + return ptr; +} + +GP<GStringRep> +GStringRep::Native::toNative(const EscapeMode escape) const +{ + if(escape == UNKNOWN_ESCAPED) + G_THROW( ERR_MSG("GStringRep.NativeToNative") ); + return const_cast<GStringRep::Native *>(this); +} + +GP<GStringRep> +GStringRep::Native::toUTF8(const bool) const +{ + unsigned char *buf; + GPBuffer<unsigned char> gbuf(buf,size*6+1); + buf[0]=0; + if(data && size) + { + size_t n=size; + const char *source=data; + mbstate_t ps; + unsigned char *ptr=buf; + //(void)mbrlen(source, n, &ps); + memset(&ps,0,sizeof(mbstate_t)); + int i=0; + if(sizeof(wchar_t) == sizeof(unsigned long)) + { + wchar_t w = 0; + for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i) + { + ptr=UCS4toUTF8(w,ptr); + } + } + else + { + wchar_t w = 0; + for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i) + { + unsigned short s[2]; + s[0]=w; + unsigned long w0; + if(UTF16toUCS4(w0,s,s+1)<=0) + { + source+=i; + n-=i; + if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0)) + { + s[1]=w; + if(UTF16toUCS4(w0,s,s+2)<=0) + { + i=(-1); + break; + } + } + else + { + i=(-1); + break; + } + } + ptr=UCS4toUTF8(w0,ptr); + } + } + if(i<0) + { + gbuf.resize(0); + } + else + { + ptr[0]=0; + } + } + return GStringRep::UTF8::create((const char *)buf); +} + +GNativeString +GBaseString::UTF8ToNative( + const bool currentlocale,const EscapeMode escape) const +{ + const char *source=(*this); + GP<GStringRep> retval; + if(source && source[0]) + { +#if DO_CHANGELOCALE + GUTF8String lc_ctype(setlocale(LC_CTYPE,0)); +#endif + bool repeat; + for(repeat=!currentlocale;;repeat=false) + { + retval=(*this)->toNative((GStringRep::EscapeMode)escape); +#if DO_CHANGELOCALE + if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,""))) +#endif + break; + } +#if DO_CHANGELOCALE + if(!repeat) + { + setlocale(LC_CTYPE,(const char *)lc_ctype); + } +#endif + } + return GNativeString(retval); +} + +/*MBCS*/ +GNativeString +GBaseString::getUTF82Native( const EscapeMode escape ) const +{ //MBCS cvt + GNativeString retval; + + // We don't want to convert this if it + // already is known to be native... +// if (isNative()) return *this; + + const size_t slen=length()+1; + if(slen>1) + { + retval=UTF8ToNative(false,escape) ; + if(!retval.length()) + { + retval=(const char*)*this; + } + } + return retval; +} + +GUTF8String +GBaseString::NativeToUTF8(void) const +{ + GP<GStringRep> retval; + if(length()) + { + const char *source=(*this); +#if DO_CHANGELOCALE + GUTF8String lc_ctype=setlocale(LC_CTYPE,0); +#endif + bool repeat; + for(repeat=true;;repeat=false) + { + if( (retval=GStringRep::NativeToUTF8(source)) ) + { + if(GStringRep::cmp(retval->toNative(),source)) + { + retval=GStringRep::UTF8::create((unsigned int)0); + } + } +#if DO_CHANGELOCALE + if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,""))) +#endif + break; + } +#if DO_CHANGELOCALE + if(!repeat) + { + setlocale(LC_CTYPE,(const char *)lc_ctype); + } +#endif + } + return GUTF8String(retval); +} + +GUTF8String +GBaseString::getNative2UTF8(void) const +{ //MBCS cvt + + // We don't want to do a transform this + // if we already are in the given type. +// if (isUTF8()) return *this; + + const size_t slen=length()+1; + GUTF8String retval; + if(slen > 1) + { + retval=NativeToUTF8(); + if(!retval.length()) + { + retval=(const char *)(*this); + } + } + return retval; +} /*MBCS*/ + +int +GStringRep::Native::cmp(const GP<GStringRep> &s2,const int len) const +{ + int retval; + if(s2) + { + if(s2->isUTF8()) + { + const GP<GStringRep> r(toUTF8(true)); + if(r) + { + retval=GStringRep::cmp(r->data,s2->data,len); + }else + { + retval=cmp(s2->toNative(NOT_ESCAPED),len); + } + }else + { + retval=GStringRep::cmp(data,s2->data,len); + } + }else + { + retval=GStringRep::cmp(data,0,len); + } + return retval; +} + +int +GStringRep::Native::toInt() const +{ + return atoi(data); +} + +long +GStringRep::Native::toLong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + const long retval=strtol(data+pos, &edata, base); + if(edata) + { + endpos=(int)((size_t)edata-(size_t)data); + }else + { + endpos=(-1); + } + return retval; +} + +unsigned long +GStringRep::Native::toULong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + const unsigned long retval=strtoul(data+pos, &edata, base); + if(edata) + { + endpos=(int)((size_t)edata-(size_t)data); + }else + { + endpos=(-1); + } + return retval; +} + +double +GStringRep::Native::toDouble( + const int pos, int &endpos) const +{ + char *edata=0; + const double retval=strtod(data+pos, &edata); + if(edata) + { + endpos=(int)((size_t)edata-(size_t)data); + }else + { + endpos=(-1); + } + return retval; +} + +unsigned long +GStringRep::Native::getValidUCS4(const char *&source) const +{ + unsigned long retval=0; + int n=(int)((size_t)size+(size_t)data-(size_t)source); + if(source && (n > 0)) + { + mbstate_t ps; + //(void)mbrlen(source, n, &ps); + memset(&ps,0,sizeof(mbstate_t)); + wchar_t wt; + const int len=mbrtowc(&wt,source,n,&ps); + if(len>=0) + { + if(sizeof(wchar_t) == sizeof(unsigned short)) + { + source+=len; + unsigned short s[2]; + s[0]=(unsigned short)wt; + if(UTF16toUCS4(retval,s,s+1)<=0) + { + if((n-=len)>0) + { + const int len=mbrtowc(&wt,source,n,&ps); + if(len>=0) + { + s[1]=(unsigned short)wt; + unsigned long w; + if(UTF16toUCS4(w,s,s+2)>0) + { + source+=len; + retval=w; + } + } + } + } + }else + { + retval=(unsigned long)wt; + source++; + } + }else + { + source++; + } + } + return retval; +} + +// Tests if a string is legally encoded in the current character set. +bool +GStringRep::Native::is_valid(void) const +{ + bool retval=true; + if(data && size) + { + size_t n=size; + const char *s=data; + mbstate_t ps; + //(void)mbrlen(s, n, &ps); + memset(&ps,0,sizeof(mbstate_t)); + do + { + size_t m=mbrlen(s,n,&ps); + if(m > n) + { + retval=false; + break; + }else if(m) + { + s+=m; + n-=m; + }else + { + break; + } + } while(n); + } + return retval; +} + +// These are dummy functions. +void +GStringRep::set_remainder(void const * const, const unsigned int, + const EncodeType) {} +void +GStringRep::set_remainder(void const * const, const unsigned int, + const GP<GStringRep> &encoding) {} +void +GStringRep::set_remainder( const GP<GStringRep::Unicode> &) {} + +GP<GStringRep::Unicode> +GStringRep::get_remainder( void ) const +{ + return 0; +} + +GNativeString::GNativeString(const char dat) +{ + init(GStringRep::Native::create(&dat,0,1)); +} + +GNativeString::GNativeString(const char *str) +{ + init(GStringRep::Native::create(str)); +} + +GNativeString::GNativeString(const unsigned char *str) +{ + init(GStringRep::Native::create((const char *)str)); +} + +GNativeString::GNativeString(const unsigned short *str) +{ + init(GStringRep::Native::create(str,0,-1)); +} + +GNativeString::GNativeString(const unsigned long *str) +{ + init(GStringRep::Native::create(str,0,-1)); +} + +GNativeString::GNativeString(const char *dat, unsigned int len) +{ + init( + GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const unsigned short *dat, unsigned int len) +{ + init( + GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const unsigned long *dat, unsigned int len) +{ + init( + GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const GNativeString &str) +{ + init(str); +} + +GNativeString::GNativeString(const GBaseString &gs, int from, int len) +{ + init( + GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len)); +} + +GNativeString::GNativeString(const int number) +{ + init(GStringRep::Native::create_format("%d",number)); +} + +GNativeString::GNativeString(const double number) +{ + init(GStringRep::Native::create_format("%f",number)); +} + +GNativeString& +GNativeString::operator= (const char str) +{ return init(GStringRep::Native::create(&str,0,1)); } + +GNativeString& +GNativeString::operator= (const char *str) +{ return init(GStringRep::Native::create(str)); } + +GNativeString +GBaseString::operator+(const GNativeString &s2) const +{ + return GStringRep::Native::create(*this,s2); +} + +GP<GStringRep> +GStringRep::NativeToUTF8( const char *s ) +{ + return GStringRep::Native::create(s)->toUTF8(); +} + +#endif // HAS_WCHAR + +template <class TYPE> +GP<GStringRep> +GStringRep::create(const unsigned int sz, TYPE *) +{ + GP<GStringRep> gaddr; + if (sz > 0) + { + GStringRep *addr; + gaddr=(addr=new TYPE); + addr->data=(char *)(::operator new(sz+1)); + addr->size = sz; + addr->data[sz] = 0; + } + return gaddr; +} + +GP<GStringRep> +GStringRep::strdup(const char *s) const +{ + GP<GStringRep> retval; + const int length=s?strlen(s):0; + if(length>0) + { + retval=blank(length); + char const * const end=s+length; + char *ptr=retval->data; + for(;*s&&(s!=end);ptr++) + { + ptr[0]=s++[0]; + } + ptr[0]=0; + } + return retval; +} + +GP<GStringRep> +GStringRep::substr(const char *s,const int start,const int len) const +{ + GP<GStringRep> retval; + if(s && s[0]) + { + const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1); + const char *startptr, *endptr; + if(start<0) + { + startptr=s+length+start; + if(startptr<s) + startptr=s; + }else + { + startptr=s; + for(const char * const ptr=s+start;(startptr<ptr)&&*startptr;++startptr) + EMPTY_LOOP; + } + if(len<0) + { + if(s+length+1 < startptr+len) + { + endptr=startptr; + }else + { + endptr=s+length+1+len; + } + }else + { + endptr=startptr; + for(const char * const ptr=startptr+len;(endptr<ptr)&&*endptr;++endptr) + EMPTY_LOOP; + } + if(endptr>startptr) + { + retval=blank((size_t)(endptr-startptr)); + char *data=retval->data; + for(; (startptr<endptr) && *startptr; ++startptr,++data) + { + data[0]=startptr[0]; + } + data[0]=0; + } + } + return retval; +} + +GP<GStringRep> +GStringRep::substr(const unsigned short *s,const int start,const int len) const +{ + GP<GStringRep> retval; + if(s && s[0]) + { + unsigned short const *eptr; + if(len<0) + { + for(eptr=s;eptr[0];++eptr) + EMPTY_LOOP; + }else + { + eptr=&(s[len]); + } + s=&s[start]; + if((size_t)s<(size_t)eptr) + { + mbstate_t ps; + memset(&ps,0,sizeof(mbstate_t)); + unsigned char *buf,*ptr; + GPBuffer<unsigned char> gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7); + for(ptr=buf;s[0];) + { + unsigned long w; + int i=UTF16toUCS4(w,s,eptr); + if(i<=0) + break; + s+=i; + ptr=UCS4toString(w,ptr,&ps); + } + ptr[0]=0; + retval = strdup( (const char *)buf ); + } + } + return retval; +} + +GP<GStringRep> +GStringRep::substr(const unsigned long *s,const int start,const int len) const +{ + GP<GStringRep> retval; + if(s && s[0]) + { + unsigned long const *eptr; + if(len<0) + { + for(eptr=s;eptr[0];++eptr) + EMPTY_LOOP; + }else + { + eptr=&(s[len]); + } + s=&s[start]; + if((size_t)s<(size_t)eptr) + { + mbstate_t ps; + memset(&ps,0,sizeof(mbstate_t)); + unsigned char *buf,*ptr; + GPBuffer<unsigned char> gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7); + for(ptr=buf;s[0];++s) + { + ptr=UCS4toString(s[0],ptr,&ps); + } + ptr[0]=0; + retval = strdup( (const char *)buf ); + } + } + return retval; +} + +GP<GStringRep> +GStringRep::append(const char *s2) const +{ + GP<GStringRep> retval; + if(s2) + { + retval=concat(data,s2); + }else + { + retval=const_cast<GStringRep *>(this); + } + return retval; +} + +GP<GStringRep> +GStringRep::UTF8::append(const GP<GStringRep> &s2) const +{ + GP<GStringRep> retval; + if(s2) + { + if(s2->isNative()) + { + G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") ); + } + retval=concat(data,s2->data); + }else + { + retval=const_cast<GStringRep::UTF8 *>(this); + } + return retval; +} + +GP<GStringRep> +GStringRep::concat(const char *s1,const char *s2) const +{ + const int length1=(s1?strlen(s1):0); + const int length2=(s2?strlen(s2):0); + const int length=length1+length2; + GP<GStringRep> retval; + if(length>0) + { + retval=blank(length); + GStringRep &r=*retval; + if(length1) + { + strcpy(r.data,s1); + if(length2) + strcat(r.data,s2); + }else + { + strcpy(r.data,s2); + } + } + return retval; +} + +const char *GBaseString::nullstr = ""; + +void +GBaseString::empty( void ) +{ + init(0); +} + +GP<GStringRep> +GStringRep::getbuf(int n) const +{ + GP<GStringRep> retval; + if(n< 0) + n=strlen(data); + if(n>0) + { + retval=blank(n); + char *ndata=retval->data; + strncpy(ndata,data,n); + ndata[n]=0; + } + return retval; +} + +const char * +GStringRep::isCharType( + bool (*xiswtest)(const unsigned long wc), const char *ptr, const bool reverse) const +{ + char const * xptr=ptr; + const unsigned long w=getValidUCS4(xptr); + if((ptr != xptr) + &&(((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ||(reverse?(!xiswtest(w)):xiswtest(w)))) + { + ptr=xptr; + } + return ptr; +} + +int +GStringRep::nextCharType( + bool (*xiswtest)(const unsigned long wc), const int from, const int len, + const bool reverse) const +{ + // We want to return the position of the next + // non white space starting from the #from# + // location. isspace should work in any locale + // so we should only need to do this for the non- + // native locales (UTF8) + int retval; + if(from<size) + { + retval=from; + const char * ptr = data+from; + for( const char * const eptr=ptr+((len<0)?(size-from):len); + (ptr<eptr) && *ptr;) + { + // Skip characters that fail the isCharType test + char const * const xptr=isCharType(xiswtest,ptr,!reverse); + if(xptr == ptr) + break; + ptr=xptr; + } + retval=(int)((size_t)ptr-(size_t)data); + }else + { + retval=size; + } + return retval; +} + +bool +GStringRep::giswspace(const unsigned long w) +{ +#if HAS_WCTYPE + return + ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ||((unsigned long)iswspace((wchar_t)w)) + ||((w == '\r')||(w == '\n')); +#else + return + (w&~0xff)?(true):(((unsigned long)isspace((char)w))||((w == '\r')||(w == '\n'))); +#endif +} + +bool +GStringRep::giswupper(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?(true):((unsigned long)iswupper((wchar_t)w)?true:false); +#else + return (w&~0xff)?(true):((unsigned long)isupper((char)w)?true:false); +#endif +} + +bool +GStringRep::giswlower(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?(true):((unsigned long)iswlower((wchar_t)w)?true:false); +#else + return (w&~0xff)?(true):((unsigned long)islower((char)w)?true:false); +#endif +} + +unsigned long +GStringRep::gtowupper(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?w:((unsigned long)towupper((wchar_t)w)); +#else + return (w&~0xff)?w:((unsigned long)toupper((char)w)); +#endif +} + +unsigned long +GStringRep::gtowlower(const unsigned long w) +{ +#if HAS_WCTYPE + return ((sizeof(wchar_t) == 2)&&(w&~0xffff)) + ?w:((unsigned long)towlower((wchar_t)w)); +#else + return (w&~0xff)?w:((unsigned long)tolower((char)w)); +#endif +} + +GP<GStringRep> +GStringRep::tocase( + bool (*xiswcase)(const unsigned long wc), + unsigned long (*xtowcase)(const unsigned long wc)) const +{ + GP<GStringRep> retval; + char const * const eptr=data+size; + char const *ptr=data; + while(ptr<eptr) + { + char const * const xptr=isCharType(xiswcase,ptr,false); + if(ptr == xptr) + break; + ptr=xptr; + } + if(ptr<eptr) + { + const int n=(int)((size_t)ptr-(size_t)data); + unsigned char *buf; + GPBuffer<unsigned char> gbuf(buf,n+(1+size-n)*6); + if(n>0) + { + strncpy((char *)buf,data,n); + } + unsigned char *buf_ptr=buf+n; + for(char const *ptr=data+n;ptr<eptr;) + { + char const * const xptr=ptr; + const unsigned long w=getValidUCS4(ptr); + if(ptr == xptr) + break; + if(xiswcase(w)) + { + const int len=(int)((size_t)ptr-(size_t)xptr); + strncpy((char *)buf_ptr,xptr,len); + buf_ptr+=len; + }else + { + mbstate_t ps; + memset(&ps,0,sizeof(mbstate_t)); + buf_ptr=UCS4toString(xtowcase(w),buf_ptr,&ps); + } + } + buf_ptr[0]=0; + retval=substr((const char *)buf,0,(int)((size_t)buf_ptr-(size_t)buf)); + }else + { + retval=const_cast<GStringRep *>(this); + } + return retval; +} + +// Returns a copy of this string with characters used in XML escaped as follows: +// '<' --> "<" +// '>' --> ">" +// '&' --> "&" +// '\'' --> "'" +// '\"' --> """ +// Also escapes characters 0x00 through 0x1f and 0x7e through 0x7f. +GP<GStringRep> +GStringRep::toEscaped( const bool tosevenbit ) const +{ + bool modified=false; + char *ret; + GPBuffer<char> gret(ret,size*7); + ret[0]=0; + char *retptr=ret; + char const *start=data; + char const *s=start; + char const *last=s; + GP<GStringRep> special; + for(unsigned long w;(w=getValidUCS4(s));last=s) + { + char const *ss=0; + switch(w) + { + case '<': + ss="<"; + break; + case '>': + ss=">"; + break; + case '&': + ss="&"; + break; + case '\47': + ss="'"; + break; + case '\42': + ss="""; + break; + default: + if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80)))) + { + special=toThis(UTF8::create_format("&#%lu;",w)); + ss=special->data; + } + break; + } + if(ss) + { + modified=true; + if(s!=start) + { + size_t len=(size_t)last-(size_t)start; + strncpy(retptr,start,len); + retptr+=len; + start=s; + } + if(ss[0]) + { + size_t len=strlen(ss); + strcpy(retptr,ss); + retptr+=len; + } + } + } + GP<GStringRep> retval; + if(modified) + { + strcpy(retptr,start); + retval=strdup( ret ); + }else + { + retval=const_cast<GStringRep *>(this); + } +// DEBUG_MSG( "Escaped string is '" << ret << "'\n" ); + return retval; +} + + +static const GMap<GUTF8String,GUTF8String> & +BasicMap( void ) +{ + static GMap<GUTF8String,GUTF8String> Basic; + if (! Basic.size()) + { + Basic["lt"] = GUTF8String('<'); + Basic["gt"] = GUTF8String('>'); + Basic["amp"] = GUTF8String('&'); + Basic["apos"] = GUTF8String('\47'); + Basic["quot"] = GUTF8String('\42'); + } + return Basic; +} + +GUTF8String +GUTF8String::fromEscaped( const GMap<GUTF8String,GUTF8String> ConvMap ) const +{ + GUTF8String ret; // Build output string here + int start_locn = 0; // Beginning of substring to skip + int amp_locn; // Location of a found ampersand + + while( (amp_locn = search( '&', start_locn )) > -1 ) + { + // Found the next apostrophe + // Locate the closing semicolon + const int semi_locn = search( ';', amp_locn ); + // No closing semicolon, exit and copy + // the rest of the string. + if( semi_locn < 0 ) + break; + ret += substr( start_locn, amp_locn - start_locn ); + int const len = semi_locn - amp_locn - 1; + if(len) + { + GUTF8String key = substr( amp_locn+1, len); + //DEBUG_MSG( "key = '" << key << "'\n" ); + char const * s=key; + if( s[0] == '#') + { + unsigned long value; + char *ptr=0; + if(s[1] == 'x' || s[1] == 'X') + { + value=strtoul((char const *)(s+2),&ptr,16); + }else + { + value=strtoul((char const *)(s+1),&ptr,10); + } + if(ptr) + { + unsigned char utf8char[7]; + unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char); + ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char); + }else + { + ret += substr( amp_locn, semi_locn - amp_locn + 1 ); + } + }else + { + GPosition map_entry = ConvMap.contains( key ); + if( map_entry ) + { // Found in the conversion map, substitute + ret += ConvMap[map_entry]; + } else + { + static const GMap<GUTF8String,GUTF8String> &Basic = BasicMap(); + GPosition map_entry = Basic.contains( key ); + if ( map_entry ) + { + ret += Basic[map_entry]; + }else + { + ret += substr( amp_locn, len+2 ); + } + } + } + }else + { + ret += substr( amp_locn, len+2 ); + } + start_locn = semi_locn + 1; +// DEBUG_MSG( "ret = '" << ret << "'\n" ); + } + + // Copy the end of the string to the output + ret += substr( start_locn, length()-start_locn ); + +// DEBUG_MSG( "Unescaped string is '" << ret << "'\n" ); + return (ret == *this)?(*this):ret; +} + +GUTF8String +GUTF8String::fromEscaped(void) const +{ + const GMap<GUTF8String,GUTF8String> nill; + return fromEscaped(nill); +} + +GP<GStringRep> +GStringRep::setat(int n, char ch) const +{ + GP<GStringRep> retval; + if(n<0) + n+=size; + if (n < 0 || n>size) + GBaseString::throw_illegal_subscript(); + if(ch == data[n]) + { + retval=const_cast<GStringRep *>(this); + }else if(!ch) + { + retval=getbuf(n); + }else + { + retval=getbuf((n<size)?size:n); + retval->data[n]=ch; + if(n == size) + retval->data[n+1]=0; + } + return retval; +} + +#ifdef WIN32 +#define USE_VSNPRINTF _vsnprintf +#endif + +#ifdef AUTOCONF +# ifdef HAVE_VSNPRINTF +# define USE_VSNPRINTF vsnprintf +# endif +#else +# ifdef linux +# define USE_VSNPRINTF vsnprintf +# endif +#endif + +GUTF8String & +GUTF8String::format(const char fmt[], ... ) +{ + va_list args; + va_start(args, fmt); + return init(GStringRep::UTF8::create(fmt,args)); +} + +GP<GStringRep> +GStringRep::UTF8::create_format(const char fmt[],...) +{ + va_list args; + va_start(args, fmt); + return create(fmt,args); +} + +GP<GStringRep> +GStringRep::vformat(va_list args) const +{ + GP<GStringRep> retval; + if(size) + { +#ifndef WIN32 + char *nfmt; + GPBuffer<char> gnfmt(nfmt,size+1); + nfmt[0]=0; + int start=0; +#endif + int from=0; + while((from=search('%',from)) >= 0) + { + if(data[++from] != '%') + { + int m,n=0; + sscanf(data+from,"%d!%n",&m,&n); + if(n) + { +#ifdef WIN32 + char *lpszFormat=data; + LPTSTR lpszTemp; + if((!::FormatMessage( + FORMAT_MESSAGE_FROM_STRING|FORMAT_MESSAGE_ALLOCATE_BUFFER, + lpszFormat, 0, 0, (LPTSTR)&lpszTemp,0,&args)) + || !lpszTemp) + { + G_THROW(GException::outofmemory); + } + va_end(args); + retval=strdup((const char *)lpszTemp); + LocalFree(lpszTemp); + break; +#else + from+=n; + const int end=search('!',from); + if(end>=0) + { + strncat(nfmt,data+start,(int)(end-start)); + strncat(nfmt,"$",1); + start=from=end+1; + }else + { + gnfmt.resize(0); + from=(-1); + break; + } +#endif + }else + { +#ifndef WIN32 + gnfmt.resize(0); +#endif + from=(-1); + break; + } + } + } + if(from < 0) + { +#ifndef WIN32 + char const * const fmt=(nfmt&&nfmt[0])?nfmt:data; +#else + char const * const fmt=data; +#endif + int buflen=32768; + char *buffer; + GPBuffer<char> gbuffer(buffer,buflen); + + ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C")); + + // Format string +#ifdef USE_VSNPRINTF + while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0) + { + gbuffer.resize(0); + gbuffer.resize(buflen+32768); + } + va_end(args); +#else + buffer[buflen-1] = 0; + vsprintf(buffer, fmt, args); + va_end(args); + if (buffer[buflen-1]) + { + // This isn't as fatal since it is on the stack, but we + // definitely should stop the current operation. + G_THROW( ERR_MSG("GString.overwrite") ); + } +#endif + retval=strdup((const char *)buffer); + } + } + // Go altering the string + return retval; +} + +int +GStringRep::search(char c, int from) const +{ + if (from<0) + from += size; + int retval=(-1); + if (from>=0 && from<size) + { + char const *const s = strchr(data+from,c); + if(s) + retval=(int)((size_t)s-(size_t)data); + } + return retval; +} + +int +GStringRep::search(char const *ptr, int from) const +{ + if(from<0) + { + from+=size; + if(from<0) + G_THROW( ERR_MSG("GString.bad_subscript") ); + } + int retval=(-1); + if (from>=0 && from<size) + { + char const *const s = strstr(data+from,ptr); + if(s) + retval=(int)((size_t)s-(size_t)data); + } + return retval; +} + +int +GStringRep::rsearch(char c, int from) const +{ + if(from<0) + { + from+=size; + if(from<0) + G_THROW( ERR_MSG("GString.bad_subscript") ); + } + int retval=(-1); + if ((from>=0) && (from<size)) + { + char const *const s = strrchr(data+from,c); + if(s) + retval=(int)((size_t)s-(size_t)data); + } + return retval; +} + +int +GStringRep::rsearch(char const *ptr, int from) const +{ + if(from<0) + { + from+=size; + if(from<0) + G_THROW( ERR_MSG("GString.bad_subscript") ); + } + int retval=(-1); + for(int loc=from;(loc=search(ptr,loc)) >= 0;++loc) + retval=loc; + return retval; +} + +int +GStringRep::contains(const char accept[],int from) const +{ + if(from<0) + { + from+=size; + if(from<0) + G_THROW( ERR_MSG("GString.bad_subscript") ); + } + int retval=(-1); + if (accept && accept[0] && from>=0 && from<size) + { + char const * const src = data+from; + char const *ptr=strpbrk(src,accept); + if(ptr) + { + retval=(int)(ptr-src)+from; + } + } + return retval; +} + +int +GStringRep::rcontains(const char accept[],int from) const +{ + int retval=(-1); + while((from=contains(accept,from)) >= 0) + { + retval=from++; + } + return retval; +} + +bool +GBaseString::is_int(void) const +{ + bool isLong=!!ptr; + if(isLong) + { + int endpos; + (*this)->toLong(0,endpos); + if(endpos>=0) + { + isLong=((*this)->nextNonSpace(endpos) == (int)length()); + } + } + return isLong; +} + +bool +GBaseString::is_float(void) const +{ + bool isDouble=!!ptr; + if(isDouble) + { + int endpos; + (*this)->toDouble(0,endpos); + if(endpos>=0) + { + isDouble=((*this)->nextNonSpace(endpos) == (int)length()); + } + } + return isDouble; +} + +unsigned int +hash(const GBaseString &str) +{ + unsigned int x = 0; + const char *s = (const char*)str; + while (*s) + x = x ^ (x<<6) ^ (unsigned char)(*s++); + return x; +} + +void +GBaseString::throw_illegal_subscript() +{ + G_THROW( ERR_MSG("GString.bad_subscript") ); +} + +unsigned char * +GStringRep::UTF8::UCS4toString( + const unsigned long w0,unsigned char *ptr, mbstate_t *) const +{ + return UCS4toUTF8(w0,ptr); +} + +int +GStringRep::UTF8::ncopy( + wchar_t * const buf, const int buflen ) const +{ + int retval=(-1); + if(buf && buflen) + { + buf[0]=0; + if(data[0]) + { + const size_t length=strlen(data); + const unsigned char * const eptr=(const unsigned char *)(data+length); + wchar_t *r=buf; + wchar_t const * const rend=buf+buflen; + for(const unsigned char *s=(const unsigned char *)data;(r<rend)&&(s<eptr)&&*s;) + { + const unsigned long w0=UTF8toUCS4(s,eptr); + unsigned short w1; + unsigned short w2=1; + for(int count=(sizeof(wchar_t) == sizeof(w1))?UCS4toUTF16(w0,w1,w2):1; + count&&(r<rend); + --count,w1=w2,++r) + { + r[0]=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0; + } + } + if(r<rend) + { + r[0]=0; + retval=((size_t)r-(size_t)buf)/sizeof(wchar_t); + } + }else + { + retval=0; + } + } + return retval; +} + +GP<GStringRep> +GStringRep::UTF8::toNative(const EscapeMode escape) const +{ + GP<GStringRep> retval; + if(data[0]) + { + const size_t length=strlen(data); + const unsigned char * const eptr=(const unsigned char *)(data+length); + unsigned char *buf; + GPBuffer<unsigned char> gbuf(buf,12*length+12); + unsigned char *r=buf; + mbstate_t ps; + memset(&ps,0,sizeof(mbstate_t)); + for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;) + { + const unsigned long w0=UTF8toUCS4(s,eptr); + const unsigned char * const r0=r; + r=UCS4toNative(w0,r,&ps); + if(r == r0) + { + if(escape == IS_ESCAPED) + { + sprintf((char *)r,"&#%lu;",w0); + r+=strlen((char *)r); + }else + { + r=buf; + break; + } + } + } + r[0]=0; + retval = NATIVE_CREATE( (const char *)buf ); + } else + { + retval = NATIVE_CREATE( (unsigned int)0 ); + } + return retval; +} + +GP<GStringRep> +GStringRep::UTF8::toUTF8(const bool nothrow) const +{ + if(!nothrow) + G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") ); + return const_cast<GStringRep::UTF8 *>(this); +} + +// Tests if a string is legally encoded in the current character set. +bool +GStringRep::UTF8::is_valid(void) const +{ + bool retval=true; + if(data && size) + { + const unsigned char * const eptr=(const unsigned char *)(data+size); + for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;) + { + const unsigned char * const r=s; + (void)UTF8toUCS4(s,eptr); + if(r == s) + { + retval=false; + break; + } + } + } + return retval; +} + +static inline unsigned long +add_char(unsigned long const U, unsigned char const * const r) +{ + unsigned long const C=r[0]; + return ((C|0x3f) == 0xbf)?((U<<6)|(C&0x3f)):0; +} + +unsigned long +GStringRep::UTF8toUCS4( + unsigned char const *&s,void const * const eptr) +{ + unsigned long U=0; + unsigned char const *r=s; + if(r < eptr) + { + unsigned long const C1=r++[0]; + if(C1&0x80) + { + if(r < eptr) + { + U=C1; + if((U=((C1&0x40)?add_char(U,r++):0))) + { + if(C1&0x20) + { + if(r < eptr) + { + if((U=add_char(U,r++))) + { + if(C1&0x10) + { + if(r < eptr) + { + if((U=add_char(U,r++))) + { + if(C1&0x8) + { + if(r < eptr) + { + if((U=add_char(U,r++))) + { + if(C1&0x4) + { + if(r < eptr) + { + if((U=((!(C1&0x2))?(add_char(U,r++)&0x7fffffff):0))) + { + s=r; + }else + { + U=(unsigned int)(-1)-s++[0]; + } + }else + { + U=0; + } + }else if((U=((U&0x4000000)?0:(U&0x3ffffff)))) + { + s=r; + } + }else + { + U=(unsigned int)(-1)-s++[0]; + } + }else + { + U=0; + } + }else if((U=((U&0x200000)?0:(U&0x1fffff)))) + { + s=r; + } + }else + { + U=(unsigned int)(-1)-s++[0]; + } + }else + { + U=0; + } + }else if((U=((U&0x10000)?0:(U&0xffff)))) + { + s=r; + } + }else + { + U=(unsigned int)(-1)-s++[0]; + } + }else + { + U=0; + } + }else if((U=((U&0x800)?0:(U&0x7ff)))) + { + s=r; + } + }else + { + U=(unsigned int)(-1)-s++[0]; + } + }else + { + U=0; + } + }else if((U=C1)) + { + s=r; + } + } + return U; +} + +unsigned char * +GStringRep::UCS4toUTF8(const unsigned long w,unsigned char *ptr) +{ + if(w <= 0x7f) + { + *ptr++ = (unsigned char)w; + } + else if(w <= 0x7ff) + { + *ptr++ = (unsigned char)((w>>6)|0xC0); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0xFFFF) + { + *ptr++ = (unsigned char)((w>>12)|0xE0); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0x1FFFFF) + { + *ptr++ = (unsigned char)((w>>18)|0xF0); + *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0x3FFFFFF) + { + *ptr++ = (unsigned char)((w>>24)|0xF8); + *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else if(w <= 0x7FFFFFFF) + { + *ptr++ = (unsigned char)((w>>30)|0xFC); + *ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF); + *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF); + *ptr++ = (unsigned char)((w|0x80)&0xBF); + } + else + { + *ptr++ = '?'; + } + return ptr; +} + + // Creates with a concat operation. +GP<GStringRep> +GStringRep::concat( const char *s1, const GP<GStringRep> &s2) const +{ + GP<GStringRep> retval; + if(s2) + { + retval=toThis(s2); + if(s1 && s1[0]) + { + if(retval) + { + retval=concat(s1,retval->data); + }else + { + retval=strdup(s1); + } + } + }else if(s1 && s1[0]) + { + retval=strdup(s1); + } + return retval; +} + + // Creates with a concat operation. + +GP<GStringRep> +GStringRep::concat( const GP<GStringRep> &s1,const char *s2) const +{ + GP<GStringRep> retval; + if(s1) + { + retval=toThis(s1); + if(s2 && s2[0]) + { + if(retval) + { + retval=retval->append(s2); + }else + { + retval=strdup(s2); + } + } + }else if(s2 && s2[0]) + { + retval=strdup(s2); + } + return retval; +} + +GP<GStringRep> +GStringRep::concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const +{ + GP<GStringRep> retval; + if(s1) + { + retval=toThis(s1,s2); + if(retval && s2) + { + retval=retval->append(toThis(s2)); + } + }else if(s2) + { + retval=toThis(s2); + } + return retval; +} + +#ifdef WIN32 +static const char *setlocale_win32(void) +{ + static const char *locale=setlocale(LC_ALL,0); + if(! locale || (locale[0] == 'C' && !locale[1])) + { + locale=setlocale(LC_ALL,""); + } + return locale; +} +#endif + +GStringRep::GStringRep(void) +{ +#ifdef WIN32 + static const char *locale=setlocale_win32(); +#endif + size=0; + data=0; +} + +GStringRep::~GStringRep() +{ + if(data) + { + data[0]=0; + ::operator delete(data); + } + data=0; +} + +GStringRep::UTF8::UTF8(void) {} + +GStringRep::UTF8::~UTF8() {} + +int +GStringRep::cmp(const char *s1,const int len) const +{ + return cmp(data,s1,len); +} + +int +GStringRep::cmp(const char *s1, const char *s2,const int len) +{ + return (len + ?((s1&&s1[0]) + ?((s2&&s2[0]) + ?((len>0) + ?strncmp(s1,s2,len) + :strcmp(s1,s2)) + :1) + :((s2&&s2[0])?(-1):0)) + :0); +} + +int +GStringRep::cmp(const GP<GStringRep> &s1, const GP<GStringRep> &s2, + const int len ) +{ + return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len)); +} + +int +GStringRep::cmp(const GP<GStringRep> &s1, const char *s2, + const int len ) +{ + return cmp((s1?s1->data:0),s2,len); +} + +int +GStringRep::cmp(const char *s1, const GP<GStringRep> &s2, + const int len ) +{ + return cmp(s1,(s2?(s2->data):0),len); +} + +int +GStringRep::UTF8::cmp(const GP<GStringRep> &s2,const int len) const +{ + int retval; + if(s2) + { + if(s2->isNative()) + { + GP<GStringRep> r(s2->toUTF8(true)); + if(r) + { + retval=GStringRep::cmp(data,r->data,len); + }else + { + retval=-(s2->cmp(toNative(NOT_ESCAPED),len)); + } + }else + { + retval=GStringRep::cmp(data,s2->data,len); + } + }else + { + retval=GStringRep::cmp(data,0,len); + } + return retval; +} + +int +GStringRep::UTF8::toInt() const +{ + int endpos; + return (int)toLong(0,endpos); +} + +static inline long +Cstrtol(char *data,char **edata, const int base) +{ + GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); + while (data && *data==' ') data++; + return strtol(data,edata,base); +} + +long +GStringRep::UTF8::toLong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + long retval=Cstrtol(data+pos,&edata, base); + if(edata) + { + endpos=edata-data; + }else + { + endpos=(-1); + GP<GStringRep> ptr=ptr->strdup(data+pos); + if(ptr) + ptr=ptr->toNative(NOT_ESCAPED); + if(ptr) + { + int xendpos; + retval=ptr->toLong(0,xendpos,base); + if(xendpos> 0) + { + endpos=(int)size; + ptr=ptr->strdup(data+xendpos); + if(ptr) + { + ptr=ptr->toUTF8(true); + if(ptr) + { + endpos-=(int)(ptr->size); + } + } + } + } + } + return retval; +} + +static inline unsigned long +Cstrtoul(char *data,char **edata, const int base) +{ + GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); + while (data && *data==' ') data++; + return strtoul(data,edata,base); +} + +unsigned long +GStringRep::UTF8::toULong( + const int pos, int &endpos, const int base) const +{ + char *edata=0; + unsigned long retval=Cstrtoul(data+pos,&edata, base); + if(edata) + { + endpos=edata-data; + }else + { + endpos=(-1); + GP<GStringRep> ptr=ptr->strdup(data+pos); + if(ptr) + ptr=ptr->toNative(NOT_ESCAPED); + if(ptr) + { + int xendpos; + retval=ptr->toULong(0,xendpos,base); + if(xendpos> 0) + { + endpos=(int)size; + ptr=ptr->strdup(data+xendpos); + if(ptr) + { + ptr=ptr->toUTF8(true); + if(ptr) + { + endpos-=(int)(ptr->size); + } + } + } + } + } + return retval; +} + +static inline double +Cstrtod(char *data,char **edata) +{ + GStringRep::ChangeLocale locale(LC_NUMERIC,"C"); + while (data && *data==' ') data++; + return strtod(data,edata); +} + +double +GStringRep::UTF8::toDouble(const int pos, int &endpos) const +{ + char *edata=0; + double retval=Cstrtod(data+pos,&edata); + if(edata) + { + endpos=edata-data; + }else + { + endpos=(-1); + GP<GStringRep> ptr=ptr->strdup(data+pos); + if(ptr) + ptr=ptr->toNative(NOT_ESCAPED); + if(ptr) + { + int xendpos; + retval=ptr->toDouble(0,xendpos); + if(xendpos >= 0) + { + endpos=(int)size; + ptr=ptr->strdup(data+xendpos); + if(ptr) + { + ptr=ptr->toUTF8(true); + if(ptr) + { + endpos-=(int)(ptr->size); + } + } + } + } + } + return retval; +} + +int +GStringRep::getUCS4(unsigned long &w, const int from) const +{ + int retval; + if(from>=size) + { + w=0; + retval=size; + }else if(from<0) + { + w=(unsigned int)(-1); + retval=(-1); + }else + { + const char *source=data+from; + w=getValidUCS4(source); + retval=(int)((size_t)source-(size_t)data); + } + return retval; +} + + +unsigned long +GStringRep::UTF8::getValidUCS4(const char *&source) const +{ + return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size); +} + +int +GStringRep::nextNonSpace(const int from,const int len) const +{ + return nextCharType(giswspace,from,len,true); +} + +int +GStringRep::nextSpace(const int from,const int len) const +{ + return nextCharType(giswspace,from,len,false); +} + +int +GStringRep::nextChar(const int from) const +{ + char const * xptr=data+from; + (void)getValidUCS4(xptr); + return (int)((size_t)xptr-(size_t)data); +} + +int +GStringRep::firstEndSpace(int from,const int len) const +{ + const int xsize=(len<0)?size:(from+len); + const int ysize=(size<xsize)?size:xsize; + int retval=ysize; + while(from<ysize) + { + from=nextNonSpace(from,ysize-from); + if(from < size) + { + const int r=nextSpace(from,ysize-from); + // If a character isn't legal, then it will return + // tru for both nextSpace and nextNonSpace. + if(r == from) + { + from++; + }else + { + from=retval=r; + } + } + } + return retval; +} + +int +GStringRep::UCS4toUTF16( + const unsigned long w,unsigned short &w1, unsigned short &w2) +{ + int retval; + if(w<0x10000) + { + w1=(unsigned short)w; + w2=0; + retval=1; + }else + { + w1=(unsigned short)((((w-0x10000)>>10)&0x3ff)+0xD800); + w2=(unsigned short)((w&0x3ff)+0xDC00); + retval=2; + } + return retval; +} + +int +GStringRep::UTF16toUCS4( + unsigned long &U,unsigned short const * const s,void const * const eptr) +{ + int retval=0; + U=0; + unsigned short const * const r=s+1; + if(r <= eptr) + { + unsigned long const W1=s[0]; + if((W1<0xD800)||(W1>0xDFFF)) + { + if((U=W1)) + { + retval=1; + } + }else if(W1<=0xDBFF) + { + unsigned short const * const rr=r+1; + if(rr <= eptr) + { + unsigned long const W2=s[1]; + if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff)))) + { + retval=2; + }else + { + retval=(-1); + } + } + } + } + return retval; +} + + +//bcr + +GUTF8String& +GUTF8String::operator+= (char ch) +{ + return init( + GStringRep::UTF8::create((const char*)*this, + GStringRep::UTF8::create(&ch,0,1))); +} + +GUTF8String& +GUTF8String::operator+= (const char *str) +{ + return init(GStringRep::UTF8::create(*this,str)); +} + +GUTF8String& +GUTF8String::operator+= (const GBaseString &str) +{ + return init(GStringRep::UTF8::create(*this,str)); +} + +GUTF8String +GUTF8String::substr(int from, int len) const +{ return GUTF8String(*this, from, len); } + +GUTF8String +GUTF8String::operator+(const GBaseString &s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +GUTF8String +GUTF8String::operator+(const GUTF8String &s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +GUTF8String +GUTF8String::operator+(const char *s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +char * +GUTF8String::getbuf(int n) +{ + if(ptr) + init((*this)->getbuf(n)); + else if(n>0) + init(GStringRep::UTF8::create(n)); + else + init(0); + return ptr?((*this)->data):0; +} + +void +GUTF8String::setat(const int n, const char ch) +{ + if((!n)&&(!ptr)) + { + init(GStringRep::UTF8::create(&ch,0,1)); + }else + { + init((*this)->setat(CheckSubscript(n),ch)); + } +} + +GP<GStringRep> +GStringRep::UTF8ToNative( const char *s, const EscapeMode escape ) +{ + return GStringRep::UTF8::create(s)->toNative(escape); +} + +GUTF8String::GUTF8String(const char dat) +{ init(GStringRep::UTF8::create(&dat,0,1)); } + +GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args) +{ + if (fmt.ptr) + init(fmt->vformat(args)); + else + init(fmt); +} + +GUTF8String::GUTF8String(const char *str) +{ init(GStringRep::UTF8::create(str)); } + +GUTF8String::GUTF8String(const unsigned char *str) +{ init(GStringRep::UTF8::create((const char *)str)); } + +GUTF8String::GUTF8String(const unsigned short *str) +{ init(GStringRep::UTF8::create(str,0,-1)); } + +GUTF8String::GUTF8String(const unsigned long *str) +{ init(GStringRep::UTF8::create(str,0,-1)); } + +GUTF8String::GUTF8String(const char *dat, unsigned int len) +{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const unsigned short *dat, unsigned int len) +{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const unsigned long *dat, unsigned int len) +{ init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const GBaseString &gs, int from, int len) +{ init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); } + +GUTF8String::GUTF8String(const int number) +{ init(GStringRep::UTF8::create_format("%d",number)); } + +GUTF8String::GUTF8String(const double number) +{ init(GStringRep::UTF8::create_format("%f",number)); } + +GUTF8String& GUTF8String::operator= (const char str) +{ return init(GStringRep::UTF8::create(&str,0,1)); } + +GUTF8String& GUTF8String::operator= (const char *str) +{ return init(GStringRep::UTF8::create(str)); } + +GUTF8String GBaseString::operator+(const GUTF8String &s2) const +{ return GStringRep::UTF8::create(*this,s2); } + +#if HAS_WCHAR +GUTF8String +GNativeString::operator+(const GUTF8String &s2) const +{ + if (ptr) + return GStringRep::UTF8::create((*this)->toUTF8(true),s2); + else + return GStringRep::UTF8::create((*this),s2); +} +#endif + +GUTF8String +GUTF8String::operator+(const GNativeString &s2) const +{ + GP<GStringRep> g = s2; + if (s2.ptr) + g = s2->toUTF8(true); + return GStringRep::UTF8::create(*this,g); +} + +GUTF8String +operator+(const char *s1, const GUTF8String &s2) +{ return GStringRep::UTF8::create(s1,s2); } + +#if HAS_WCHAR +GNativeString +operator+(const char *s1, const GNativeString &s2) +{ return GStringRep::Native::create(s1,s2); } + +GNativeString& +GNativeString::operator+= (char ch) +{ + char s[2]; s[0]=ch; s[1]=0; + return init(GStringRep::Native::create((const char*)*this, s)); +} + +GNativeString& +GNativeString::operator+= (const char *str) +{ + return init(GStringRep::Native::create(*this,str)); +} + +GNativeString& +GNativeString::operator+= (const GBaseString &str) +{ + return init(GStringRep::Native::create(*this,str)); +} + +GNativeString +GNativeString::operator+(const GBaseString &s2) const +{ return GStringRep::Native::create(*this,s2); } + +GNativeString +GNativeString::operator+(const GNativeString &s2) const +{ return GStringRep::Native::create(*this,s2); } + +GNativeString +GNativeString::operator+(const char *s2) const +{ return GStringRep::Native::create(*this,s2); } + +char * +GNativeString::getbuf(int n) +{ + if(ptr) + init((*this)->getbuf(n)); + else if(n>0) + init(GStringRep::Native::create(n)); + else + init(0); + return ptr?((*this)->data):0; +} + +void +GNativeString::setat(const int n, const char ch) +{ + if((!n)&&(!ptr)) + { + init(GStringRep::Native::create(&ch,0,1)); + }else + { + init((*this)->setat(CheckSubscript(n),ch)); + } +} + +#endif + + +#ifdef HAVE_NAMESPACES +} +# ifndef NOT_USING_DJVU_NAMESPACE +using namespace DJVU; +# endif +#endif |