summaryrefslogtreecommitdiffstats
path: root/src/codecs/qutfcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/codecs/qutfcodec.cpp')
-rw-r--r--src/codecs/qutfcodec.cpp350
1 files changed, 350 insertions, 0 deletions
diff --git a/src/codecs/qutfcodec.cpp b/src/codecs/qutfcodec.cpp
new file mode 100644
index 0000000..bc1abce
--- /dev/null
+++ b/src/codecs/qutfcodec.cpp
@@ -0,0 +1,350 @@
+/****************************************************************************
+**
+** Implementation of QUtf{8,16}Codec class
+**
+** Created : 981015
+**
+** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved.
+**
+** This file is part of the tools module of the Qt GUI Toolkit.
+**
+** This file may be used under the terms of the GNU General
+** Public License versions 2.0 or 3.0 as published by the Free
+** Software Foundation and appearing in the files LICENSE.GPL2
+** and LICENSE.GPL3 included in the packaging of this file.
+** Alternatively you may (at your option) use any later version
+** of the GNU General Public License if such license has been
+** publicly approved by Trolltech ASA (or its successors, if any)
+** and the KDE Free Qt Foundation.
+**
+** Please review the following information to ensure GNU General
+** Public Licensing requirements will be met:
+** http://trolltech.com/products/qt/licenses/licensing/opensource/.
+** If you are unsure which license is appropriate for your use, please
+** review the following information:
+** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
+** or contact the sales department at sales@trolltech.com.
+**
+** This file may be used under the terms of the Q Public License as
+** defined by Trolltech ASA and appearing in the file LICENSE.QPL
+** included in the packaging of this file. Licensees holding valid Qt
+** Commercial licenses may use this file in accordance with the Qt
+** Commercial License Agreement provided with the Software.
+**
+** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
+** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
+** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted
+** herein.
+**
+**********************************************************************/
+
+#include "qutfcodec.h"
+
+#ifndef QT_NO_TEXTCODEC
+
+int QUtf8Codec::mibEnum() const
+{
+ return 106;
+}
+
+QCString QUtf8Codec::fromUnicode(const QString& uc, int& lenInOut) const
+{
+ int l = uc.length();
+ if (lenInOut > 0)
+ l = QMIN(l, lenInOut);
+ int rlen = l*3+1;
+ QCString rstr(rlen);
+ uchar* cursor = (uchar*)rstr.data();
+ const QChar *ch = uc.unicode();
+ for (int i=0; i < l; i++) {
+ uint u = ch->unicode();
+ if ( u < 0x80 ) {
+ *cursor++ = (uchar)u;
+ } else {
+ if ( u < 0x0800 ) {
+ *cursor++ = 0xc0 | ((uchar) (u >> 6));
+ } else {
+ if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
+ unsigned short low = ch[1].unicode();
+ if (low >= 0xdc00 && low < 0xe000) {
+ ++ch;
+ ++i;
+ u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
+ }
+ }
+ if (u > 0xffff) {
+ // see QString::fromUtf8() and QString::utf8() for explanations
+ if (u > 0x10fe00 && u < 0x10ff00) {
+ *cursor++ = (u - 0x10fe00);
+ ++ch;
+ continue;
+ } else {
+ *cursor++ = 0xf0 | ((uchar) (u >> 18));
+ *cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f);
+ }
+ } else {
+ *cursor++ = 0xe0 | ((uchar) (u >> 12));
+ }
+ *cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f);
+ }
+ *cursor++ = 0x80 | ((uchar) (u&0x3f));
+ }
+ ++ch;
+ }
+ *cursor = 0;
+ lenInOut = cursor - (uchar*)rstr.data();
+ ((QByteArray&)rstr).resize(lenInOut+1);
+ return rstr;
+}
+
+QString QUtf8Codec::toUnicode(const char* chars, int len) const
+{
+ if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
+ // starts with a byte order mark
+ chars += 3;
+ len -= 3;
+ }
+ return QString::fromUtf8( chars, len );
+}
+
+
+const char* QUtf8Codec::name() const
+{
+ return "UTF-8";
+}
+
+int QUtf8Codec::heuristicContentMatch(const char* chars, int len) const
+{
+ int score = 0;
+ for (int i=0; i<len; i++) {
+ uchar ch = chars[i];
+ // No nulls allowed.
+ if ( !ch )
+ return -1;
+ if ( ch < 128 ) {
+ // Inconclusive
+ score++;
+ } else if ( (ch&0xe0) == 0xc0 ) {
+ if ( i < len-1 ) {
+ uchar c2 = chars[++i];
+ if ( (c2&0xc0) != 0x80 )
+ return -1;
+ score+=3;
+ }
+ } else if ( (ch&0xf0) == 0xe0 ) {
+ if ( i < len-1 ) {
+ uchar c2 = chars[++i];
+ if ( (c2&0xc0) != 0x80 ) {
+ return -1;
+#if 0
+ if ( i < len-1 ) {
+ uchar c3 = chars[++i];
+ if ( (c3&0xc0) != 0x80 )
+ return -1;
+ score+=3;
+ }
+#endif
+ }
+ score+=2;
+ }
+ }
+ }
+ return score;
+}
+
+
+
+
+class QUtf8Decoder : public QTextDecoder {
+ uint uc;
+ uint min_uc;
+ int need;
+ bool headerDone;
+public:
+ QUtf8Decoder() : need(0), headerDone(FALSE)
+ {
+ }
+
+ QString toUnicode(const char* chars, int len)
+ {
+ QString result;
+ result.setLength( len + 1 ); // worst case
+ QChar *qch = (QChar *)result.unicode();
+ uchar ch;
+ int error = -1;
+ for (int i=0; i<len; i++) {
+ ch = chars[i];
+ if (need) {
+ if ( (ch&0xc0) == 0x80 ) {
+ uc = (uc << 6) | (ch & 0x3f);
+ need--;
+ if ( !need ) {
+ if (uc > 0xffff) {
+ // surrogate pair
+ uc -= 0x10000;
+ unsigned short high = uc/0x400 + 0xd800;
+ unsigned short low = uc%0x400 + 0xdc00;
+ *qch++ = QChar(high);
+ *qch++ = QChar(low);
+ headerDone = TRUE;
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
+ *qch++ = QChar::replacement;
+ } else {
+ if (headerDone || QChar(uc) != QChar::byteOrderMark)
+ *qch++ = uc;
+ headerDone = TRUE;
+ }
+ }
+ } else {
+ // error
+ i = error;
+ *qch++ = QChar::replacement;
+ need = 0;
+ }
+ } else {
+ if ( ch < 128 ) {
+ *qch++ = ch;
+ headerDone = TRUE;
+ } else if ((ch & 0xe0) == 0xc0) {
+ uc = ch & 0x1f;
+ need = 1;
+ error = i;
+ min_uc = 0x80;
+ } else if ((ch & 0xf0) == 0xe0) {
+ uc = ch & 0x0f;
+ need = 2;
+ error = i;
+ min_uc = 0x800;
+ } else if ((ch&0xf8) == 0xf0) {
+ uc = ch & 0x07;
+ need = 3;
+ error = i;
+ min_uc = 0x10000;
+ } else {
+ // error
+ *qch++ = QChar::replacement;
+ }
+ }
+ }
+ result.truncate( qch - result.unicode() );
+ return result;
+ }
+};
+
+QTextDecoder* QUtf8Codec::makeDecoder() const
+{
+ return new QUtf8Decoder;
+}
+
+
+
+
+
+
+int QUtf16Codec::mibEnum() const
+{
+ return 1000;
+}
+
+const char* QUtf16Codec::name() const
+{
+ return "ISO-10646-UCS-2";
+}
+
+int QUtf16Codec::heuristicContentMatch(const char* chars, int len) const
+{
+ uchar* uchars = (uchar*)chars;
+ if ( len >= 2 && (uchars[0] == 0xff && uchars[1] == 0xfe ||
+ uchars[1] == 0xff && uchars[0] == 0xfe) )
+ return len;
+ else
+ return 0;
+}
+
+
+
+
+class QUtf16Encoder : public QTextEncoder {
+ bool headerdone;
+public:
+ QUtf16Encoder() : headerdone(FALSE)
+ {
+ }
+
+ QCString fromUnicode(const QString& uc, int& lenInOut)
+ {
+ if ( headerdone ) {
+ lenInOut = uc.length()*sizeof(QChar);
+ QCString d(lenInOut);
+ memcpy(d.data(),uc.unicode(),lenInOut);
+ return d;
+ } else {
+ headerdone = TRUE;
+ lenInOut = (1+uc.length())*sizeof(QChar);
+ QCString d(lenInOut);
+ memcpy(d.data(),&QChar::byteOrderMark,sizeof(QChar));
+ memcpy(d.data()+sizeof(QChar),uc.unicode(),uc.length()*sizeof(QChar));
+ return d;
+ }
+ }
+};
+
+class QUtf16Decoder : public QTextDecoder {
+ uchar buf;
+ bool half;
+ bool swap;
+ bool headerdone;
+
+public:
+ QUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE)
+ {
+ }
+
+ QString toUnicode(const char* chars, int len)
+ {
+ QString result;
+ result.setLength( len + 1 ); // worst case
+ QChar *qch = (QChar *)result.unicode();
+ QChar ch;
+ while ( len-- ) {
+ if ( half ) {
+ if ( swap ) {
+ ch.setRow( *chars++ );
+ ch.setCell( buf );
+ } else {
+ ch.setRow( buf );
+ ch.setCell( *chars++ );
+ }
+ if ( !headerdone ) {
+ if ( ch == QChar::byteOrderSwapped ) {
+ swap = !swap;
+ } else if ( ch == QChar::byteOrderMark ) {
+ // Ignore ZWNBSP
+ } else {
+ *qch++ = ch;
+ }
+ headerdone = TRUE;
+ } else
+ *qch++ = ch;
+ half = FALSE;
+ } else {
+ buf = *chars++;
+ half = TRUE;
+ }
+ }
+ result.truncate( qch - result.unicode() );
+ return result;
+ }
+};
+
+QTextDecoder* QUtf16Codec::makeDecoder() const
+{
+ return new QUtf16Decoder;
+}
+
+QTextEncoder* QUtf16Codec::makeEncoder() const
+{
+ return new QUtf16Encoder;
+}
+
+#endif //QT_NO_TEXTCODEC