diff options
Diffstat (limited to 'src/codecs/qutfcodec.cpp')
-rw-r--r-- | src/codecs/qutfcodec.cpp | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/src/codecs/qutfcodec.cpp b/src/codecs/qutfcodec.cpp new file mode 100644 index 0000000..bc1abce --- /dev/null +++ b/src/codecs/qutfcodec.cpp @@ -0,0 +1,350 @@ +/**************************************************************************** +** +** Implementation of QUtf{8,16}Codec class +** +** Created : 981015 +** +** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved. +** +** This file is part of the tools module of the Qt GUI Toolkit. +** +** This file may be used under the terms of the GNU General +** Public License versions 2.0 or 3.0 as published by the Free +** Software Foundation and appearing in the files LICENSE.GPL2 +** and LICENSE.GPL3 included in the packaging of this file. +** Alternatively you may (at your option) use any later version +** of the GNU General Public License if such license has been +** publicly approved by Trolltech ASA (or its successors, if any) +** and the KDE Free Qt Foundation. +** +** Please review the following information to ensure GNU General +** Public Licensing requirements will be met: +** http://trolltech.com/products/qt/licenses/licensing/opensource/. +** If you are unsure which license is appropriate for your use, please +** review the following information: +** http://trolltech.com/products/qt/licenses/licensing/licensingoverview +** or contact the sales department at sales@trolltech.com. +** +** This file may be used under the terms of the Q Public License as +** defined by Trolltech ASA and appearing in the file LICENSE.QPL +** included in the packaging of this file. Licensees holding valid Qt +** Commercial licenses may use this file in accordance with the Qt +** Commercial License Agreement provided with the Software. +** +** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, +** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR +** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted +** herein. +** +**********************************************************************/ + +#include "qutfcodec.h" + +#ifndef QT_NO_TEXTCODEC + +int QUtf8Codec::mibEnum() const +{ + return 106; +} + +QCString QUtf8Codec::fromUnicode(const QString& uc, int& lenInOut) const +{ + int l = uc.length(); + if (lenInOut > 0) + l = QMIN(l, lenInOut); + int rlen = l*3+1; + QCString rstr(rlen); + uchar* cursor = (uchar*)rstr.data(); + const QChar *ch = uc.unicode(); + for (int i=0; i < l; i++) { + uint u = ch->unicode(); + if ( u < 0x80 ) { + *cursor++ = (uchar)u; + } else { + if ( u < 0x0800 ) { + *cursor++ = 0xc0 | ((uchar) (u >> 6)); + } else { + if (u >= 0xd800 && u < 0xdc00 && i < l-1) { + unsigned short low = ch[1].unicode(); + if (low >= 0xdc00 && low < 0xe000) { + ++ch; + ++i; + u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; + } + } + if (u > 0xffff) { + // see QString::fromUtf8() and QString::utf8() for explanations + if (u > 0x10fe00 && u < 0x10ff00) { + *cursor++ = (u - 0x10fe00); + ++ch; + continue; + } else { + *cursor++ = 0xf0 | ((uchar) (u >> 18)); + *cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f); + } + } else { + *cursor++ = 0xe0 | ((uchar) (u >> 12)); + } + *cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f); + } + *cursor++ = 0x80 | ((uchar) (u&0x3f)); + } + ++ch; + } + *cursor = 0; + lenInOut = cursor - (uchar*)rstr.data(); + ((QByteArray&)rstr).resize(lenInOut+1); + return rstr; +} + +QString QUtf8Codec::toUnicode(const char* chars, int len) const +{ + if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) { + // starts with a byte order mark + chars += 3; + len -= 3; + } + return QString::fromUtf8( chars, len ); +} + + +const char* QUtf8Codec::name() const +{ + return "UTF-8"; +} + +int QUtf8Codec::heuristicContentMatch(const char* chars, int len) const +{ + int score = 0; + for (int i=0; i<len; i++) { + uchar ch = chars[i]; + // No nulls allowed. + if ( !ch ) + return -1; + if ( ch < 128 ) { + // Inconclusive + score++; + } else if ( (ch&0xe0) == 0xc0 ) { + if ( i < len-1 ) { + uchar c2 = chars[++i]; + if ( (c2&0xc0) != 0x80 ) + return -1; + score+=3; + } + } else if ( (ch&0xf0) == 0xe0 ) { + if ( i < len-1 ) { + uchar c2 = chars[++i]; + if ( (c2&0xc0) != 0x80 ) { + return -1; +#if 0 + if ( i < len-1 ) { + uchar c3 = chars[++i]; + if ( (c3&0xc0) != 0x80 ) + return -1; + score+=3; + } +#endif + } + score+=2; + } + } + } + return score; +} + + + + +class QUtf8Decoder : public QTextDecoder { + uint uc; + uint min_uc; + int need; + bool headerDone; +public: + QUtf8Decoder() : need(0), headerDone(FALSE) + { + } + + QString toUnicode(const char* chars, int len) + { + QString result; + result.setLength( len + 1 ); // worst case + QChar *qch = (QChar *)result.unicode(); + uchar ch; + int error = -1; + for (int i=0; i<len; i++) { + ch = chars[i]; + if (need) { + if ( (ch&0xc0) == 0x80 ) { + uc = (uc << 6) | (ch & 0x3f); + need--; + if ( !need ) { + if (uc > 0xffff) { + // surrogate pair + uc -= 0x10000; + unsigned short high = uc/0x400 + 0xd800; + unsigned short low = uc%0x400 + 0xdc00; + *qch++ = QChar(high); + *qch++ = QChar(low); + headerDone = TRUE; + } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { + *qch++ = QChar::replacement; + } else { + if (headerDone || QChar(uc) != QChar::byteOrderMark) + *qch++ = uc; + headerDone = TRUE; + } + } + } else { + // error + i = error; + *qch++ = QChar::replacement; + need = 0; + } + } else { + if ( ch < 128 ) { + *qch++ = ch; + headerDone = TRUE; + } else if ((ch & 0xe0) == 0xc0) { + uc = ch & 0x1f; + need = 1; + error = i; + min_uc = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + uc = ch & 0x0f; + need = 2; + error = i; + min_uc = 0x800; + } else if ((ch&0xf8) == 0xf0) { + uc = ch & 0x07; + need = 3; + error = i; + min_uc = 0x10000; + } else { + // error + *qch++ = QChar::replacement; + } + } + } + result.truncate( qch - result.unicode() ); + return result; + } +}; + +QTextDecoder* QUtf8Codec::makeDecoder() const +{ + return new QUtf8Decoder; +} + + + + + + +int QUtf16Codec::mibEnum() const +{ + return 1000; +} + +const char* QUtf16Codec::name() const +{ + return "ISO-10646-UCS-2"; +} + +int QUtf16Codec::heuristicContentMatch(const char* chars, int len) const +{ + uchar* uchars = (uchar*)chars; + if ( len >= 2 && (uchars[0] == 0xff && uchars[1] == 0xfe || + uchars[1] == 0xff && uchars[0] == 0xfe) ) + return len; + else + return 0; +} + + + + +class QUtf16Encoder : public QTextEncoder { + bool headerdone; +public: + QUtf16Encoder() : headerdone(FALSE) + { + } + + QCString fromUnicode(const QString& uc, int& lenInOut) + { + if ( headerdone ) { + lenInOut = uc.length()*sizeof(QChar); + QCString d(lenInOut); + memcpy(d.data(),uc.unicode(),lenInOut); + return d; + } else { + headerdone = TRUE; + lenInOut = (1+uc.length())*sizeof(QChar); + QCString d(lenInOut); + memcpy(d.data(),&QChar::byteOrderMark,sizeof(QChar)); + memcpy(d.data()+sizeof(QChar),uc.unicode(),uc.length()*sizeof(QChar)); + return d; + } + } +}; + +class QUtf16Decoder : public QTextDecoder { + uchar buf; + bool half; + bool swap; + bool headerdone; + +public: + QUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE) + { + } + + QString toUnicode(const char* chars, int len) + { + QString result; + result.setLength( len + 1 ); // worst case + QChar *qch = (QChar *)result.unicode(); + QChar ch; + while ( len-- ) { + if ( half ) { + if ( swap ) { + ch.setRow( *chars++ ); + ch.setCell( buf ); + } else { + ch.setRow( buf ); + ch.setCell( *chars++ ); + } + if ( !headerdone ) { + if ( ch == QChar::byteOrderSwapped ) { + swap = !swap; + } else if ( ch == QChar::byteOrderMark ) { + // Ignore ZWNBSP + } else { + *qch++ = ch; + } + headerdone = TRUE; + } else + *qch++ = ch; + half = FALSE; + } else { + buf = *chars++; + half = TRUE; + } + } + result.truncate( qch - result.unicode() ); + return result; + } +}; + +QTextDecoder* QUtf16Codec::makeDecoder() const +{ + return new QUtf16Decoder; +} + +QTextEncoder* QUtf16Codec::makeEncoder() const +{ + return new QUtf16Encoder; +} + +#endif //QT_NO_TEXTCODEC |