summaryrefslogtreecommitdiffstats
path: root/libtdepim/qutf7codec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libtdepim/qutf7codec.cpp')
-rw-r--r--libtdepim/qutf7codec.cpp550
1 files changed, 550 insertions, 0 deletions
diff --git a/libtdepim/qutf7codec.cpp b/libtdepim/qutf7codec.cpp
new file mode 100644
index 000000000..dc8ef2a2f
--- /dev/null
+++ b/libtdepim/qutf7codec.cpp
@@ -0,0 +1,550 @@
+/*
+ qutf7codec.cpp
+
+ A TQTextCodec for UTF-7 (rfc2152).
+ Copyright (c) 2001 Marc Mutz <mutz@kde.org>
+ See file COPYING for details
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2.0,
+ as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, US
+
+ As a special exception, permission is granted to use this plugin
+ with any version of TQt by TrollTech AS, Norway. In this case, the
+ use of this plugin doesn't cause the resulting executable to be
+ covered by the GNU General Public License.
+ This exception does not however tqinvalidate any other reasons why the
+ executable file might be covered by the GNU General Public License.
+*/
+
+
+#include "qutf7codec.h"
+
+#ifndef TQT_NO_TEXTCODEC
+
+int TQUtf7Codec::mibEnum() const {
+ return 1012;
+}
+
+int TQStrictUtf7Codec::mibEnum() const {
+ return -1012;
+}
+
+const char* TQUtf7Codec::name() const {
+ return "UTF-7";
+}
+
+const char* TQStrictUtf7Codec::name() const {
+ return "X-QT-UTF-7-STRICT";
+}
+
+const char* TQUtf7Codec::mimeName() const {
+ return "UTF-7";
+}
+
+bool TQUtf7Codec::canEncode( TQChar ) const {
+ return TRUE;
+}
+
+bool TQUtf7Codec::canEncode( const TQString & ) const {
+ return TRUE;
+}
+
+static uchar base64Set[] = {
+ 0x00, 0x00, 0x00, 0x00, // '\0' ...
+ 0x00, 0x11, 0xFF, 0xC0, // ' ' ... '?'
+ 0x7F, 0xFF, 0xFF, 0xE0, // '@' ... '_'
+ 0x7F, 0xFF, 0xFF, 0xE0 // '`' ... DEL
+};
+
+static uchar base64SetWithLastTwoBitsZero[] = {
+ 0x00, 0x00, 0x00, 0x00, // '\0' ...
+ 0x00, 0x00, 0x88, 0x80, // ' ' ... '?'
+ 0x44, 0x44, 0x44, 0x40, // '@' ... '_'
+ 0x11, 0x11, 0x11, 0x00 // '`' ... DEL
+};
+
+static uchar directSet[] = {
+ 0x00, 0x00, 0x00, 0x00, // '\0' ...
+ 0x01, 0xCF, 0xFF, 0xE1, // ' ' ... '?'
+ 0x7F, 0xFF, 0xFF, 0xE0, // '@' ... '_'
+ 0x7F, 0xFF, 0xFF, 0xE0 // '`' ... DEL
+};
+
+static uchar optDirectSet[] = {
+ 0x00, 0x00, 0x00, 0x00, // '\0' ...
+ 0x7E, 0x20, 0x00, 0x1E, // ' ' ... '?'
+ 0x80, 0x00, 0x00, 0x17, // '@' ... '_'
+ 0x80, 0x00, 0x00, 0x1C // '`' ... DEL
+};
+
+static inline bool isOfSet(uchar ch, uchar* set) {
+ return set[ ch/8 ] & (0x80 >> ( ch%8 ));
+}
+
+int TQUtf7Codec::heuristicContentMatch(const char* chars, int len) const
+{
+ int stepNo = 0;
+ int i;
+ bool shifted = FALSE;
+ bool rightAfterEscape = FALSE;
+ bool onlyNullBitsSinceLastBoundary = TRUE;
+ for ( i = 0; i < len ; i++ ) {
+ if ((unsigned char)chars[i] >= 128) // 8bit chars not allowed.
+ break;
+ if (shifted) {
+ if ( isOfSet(chars[i],base64Set) ) {
+ switch (stepNo) {
+ case 0:
+ onlyNullBitsSinceLastBoundary = TRUE;
+ break;
+ case 3:
+ onlyNullBitsSinceLastBoundary
+ = isOfSet(chars[i],base64SetWithLastTwoBitsZero);
+ break;
+ case 6:
+ onlyNullBitsSinceLastBoundary
+ = ( chars[i] == 'A' || chars[i] == 'Q' ||
+ chars[i] == 'g' || chars[i] == 'w' );
+ break;
+ default:
+ onlyNullBitsSinceLastBoundary
+ = onlyNullBitsSinceLastBoundary && (chars[i] == 'A');
+ }
+ stepNo = (stepNo + 1) % 8;
+ rightAfterEscape = FALSE;
+ } else {
+ if (rightAfterEscape && chars[i] != '-')
+ break; // a '+' must be followed by '-' or a base64 char
+ if (!onlyNullBitsSinceLastBoundary)
+ break; // non-zero bits in the tail of the base64 encoding
+ shifted = FALSE;
+ stepNo = 0;
+ }
+ } else {
+ if (chars[i] == '+') {
+ shifted = TRUE;
+ rightAfterEscape = TRUE;
+ }
+ }
+ }
+ return i;
+}
+
+class TQUtf7Decoder : public TQTextDecoder {
+ // the storage for our tqunicode char until it's finished
+ ushort uc;
+ // the state of the base64 decoding
+ // can be 0 (just finished three tqunicode chars)
+ // 1 (have the upper 6 bits of uc already)
+ // 2 (have the upper 12 bits of uc already)
+ // 3 (have the upper 2 bits of uc already)
+ // ..........
+ // 7 (have the upper 10 bits of uc already)
+ // => n (have the upper (n * 6) % 16 bits of uc already)
+ // "stepNo" cycles through all it's values every three
+ // tqunicode chars.
+ char stepNo;
+ // remembers if we are in shifted-sequence mode
+ bool shifted;
+ // remembers if we're just after the initial '+'
+ // of a shifted-sequence.
+ bool rightAfterEscape;
+public:
+ TQUtf7Decoder() : uc(0), stepNo(0), shifted(FALSE), rightAfterEscape(FALSE)
+ {
+ }
+
+private:
+ inline void resetParser()
+ {
+ uc = 0;
+ stepNo = 0;
+ shifted = FALSE;
+ rightAfterEscape = FALSE;
+ }
+
+public:
+ TQString toUnicode(const char* chars, int len)
+ {
+ TQString result = "";
+ for (int i=0; i<len; i++) {
+ uchar ch = chars[i];
+
+ //
+ // check for 8bit char's:
+ //
+ if ( ch > 127 ) {
+ qWarning("TQUtf7Decoder: 8bit char found in input. "
+ "Parser has been re-initialized!");
+ resetParser();
+ result += TQChar::replacement;
+ continue;
+ }
+
+ if (shifted) { // in shifted mode
+
+ //
+ // first, we check specialities that only occur
+ // right after the escaping '+':
+ //
+ if ( rightAfterEscape && ch == '-' ) {
+ // a "+-" sequence is a short-circuit encoding
+ // for just '+':
+ resetParser();
+ result += TQChar('+');
+ // we're already done for this "ch", so
+ continue;
+ }
+
+ //
+ // Here we're going to extract the bits represented by "ch":
+ //
+ ushort bits;
+ if ( ch >= 'A' && ch <= 'Z' ) {
+ bits = ch - 'A';
+ } else if ( ch >= 'a' && ch <= 'z' ) {
+ bits = ch - 'a' + 26;
+ } else if ( ch >= '0' && ch <= '9' ) {
+ bits = ch - '0' + 52;
+ } else if ( ch == '+' ) {
+ bits = 62;
+ } else if ( ch == '/' ) {
+ bits = 63;
+ } else {
+ bits = 0; // keep compiler happy
+
+ //
+ // ch is not of the base64 alphabet.
+ // Here we are going to check the sequence's validity:
+ //
+ if ( rightAfterEscape ) {
+ // any non-base64 char following an escaping '+'
+ // makes for an ill-formed sequence.
+ // Note that we catch (the valid) "+-" pair
+ // right at the beginning.
+ qWarning("TQUtf7Decoder: ill-formed input: "
+ "non-base64 char after escaping \"+\"!");
+ }
+ // pending bits from base64 encoding must be all 0:
+ if (stepNo >= 1 && uc) {
+ qWarning("TQUtf7Decoder: ill-formed sequence: "
+ "non-zero bits in shifted-sequence tail!");
+ }
+ resetParser();
+
+ // a '-' signifies the end of the shifted-sequence,
+ // so we just swallow it.
+ if ( ch == '-' )
+ continue;
+ // end of validity checking. Process ch now...
+ }
+
+ if ( /*still*/ shifted ) {
+ //
+ // now we're going to stuff the "bits" bit bucket into
+ // the right position inside "uc", emitting a resulting
+ // TQChar if possible.
+ //
+ switch (stepNo) {
+ // "bits" are the 6 msb's of uc
+ case 0: uc = bits << 10; break;
+
+ case 1: uc |= bits << 4; break;
+
+ // 4 bits of "bits" complete the first ushort
+ case 2: uc |= bits >> 2; result += TQChar(uc);
+ // 2 bits of "bits" make the msb's of the next ushort
+ uc = bits << 14; break;
+ case 3: uc |= bits << 8; break;
+ case 4: uc |= bits << 2; break;
+
+ // 2 bits of "bits" complete the second ushort
+ case 5: uc |= bits >> 4; result += TQChar(uc);
+ // 4 bits of "bits" make the msb's of the next ushort
+ uc = bits << 12; break;
+ case 6: uc |= bits << 6; break;
+
+ // these 6 bits complete the third ushort
+ // and also one round of 8 chars -> 3 ushort decoding
+ case 7: uc |= bits; result += TQChar(uc);
+ uc = 0; break;
+ default: ;
+ } // switch (stepNo)
+ // increase the step counter
+ stepNo++;
+ stepNo %= 8;
+ rightAfterEscape = FALSE;
+ // and look at the next char.
+ continue;
+ } // fi (still) shifted
+ } // fi shifted
+
+ //
+ // if control reaches here, we either weren't in a
+ // shifted sequence or we just left one by seeing
+ // a non-base64-char.
+ // Either way, we have to process "ch" outside
+ // a shifted-sequence now:
+ //
+ if ( ch == '+' ) {
+ // '+' is the escape char for entering a
+ // shifted sequence:
+ shifted = TRUE;
+ stepNo = 0;
+ // also, we're right at the beginning where
+ // special rules apply:
+ rightAfterEscape = TRUE;
+ } else {
+ // US-ASCII values are directly used
+ result += TQChar(ch);
+ }
+ }
+
+ return result;
+
+ } // toUnicode()
+
+}; // class TQUtf7Decoder
+
+TQTextDecoder* TQUtf7Codec::makeDecoder() const
+{
+ return new TQUtf7Decoder;
+}
+
+
+class TQUtf7Encoder : public TQTextEncoder {
+ uchar dontNeedEncodingSet[16];
+ ushort outbits;
+ uint stepNo : 2;
+ bool shifted : 1;
+ bool mayContinueShiftedSequence : 1;
+public:
+ TQUtf7Encoder(bool encOpt, bool encLwsp)
+ : outbits(0), stepNo(0),
+ shifted(FALSE), mayContinueShiftedSequence(FALSE)
+ {
+ for ( int i = 0; i < 16 ; i++) {
+ dontNeedEncodingSet[i] = directSet[i];
+ if (!encOpt)
+ dontNeedEncodingSet[i] |= optDirectSet[i];
+ }
+ if(!encLwsp) {
+ dontNeedEncodingSet[' '/8] |= 0x80 >> (' '%8);
+ dontNeedEncodingSet['\n'/8] |= 0x80 >> ('\n'%8);
+ dontNeedEncodingSet['\r'/8] |= 0x80 >> ('\r'%8);
+ dontNeedEncodingSet['\t'/8] |= 0x80 >> ('\t'%8);
+ }
+ }
+
+private:
+
+ char toBase64( ushort u ) {
+ if ( u < 26 )
+ return (char)u + 'A';
+ else if ( u < 52 )
+ return (char)u - 26 + 'a';
+ else if ( u < 62 )
+ return (char)u - 52 + '0';
+ else if ( u == 62 )
+ return '+';
+ else
+ return '/';
+ }
+
+ void addToShiftedSequence(TQCString::Iterator & t, ushort u) {
+ switch (stepNo) {
+ // no outbits; use uppermost 6 bits of u
+ case 0:
+ *t++ = toBase64( u >> 10 );
+ *t++ = toBase64( (u & 0x03FF /* umask top 6 bits */ ) >> 4 );
+ // save 4 lowest-order bits in outbits[5..2]
+ outbits = (u & 0x000F) << 2;
+ break;
+
+ // outbits available; use top two bits of u to complete
+ // the previous char
+ case 1:
+ if (!mayContinueShiftedSequence) {
+ // if mayContinue, this char has already been written
+ *t++ = toBase64( outbits | ( u >> 14 ) );
+ }
+ *t++ = toBase64( (u & 0x3F00 /* mask top 2 bits */ ) >> 8 );
+ *t++ = toBase64( (u & 0x00FC /* mask msbyte */ ) >> 2 );
+ // save 2 lowest-significant bits in outbits[5..4]
+ outbits = (u & 0x0003) << 4;
+ break;
+
+ // outbits available; use top four bits of u to complete
+ // the previous char
+ case 2:
+ if (!mayContinueShiftedSequence) {
+ // if mayContinue, this char has already been written
+ *t++ = toBase64( outbits | ( u >> 12 ) );
+ }
+ *t++ = toBase64( (u & 0x0FFF) >> 6 );
+ *t++ = toBase64( u & 0x003F );
+ break;
+
+ default: ;
+ }
+ stepNo = (stepNo + 1) % 3;
+ }
+
+ void endShiftedSequence(TQCString::Iterator & t) {
+ switch (stepNo) {
+ case 1: // four outbits still to be written
+ case 2: // two outbits still to be written
+ *t++ = toBase64( outbits );
+ break;
+ case 0: // nothing to do
+ default: ;
+ }
+ outbits = 0;
+ }
+
+ // depending on the stepNo, checks whether we can continue
+ // an already ended shifted-sequence with char "u".
+ // This is only possible if the topmost bits fit the
+ // already written ones (which are all 0 between calls)
+ bool continueOK( ushort u ) {
+ return stepNo == 0 ||
+ ( stepNo == 1 && (u & 0xF000) == 0 ) ||
+ ( stepNo == 2 && (u & 0xC000) == 0 );
+ }
+
+ void processDoesntNeedEncoding(TQCString::Iterator & t, ushort ch) {
+ // doesn't need encoding
+ if (shifted) {
+ endShiftedSequence(t);
+ // add "lead-out" to dis-ambiguate following chars:
+ if (isOfSet((char)ch,base64Set) || ch == '-' ) {
+ *t++ = '-';
+ }
+ } else if (mayContinueShiftedSequence) {
+ // if mayContinue is set, this means the
+ // shifted-sequence needs a lead-out.
+ mayContinueShiftedSequence = FALSE;
+ if (isOfSet(ch,base64Set) || ch == '-' ) {
+ *t++ = '-';
+ }
+ }
+ *t++ = (uchar)ch;
+ shifted = FALSE;
+ stepNo = 0;
+ }
+
+public:
+ TQCString fromUnicode(const TQString & uc, int & len_in_out)
+ {
+ // allocate place for worst case:
+ // len/2 * (5+1) for an alternating sequence of e.g. "A\",
+ // + 4 for a worst-case of another +ABC encoded char
+ // + 1 for the trailing \0
+ //
+ int maxreslen = 3 * len_in_out + 5;
+ TQCString result( maxreslen );
+
+#if 0
+ // if (len_in_out == 1) {
+ cout << "\nlen_in_out: " << len_in_out
+ <<"; shifted: " << (shifted ? "true" : "false")
+ << ";\n" << "mayContinue: "
+ << (mayContinueShiftedSequence ? "true" : "false")
+ << "; stepNo: " << stepNo << ";\n"
+ << "outbits: " << outbits << endl;
+ // }
+#endif
+
+ // source and destination cursor
+ const TQChar * s = uc.tqunicode();
+ TQCString::Iterator t = result.data();
+
+ if ( uc.isNull() ) {
+ // return to ascii requested:
+ if ( mayContinueShiftedSequence )
+ *t++ = '-';
+ } else {
+ // normal operation:
+ for (int i = 0 ; i < len_in_out ;
+ i++/*, checkOutBuf(result,maxreslen,t,i,len_in_out,5)*/ ) {
+ ushort ch = s[i].tqunicode();
+
+ //
+ // first, we check whether we might get around encoding:
+ //
+ if ( ch < 128 ) {
+ //
+ // ch is usAscii, so we have a chance that we don't
+ // need to encode it.
+ //
+ if ( isOfSet((uchar)ch,dontNeedEncodingSet) ) {
+ processDoesntNeedEncoding(t,ch);
+ continue;
+ } else if ( ch == '+' ) {
+ // '+' is the shift escape character
+ if (shifted || mayContinueShiftedSequence) {
+ // if we are already in shifted mode, we just
+ // encode the '+', too. Compare
+ // 24bits ("-+-") + some from ending the shifted-sequence
+ // with 21,33 bits
+ addToShiftedSequence(t,ch);
+ mayContinueShiftedSequence = FALSE;
+ shifted = TRUE;
+ } else {
+ // shortcut encoding of '+':
+ *t++ = '+';
+ *t++ = '-';
+ }
+ continue; // done
+ } // else fall through to encoding
+ }
+ //
+ // need encoding
+ //
+ if (!shifted && (!mayContinueShiftedSequence || !continueOK(ch) ) ) {
+ *t++ = '+';
+ stepNo = 0;
+ }
+ addToShiftedSequence(t,ch);
+ shifted = TRUE;
+ mayContinueShiftedSequence = FALSE;
+ }
+
+ if ( shifted ) {
+ endShiftedSequence(t);
+ mayContinueShiftedSequence = TRUE;
+ };
+ shifted = FALSE;
+ }
+
+ *t = '\0';
+ len_in_out = t - result.data();
+
+#if 0
+ cout << "len_in_out: " << len_in_out << "; "
+ << "mayContinue: " << (mayContinueShiftedSequence ? "true" : "false")
+ << "; stepNo: " << stepNo << endl;
+#endif
+
+ Q_ASSERT(len_in_out <= maxreslen-1);
+
+ return result;
+ } // fromUnicode()
+
+}; // class TQUtf7Encoder
+
+TQTextEncoder* TQUtf7Codec::makeEncoder() const {
+ return new TQUtf7Encoder( false, false );
+}
+
+TQTextEncoder* TQStrictUtf7Codec::makeEncoder() const {
+ return new TQUtf7Encoder( true, false );
+}
+
+#endif // TQT_NO_TEXTCODEC