1 files changed, 668 insertions, 0 deletions
diff --git a/tdeioslave/imap4/rfcdecoder.cc b/tdeioslave/imap4/rfcdecoder.cc
new file mode 100644
index 000000000..08b641b7d
--- /dev/null
+++ b/tdeioslave/imap4/rfcdecoder.cc
@@ -0,0 +1,668 @@
+/**********************************************************************
+ *
+ *   rfcdecoder.cc  - handler for various rfc/mime encodings
+ *   Copyright (C) 2000 s.carstens@gmx.de
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ *   Send comments and bug fixes to s.carstens@gmx.de
+ *
+ *********************************************************************/
+#include "rfcdecoder.h"
+
+#include <ctype.h>
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <tqtextcodec.h>
+#include <tqbuffer.h>
+#include <tqregexp.h>
+#include <kmdcodec.h>
+
+// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
+// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
+
+static unsigned char base64chars[] =
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+#define UNDEFINED 64
+#define MAXLINE  76
+
+/* UTF16 definitions */
+#define UTF16MASK       0x03FFUL
+#define UTF16SHIFT      10
+#define UTF16BASE       0x10000UL
+#define UTF16HIGHSTART  0xD800UL
+#define UTF16HIGHEND    0xDBFFUL
+#define UTF16LOSTART    0xDC00UL
+#define UTF16LOEND      0xDFFFUL
+
+/* Convert an IMAP mailbox to a Unicode path
+ */
+TQString rfcDecoder::fromIMAP (const TQString & inSrc)
+{
+  unsigned char c, i, bitcount;
+  unsigned long ucs4, utf16, bitbuf;
+  unsigned char base64[256], utf8[6];
+  unsigned long srcPtr = 0;
+  TQCString dst;
+  TQCString src = inSrc.ascii ();
+  uint srcLen = inSrc.length();
+
+  /* initialize modified base64 decoding table */
+  memset (base64, UNDEFINED, sizeof (base64));
+  for (i = 0; i < sizeof (base64chars); ++i)
+  {
+    base64[(int)base64chars[i]] = i;
+  }
+
+  /* loop until end of string */
+  while (srcPtr < srcLen)
+  {
+    c = src[srcPtr++];
+    /* deal with literal characters and &- */
+    if (c != '&' || src[srcPtr] == '-')
+    {
+      /* encode literally */
+      dst += c;
+      /* skip over the '-' if this is an &- sequence */
+      if (c == '&')
+        srcPtr++;
+    }
+    else
+    {
+      /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
+      bitbuf = 0;
+      bitcount = 0;
+      ucs4 = 0;
+      while ((c = base64[(unsigned char) src[srcPtr]]) != UNDEFINED)
+      {
+        ++srcPtr;
+        bitbuf = (bitbuf << 6) | c;
+        bitcount += 6;
+        /* enough bits for a UTF-16 character? */
+        if (bitcount >= 16)
+        {
+          bitcount -= 16;
+          utf16 = (bitcount ? bitbuf >> bitcount : bitbuf) & 0xffff;
+          /* convert UTF16 to UCS4 */
+          if (utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND)
+          {
+            ucs4 = (utf16 - UTF16HIGHSTART) << UTF16SHIFT;
+            continue;
+          }
+          else if (utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND)
+          {
+            ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
+          }
+          else
+          {
+            ucs4 = utf16;
+          }
+          /* convert UTF-16 range of UCS4 to UTF-8 */
+          if (ucs4 <= 0x7fUL)
+          {
+            utf8[0] = ucs4;
+            i = 1;
+          }
+          else if (ucs4 <= 0x7ffUL)
+          {
+            utf8[0] = 0xc0 | (ucs4 >> 6);
+            utf8[1] = 0x80 | (ucs4 & 0x3f);
+            i = 2;
+          }
+          else if (ucs4 <= 0xffffUL)
+          {
+            utf8[0] = 0xe0 | (ucs4 >> 12);
+            utf8[1] = 0x80 | ((ucs4 >> 6) & 0x3f);
+            utf8[2] = 0x80 | (ucs4 & 0x3f);
+            i = 3;
+          }
+          else
+          {
+            utf8[0] = 0xf0 | (ucs4 >> 18);
+            utf8[1] = 0x80 | ((ucs4 >> 12) & 0x3f);
+            utf8[2] = 0x80 | ((ucs4 >> 6) & 0x3f);
+            utf8[3] = 0x80 | (ucs4 & 0x3f);
+            i = 4;
+          }
+          /* copy it */
+          for (c = 0; c < i; ++c)
+          {
+            dst += utf8[c];
+          }
+        }
+      }
+      /* skip over trailing '-' in modified UTF-7 encoding */
+      if (src[srcPtr] == '-')
+        ++srcPtr;
+    }
+  }
+  return TQString::fromUtf8 (dst.data ());
+}
+
+/* replace " with \" and \ with \\ " and \ characters */
+TQString rfcDecoder::quoteIMAP(const TQString &src)
+{
+  uint len = src.length();
+  TQString result;
+  result.reserve(2 * len);
+  for (unsigned int i = 0; i < len; i++)
+  {
+    if (src[i] == '"' || src[i] == '\\')
+      result += '\\';
+    result += src[i];
+  }
+  //result.squeeze(); - unnecessary and slow
+  return result;
+}
+
+/* Convert Unicode path to modified UTF-7 IMAP mailbox
+ */
+TQString rfcDecoder::toIMAP (const TQString & inSrc)
+{
+  unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
+  unsigned long ucs4, bitbuf;
+  TQCString src = inSrc.utf8 ();
+  TQString dst;
+
+  ulong srcPtr = 0;
+  utf7mode = 0;
+  utf8total = 0;
+  bitstogo = 0;
+  utf8pos = 0;
+  bitbuf = 0;
+  ucs4 = 0;
+  while (srcPtr < src.length ())
+  {
+    c = (unsigned char) src[srcPtr++];
+    /* normal character? */
+    if (c >= ' ' && c <= '~')
+    {
+      /* switch out of UTF-7 mode */
+      if (utf7mode)
+      {
+        if (bitstogo)
+        {
+          dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
+          bitstogo = 0;
+        }
+        dst += '-';
+        utf7mode = 0;
+      }
+      dst += c;
+      /* encode '&' as '&-' */
+      if (c == '&')
+      {
+        dst += '-';
+      }
+      continue;
+    }
+    /* switch to UTF-7 mode */
+    if (!utf7mode)
+    {
+      dst += '&';
+      utf7mode = 1;
+    }
+    /* Encode US-ASCII characters as themselves */
+    if (c < 0x80)
+    {
+      ucs4 = c;
+      utf8total = 1;
+    }
+    else if (utf8total)
+    {
+      /* save UTF8 bits into UCS4 */
+      ucs4 = (ucs4 << 6) | (c & 0x3FUL);
+      if (++utf8pos < utf8total)
+      {
+        continue;
+      }
+    }
+    else
+    {
+      utf8pos = 1;
+      if (c < 0xE0)
+      {
+        utf8total = 2;
+        ucs4 = c & 0x1F;
+      }
+      else if (c < 0xF0)
+      {
+        utf8total = 3;
+        ucs4 = c & 0x0F;
+      }
+      else
+      {
+        /* NOTE: can't convert UTF8 sequences longer than 4 */
+        utf8total = 4;
+        ucs4 = c & 0x03;
+      }
+      continue;
+    }
+    /* loop to split ucs4 into two utf16 chars if necessary */
+    utf8total = 0;
+    do
+    {
+      if (ucs4 >= UTF16BASE)
+      {
+        ucs4 -= UTF16BASE;
+        bitbuf = (bitbuf << 16) | ((ucs4 >> UTF16SHIFT) + UTF16HIGHSTART);
+        ucs4 = (ucs4 & UTF16MASK) + UTF16LOSTART;
+        utf16flag = 1;
+      }
+      else
+      {
+        bitbuf = (bitbuf << 16) | ucs4;
+        utf16flag = 0;
+      }
+      bitstogo += 16;
+      /* spew out base64 */
+      while (bitstogo >= 6)
+      {
+        bitstogo -= 6;
+        dst += base64chars[(bitstogo ? (bitbuf >> bitstogo) : bitbuf) & 0x3F];
+      }
+    }
+    while (utf16flag);
+  }
+  /* if in UTF-7 mode, finish in ASCII */
+  if (utf7mode)
+  {
+    if (bitstogo)
+    {
+      dst += base64chars[(bitbuf << (6 - bitstogo)) & 0x3F];
+    }
+    dst += '-';
+  }
+  return quoteIMAP(dst);
+}
+
+//-----------------------------------------------------------------------------
+TQString rfcDecoder::decodeQuoting(const TQString &aStr)
+{
+  TQString result;
+  unsigned int strLength(aStr.length());
+  for (unsigned int i = 0; i < strLength ; i++)
+  {
+    if (aStr[i] == "\\") i++;
+    result += aStr[i];
+  }
+  return result;
+}
+
+//-----------------------------------------------------------------------------
+TQTextCodec *
+rfcDecoder::codecForName (const TQString & _str)
+{
+  if (_str.isEmpty ())
+    return NULL;
+  return TQTextCodec::codecForName (_str.lower ().
+                                   replace ("windows", "cp").latin1 ());
+}
+
+//-----------------------------------------------------------------------------
+const TQString
+rfcDecoder::decodeRFC2047String (const TQString & _str)
+{
+  TQString throw_away;
+
+  return decodeRFC2047String (_str, throw_away);
+}
+
+//-----------------------------------------------------------------------------
+const TQString
+rfcDecoder::decodeRFC2047String (const TQString & _str, TQString & charset)
+{
+  TQString throw_away;
+
+  return decodeRFC2047String (_str, charset, throw_away);
+}
+
+//-----------------------------------------------------------------------------
+const TQString
+rfcDecoder::decodeRFC2047String (const TQString & _str, TQString & charset,
+                                 TQString & language)
+{
+  //do we have a rfc string
+  if (_str.find("=?") < 0)
+    return _str;
+
+  TQCString aStr = _str.ascii ();  // TQString.length() means Unicode chars
+  TQCString result;
+  char *pos, *beg, *end, *mid = NULL;
+  TQCString str;
+  char encoding = 0, ch;
+  bool valid;
+  const int maxLen = 200;
+  int i;
+
+//  result.truncate(aStr.length());
+  for (pos = aStr.data (); *pos; pos++)
+  {
+    if (pos[0] != '=' || pos[1] != '?')
+    {
+      result += *pos;
+      continue;
+    }
+    beg = pos + 2;
+    end = beg;
+    valid = TRUE;
+    // parse charset name
+    for (i = 2, pos += 2;
+         i < maxLen && (*pos != '?' && (ispunct (*pos) || isalnum (*pos)));
+         i++)
+      pos++;
+    if (*pos != '?' || i < 4 || i >= maxLen)
+      valid = FALSE;
+    else
+    {
+      charset = TQCString (beg, i - 1);  // -2 + 1 for the zero
+      int pt = charset.findRev('*');
+      if (pt != -1)
+      {
+        // save language for later usage
+        language = charset.right (charset.length () - pt - 1);
+
+        // tie off language as defined in rfc2047
+        charset.truncate(pt);
+      }
+      // get encoding and check delimiting question marks
+      encoding = toupper (pos[1]);
+      if (pos[2] != '?'
+          || (encoding != 'Q' && encoding != 'B' && encoding != 'q'
+              && encoding != 'b'))
+        valid = FALSE;
+      pos += 3;
+      i += 3;
+//    kdDebug(7116) << "rfcDecoder::decodeRFC2047String - charset " << charset << " - language " << language << " - '" << pos << "'" << endl;
+    }
+    if (valid)
+    {
+      mid = pos;
+      // search for end of encoded part
+      while (i < maxLen && *pos && !(*pos == '?' && *(pos + 1) == '='))
+      {
+        i++;
+        pos++;
+      }
+      end = pos + 2;            //end now points to the first char after the encoded string
+      if (i >= maxLen || !*pos)
+        valid = FALSE;
+    }
+    if (valid)
+    {
+      ch = *pos;
+      *pos = '\0';
+      str = TQCString (mid).left ((int) (mid - pos - 1));
+      if (encoding == 'Q')
+      {
+        // decode quoted printable text
+        for (i = str.length () - 1; i >= 0; i--)
+          if (str[i] == '_')
+            str[i] = ' ';
+//    kdDebug(7116) << "rfcDecoder::decodeRFC2047String - before QP '" << str << "'" << endl;
+
+        str = KCodecs::quotedPrintableDecode(str);
+//    kdDebug(7116) << "rfcDecoder::decodeRFC2047String - after QP '" << str << "'" << endl;
+      }
+      else
+      {
+        // decode base64 text
+        str = KCodecs::base64Decode(str);
+      }
+      *pos = ch;
+      int len = str.length();
+      for (i = 0; i < len; i++)
+        result += (char) (TQChar) str[i];
+
+      pos = end - 1;
+    }
+    else
+    {
+//    kdDebug(7116) << "rfcDecoder::decodeRFC2047String - invalid" << endl;
+      //result += "=?";
+      //pos = beg -1; // because pos gets increased shortly afterwards
+      pos = beg - 2;
+      result += *pos++;
+      result += *pos;
+    }
+  }
+  if (!charset.isEmpty ())
+  {
+    TQTextCodec *aCodec = codecForName (charset.ascii ());
+    if (aCodec)
+    {
+//    kdDebug(7116) << "Codec is " << aCodec->name() << endl;
+      return aCodec->toUnicode (result);
+    }
+  }
+  return result;
+}
+
+
+//-----------------------------------------------------------------------------
+const char especials[17] = "()<>@,;:\"/[]?.= ";
+
+const TQString
+rfcDecoder::encodeRFC2047String (const TQString & _str)
+{
+  if (_str.isEmpty ())
+    return _str;
+  const signed char *latin = reinterpret_cast<const signed char *>(_str.latin1()), *l, *start, *stop;
+  char hexcode;
+  int numQuotes, i;
+  int rptr = 0;
+  // My stats show this number results in 12 resize() out of 73,000
+  int resultLen = 3 * _str.length() / 2;
+  TQCString result(resultLen);
+  
+  while (*latin)
+  {
+    l = latin;
+    start = latin;
+    while (*l)
+    {
+      if (*l == 32)
+        start = l + 1;
+      if (*l < 0)
+        break;
+      l++;
+    }
+    if (*l)
+    {
+      numQuotes = 1;
+      while (*l)
+      {
+        /* The encoded word must be limited to 75 character */
+        for (i = 0; i < 16; i++)
+          if (*l == especials[i])
+            numQuotes++;
+        if (*l < 0)
+          numQuotes++;
+        /* Stop after 58 = 75 - 17 characters or at "<user@host..." */
+        if (l - start + 2 * numQuotes >= 58 || *l == 60)
+          break;
+        l++;
+      }
+      if (*l)
+      {
+        stop = l - 1;
+        while (stop >= start && *stop != 32)
+          stop--;
+        if (stop <= start)
+          stop = l;
+      }
+      else
+        stop = l;
+      if (resultLen - rptr - 1 <= start -  latin + 1 + 16 /* =?iso-88... */) {
+        resultLen += (start - latin + 1) * 2 + 20; // more space
+	result.resize(resultLen);
+      }
+      while (latin < start)
+      {
+        result[rptr++] = *latin;
+        latin++;
+      }
+      strcpy(&result[rptr], "=?iso-8859-1?q?"); rptr += 15;
+      if (resultLen - rptr - 1 <= 3*(stop - latin + 1)) {
+        resultLen += (stop - latin + 1) * 4 + 20; // more space
+	result.resize(resultLen);
+      }
+      while (latin < stop) // can add up to 3 chars/iteration
+      {
+        numQuotes = 0;
+        for (i = 0; i < 16; i++)
+          if (*latin == especials[i])
+            numQuotes = 1;
+        if (*latin < 0)
+          numQuotes = 1;
+        if (numQuotes)
+        {
+          result[rptr++] = '=';
+          hexcode = ((*latin & 0xF0) >> 4) + 48;
+          if (hexcode >= 58)
+            hexcode += 7;
+          result[rptr++] = hexcode;
+          hexcode = (*latin & 0x0F) + 48;
+          if (hexcode >= 58)
+            hexcode += 7;
+          result[rptr++] = hexcode;
+        }
+        else
+        {
+          result[rptr++] = *latin;
+        }
+        latin++;
+      }
+      result[rptr++] = '?';
+      result[rptr++] = '=';
+    }
+    else
+    {
+      while (*latin)
+      {
+        if (rptr == resultLen - 1) {
+          resultLen += 30;
+          result.resize(resultLen);
+        }
+        result[rptr++] = *latin;
+        latin++;
+      }
+    }
+  }
+  result[rptr] = 0;
+  //free (latinStart);
+  return result;
+}
+
+
+//-----------------------------------------------------------------------------
+const TQString
+rfcDecoder::encodeRFC2231String (const TQString & _str)
+{
+  if (_str.isEmpty ())
+    return _str;
+  signed char *latin = (signed char *) calloc (1, _str.length () + 1);
+  char *latin_us = (char *) latin;
+  strcpy (latin_us, _str.latin1 ());
+  signed char *l = latin;
+  char hexcode;
+  int i;
+  bool quote;
+  while (*l)
+  {
+    if (*l < 0)
+      break;
+    l++;
+  }
+  if (!*l) {
+    free(latin);
+    return _str.ascii ();
+  }
+  TQCString result;
+  l = latin;
+  while (*l)
+  {
+    quote = *l < 0;
+    for (i = 0; i < 16; i++)
+      if (*l == especials[i])
+        quote = true;
+    if (quote)
+    {
+      result += "%";
+      hexcode = ((*l & 0xF0) >> 4) + 48;
+      if (hexcode >= 58)
+        hexcode += 7;
+      result += hexcode;
+      hexcode = (*l & 0x0F) + 48;
+      if (hexcode >= 58)
+        hexcode += 7;
+      result += hexcode;
+    }
+    else
+    {
+      result += *l;
+    }
+    l++;
+  }
+  free (latin);
+  return result;
+}
+
+
+//-----------------------------------------------------------------------------
+const TQString
+rfcDecoder::decodeRFC2231String (const TQString & _str)
+{
+  int p = _str.find ('\'');
+
+  //see if it is an rfc string
+  if (p < 0)
+    return _str;
+
+  int l = _str.findRev ('\'');
+
+  //second is language
+  if (p >= l)
+    return _str;
+
+  //first is charset or empty
+  TQString charset = _str.left (p);
+  TQString st = _str.mid (l + 1);
+  TQString language = _str.mid (p + 1, l - p - 1);
+
+  //kdDebug(7116) << "Charset: " << charset << " Language: " << language << endl;
+
+  char ch, ch2;
+  p = 0;
+  while (p < (int) st.length ())
+  {
+    if (st.at (p) == 37)
+    {
+      ch = st.at (p + 1).latin1 () - 48;
+      if (ch > 16)
+        ch -= 7;
+      ch2 = st.at (p + 2).latin1 () - 48;
+      if (ch2 > 16)
+        ch2 -= 7;
+      st.at (p) = ch * 16 + ch2;
+      st.remove (p + 1, 2);
+    }
+    p++;
+  }
+  return st;
+}