summaryrefslogtreecommitdiffstats
path: root/libkmime/kmime_codec_qp.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libkmime/kmime_codec_qp.cpp')
-rw-r--r--libkmime/kmime_codec_qp.cpp644
1 files changed, 644 insertions, 0 deletions
diff --git a/libkmime/kmime_codec_qp.cpp b/libkmime/kmime_codec_qp.cpp
new file mode 100644
index 000000000..c867a6346
--- /dev/null
+++ b/libkmime/kmime_codec_qp.cpp
@@ -0,0 +1,644 @@
+/* -*- c++ -*-
+ kmime_codec_qp.cpp
+
+ This file is part of KMime, the KDE internet mail/usenet news message library.
+ Copyright (c) 2002 Marc Mutz <mutz@kde.org>
+
+ KMime is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License, version 2, as
+ published by the Free Software Foundation.
+
+ KMime is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ In addition, as a special exception, the copyright holders give
+ permission to link the code of this library with any edition of
+ the Qt library by Trolltech AS, Norway (or with modified versions
+ of Qt that use the same license as Qt), and distribute linked
+ combinations including the two. You must obey the GNU General
+ Public License in all respects for all of the code used other than
+ Qt. If you modify this file, you may extend this exception to
+ your version of the file, but you are not obligated to do so. If
+ you do not wish to do so, delete this exception statement from
+ your version.
+*/
+
+#include "kmime_codec_qp.h"
+
+#include "kmime_util.h"
+
+#include <kdebug.h>
+
+#include <cassert>
+
+using namespace KMime;
+
+namespace KMime {
+
+// some helpful functions:
+
+static inline char binToHex( uchar value ) {
+ if ( value > 9 )
+ return value + 'A' - 10;
+ else
+ return value + '0';
+}
+
+static inline uchar highNibble( uchar ch ) {
+ return ch >> 4;
+}
+
+static inline uchar lowNibble( uchar ch ) {
+ return ch & 0xF;
+}
+
+static inline bool keep( uchar ch ) {
+ // no CTLs, except HT and not '?'
+ return !( ch < ' ' && ch != '\t' || ch == '?' );
+}
+
+//
+// QuotedPrintableCodec
+//
+
+class QuotedPrintableEncoder : public Encoder {
+ char mInputBuffer[16];
+ uchar mCurrentLineLength; // 0..76
+ uchar mAccu;
+ uint mInputBufferReadCursor : 4; // 0..15
+ uint mInputBufferWriteCursor : 4; // 0..15
+ enum {
+ Never, AtBOL, Definitely
+ } mAccuNeedsEncoding : 2;
+ bool mSawLineEnd : 1;
+ bool mSawCR : 1;
+ bool mFinishing : 1;
+ bool mFinished : 1;
+protected:
+ friend class QuotedPrintableCodec;
+ QuotedPrintableEncoder( bool withCRLF=false )
+ : Encoder( withCRLF ), mCurrentLineLength(0), mAccu(0),
+ mInputBufferReadCursor(0), mInputBufferWriteCursor(0),
+ mAccuNeedsEncoding(Never),
+ mSawLineEnd(false), mSawCR(false), mFinishing(false),
+ mFinished(false) {}
+
+ bool needsEncoding( uchar ch ) {
+ return ( ch > '~' || ch < ' ' && ch != '\t' || ch == '=' );
+ }
+ bool needsEncodingAtEOL( uchar ch ) {
+ return ( ch == ' ' || ch == '\t' );
+ }
+ bool needsEncodingAtBOL( uchar ch ) {
+ return ( ch == 'F' || ch == '.' || ch == '-' );
+ }
+ bool fillInputBuffer( const char* & scursor, const char * const send );
+ bool processNextChar();
+ void createOutputBuffer( char* & dcursor, const char * const dend );
+public:
+ virtual ~QuotedPrintableEncoder() {}
+
+ bool encode( const char* & scursor, const char * const send,
+ char* & dcursor, const char * const dend );
+
+ bool finish( char* & dcursor, const char * const dend );
+};
+
+
+class QuotedPrintableDecoder : public Decoder {
+ const char mEscapeChar;
+ char mBadChar;
+ /** @p accu holds the msb nibble of the hexchar or zero. */
+ uchar mAccu;
+ /** @p insideHexChar is true iff we're inside an hexchar (=XY).
+ Together with @ref mAccu, we can build this states:
+ @li @p insideHexChar == @p false:
+ normal text
+ @li @p insideHexChar == @p true, @p mAccu == 0:
+ saw the leading '='
+ @li @p insideHexChar == @p true, @p mAccu != 0:
+ saw the first nibble '=X'
+ */
+ const bool mQEncoding;
+ bool mInsideHexChar;
+ bool mFlushing;
+ bool mExpectLF;
+ bool mHaveAccu;
+protected:
+ friend class QuotedPrintableCodec;
+ friend class Rfc2047QEncodingCodec;
+ friend class Rfc2231EncodingCodec;
+ QuotedPrintableDecoder( bool withCRLF=false,
+ bool aQEncoding=false, char aEscapeChar='=' )
+ : Decoder( withCRLF ),
+ mEscapeChar(aEscapeChar),
+ mBadChar(0),
+ mAccu(0),
+ mQEncoding(aQEncoding),
+ mInsideHexChar(false),
+ mFlushing(false),
+ mExpectLF(false),
+ mHaveAccu(false) {}
+public:
+ virtual ~QuotedPrintableDecoder() {}
+
+ bool decode( const char* & scursor, const char * const send,
+ char* & dcursor, const char * const dend );
+ // ### really no finishing needed???
+ bool finish( char* &, const char * const ) { return true; }
+};
+
+
+class Rfc2047QEncodingEncoder : public Encoder {
+ uchar mAccu;
+ uchar mStepNo;
+ const char mEscapeChar;
+ bool mInsideFinishing : 1;
+protected:
+ friend class Rfc2047QEncodingCodec;
+ friend class Rfc2231EncodingCodec;
+ Rfc2047QEncodingEncoder( bool withCRLF=false, char aEscapeChar='=' )
+ : Encoder( withCRLF ),
+ mAccu(0), mStepNo(0), mEscapeChar( aEscapeChar ),
+ mInsideFinishing( false )
+ {
+ // else an optimization in ::encode might break.
+ assert( aEscapeChar == '=' || aEscapeChar == '%' );
+ }
+
+ // this code assumes that isEText( mEscapeChar ) == false!
+ bool needsEncoding( uchar ch ) {
+ if ( ch > 'z' ) return true; // {|}~ DEL and 8bit chars need
+ if ( !isEText( ch ) ) return true; // all but a-zA-Z0-9!/*+- need, too
+ if ( mEscapeChar == '%' && ( ch == '*' || ch == '/' ) )
+ return true; // not allowed in rfc2231 encoding
+ return false;
+ }
+
+public:
+ virtual ~Rfc2047QEncodingEncoder() {}
+
+ bool encode( const char* & scursor, const char * const send,
+ char* & dcursor, const char * const dend );
+ bool finish( char* & dcursor, const char * const dend );
+};
+
+// this doesn't access any member variables, so it can be defined static
+// but then we can't call it from virtual functions
+static int QuotedPrintableDecoder_maxDecodedSizeFor( int insize, bool withCRLF ) {
+ // all chars unencoded:
+ int result = insize;
+ // but maybe all of them are \n and we need to make them \r\n :-o
+ if ( withCRLF )
+ result += insize;
+
+ // there might be an accu plus escape
+ result += 2;
+
+ return result;
+}
+
+Encoder * QuotedPrintableCodec::makeEncoder( bool withCRLF ) const {
+ return new QuotedPrintableEncoder( withCRLF );
+}
+
+Decoder * QuotedPrintableCodec::makeDecoder( bool withCRLF ) const {
+ return new QuotedPrintableDecoder( withCRLF );
+}
+
+int QuotedPrintableCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
+ return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
+}
+
+Encoder * Rfc2047QEncodingCodec::makeEncoder( bool withCRLF ) const {
+ return new Rfc2047QEncodingEncoder( withCRLF );
+}
+
+Decoder * Rfc2047QEncodingCodec::makeDecoder( bool withCRLF ) const {
+ return new QuotedPrintableDecoder( withCRLF, true );
+}
+
+int Rfc2047QEncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
+ return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
+}
+
+Encoder * Rfc2231EncodingCodec::makeEncoder( bool withCRLF ) const {
+ return new Rfc2047QEncodingEncoder( withCRLF, '%' );
+}
+
+Decoder * Rfc2231EncodingCodec::makeDecoder( bool withCRLF ) const {
+ return new QuotedPrintableDecoder( withCRLF, true, '%' );
+}
+
+int Rfc2231EncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const {
+ return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
+}
+
+ /********************************************************/
+ /********************************************************/
+ /********************************************************/
+
+bool QuotedPrintableDecoder::decode( const char* & scursor, const char * const send,
+ char* & dcursor, const char * const dend ) {
+ if ( mWithCRLF )
+ kdWarning() << "CRLF output for decoders isn't yet supported!" << endl;
+
+ while ( scursor != send && dcursor != dend ) {
+ if ( mFlushing ) {
+ // we have to flush chars in the aftermath of an decoding
+ // error. The way to request a flush is to
+ // - store the offending character in mBadChar and
+ // - set mFlushing to true.
+ // The supported cases are (H: hexchar, X: bad char):
+ // =X, =HX, CR
+ // mBadChar is only written out if it is not by itself illegal in
+ // quoted-printable (e.g. CTLs, 8Bits).
+ // A fast way to suppress mBadChar output is to set it to NUL.
+ if ( mInsideHexChar ) {
+ // output '='
+ *dcursor++ = mEscapeChar;
+ mInsideHexChar = false;
+ } else if ( mHaveAccu ) {
+ // output the high nibble of the accumulator:
+ *dcursor++ = binToHex( highNibble( mAccu ) );
+ mHaveAccu = false;
+ mAccu = 0;
+ } else {
+ // output mBadChar
+ assert( mAccu == 0 );
+ if ( mBadChar ) {
+ if ( mBadChar >= '>' && mBadChar <= '~' ||
+ mBadChar >= '!' && mBadChar <= '<' )
+ *dcursor++ = mBadChar;
+ mBadChar = 0;
+ }
+ mFlushing = false;
+ }
+ continue;
+ }
+ assert( mBadChar == 0 );
+
+ uchar ch = *scursor++;
+ uchar value = 255;
+
+ if ( mExpectLF && ch != '\n' ) {
+ kdWarning() << "QuotedPrintableDecoder: "
+ "illegally formed soft linebreak or lonely CR!" << endl;
+ mInsideHexChar = false;
+ mExpectLF = false;
+ assert( mAccu == 0 );
+ }
+
+ if ( mInsideHexChar ) {
+ // next char(s) represent nibble instead of itself:
+ if ( ch <= '9' ) {
+ if ( ch >= '0' ) {
+ value = ch - '0';
+ } else {
+ switch ( ch ) {
+ case '\r':
+ mExpectLF = true;
+ break;
+ case '\n':
+ // soft line break, but only if mAccu is NUL.
+ if ( !mHaveAccu ) {
+ mExpectLF = false;
+ mInsideHexChar = false;
+ break;
+ }
+ // else fall through
+ default:
+ kdWarning() << "QuotedPrintableDecoder: "
+ "illegally formed hex char! Outputting verbatim." << endl;
+ mBadChar = ch;
+ mFlushing = true;
+ }
+ continue;
+ }
+ } else { // ch > '9'
+ if ( ch <= 'F' ) {
+ if ( ch >= 'A' ) {
+ value = 10 + ch - 'A';
+ } else { // [:-@]
+ mBadChar = ch;
+ mFlushing = true;
+ continue;
+ }
+ } else { // ch > 'F'
+ if ( ch <= 'f' && ch >= 'a' ) {
+ value = 10 + ch - 'a';
+ } else {
+ mBadChar = ch;
+ mFlushing = true;
+ continue;
+ }
+ }
+ }
+
+ assert( value < 16 );
+ assert( mBadChar == 0 );
+ assert( !mExpectLF );
+
+ if ( mHaveAccu ) {
+ *dcursor++ = char( mAccu | value );
+ mAccu = 0;
+ mHaveAccu = false;
+ mInsideHexChar = false;
+ } else {
+ mHaveAccu = true;
+ mAccu = value << 4;
+ }
+ } else { // not mInsideHexChar
+ if ( ch <= '~' && ch >= ' ' || ch == '\t' ) {
+ if ( ch == mEscapeChar ) {
+ mInsideHexChar = true;
+ } else if ( mQEncoding && ch == '_' ) {
+ *dcursor++ = char(0x20);
+ } else {
+ *dcursor++ = char(ch);
+ }
+ } else if ( ch == '\n' ) {
+ *dcursor++ = '\n';
+ mExpectLF = false;
+ } else if ( ch == '\r' ) {
+ mExpectLF = true;
+ } else {
+ kdWarning() << "QuotedPrintableDecoder: " << ch <<
+ " illegal character in input stream! Ignoring." << endl;
+ }
+ }
+ }
+
+ return (scursor == send);
+}
+
+bool QuotedPrintableEncoder::fillInputBuffer( const char* & scursor,
+ const char * const send ) {
+ // Don't read more if there's still a tail of a line in the buffer:
+ if ( mSawLineEnd )
+ return true;
+
+ // Read until the buffer is full or we have found CRLF or LF (which
+ // don't end up in the input buffer):
+ for ( ; ( mInputBufferWriteCursor + 1 ) % 16 != mInputBufferReadCursor
+ && scursor != send ; mInputBufferWriteCursor++ ) {
+ char ch = *scursor++;
+ if ( ch == '\r' ) {
+ mSawCR = true;
+ } else if ( ch == '\n' ) {
+ // remove the CR from the input buffer (if any) and return that
+ // we found a line ending:
+ if ( mSawCR ) {
+ mSawCR = false;
+ assert( mInputBufferWriteCursor != mInputBufferReadCursor );
+ mInputBufferWriteCursor--;
+ }
+ mSawLineEnd = true;
+ return true; // saw CRLF or LF
+ } else {
+ mSawCR = false;
+ }
+ mInputBuffer[ mInputBufferWriteCursor ] = ch;
+ }
+ mSawLineEnd = false;
+ return false; // didn't see a line ending...
+}
+
+bool QuotedPrintableEncoder::processNextChar() {
+
+ // If we process a buffer which doesn't end in a line break, we
+ // can't process all of it, since the next chars that will be read
+ // could be a line break. So we empty the buffer only until a fixed
+ // number of chars is left (except when mFinishing, which means that
+ // the data doesn't end in newline):
+ const int minBufferFillWithoutLineEnd = 4;
+
+ assert( mOutputBufferCursor == 0 );
+
+ int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor) ;
+ if ( bufferFill < 0 )
+ bufferFill += 16;
+
+ assert( bufferFill >=0 && bufferFill <= 15 );
+
+ if ( !mFinishing && !mSawLineEnd &&
+ bufferFill < minBufferFillWithoutLineEnd )
+ return false;
+
+ // buffer is empty, return false:
+ if ( mInputBufferReadCursor == mInputBufferWriteCursor )
+ return false;
+
+ // Real processing goes here:
+ mAccu = mInputBuffer[ mInputBufferReadCursor++ ];
+ if ( needsEncoding( mAccu ) ) // always needs encoding or
+ mAccuNeedsEncoding = Definitely;
+ else if ( ( mSawLineEnd || mFinishing ) // needs encoding at end of line
+ && bufferFill == 1 // or end of buffer
+ && needsEncodingAtEOL( mAccu ) )
+ mAccuNeedsEncoding = Definitely;
+ else if ( needsEncodingAtBOL( mAccu ) )
+ mAccuNeedsEncoding = AtBOL;
+ else
+ // never needs encoding
+ mAccuNeedsEncoding = Never;
+
+ return true;
+}
+
+// Outputs processed (verbatim or hex-encoded) chars and inserts soft
+// line breaks as necessary. Depends on processNextChar's directions
+// on whether or not to encode the current char, and whether or not
+// the current char is the last one in it's input line:
+void QuotedPrintableEncoder::createOutputBuffer( char* & dcursor,
+ const char * const dend )
+{
+ const int maxLineLength = 76; // rfc 2045
+
+ assert( mOutputBufferCursor == 0 );
+
+ bool lastOneOnThisLine = mSawLineEnd
+ && mInputBufferReadCursor == mInputBufferWriteCursor;
+
+ int neededSpace = 1;
+ if ( mAccuNeedsEncoding == Definitely)
+ neededSpace = 3;
+
+ // reserve space for the soft hyphen (=)
+ if ( !lastOneOnThisLine )
+ neededSpace++;
+
+ if ( mCurrentLineLength > maxLineLength - neededSpace ) {
+ // current line too short, insert soft line break:
+ write( '=', dcursor, dend );
+ writeCRLF( dcursor, dend );
+ mCurrentLineLength = 0;
+ }
+
+ if ( Never == mAccuNeedsEncoding ||
+ AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0 ) {
+ write( mAccu, dcursor, dend );
+ mCurrentLineLength++;
+ } else {
+ write( '=', dcursor, dend );
+ write( binToHex( highNibble( mAccu ) ), dcursor, dend );
+ write( binToHex( lowNibble( mAccu ) ), dcursor, dend );
+ mCurrentLineLength += 3;
+ }
+}
+
+
+bool QuotedPrintableEncoder::encode( const char* & scursor, const char * const send,
+ char* & dcursor, const char * const dend )
+{
+ // support probing by the caller:
+ if ( mFinishing ) return true;
+
+ while ( scursor != send && dcursor != dend ) {
+ if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
+ return (scursor == send);
+
+ assert( mOutputBufferCursor == 0 );
+
+ // fill input buffer until eol has been reached or until the
+ // buffer is full, whatever comes first:
+ fillInputBuffer( scursor, send );
+
+ if ( processNextChar() )
+ // there was one...
+ createOutputBuffer( dcursor, dend );
+ else if ( mSawLineEnd &&
+ mInputBufferWriteCursor == mInputBufferReadCursor ) {
+ // load a hard line break into output buffer:
+ writeCRLF( dcursor, dend );
+ // signal fillInputBuffer() we are ready for the next line:
+ mSawLineEnd = false;
+ mCurrentLineLength = 0;
+ } else
+ // we are supposedly finished with this input block:
+ break;
+ }
+
+ // make sure we write as much as possible and don't stop _writing_
+ // just because we have no more _input_:
+ if ( mOutputBufferCursor ) flushOutputBuffer( dcursor, dend );
+
+ return (scursor == send);
+
+} // encode
+
+bool QuotedPrintableEncoder::finish( char* & dcursor,
+ const char * const dend ) {
+ mFinishing = true;
+
+ if ( mFinished )
+ return flushOutputBuffer( dcursor, dend );
+
+ while ( dcursor != dend ) {
+ if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
+ return false;
+
+ assert( mOutputBufferCursor == 0 );
+
+ if ( processNextChar() )
+ // there was one...
+ createOutputBuffer( dcursor, dend );
+ else if ( mSawLineEnd &&
+ mInputBufferWriteCursor == mInputBufferReadCursor ) {
+ // load a hard line break into output buffer:
+ writeCRLF( dcursor, dend );
+ mSawLineEnd = false;
+ mCurrentLineLength = 0;
+ } else {
+ mFinished = true;
+ return flushOutputBuffer( dcursor, dend );
+ }
+ }
+
+ return mFinished && !mOutputBufferCursor;
+
+} // finish
+
+
+bool Rfc2047QEncodingEncoder::encode( const char* & scursor, const char * const send,
+ char* & dcursor, const char * const dend )
+{
+ if ( mInsideFinishing ) return true;
+
+ while ( scursor != send && dcursor != dend ) {
+ uchar value;
+ switch ( mStepNo ) {
+ case 0:
+ // read the next char and decide if and how do encode:
+ mAccu = *scursor++;
+ if ( !needsEncoding( mAccu ) ) {
+ *dcursor++ = char(mAccu);
+ } else if ( mEscapeChar == '=' && mAccu == 0x20 ) {
+ // shortcut encoding for 0x20 (latin-1/us-ascii SPACE)
+ // (not for rfc2231 encoding)
+ *dcursor++ = '_';
+ } else {
+ // needs =XY encoding - write escape char:
+ *dcursor++ = mEscapeChar;
+ mStepNo = 1;
+ }
+ continue;
+ case 1:
+ // extract hi-nibble:
+ value = highNibble(mAccu);
+ mStepNo = 2;
+ break;
+ case 2:
+ // extract lo-nibble:
+ value = lowNibble(mAccu);
+ mStepNo = 0;
+ break;
+ default: assert( 0 );
+ }
+
+ // and write:
+ *dcursor++ = binToHex( value );
+ }
+
+ return (scursor == send);
+} // encode
+
+#include <qstring.h>
+
+bool Rfc2047QEncodingEncoder::finish( char* & dcursor, const char * const dend ) {
+ mInsideFinishing = true;
+
+ // write the last bits of mAccu, if any:
+ while ( mStepNo != 0 && dcursor != dend ) {
+ uchar value;
+ switch ( mStepNo ) {
+ case 1:
+ // extract hi-nibble:
+ value = highNibble(mAccu);
+ mStepNo = 2;
+ break;
+ case 2:
+ // extract lo-nibble:
+ value = lowNibble(mAccu);
+ mStepNo = 0;
+ break;
+ default: assert( 0 );
+ }
+
+ // and write:
+ *dcursor++ = binToHex( value );
+ }
+
+ return mStepNo == 0;
+}
+
+
+
+
+} // namespace KMime