libkdenetwork Library API Documentation

kmime_codec_qp.cpp

00001 /* -*- c++ -*- 00002 kmime_codec_qp.cpp 00003 00004 This file is part of KMime, the KDE internet mail/usenet news message library. 00005 Copyright (c) 2002 Marc Mutz <mutz@kde.org> 00006 00007 KMime is free software; you can redistribute it and/or modify it 00008 under the terms of the GNU General Public License, version 2, as 00009 published by the Free Software Foundation. 00010 00011 KMime is distributed in the hope that it will be useful, but 00012 WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this library; if not, write to the Free Software 00018 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 00020 In addition, as a special exception, the copyright holders give 00021 permission to link the code of this library with any edition of 00022 the Qt library by Trolltech AS, Norway (or with modified versions 00023 of Qt that use the same license as Qt), and distribute linked 00024 combinations including the two. You must obey the GNU General 00025 Public License in all respects for all of the code used other than 00026 Qt. If you modify this file, you may extend this exception to 00027 your version of the file, but you are not obligated to do so. If 00028 you do not wish to do so, delete this exception statement from 00029 your version. 00030 */ 00031 00032 #include "kmime_codec_qp.h" 00033 00034 #include "kmime_util.h" 00035 00036 #include <kdebug.h> 00037 00038 #include <cassert> 00039 00040 using namespace KMime; 00041 00042 namespace KMime { 00043 00044 // some helpful functions: 00045 00046 static inline char binToHex( uchar value ) { 00047 if ( value > 9 ) 00048 return value + 'A' - 10; 00049 else 00050 return value + '0'; 00051 } 00052 00053 static inline uchar highNibble( uchar ch ) { 00054 return ch >> 4; 00055 } 00056 00057 static inline uchar lowNibble( uchar ch ) { 00058 return ch & 0xF; 00059 } 00060 00061 static inline bool keep( uchar ch ) { 00062 // no CTLs, except HT and not '?' 00063 return !( ch < ' ' && ch != '\t' || ch == '?' ); 00064 } 00065 00066 // 00067 // QuotedPrintableCodec 00068 // 00069 00070 class QuotedPrintableEncoder : public Encoder { 00071 char mInputBuffer[16]; 00072 uchar mCurrentLineLength; // 0..76 00073 uchar mAccu; 00074 uint mInputBufferReadCursor : 4; // 0..15 00075 uint mInputBufferWriteCursor : 4; // 0..15 00076 enum { 00077 Never, AtBOL, Definitely 00078 } mAccuNeedsEncoding : 2; 00079 bool mSawLineEnd : 1; 00080 bool mSawCR : 1; 00081 bool mFinishing : 1; 00082 bool mFinished : 1; 00083 protected: 00084 friend class QuotedPrintableCodec; 00085 QuotedPrintableEncoder( bool withCRLF=false ) 00086 : Encoder( withCRLF ), mCurrentLineLength(0), mAccu(0), 00087 mInputBufferReadCursor(0), mInputBufferWriteCursor(0), 00088 mAccuNeedsEncoding(Never), 00089 mSawLineEnd(false), mSawCR(false), mFinishing(false), 00090 mFinished(false) {} 00091 00092 bool needsEncoding( uchar ch ) { 00093 return ( ch > '~' || ch < ' ' && ch != '\t' || ch == '=' ); 00094 } 00095 bool needsEncodingAtEOL( uchar ch ) { 00096 return ( ch == ' ' || ch == '\t' ); 00097 } 00098 bool needsEncodingAtBOL( uchar ch ) { 00099 return ( ch == 'F' || ch == '.' || ch == '-' ); 00100 } 00101 bool fillInputBuffer( const char* & scursor, const char * const send ); 00102 bool processNextChar(); 00103 void createOutputBuffer( char* & dcursor, const char * const dend ); 00104 public: 00105 virtual ~QuotedPrintableEncoder() {} 00106 00107 bool encode( const char* & scursor, const char * const send, 00108 char* & dcursor, const char * const dend ); 00109 00110 bool finish( char* & dcursor, const char * const dend ); 00111 }; 00112 00113 00114 class QuotedPrintableDecoder : public Decoder { 00115 const char mEscapeChar; 00116 char mBadChar; 00118 uchar mAccu; 00128 const bool mQEncoding : 1; 00129 bool mInsideHexChar : 1; 00130 bool mFlushing : 1; 00131 bool mExpectLF : 1; 00132 bool mHaveAccu : 1; 00133 protected: 00134 friend class QuotedPrintableCodec; 00135 friend class Rfc2047QEncodingCodec; 00136 friend class Rfc2231EncodingCodec; 00137 QuotedPrintableDecoder( bool withCRLF=false, 00138 bool aQEncoding=false, char aEscapeChar='=' ) 00139 : Decoder( withCRLF ), 00140 mEscapeChar(aEscapeChar), 00141 mBadChar(0), 00142 mAccu(0), 00143 mQEncoding(aQEncoding), 00144 mInsideHexChar(false), 00145 mFlushing(false), 00146 mExpectLF(false), 00147 mHaveAccu(false) {} 00148 public: 00149 virtual ~QuotedPrintableDecoder() {} 00150 00151 bool decode( const char* & scursor, const char * const send, 00152 char* & dcursor, const char * const dend ); 00153 // ### really no finishing needed??? 00154 bool finish( char* &, const char * const ) { return true; } 00155 }; 00156 00157 00158 class Rfc2047QEncodingEncoder : public Encoder { 00159 uchar mAccu; 00160 uchar mStepNo; 00161 const char mEscapeChar; 00162 bool mInsideFinishing : 1; 00163 protected: 00164 friend class Rfc2047QEncodingCodec; 00165 friend class Rfc2231EncodingCodec; 00166 Rfc2047QEncodingEncoder( bool withCRLF=false, char aEscapeChar='=' ) 00167 : Encoder( withCRLF ), 00168 mAccu(0), mStepNo(0), mEscapeChar( aEscapeChar ), 00169 mInsideFinishing( false ) 00170 { 00171 // else an optimization in ::encode might break. 00172 assert( aEscapeChar == '=' || aEscapeChar == '%' ); 00173 } 00174 00175 // this code assumes that isEText( mEscapeChar ) == false! 00176 bool needsEncoding( uchar ch ) { 00177 if ( ch > 'z' ) return true; // {|}~ DEL and 8bit chars need 00178 if ( !isEText( ch ) ) return true; // all but a-zA-Z0-9!/*+- need, too 00179 if ( mEscapeChar == '%' && ( ch == '*' || ch == '/' ) ) 00180 return true; // not allowed in rfc2231 encoding 00181 return false; 00182 } 00183 00184 public: 00185 virtual ~Rfc2047QEncodingEncoder() {} 00186 00187 bool encode( const char* & scursor, const char * const send, 00188 char* & dcursor, const char * const dend ); 00189 bool finish( char* & dcursor, const char * const dend ); 00190 }; 00191 00192 // this doesn't access any member variables, so it can be defined static 00193 // but then we can't call it from virtual functions 00194 static int QuotedPrintableDecoder_maxDecodedSizeFor( int insize, bool withCRLF ) { 00195 // all chars unencoded: 00196 int result = insize; 00197 // but maybe all of them are \n and we need to make them \r\n :-o 00198 if ( withCRLF ) 00199 result += insize; 00200 00201 // there might be an accu plus escape 00202 result += 2; 00203 00204 return result; 00205 } 00206 00207 Encoder * QuotedPrintableCodec::makeEncoder( bool withCRLF ) const { 00208 return new QuotedPrintableEncoder( withCRLF ); 00209 } 00210 00211 Decoder * QuotedPrintableCodec::makeDecoder( bool withCRLF ) const { 00212 return new QuotedPrintableDecoder( withCRLF ); 00213 } 00214 00215 int QuotedPrintableCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const { 00216 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF); 00217 } 00218 00219 Encoder * Rfc2047QEncodingCodec::makeEncoder( bool withCRLF ) const { 00220 return new Rfc2047QEncodingEncoder( withCRLF ); 00221 } 00222 00223 Decoder * Rfc2047QEncodingCodec::makeDecoder( bool withCRLF ) const { 00224 return new QuotedPrintableDecoder( withCRLF, true ); 00225 } 00226 00227 int Rfc2047QEncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const { 00228 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF); 00229 } 00230 00231 Encoder * Rfc2231EncodingCodec::makeEncoder( bool withCRLF ) const { 00232 return new Rfc2047QEncodingEncoder( withCRLF, '%' ); 00233 } 00234 00235 Decoder * Rfc2231EncodingCodec::makeDecoder( bool withCRLF ) const { 00236 return new QuotedPrintableDecoder( withCRLF, true, '%' ); 00237 } 00238 00239 int Rfc2231EncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const { 00240 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF); 00241 } 00242 00243 /********************************************************/ 00244 /********************************************************/ 00245 /********************************************************/ 00246 00247 bool QuotedPrintableDecoder::decode( const char* & scursor, const char * const send, 00248 char* & dcursor, const char * const dend ) { 00249 if ( mWithCRLF ) 00250 kdWarning() << "CRLF output for decoders isn't yet supported!" << endl; 00251 00252 while ( scursor != send && dcursor != dend ) { 00253 if ( mFlushing ) { 00254 // we have to flush chars in the aftermath of an decoding 00255 // error. The way to request a flush is to 00256 // - store the offending character in mBadChar and 00257 // - set mFlushing to true. 00258 // The supported cases are (H: hexchar, X: bad char): 00259 // =X, =HX, CR 00260 // mBadChar is only written out if it is not by itself illegal in 00261 // quoted-printable (e.g. CTLs, 8Bits). 00262 // A fast way to suppress mBadChar output is to set it to NUL. 00263 if ( mInsideHexChar ) { 00264 // output '=' 00265 *dcursor++ = mEscapeChar; 00266 mInsideHexChar = false; 00267 } else if ( mHaveAccu ) { 00268 // output the high nibble of the accumulator: 00269 *dcursor++ = binToHex( highNibble( mAccu ) ); 00270 mHaveAccu = false; 00271 mAccu = 0; 00272 } else { 00273 // output mBadChar 00274 assert( mAccu == 0 ); 00275 if ( mBadChar ) { 00276 if ( mBadChar >= '>' && mBadChar <= '~' || 00277 mBadChar >= '!' && mBadChar <= '<' ) 00278 *dcursor++ = mBadChar; 00279 mBadChar = 0; 00280 } 00281 mFlushing = false; 00282 } 00283 continue; 00284 } 00285 assert( mBadChar == 0 ); 00286 00287 uchar ch = *scursor++; 00288 uchar value = 255; 00289 00290 if ( mExpectLF && ch != '\n' ) { 00291 kdWarning() << "QuotedPrintableDecoder: " 00292 "illegally formed soft linebreak or lonely CR!" << endl; 00293 mInsideHexChar = false; 00294 mExpectLF = false; 00295 assert( mAccu == 0 ); 00296 } 00297 00298 if ( mInsideHexChar ) { 00299 // next char(s) represent nibble instead of itself: 00300 if ( ch <= '9' ) { 00301 if ( ch >= '0' ) { 00302 value = ch - '0'; 00303 } else { 00304 switch ( ch ) { 00305 case '\r': 00306 mExpectLF = true; 00307 break; 00308 case '\n': 00309 // soft line break, but only if mAccu is NUL. 00310 if ( !mHaveAccu ) { 00311 mExpectLF = false; 00312 mInsideHexChar = false; 00313 break; 00314 } 00315 // else fall through 00316 default: 00317 kdWarning() << "QuotedPrintableDecoder: " 00318 "illegally formed hex char! Outputting verbatim." << endl; 00319 mBadChar = ch; 00320 mFlushing = true; 00321 } 00322 continue; 00323 } 00324 } else { // ch > '9' 00325 if ( ch <= 'F' ) { 00326 if ( ch >= 'A' ) { 00327 value = 10 + ch - 'A'; 00328 } else { // [:-@] 00329 mBadChar = ch; 00330 mFlushing = true; 00331 continue; 00332 } 00333 } else { // ch > 'F' 00334 if ( ch <= 'f' && ch >= 'a' ) { 00335 value = 10 + ch - 'a'; 00336 } else { 00337 mBadChar = ch; 00338 mFlushing = true; 00339 continue; 00340 } 00341 } 00342 } 00343 00344 assert( value < 16 ); 00345 assert( mBadChar == 0 ); 00346 assert( !mExpectLF ); 00347 00348 if ( mHaveAccu ) { 00349 *dcursor++ = char( mAccu | value ); 00350 mAccu = 0; 00351 mHaveAccu = false; 00352 mInsideHexChar = false; 00353 } else { 00354 mHaveAccu = true; 00355 mAccu = value << 4; 00356 } 00357 } else { // not mInsideHexChar 00358 if ( ch <= '~' && ch >= ' ' || ch == '\t' ) { 00359 if ( ch == mEscapeChar ) { 00360 mInsideHexChar = true; 00361 } else if ( mQEncoding && ch == '_' ) { 00362 *dcursor++ = char(0x20); 00363 } else { 00364 *dcursor++ = char(ch); 00365 } 00366 } else if ( ch == '\n' ) { 00367 *dcursor++ = '\n'; 00368 mExpectLF = false; 00369 } else if ( ch == '\r' ) { 00370 mExpectLF = true; 00371 } else { 00372 kdWarning() << "QuotedPrintableDecoder: " << ch << 00373 " illegal character in input stream! Ignoring." << endl; 00374 } 00375 } 00376 } 00377 00378 return (scursor == send); 00379 } 00380 00381 bool QuotedPrintableEncoder::fillInputBuffer( const char* & scursor, 00382 const char * const send ) { 00383 // Don't read more if there's still a tail of a line in the buffer: 00384 if ( mSawLineEnd ) 00385 return true; 00386 00387 // Read until the buffer is full or we have found CRLF or LF (which 00388 // don't end up in the input buffer): 00389 for ( ; ( mInputBufferWriteCursor + 1 ) % 16 != mInputBufferReadCursor 00390 && scursor != send ; mInputBufferWriteCursor++ ) { 00391 char ch = *scursor++; 00392 if ( ch == '\r' ) { 00393 mSawCR = true; 00394 } else if ( ch == '\n' ) { 00395 // remove the CR from the input buffer (if any) and return that 00396 // we found a line ending: 00397 if ( mSawCR ) { 00398 mSawCR = false; 00399 assert( mInputBufferWriteCursor != mInputBufferReadCursor ); 00400 mInputBufferWriteCursor--; 00401 } 00402 mSawLineEnd = true; 00403 return true; // saw CRLF or LF 00404 } else { 00405 mSawCR = false; 00406 } 00407 mInputBuffer[ mInputBufferWriteCursor ] = ch; 00408 } 00409 mSawLineEnd = false; 00410 return false; // didn't see a line ending... 00411 } 00412 00413 bool QuotedPrintableEncoder::processNextChar() { 00414 00415 // If we process a buffer which doesn't end in a line break, we 00416 // can't process all of it, since the next chars that will be read 00417 // could be a line break. So we empty the buffer only until a fixed 00418 // number of chars is left (except when mFinishing, which means that 00419 // the data doesn't end in newline): 00420 const int minBufferFillWithoutLineEnd = 4; 00421 00422 assert( mOutputBufferCursor == 0 ); 00423 00424 int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor) ; 00425 if ( bufferFill < 0 ) 00426 bufferFill += 16; 00427 00428 assert( bufferFill >=0 && bufferFill <= 15 ); 00429 00430 if ( !mFinishing && !mSawLineEnd && 00431 bufferFill < minBufferFillWithoutLineEnd ) 00432 return false; 00433 00434 // buffer is empty, return false: 00435 if ( mInputBufferReadCursor == mInputBufferWriteCursor ) 00436 return false; 00437 00438 // Real processing goes here: 00439 mAccu = mInputBuffer[ mInputBufferReadCursor++ ]; 00440 if ( needsEncoding( mAccu ) ) // always needs encoding or 00441 mAccuNeedsEncoding = Definitely; 00442 else if ( ( mSawLineEnd || mFinishing ) // needs encoding at end of line 00443 && bufferFill == 1 // or end of buffer 00444 && needsEncodingAtEOL( mAccu ) ) 00445 mAccuNeedsEncoding = Definitely; 00446 else if ( needsEncodingAtBOL( mAccu ) ) 00447 mAccuNeedsEncoding = AtBOL; 00448 else 00449 // never needs encoding 00450 mAccuNeedsEncoding = Never; 00451 00452 return true; 00453 } 00454 00455 // Outputs processed (verbatim or hex-encoded) chars and inserts soft 00456 // line breaks as necessary. Depends on processNextChar's directions 00457 // on whether or not to encode the current char, and whether or not 00458 // the current char is the last one in it's input line: 00459 void QuotedPrintableEncoder::createOutputBuffer( char* & dcursor, 00460 const char * const dend ) 00461 { 00462 const int maxLineLength = 76; // rfc 2045 00463 00464 assert( mOutputBufferCursor == 0 ); 00465 00466 bool lastOneOnThisLine = mSawLineEnd 00467 && mInputBufferReadCursor == mInputBufferWriteCursor; 00468 00469 int neededSpace = 1; 00470 if ( mAccuNeedsEncoding == Definitely) 00471 neededSpace = 3; 00472 00473 // reserve space for the soft hyphen (=) 00474 if ( !lastOneOnThisLine ) 00475 neededSpace++; 00476 00477 if ( mCurrentLineLength > maxLineLength - neededSpace ) { 00478 // current line too short, insert soft line break: 00479 write( '=', dcursor, dend ); 00480 writeCRLF( dcursor, dend ); 00481 mCurrentLineLength = 0; 00482 } 00483 00484 if ( Never == mAccuNeedsEncoding || 00485 AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0 ) { 00486 write( mAccu, dcursor, dend ); 00487 mCurrentLineLength++; 00488 } else { 00489 write( '=', dcursor, dend ); 00490 write( binToHex( highNibble( mAccu ) ), dcursor, dend ); 00491 write( binToHex( lowNibble( mAccu ) ), dcursor, dend ); 00492 mCurrentLineLength += 3; 00493 } 00494 } 00495 00496 00497 bool QuotedPrintableEncoder::encode( const char* & scursor, const char * const send, 00498 char* & dcursor, const char * const dend ) 00499 { 00500 // support probing by the caller: 00501 if ( mFinishing ) return true; 00502 00503 while ( scursor != send && dcursor != dend ) { 00504 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) 00505 return (scursor == send); 00506 00507 assert( mOutputBufferCursor == 0 ); 00508 00509 // fill input buffer until eol has been reached or until the 00510 // buffer is full, whatever comes first: 00511 fillInputBuffer( scursor, send ); 00512 00513 if ( processNextChar() ) 00514 // there was one... 00515 createOutputBuffer( dcursor, dend ); 00516 else if ( mSawLineEnd && 00517 mInputBufferWriteCursor == mInputBufferReadCursor ) { 00518 // load a hard line break into output buffer: 00519 writeCRLF( dcursor, dend ); 00520 // signal fillInputBuffer() we are ready for the next line: 00521 mSawLineEnd = false; 00522 mCurrentLineLength = 0; 00523 } else 00524 // we are supposedly finished with this input block: 00525 break; 00526 } 00527 00528 // make sure we write as much as possible and don't stop _writing_ 00529 // just because we have no more _input_: 00530 if ( mOutputBufferCursor ) flushOutputBuffer( dcursor, dend ); 00531 00532 return (scursor == send); 00533 00534 } // encode 00535 00536 bool QuotedPrintableEncoder::finish( char* & dcursor, 00537 const char * const dend ) { 00538 mFinishing = true; 00539 00540 if ( mFinished ) 00541 return flushOutputBuffer( dcursor, dend ); 00542 00543 while ( dcursor != dend ) { 00544 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) 00545 return false; 00546 00547 assert( mOutputBufferCursor == 0 ); 00548 00549 if ( processNextChar() ) 00550 // there was one... 00551 createOutputBuffer( dcursor, dend ); 00552 else if ( mSawLineEnd && 00553 mInputBufferWriteCursor == mInputBufferReadCursor ) { 00554 // load a hard line break into output buffer: 00555 writeCRLF( dcursor, dend ); 00556 mSawLineEnd = false; 00557 mCurrentLineLength = 0; 00558 } else { 00559 mFinished = true; 00560 return flushOutputBuffer( dcursor, dend ); 00561 } 00562 } 00563 00564 return mFinished && !mOutputBufferCursor; 00565 00566 } // finish 00567 00568 00569 bool Rfc2047QEncodingEncoder::encode( const char* & scursor, const char * const send, 00570 char* & dcursor, const char * const dend ) 00571 { 00572 if ( mInsideFinishing ) return true; 00573 00574 while ( scursor != send && dcursor != dend ) { 00575 uchar value; 00576 switch ( mStepNo ) { 00577 case 0: 00578 // read the next char and decide if and how do encode: 00579 mAccu = *scursor++; 00580 if ( !needsEncoding( mAccu ) ) { 00581 *dcursor++ = char(mAccu); 00582 } else if ( mEscapeChar == '=' && mAccu == 0x20 ) { 00583 // shortcut encoding for 0x20 (latin-1/us-ascii SPACE) 00584 // (not for rfc2231 encoding) 00585 *dcursor++ = '_'; 00586 } else { 00587 // needs =XY encoding - write escape char: 00588 *dcursor++ = mEscapeChar; 00589 mStepNo = 1; 00590 } 00591 continue; 00592 case 1: 00593 // extract hi-nibble: 00594 value = highNibble(mAccu); 00595 mStepNo = 2; 00596 break; 00597 case 2: 00598 // extract lo-nibble: 00599 value = lowNibble(mAccu); 00600 mStepNo = 0; 00601 break; 00602 default: assert( 0 ); 00603 } 00604 00605 // and write: 00606 *dcursor++ = binToHex( value ); 00607 } 00608 00609 return (scursor == send); 00610 } // encode 00611 00612 #include <qstring.h> 00613 00614 bool Rfc2047QEncodingEncoder::finish( char* & dcursor, const char * const dend ) { 00615 mInsideFinishing = true; 00616 00617 // write the last bits of mAccu, if any: 00618 while ( mStepNo != 0 && dcursor != dend ) { 00619 uchar value; 00620 switch ( mStepNo ) { 00621 case 1: 00622 // extract hi-nibble: 00623 value = highNibble(mAccu); 00624 mStepNo = 2; 00625 break; 00626 case 2: 00627 // extract lo-nibble: 00628 value = lowNibble(mAccu); 00629 mStepNo = 0; 00630 break; 00631 default: assert( 0 ); 00632 } 00633 00634 // and write: 00635 *dcursor++ = binToHex( value ); 00636 } 00637 00638 return mStepNo == 0; 00639 } 00640 00641 00642 00643 00644 } // namespace KMime
KDE Logo
This file is part of the documentation for libkdenetwork Library Version 3.3.0.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Fri Aug 27 12:48:41 2004 by doxygen 1.3.8 written by Dimitri van Heesch, © 1997-2003