00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
#include "kmime_codec_qp.h"
00033
00034
#include "kmime_util.h"
00035
00036
#include <kdebug.h>
00037
00038
#include <cassert>
00039
00040
using namespace KMime;
00041
00042
namespace KMime {
00043
00044
00045
00046
static inline char binToHex( uchar value ) {
00047
if ( value > 9 )
00048
return value +
'A' - 10;
00049
else
00050
return value +
'0';
00051 }
00052
00053
static inline uchar highNibble( uchar ch ) {
00054
return ch >> 4;
00055 }
00056
00057
static inline uchar lowNibble( uchar ch ) {
00058
return ch & 0xF;
00059 }
00060
00061
static inline bool keep( uchar ch ) {
00062
00063
return !( ch <
' ' && ch !=
'\t' || ch ==
'?' );
00064 }
00065
00066
00067
00068
00069
00070
class QuotedPrintableEncoder :
public Encoder {
00071
char mInputBuffer[16];
00072 uchar mCurrentLineLength;
00073 uchar mAccu;
00074 uint mInputBufferReadCursor : 4;
00075 uint mInputBufferWriteCursor : 4;
00076
enum {
00077 Never, AtBOL, Definitely
00078 } mAccuNeedsEncoding : 2;
00079
bool mSawLineEnd : 1;
00080
bool mSawCR : 1;
00081
bool mFinishing : 1;
00082
bool mFinished : 1;
00083
protected:
00084
friend class QuotedPrintableCodec;
00085 QuotedPrintableEncoder(
bool withCRLF=
false )
00086 :
Encoder( withCRLF ), mCurrentLineLength(0), mAccu(0),
00087 mInputBufferReadCursor(0), mInputBufferWriteCursor(0),
00088 mAccuNeedsEncoding(Never),
00089 mSawLineEnd(false), mSawCR(false), mFinishing(false),
00090 mFinished(false) {}
00091
00092
bool needsEncoding( uchar ch ) {
00093
return ( ch >
'~' || ch <
' ' && ch !=
'\t' || ch ==
'=' );
00094 }
00095
bool needsEncodingAtEOL( uchar ch ) {
00096
return ( ch ==
' ' || ch ==
'\t' );
00097 }
00098
bool needsEncodingAtBOL( uchar ch ) {
00099
return ( ch ==
'F' || ch ==
'.' || ch ==
'-' );
00100 }
00101
bool fillInputBuffer(
const char* & scursor,
const char *
const send );
00102
bool processNextChar();
00103
void createOutputBuffer(
char* & dcursor,
const char *
const dend );
00104
public:
00105
virtual ~QuotedPrintableEncoder() {}
00106
00107
bool encode(
const char* & scursor,
const char *
const send,
00108
char* & dcursor,
const char *
const dend );
00109
00110
bool finish(
char* & dcursor,
const char *
const dend );
00111 };
00112
00113
00114
class QuotedPrintableDecoder :
public Decoder {
00115
const char mEscapeChar;
00116
char mBadChar;
00118 uchar mAccu;
00128
const bool mQEncoding : 1;
00129
bool mInsideHexChar : 1;
00130
bool mFlushing : 1;
00131
bool mExpectLF : 1;
00132
bool mHaveAccu : 1;
00133
protected:
00134
friend class QuotedPrintableCodec;
00135
friend class Rfc2047QEncodingCodec;
00136
friend class Rfc2231EncodingCodec;
00137 QuotedPrintableDecoder(
bool withCRLF=
false,
00138
bool aQEncoding=
false,
char aEscapeChar=
'=' )
00139 : Decoder( withCRLF ),
00140 mEscapeChar(aEscapeChar),
00141 mBadChar(0),
00142 mAccu(0),
00143 mQEncoding(aQEncoding),
00144 mInsideHexChar(false),
00145 mFlushing(false),
00146 mExpectLF(false),
00147 mHaveAccu(false) {}
00148
public:
00149
virtual ~QuotedPrintableDecoder() {}
00150
00151
bool decode(
const char* & scursor,
const char *
const send,
00152
char* & dcursor,
const char *
const dend );
00153
00154
bool finish(
char* &,
const char *
const ) {
return true; }
00155 };
00156
00157
00158
class Rfc2047QEncodingEncoder :
public Encoder {
00159 uchar mAccu;
00160 uchar mStepNo;
00161
const char mEscapeChar;
00162
bool mInsideFinishing : 1;
00163
protected:
00164
friend class Rfc2047QEncodingCodec;
00165
friend class Rfc2231EncodingCodec;
00166 Rfc2047QEncodingEncoder(
bool withCRLF=
false,
char aEscapeChar=
'=' )
00167 :
Encoder( withCRLF ),
00168 mAccu(0), mStepNo(0), mEscapeChar( aEscapeChar ),
00169 mInsideFinishing( false )
00170 {
00171
00172 assert( aEscapeChar ==
'=' || aEscapeChar ==
'%' );
00173 }
00174
00175
00176
bool needsEncoding( uchar ch ) {
00177
if ( ch >
'z' )
return true;
00178
if ( !isEText( ch ) )
return true;
00179
if ( mEscapeChar ==
'%' && ( ch ==
'*' || ch ==
'/' ) )
00180
return true;
00181
return false;
00182 }
00183
00184
public:
00185
virtual ~Rfc2047QEncodingEncoder() {}
00186
00187
bool encode(
const char* & scursor,
const char *
const send,
00188
char* & dcursor,
const char *
const dend );
00189
bool finish(
char* & dcursor,
const char *
const dend );
00190 };
00191
00192
00193
00194
static int QuotedPrintableDecoder_maxDecodedSizeFor(
int insize,
bool withCRLF ) {
00195
00196
int result = insize;
00197
00198
if ( withCRLF )
00199 result += insize;
00200
00201
00202 result += 2;
00203
00204
return result;
00205 }
00206
00207
Encoder * QuotedPrintableCodec::makeEncoder(
bool withCRLF )
const {
00208
return new QuotedPrintableEncoder( withCRLF );
00209 }
00210
00211 Decoder * QuotedPrintableCodec::makeDecoder(
bool withCRLF )
const {
00212
return new QuotedPrintableDecoder( withCRLF );
00213 }
00214
00215
int QuotedPrintableCodec::maxDecodedSizeFor(
int insize,
bool withCRLF )
const {
00216
return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00217 }
00218
00219
Encoder * Rfc2047QEncodingCodec::makeEncoder(
bool withCRLF )
const {
00220
return new Rfc2047QEncodingEncoder( withCRLF );
00221 }
00222
00223 Decoder * Rfc2047QEncodingCodec::makeDecoder(
bool withCRLF )
const {
00224
return new QuotedPrintableDecoder( withCRLF,
true );
00225 }
00226
00227
int Rfc2047QEncodingCodec::maxDecodedSizeFor(
int insize,
bool withCRLF )
const {
00228
return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00229 }
00230
00231
Encoder * Rfc2231EncodingCodec::makeEncoder(
bool withCRLF )
const {
00232
return new Rfc2047QEncodingEncoder( withCRLF,
'%' );
00233 }
00234
00235 Decoder * Rfc2231EncodingCodec::makeDecoder(
bool withCRLF )
const {
00236
return new QuotedPrintableDecoder( withCRLF,
true,
'%' );
00237 }
00238
00239
int Rfc2231EncodingCodec::maxDecodedSizeFor(
int insize,
bool withCRLF )
const {
00240
return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00241 }
00242
00243
00244
00245
00246
00247
bool QuotedPrintableDecoder::decode(
const char* & scursor,
const char *
const send,
00248
char* & dcursor,
const char *
const dend ) {
00249
if ( mWithCRLF )
00250 kdWarning() <<
"CRLF output for decoders isn't yet supported!" << endl;
00251
00252
while ( scursor != send && dcursor != dend ) {
00253
if ( mFlushing ) {
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
if ( mInsideHexChar ) {
00264
00265 *dcursor++ = mEscapeChar;
00266 mInsideHexChar =
false;
00267 }
else if ( mHaveAccu ) {
00268
00269 *dcursor++ = binToHex( highNibble( mAccu ) );
00270 mHaveAccu =
false;
00271 mAccu = 0;
00272 }
else {
00273
00274 assert( mAccu == 0 );
00275
if ( mBadChar ) {
00276
if ( mBadChar >=
'>' && mBadChar <=
'~' ||
00277 mBadChar >=
'!' && mBadChar <=
'<' )
00278 *dcursor++ = mBadChar;
00279 mBadChar = 0;
00280 }
00281 mFlushing =
false;
00282 }
00283
continue;
00284 }
00285 assert( mBadChar == 0 );
00286
00287 uchar ch = *scursor++;
00288 uchar value = 255;
00289
00290
if ( mExpectLF && ch !=
'\n' ) {
00291 kdWarning() <<
"QuotedPrintableDecoder: "
00292
"illegally formed soft linebreak or lonely CR!" << endl;
00293 mInsideHexChar =
false;
00294 mExpectLF =
false;
00295 assert( mAccu == 0 );
00296 }
00297
00298
if ( mInsideHexChar ) {
00299
00300
if ( ch <=
'9' ) {
00301
if ( ch >=
'0' ) {
00302 value = ch -
'0';
00303 }
else {
00304
switch ( ch ) {
00305
case '\r':
00306 mExpectLF =
true;
00307
break;
00308
case '\n':
00309
00310
if ( !mHaveAccu ) {
00311 mExpectLF =
false;
00312 mInsideHexChar =
false;
00313
break;
00314 }
00315
00316
default:
00317 kdWarning() <<
"QuotedPrintableDecoder: "
00318
"illegally formed hex char! Outputting verbatim." << endl;
00319 mBadChar = ch;
00320 mFlushing =
true;
00321 }
00322
continue;
00323 }
00324 }
else {
00325
if ( ch <=
'F' ) {
00326
if ( ch >=
'A' ) {
00327 value = 10 + ch -
'A';
00328 }
else {
00329 mBadChar = ch;
00330 mFlushing =
true;
00331
continue;
00332 }
00333 }
else {
00334
if ( ch <= 'f' && ch >=
'a' ) {
00335 value = 10 + ch -
'a';
00336 }
else {
00337 mBadChar = ch;
00338 mFlushing =
true;
00339
continue;
00340 }
00341 }
00342 }
00343
00344 assert( value < 16 );
00345 assert( mBadChar == 0 );
00346 assert( !mExpectLF );
00347
00348
if ( mHaveAccu ) {
00349 *dcursor++ = char( mAccu | value );
00350 mAccu = 0;
00351 mHaveAccu =
false;
00352 mInsideHexChar =
false;
00353 }
else {
00354 mHaveAccu =
true;
00355 mAccu = value << 4;
00356 }
00357 }
else {
00358
if ( ch <= '~' && ch >=
' ' || ch ==
'\t' ) {
00359
if ( ch == mEscapeChar ) {
00360 mInsideHexChar =
true;
00361 }
else if ( mQEncoding && ch ==
'_' ) {
00362 *dcursor++ = char(0x20);
00363 }
else {
00364 *dcursor++ = char(ch);
00365 }
00366 }
else if ( ch ==
'\n' ) {
00367 *dcursor++ =
'\n';
00368 mExpectLF =
false;
00369 }
else if ( ch ==
'\r' ) {
00370 mExpectLF =
true;
00371 }
else {
00372 kdWarning() <<
"QuotedPrintableDecoder: " << ch <<
00373
" illegal character in input stream! Ignoring." << endl;
00374 }
00375 }
00376 }
00377
00378
return (scursor == send);
00379 }
00380
00381
bool QuotedPrintableEncoder::fillInputBuffer(
const char* & scursor,
00382
const char *
const send ) {
00383
00384
if ( mSawLineEnd )
00385
return true;
00386
00387
00388
00389
for ( ; ( mInputBufferWriteCursor + 1 ) % 16 != mInputBufferReadCursor
00390 && scursor != send ; mInputBufferWriteCursor++ ) {
00391
char ch = *scursor++;
00392
if ( ch ==
'\r' ) {
00393 mSawCR =
true;
00394 }
else if ( ch ==
'\n' ) {
00395
00396
00397
if ( mSawCR ) {
00398 mSawCR =
false;
00399 assert( mInputBufferWriteCursor != mInputBufferReadCursor );
00400 mInputBufferWriteCursor--;
00401 }
00402 mSawLineEnd =
true;
00403
return true;
00404 }
else {
00405 mSawCR =
false;
00406 }
00407 mInputBuffer[ mInputBufferWriteCursor ] = ch;
00408 }
00409 mSawLineEnd =
false;
00410
return false;
00411 }
00412
00413
bool QuotedPrintableEncoder::processNextChar() {
00414
00415
00416
00417
00418
00419
00420
const int minBufferFillWithoutLineEnd = 4;
00421
00422 assert( mOutputBufferCursor == 0 );
00423
00424
int bufferFill = int(mInputBufferWriteCursor) - int(mInputBufferReadCursor) ;
00425
if ( bufferFill < 0 )
00426 bufferFill += 16;
00427
00428 assert( bufferFill >=0 && bufferFill <= 15 );
00429
00430
if ( !mFinishing && !mSawLineEnd &&
00431 bufferFill < minBufferFillWithoutLineEnd )
00432
return false;
00433
00434
00435
if ( mInputBufferReadCursor == mInputBufferWriteCursor )
00436
return false;
00437
00438
00439 mAccu = mInputBuffer[ mInputBufferReadCursor++ ];
00440
if ( needsEncoding( mAccu ) )
00441 mAccuNeedsEncoding = Definitely;
00442
else if ( ( mSawLineEnd || mFinishing )
00443 && bufferFill == 1
00444 && needsEncodingAtEOL( mAccu ) )
00445 mAccuNeedsEncoding = Definitely;
00446
else if ( needsEncodingAtBOL( mAccu ) )
00447 mAccuNeedsEncoding = AtBOL;
00448
else
00449
00450 mAccuNeedsEncoding = Never;
00451
00452
return true;
00453 }
00454
00455
00456
00457
00458
00459
void QuotedPrintableEncoder::createOutputBuffer(
char* & dcursor,
00460
const char *
const dend )
00461 {
00462
const int maxLineLength = 76;
00463
00464 assert( mOutputBufferCursor == 0 );
00465
00466
bool lastOneOnThisLine = mSawLineEnd
00467 && mInputBufferReadCursor == mInputBufferWriteCursor;
00468
00469
int neededSpace = 1;
00470
if ( mAccuNeedsEncoding == Definitely)
00471 neededSpace = 3;
00472
00473
00474
if ( !lastOneOnThisLine )
00475 neededSpace++;
00476
00477
if ( mCurrentLineLength > maxLineLength - neededSpace ) {
00478
00479 write(
'=', dcursor, dend );
00480 writeCRLF( dcursor, dend );
00481 mCurrentLineLength = 0;
00482 }
00483
00484
if ( Never == mAccuNeedsEncoding ||
00485 AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0 ) {
00486 write( mAccu, dcursor, dend );
00487 mCurrentLineLength++;
00488 }
else {
00489 write(
'=', dcursor, dend );
00490 write( binToHex( highNibble( mAccu ) ), dcursor, dend );
00491 write( binToHex( lowNibble( mAccu ) ), dcursor, dend );
00492 mCurrentLineLength += 3;
00493 }
00494 }
00495
00496
00497
bool QuotedPrintableEncoder::encode(
const char* & scursor,
const char *
const send,
00498
char* & dcursor,
const char *
const dend )
00499 {
00500
00501
if ( mFinishing )
return true;
00502
00503
while ( scursor != send && dcursor != dend ) {
00504
if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
00505
return (scursor == send);
00506
00507 assert( mOutputBufferCursor == 0 );
00508
00509
00510
00511 fillInputBuffer( scursor, send );
00512
00513
if ( processNextChar() )
00514
00515 createOutputBuffer( dcursor, dend );
00516
else if ( mSawLineEnd &&
00517 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00518
00519 writeCRLF( dcursor, dend );
00520
00521 mSawLineEnd =
false;
00522 mCurrentLineLength = 0;
00523 }
else
00524
00525
break;
00526 }
00527
00528
00529
00530
if ( mOutputBufferCursor ) flushOutputBuffer( dcursor, dend );
00531
00532
return (scursor == send);
00533
00534 }
00535
00536
bool QuotedPrintableEncoder::finish(
char* & dcursor,
00537
const char *
const dend ) {
00538 mFinishing =
true;
00539
00540
if ( mFinished )
00541
return flushOutputBuffer( dcursor, dend );
00542
00543
while ( dcursor != dend ) {
00544
if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) )
00545
return false;
00546
00547 assert( mOutputBufferCursor == 0 );
00548
00549
if ( processNextChar() )
00550
00551 createOutputBuffer( dcursor, dend );
00552
else if ( mSawLineEnd &&
00553 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00554
00555 writeCRLF( dcursor, dend );
00556 mSawLineEnd =
false;
00557 mCurrentLineLength = 0;
00558 }
else {
00559 mFinished =
true;
00560
return flushOutputBuffer( dcursor, dend );
00561 }
00562 }
00563
00564
return mFinished && !mOutputBufferCursor;
00565
00566 }
00567
00568
00569
bool Rfc2047QEncodingEncoder::encode(
const char* & scursor,
const char *
const send,
00570
char* & dcursor,
const char *
const dend )
00571 {
00572
if ( mInsideFinishing )
return true;
00573
00574
while ( scursor != send && dcursor != dend ) {
00575 uchar value;
00576
switch ( mStepNo ) {
00577
case 0:
00578
00579 mAccu = *scursor++;
00580
if ( !needsEncoding( mAccu ) ) {
00581 *dcursor++ = char(mAccu);
00582 }
else if ( mEscapeChar ==
'=' && mAccu == 0x20 ) {
00583
00584
00585 *dcursor++ =
'_';
00586 }
else {
00587
00588 *dcursor++ = mEscapeChar;
00589 mStepNo = 1;
00590 }
00591
continue;
00592
case 1:
00593
00594 value = highNibble(mAccu);
00595 mStepNo = 2;
00596
break;
00597
case 2:
00598
00599 value = lowNibble(mAccu);
00600 mStepNo = 0;
00601
break;
00602
default: assert( 0 );
00603 }
00604
00605
00606 *dcursor++ = binToHex( value );
00607 }
00608
00609
return (scursor == send);
00610 }
00611
00612
#include <qstring.h>
00613
00614
bool Rfc2047QEncodingEncoder::finish(
char* & dcursor,
const char *
const dend ) {
00615 mInsideFinishing =
true;
00616
00617
00618
while ( mStepNo != 0 && dcursor != dend ) {
00619 uchar value;
00620
switch ( mStepNo ) {
00621
case 1:
00622
00623 value = highNibble(mAccu);
00624 mStepNo = 2;
00625
break;
00626
case 2:
00627
00628 value = lowNibble(mAccu);
00629 mStepNo = 0;
00630
break;
00631
default: assert( 0 );
00632 }
00633
00634
00635 *dcursor++ = binToHex( value );
00636 }
00637
00638
return mStepNo == 0;
00639 }
00640
00641
00642
00643
00644 }