00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
#include "qutf7codec.h"
00027
00028
#ifndef QT_NO_TEXTCODEC
00029
00030
int QUtf7Codec::mibEnum()
const {
00031
return 1012;
00032 }
00033
00034
int QStrictUtf7Codec::mibEnum()
const {
00035
return -1012;
00036 }
00037
00038
const char* QUtf7Codec::name()
const {
00039
return "UTF-7";
00040 }
00041
00042
const char* QStrictUtf7Codec::name()
const {
00043
return "X-QT-UTF-7-STRICT";
00044 }
00045
00046
const char* QUtf7Codec::mimeName()
const {
00047
return "UTF-7";
00048 }
00049
00050
bool QUtf7Codec::canEncode(
QChar )
const {
00051
return TRUE;
00052 }
00053
00054
bool QUtf7Codec::canEncode(
const QString & )
const {
00055
return TRUE;
00056 }
00057
00058
static uchar base64Set[] = {
00059 0x00, 0x00, 0x00, 0x00,
00060 0x00, 0x11, 0xFF, 0xC0,
00061 0x7F, 0xFF, 0xFF, 0xE0,
00062 0x7F, 0xFF, 0xFF, 0xE0
00063 };
00064
00065
static uchar base64SetWithLastTwoBitsZero[] = {
00066 0x00, 0x00, 0x00, 0x00,
00067 0x00, 0x00, 0x88, 0x80,
00068 0x44, 0x44, 0x44, 0x40,
00069 0x11, 0x11, 0x11, 0x00
00070 };
00071
00072
static uchar directSet[] = {
00073 0x00, 0x00, 0x00, 0x00,
00074 0x01, 0xCF, 0xFF, 0xE1,
00075 0x7F, 0xFF, 0xFF, 0xE0,
00076 0x7F, 0xFF, 0xFF, 0xE0
00077 };
00078
00079
static uchar optDirectSet[] = {
00080 0x00, 0x00, 0x00, 0x00,
00081 0x7E, 0x20, 0x00, 0x1E,
00082 0x80, 0x00, 0x00, 0x17,
00083 0x80, 0x00, 0x00, 0x1C
00084 };
00085
00086
static inline bool isOfSet(uchar ch, uchar* set) {
00087
return set[ ch/8 ] & (0x80 >> ( ch%8 ));
00088 }
00089
00090
int QUtf7Codec::heuristicContentMatch(
const char* chars,
int len)
const
00091
{
00092
int stepNo = 0;
00093
int i;
00094
bool shifted = FALSE;
00095
bool rightAfterEscape = FALSE;
00096
bool onlyNullBitsSinceLastBoundary = TRUE;
00097
for ( i = 0; i < len ; i++ ) {
00098
if ((
unsigned char)chars[i] >= 128)
00099
break;
00100
if (shifted) {
00101
if ( isOfSet(chars[i],base64Set) ) {
00102
switch (stepNo) {
00103
case 0:
00104 onlyNullBitsSinceLastBoundary = TRUE;
00105
break;
00106
case 3:
00107 onlyNullBitsSinceLastBoundary
00108 = isOfSet(chars[i],base64SetWithLastTwoBitsZero);
00109
break;
00110
case 6:
00111 onlyNullBitsSinceLastBoundary
00112 = ( chars[i] ==
'A' || chars[i] ==
'Q' ||
00113 chars[i] ==
'g' || chars[i] ==
'w' );
00114
break;
00115
default:
00116 onlyNullBitsSinceLastBoundary
00117 = onlyNullBitsSinceLastBoundary && (chars[i] ==
'A');
00118 }
00119 stepNo = (stepNo + 1) % 8;
00120 rightAfterEscape = FALSE;
00121 }
else {
00122
if (rightAfterEscape && chars[i] !=
'-')
00123
break;
00124
if (!onlyNullBitsSinceLastBoundary)
00125
break;
00126 shifted = FALSE;
00127 stepNo = 0;
00128 }
00129 }
else {
00130
if (chars[i] ==
'+') {
00131 shifted = TRUE;
00132 rightAfterEscape = TRUE;
00133 }
00134 }
00135 }
00136
return i;
00137 }
00138
00139
class QUtf7Decoder :
public QTextDecoder {
00140
00141 ushort uc;
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
char stepNo;
00153
00154
bool shifted;
00155
00156
00157
bool rightAfterEscape;
00158
public:
00159 QUtf7Decoder() : uc(0), stepNo(0), shifted(FALSE), rightAfterEscape(FALSE)
00160 {
00161 }
00162
00163
private:
00164
inline void resetParser()
00165 {
00166 uc = 0;
00167 stepNo = 0;
00168 shifted = FALSE;
00169 rightAfterEscape = FALSE;
00170 }
00171
00172
public:
00173
QString toUnicode(
const char* chars,
int len)
00174 {
00175
QString result =
"";
00176
for (
int i=0; i<len; i++) {
00177 uchar ch = chars[i];
00178
00179
00180
00181
00182
if ( ch > 127 ) {
00183 qWarning(
"QUtf7Decoder: 8bit char found in input. "
00184
"Parser has been re-initialized!");
00185 resetParser();
00186 result += QChar::replacement;
00187
continue;
00188 }
00189
00190
if (shifted) {
00191
00192
00193
00194
00195
00196
if ( rightAfterEscape && ch ==
'-' ) {
00197
00198
00199 resetParser();
00200 result +=
QChar(
'+');
00201
00202
continue;
00203 }
00204
00205
00206
00207
00208 ushort bits;
00209
if ( ch >=
'A' && ch <=
'Z' ) {
00210 bits = ch -
'A';
00211 }
else if ( ch >=
'a' && ch <=
'z' ) {
00212 bits = ch -
'a' + 26;
00213 }
else if ( ch >=
'0' && ch <=
'9' ) {
00214 bits = ch -
'0' + 52;
00215 }
else if ( ch ==
'+' ) {
00216 bits = 62;
00217 }
else if ( ch ==
'/' ) {
00218 bits = 63;
00219 }
else {
00220 bits = 0;
00221
00222
00223
00224
00225
00226
if ( rightAfterEscape ) {
00227
00228
00229
00230
00231 qWarning(
"QUtf7Decoder: ill-formed input: "
00232
"non-base64 char after escaping \"+\"!");
00233 }
00234
00235
if (stepNo >= 1 && uc) {
00236 qWarning(
"QUtf7Decoder: ill-formed sequence: "
00237
"non-zero bits in shifted-sequence tail!");
00238 }
00239 resetParser();
00240
00241
00242
00243
if ( ch ==
'-' )
00244
continue;
00245
00246 }
00247
00248
if ( shifted ) {
00249
00250
00251
00252
00253
00254
switch (stepNo) {
00255
00256
case 0: uc = bits << 10;
break;
00257
00258
case 1: uc |= bits << 4;
break;
00259
00260
00261
case 2: uc |= bits >> 2; result +=
QChar(uc);
00262
00263 uc = bits << 14;
break;
00264
case 3: uc |= bits << 8;
break;
00265
case 4: uc |= bits << 2;
break;
00266
00267
00268
case 5: uc |= bits >> 4; result +=
QChar(uc);
00269
00270 uc = bits << 12;
break;
00271
case 6: uc |= bits << 6;
break;
00272
00273
00274
00275
case 7: uc |= bits; result +=
QChar(uc);
00276 uc = 0;
break;
00277
default: ;
00278 }
00279
00280 stepNo++;
00281 stepNo %= 8;
00282 rightAfterEscape = FALSE;
00283
00284
continue;
00285 }
00286 }
00287
00288
00289
00290
00291
00292
00293
00294
00295
if ( ch ==
'+' ) {
00296
00297
00298 shifted = TRUE;
00299 stepNo = 0;
00300
00301
00302 rightAfterEscape = TRUE;
00303 }
else {
00304
00305 result +=
QChar(ch);
00306 }
00307 }
00308
00309
return result;
00310
00311 }
00312
00313 };
00314
00315
QTextDecoder* QUtf7Codec::makeDecoder()
const
00316
{
00317
return new QUtf7Decoder;
00318 }
00319
00320
00321
class QUtf7Encoder :
public QTextEncoder {
00322 uchar dontNeedEncodingSet[16];
00323 ushort outbits;
00324 uint stepNo : 2;
00325
bool shifted : 1;
00326
bool mayContinueShiftedSequence : 1;
00327
public:
00328 QUtf7Encoder(
bool encOpt,
bool encLwsp)
00329 : outbits(0), stepNo(0),
00330 shifted(FALSE), mayContinueShiftedSequence(FALSE)
00331 {
00332
for (
int i = 0; i < 16 ; i++) {
00333 dontNeedEncodingSet[i] = directSet[i];
00334
if (!encOpt)
00335 dontNeedEncodingSet[i] |= optDirectSet[i];
00336 }
00337
if(!encLwsp) {
00338 dontNeedEncodingSet[
' '/8] |= 0x80 >> (
' '%8);
00339 dontNeedEncodingSet[
'\n'/8] |= 0x80 >> (
'\n'%8);
00340 dontNeedEncodingSet[
'\r'/8] |= 0x80 >> (
'\r'%8);
00341 dontNeedEncodingSet[
'\t'/8] |= 0x80 >> (
'\t'%8);
00342 }
00343 }
00344
00345
private:
00346
00347
char toBase64( ushort u ) {
00348
if ( u < 26 )
00349
return (
char)u +
'A';
00350
else if ( u < 52 )
00351
return (
char)u - 26 +
'a';
00352
else if ( u < 62 )
00353
return (
char)u - 52 +
'0';
00354
else if ( u == 62 )
00355
return '+';
00356
else
00357
return '/';
00358 }
00359
00360
void addToShiftedSequence(QCString::Iterator & t, ushort u) {
00361
switch (stepNo) {
00362
00363
case 0:
00364 *t++ = toBase64( u >> 10 );
00365 *t++ = toBase64( (u & 0x03FF ) >> 4 );
00366
00367 outbits = (u & 0x000F) << 2;
00368
break;
00369
00370
00371
00372
case 1:
00373
if (!mayContinueShiftedSequence) {
00374
00375 *t++ = toBase64( outbits | ( u >> 14 ) );
00376 }
00377 *t++ = toBase64( (u & 0x3F00 ) >> 8 );
00378 *t++ = toBase64( (u & 0x00FC ) >> 2 );
00379
00380 outbits = (u & 0x0003) << 4;
00381
break;
00382
00383
00384
00385
case 2:
00386
if (!mayContinueShiftedSequence) {
00387
00388 *t++ = toBase64( outbits | ( u >> 12 ) );
00389 }
00390 *t++ = toBase64( (u & 0x0FFF) >> 6 );
00391 *t++ = toBase64( u & 0x003F );
00392
break;
00393
00394
default: ;
00395 }
00396 stepNo = (stepNo + 1) % 3;
00397 }
00398
00399
void endShiftedSequence(QCString::Iterator & t) {
00400
switch (stepNo) {
00401
case 1:
00402
case 2:
00403 *t++ = toBase64( outbits );
00404
break;
00405
case 0:
00406
default: ;
00407 }
00408 outbits = 0;
00409 }
00410
00411
00412
00413
00414
00415
bool continueOK( ushort u ) {
00416
return stepNo == 0 ||
00417 ( stepNo == 1 && (u & 0xF000) == 0 ) ||
00418 ( stepNo == 2 && (u & 0xC000) == 0 );
00419 }
00420
00421
void processDoesntNeedEncoding(QCString::Iterator & t, ushort ch) {
00422
00423
if (shifted) {
00424 endShiftedSequence(t);
00425
00426
if (isOfSet((
char)ch,base64Set) || ch ==
'-' ) {
00427 *t++ =
'-';
00428 }
00429 }
else if (mayContinueShiftedSequence) {
00430
00431
00432 mayContinueShiftedSequence = FALSE;
00433
if (isOfSet(ch,base64Set) || ch ==
'-' ) {
00434 *t++ =
'-';
00435 }
00436 }
00437 *t++ = (uchar)ch;
00438 shifted = FALSE;
00439 stepNo = 0;
00440 }
00441
00442
public:
00443
QCString fromUnicode(
const QString & uc,
int & len_in_out)
00444 {
00445
00446
00447
00448
00449
00450
int maxreslen = 3 * len_in_out + 5;
00451
QCString result( maxreslen );
00452
00453
#if 0
00454
00455 cout <<
"\nlen_in_out: " << len_in_out
00456 <<
"; shifted: " << (shifted ?
"true" :
"false")
00457 <<
";\n" <<
"mayContinue: "
00458 << (mayContinueShiftedSequence ?
"true" :
"false")
00459 <<
"; stepNo: " << stepNo <<
";\n"
00460 <<
"outbits: " << outbits << endl;
00461
00462
#endif
00463
00464
00465
const QChar * s = uc.unicode();
00466 QCString::Iterator t = result.data();
00467
00468
if ( uc.isNull() ) {
00469
00470
if ( mayContinueShiftedSequence )
00471 *t++ =
'-';
00472 }
else {
00473
00474
for (
int i = 0 ; i < len_in_out ;
00475 i++ ) {
00476 ushort ch = s[i].unicode();
00477
00478
00479
00480
00481
if ( ch < 128 ) {
00482
00483
00484
00485
00486
if ( isOfSet((uchar)ch,dontNeedEncodingSet) ) {
00487 processDoesntNeedEncoding(t,ch);
00488
continue;
00489 }
else if ( ch ==
'+' ) {
00490
00491
if (shifted || mayContinueShiftedSequence) {
00492
00493
00494
00495
00496 addToShiftedSequence(t,ch);
00497 mayContinueShiftedSequence = FALSE;
00498 shifted = TRUE;
00499 }
else {
00500
00501 *t++ =
'+';
00502 *t++ =
'-';
00503 }
00504
continue;
00505 }
00506 }
00507
00508
00509
00510
if (!shifted && (!mayContinueShiftedSequence || !continueOK(ch) ) ) {
00511 *t++ =
'+';
00512 stepNo = 0;
00513 }
00514 addToShiftedSequence(t,ch);
00515 shifted = TRUE;
00516 mayContinueShiftedSequence = FALSE;
00517 }
00518
00519
if ( shifted ) {
00520 endShiftedSequence(t);
00521 mayContinueShiftedSequence = TRUE;
00522 };
00523 shifted = FALSE;
00524 }
00525
00526 *t =
'\0';
00527 len_in_out = t - result.data();
00528
00529
#if 0
00530
cout <<
"len_in_out: " << len_in_out <<
"; "
00531 <<
"mayContinue: " << (mayContinueShiftedSequence ?
"true" :
"false")
00532 <<
"; stepNo: " << stepNo << endl;
00533
#endif
00534
00535 Q_ASSERT(len_in_out <= maxreslen-1);
00536
00537
return result;
00538 }
00539
00540 };
00541
00542
QTextEncoder* QUtf7Codec::makeEncoder()
const {
00543
return new QUtf7Encoder(
false,
false );
00544 }
00545
00546
QTextEncoder* QStrictUtf7Codec::makeEncoder()
const {
00547
return new QUtf7Encoder(
true,
false );
00548 }
00549
00550
#endif // QT_NO_TEXTCODEC