00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
#include "kmimemagic.h"
00019
#include <kdebug.h>
00020
#include <kapplication.h>
00021
#include <qfile.h>
00022
#include <ksimpleconfig.h>
00023
#include <kstandarddirs.h>
00024
#include <kstaticdeleter.h>
00025
#include <klargefile.h>
00026
#include <assert.h>
00027
00028
static int fsmagic(
struct config_rec* conf,
const char *fn, KDE_struct_stat *sb);
00029
static void process(
struct config_rec* conf,
const QString &);
00030
static int ascmagic(
struct config_rec* conf,
unsigned char *buf,
int nbytes);
00031
static int tagmagic(
unsigned char *buf,
int nbytes);
00032
static int textmagic(
struct config_rec* conf,
unsigned char *,
int);
00033
00034
static void tryit(
struct config_rec* conf,
unsigned char *buf,
int nb);
00035
static int match(
struct config_rec* conf,
unsigned char *,
int);
00036
00037
KMimeMagic* KMimeMagic::s_pSelf;
00038
static KStaticDeleter<KMimeMagic> kmimemagicsd;
00039
00040 KMimeMagic*
KMimeMagic::self()
00041 {
00042
if( !s_pSelf )
00043 initStatic();
00044
return s_pSelf;
00045 }
00046
00047
void KMimeMagic::initStatic()
00048 {
00049 s_pSelf = kmimemagicsd.setObject( s_pSelf,
new KMimeMagic() );
00050 s_pSelf->
setFollowLinks(
true );
00051 }
00052
00053
#include <stdio.h>
00054
#include <unistd.h>
00055
#include <stdlib.h>
00056
#include <sys/wait.h>
00057
#include <sys/types.h>
00058
#include <sys/stat.h>
00059
#include <fcntl.h>
00060
#include <errno.h>
00061
#include <ctype.h>
00062
#include <time.h>
00063
#include <utime.h>
00064
#include <stdarg.h>
00065
#include <qregexp.h>
00066
#include <qstring.h>
00067
00068
00069
00070
00071
00072
00073
00074
00075
#if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00076
#define DEBUG_LINENUMBERS
00077
#endif
00078
00079
00080
00081
00082
#define DECLINED 999
00083
#define ERROR 998
00084
#define OK 0
00085
00086
00087
00088
00089
#define MIME_BINARY_UNKNOWN "application/octet-stream"
00090
#define MIME_BINARY_UNREADABLE "application/x-unreadable"
00091
#define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00092
#define MIME_TEXT_UNKNOWN "text/plain"
00093
#define MIME_TEXT_PLAIN "text/plain"
00094
#define MIME_INODE_DIR "inode/directory"
00095
#define MIME_INODE_CDEV "inode/chardevice"
00096
#define MIME_INODE_BDEV "inode/blockdevice"
00097
#define MIME_INODE_FIFO "inode/fifo"
00098
#define MIME_INODE_LINK "inode/link"
00099
#define MIME_INODE_SOCK "inode/socket"
00100
00101
#define MIME_APPL_TROFF "application/x-troff"
00102
#define MIME_APPL_TAR "application/x-tar"
00103
#define MIME_TEXT_FORTRAN "text/x-fortran"
00104
00105
#define MAXMIMESTRING 256
00106
00107
#define HOWMANY 1024
00108
#define MAXDESC 50
00109
#define MAXstring 64
00110
00111
typedef union VALUETYPE {
00112
unsigned char b;
00113
unsigned short h;
00114
unsigned long l;
00115
char s[MAXstring];
00116
unsigned char hs[2];
00117
unsigned char hl[4];
00118 } VALUETYPE;
00119
00120
struct magic {
00121
struct magic *
next;
00122
#ifdef DEBUG_LINENUMBERS
00123
int lineno;
00124
#endif
00125
00126
short flag;
00127
#define INDIR 1
00128
#define UNSIGNED 2
00129
short cont_level;
00130
struct {
00131
char type;
00132
long offset;
00133 } in;
00134
long offset;
00135
unsigned char reln;
00136
char type;
00137
char vallen;
00138
#define BYTE 1
00139
#define SHORT 2
00140
#define LONG 4
00141
#define STRING 5
00142
#define DATE 6
00143
#define BESHORT 7
00144
#define BELONG 8
00145
#define BEDATE 9
00146
#define LESHORT 10
00147
#define LELONG 11
00148
#define LEDATE 12
00149
VALUETYPE value;
00150
unsigned long mask;
00151
char nospflag;
00152
00153
00154
char desc[MAXDESC];
00155 };
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
#define RECORDSIZE 512
00172
#define NAMSIZ 100
00173
#define TUNMLEN 32
00174
#define TGNMLEN 32
00175
00176
union record {
00177
char charptr[RECORDSIZE];
00178
struct header {
00179
char name[NAMSIZ];
00180
char mode[8];
00181
char uid[8];
00182
char gid[8];
00183
char size[12];
00184
char mtime[12];
00185
char chksum[8];
00186
char linkflag;
00187
char linkname[NAMSIZ];
00188
char magic[8];
00189
char uname[TUNMLEN];
00190
char gname[TGNMLEN];
00191
char devmajor[8];
00192
char devminor[8];
00193 } header;
00194 };
00195
00196
00197
#define TMAGIC "ustar "
00198
00199
00200
00201
00202
static int is_tar(
unsigned char *,
int);
00203
static unsigned long signextend(
struct magic *,
unsigned long);
00204
static int getvalue(
struct magic *,
char **);
00205
static int hextoint(
int);
00206
static char *getstr(
char *,
char *,
int,
int *);
00207
static int mget(
union VALUETYPE *,
unsigned char *,
struct magic *,
int);
00208
static int mcheck(
union VALUETYPE *,
struct magic *);
00209
static int mconvert(
union VALUETYPE *,
struct magic *);
00210
static long from_oct(
int,
char *);
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
#define L_HTML 0x001
00227
#define L_C 0x002
00228
#define L_MAKE 0x004
00229
#define L_PLI 0x008
00230
#define L_MACH 0x010
00231
#define L_PAS 0x020
00232
#define L_JAVA 0x040
00233
#define L_CPP 0x080
00234
#define L_MAIL 0x100
00235
#define L_NEWS 0x200
00236
#define L_DIFF 0x400
00237
00238
#define P_HTML 0
00239
#define P_C 1
00240
#define P_MAKE 2
00241
#define P_PLI 3
00242
#define P_MACH 4
00243
#define P_PAS 5
00244
#define P_JAVA 6
00245
#define P_CPP 7
00246
#define P_MAIL 8
00247
#define P_NEWS 9
00248
#define P_DIFF 10
00249
00250
typedef struct asc_type {
00251
const char *type;
00252
int kwords;
00253
double weight;
00254 } asc_type;
00255
00256
static const asc_type types[] = {
00257 {
"text/html", 19, 2 },
00258 {
"text/x-c", 9, 1.3 },
00259 {
"text/x-makefile", 4, 1.9 },
00260 {
"text/x-pli", 1, 3 },
00261 {
"text/x-assembler", 6, 2.1 },
00262 {
"text/x-pascal", 1, 1 },
00263 {
"text/x-java", 14, 1 },
00264 {
"text/x-c++", 14, 1 },
00265 {
"message/rfc822", 4, 1.9 },
00266 {
"message/news", 3, 2 },
00267 {
"text/x-diff", 4, 2 }
00268 };
00269
00270
#define NTYPES (sizeof(types)/sizeof(asc_type))
00271
00272
static struct names {
00273
const char *
name;
00274
short type;
00275 }
const names[] = {
00276 {
00277
"<html", L_HTML
00278 },
00279 {
00280
"<HTML", L_HTML
00281 },
00282 {
00283
"<head", L_HTML
00284 },
00285 {
00286
"<HEAD", L_HTML
00287 },
00288 {
00289
"<body", L_HTML
00290 },
00291 {
00292
"<BODY", L_HTML
00293 },
00294 {
00295
"<title", L_HTML
00296 },
00297 {
00298
"<TITLE", L_HTML
00299 },
00300 {
00301
"<h1", L_HTML
00302 },
00303 {
00304
"<H1", L_HTML
00305 },
00306 {
00307
"<a", L_HTML
00308 },
00309 {
00310
"<A", L_HTML
00311 },
00312 {
00313
"<img", L_HTML
00314 },
00315 {
00316
"<IMG", L_HTML
00317 },
00318 {
00319
"<!--", L_HTML
00320 },
00321 {
00322
"<!doctype", L_HTML
00323 },
00324 {
00325
"<!DOCTYPE", L_HTML
00326 },
00327 {
00328
"<div", L_HTML
00329 },
00330 {
00331
"<DIV", L_HTML
00332 },
00333 {
00334
"<frame", L_HTML
00335 },
00336 {
00337
"<FRAME", L_HTML
00338 },
00339 {
00340
"<frameset", L_HTML
00341 },
00342 {
00343
"<FRAMESET", L_HTML
00344 },
00345 {
00346
"<script", L_HTML
00347 },
00348 {
00349
"<SCRIPT", L_HTML
00350 },
00351 {
00352
"/*", L_C|L_CPP|L_JAVA
00353 },
00354 {
00355
"//", L_CPP|L_JAVA
00356 },
00357 {
00358
"#include", L_C|L_CPP
00359 },
00360 {
00361
"char", L_C|L_CPP|L_JAVA
00362 },
00363 {
00364
"double", L_C|L_CPP|L_JAVA
00365 },
00366 {
00367
"extern", L_C|L_CPP
00368 },
00369 {
00370
"float", L_C|L_CPP|L_JAVA
00371 },
00372 {
00373
"real", L_C|L_CPP|L_JAVA
00374 },
00375 {
00376
"struct", L_C|L_CPP
00377 },
00378 {
00379
"union", L_C|L_CPP
00380 },
00381 {
00382
"implements", L_JAVA
00383 },
00384 {
00385
"super", L_JAVA
00386 },
00387 {
00388
"import", L_JAVA
00389 },
00390 {
00391
"class", L_CPP|L_JAVA
00392 },
00393 {
00394
"public", L_CPP|L_JAVA
00395 },
00396 {
00397
"private", L_CPP|L_JAVA
00398 },
00399 {
00400
"CFLAGS", L_MAKE
00401 },
00402 {
00403
"LDFLAGS", L_MAKE
00404 },
00405 {
00406
"all:", L_MAKE
00407 },
00408 {
00409
".PHONY:", L_MAKE
00410 },
00411 {
00412
"srcdir", L_MAKE
00413 },
00414 {
00415
"exec_prefix", L_MAKE
00416 },
00417
00418
00419
00420
00421 {
00422
".ascii", L_MACH
00423 },
00424 {
00425
".asciiz", L_MACH
00426 },
00427 {
00428
".byte", L_MACH
00429 },
00430 {
00431
".even", L_MACH
00432 },
00433 {
00434
".globl", L_MACH
00435 },
00436 {
00437
"clr", L_MACH
00438 },
00439 {
00440
"(input", L_PAS
00441 },
00442 {
00443
"dcl", L_PLI
00444 },
00445 {
00446
"Received:", L_MAIL
00447 },
00448
00449
00450
00451 {
00452
"Return-Path:", L_MAIL
00453 },
00454 {
00455
"Cc:", L_MAIL
00456 },
00457 {
00458
"Newsgroups:", L_NEWS
00459 },
00460 {
00461
"Path:", L_NEWS
00462 },
00463 {
00464
"Organization:", L_NEWS
00465 },
00466 {
00467
"---", L_DIFF
00468 },
00469 {
00470
"+++", L_DIFF
00471 },
00472 {
00473
"***", L_DIFF
00474 },
00475 {
00476
"@@", L_DIFF
00477 },
00478 {
00479 NULL, 0
00480 }
00481 };
00482
00493
class KMimeMagicUtimeConf
00494 {
00495
public:
00496 KMimeMagicUtimeConf()
00497 {
00498 tmpDirs <<
QString::fromLatin1(
"/tmp");
00499
00500
00501
00502
QStringList confDirs =
KGlobal::dirs()->
resourceDirs(
"config" );
00503
if ( !confDirs.isEmpty() )
00504 {
00505
QString globalConf = confDirs.last() +
"kmimemagicrc";
00506
if (
QFile::exists( globalConf ) )
00507 {
00508
KSimpleConfig cfg( globalConf );
00509 cfg.setGroup(
"Settings" );
00510 tmpDirs = cfg.readListEntry(
"atimeDirs" );
00511 }
00512
if ( confDirs.count() > 1 )
00513 {
00514
QString localConf = confDirs.first() +
"kmimemagicrc";
00515
if (
QFile::exists( localConf ) )
00516 {
00517
KSimpleConfig cfg( localConf );
00518 cfg.setGroup(
"Settings" );
00519 tmpDirs += cfg.readListEntry(
"atimeDirs" );
00520 }
00521 }
00522
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00523 {
00524
QString dir = *it;
00525
if ( !dir.
isEmpty() && dir[ dir.
length()-1 ] !=
'/' )
00526 (*it) +=
'/';
00527 }
00528 }
00529
#if 0
00530
00531
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00532
kdDebug(7018) <<
" atimeDir: " << *it <<
endl;
00533
#endif
00534
}
00535
00536
bool restoreAccessTime(
const QString & file )
const
00537
{
00538
QString dir = file.
left( file.
findRev(
'/' ) );
00539
bool res = tmpDirs.contains( dir );
00540
00541
return res;
00542 }
00543
QStringList tmpDirs;
00544 };
00545
00546
00547
struct config_rec {
00548
bool followLinks;
00549
QString resultBuf;
00550
int accuracy;
00551
00552
struct magic *magic,
00553 *last;
00554 KMimeMagicUtimeConf * utimeConf;
00555 };
00556
00557
#ifdef MIME_MAGIC_DEBUG_TABLE
00558
static void
00559 test_table()
00560 {
00561
struct magic *m;
00562
struct magic *prevm = NULL;
00563
00564
kdDebug(7018) <<
"test_table : started" <<
endl;
00565
for (m = conf->magic; m; m = m->next) {
00566
if (isprint((((
unsigned long) m) >> 24) & 255) &&
00567 isprint((((
unsigned long) m) >> 16) & 255) &&
00568 isprint((((
unsigned long) m) >> 8) & 255) &&
00569 isprint(((
unsigned long) m) & 255)) {
00570
00571
00572 (((
unsigned long) m) >> 24) & 255,
00573 (((
unsigned long) m) >> 16) & 255,
00574 (((
unsigned long) m) >> 8) & 255,
00575 ((
unsigned long) m) & 255,
00576 prevm ? prevm->lineno : -1);
00577
break;
00578 }
00579 prevm = m;
00580 }
00581 }
00582
#endif
00583
00584
#define EATAB {while (isascii((unsigned char) *l) && \
00585
isspace((unsigned char) *l)) ++l;}
00586
00587
int KMimeMagic::parse_line(
char *line,
int *rule,
int lineno)
00588 {
00589
int ws_offset;
00590
00591
00592
if (line[0]) {
00593 line[strlen(line) - 1] =
'\0';
00594 }
00595
00596 ws_offset = 0;
00597
while (line[ws_offset] && isspace(line[ws_offset])) {
00598 ws_offset++;
00599 }
00600
00601
00602
if (line[ws_offset] == 0) {
00603
return 0;
00604 }
00605
00606
if (line[ws_offset] ==
'#')
00607
return 0;
00608
00609
00610 (*rule)++;
00611
00612
00613
return (parse(line + ws_offset, lineno) != 0);
00614 }
00615
00616
00617
00618
00619
int KMimeMagic::apprentice(
const QString& magicfile )
00620 {
00621 FILE *f;
00622
char line[BUFSIZ + 1];
00623
int errs = 0;
00624
int lineno;
00625
int rule = 0;
00626
QCString fname;
00627
00628
if (magicfile.
isEmpty())
00629
return -1;
00630 fname =
QFile::encodeName(magicfile);
00631 f = fopen(fname,
"r");
00632
if (f == NULL) {
00633
kdError(7018) <<
"can't read magic file " << fname.data() <<
": " << strerror(errno) <<
endl;
00634
return -1;
00635 }
00636
00637
00638
for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00639
if (parse_line(line, &rule, lineno))
00640 errs++;
00641
00642 fclose(f);
00643
00644
#ifdef DEBUG_APPRENTICE
00645
kdDebug(7018) <<
"apprentice: conf=" << conf <<
" file=" << magicfile <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
00646
kdDebug(7018) <<
"apprentice: read " << lineno <<
" lines, " << rule <<
" rules, " << errs <<
" errors" <<
endl;
00647
#endif
00648
00649
#ifdef MIME_MAGIC_DEBUG_TABLE
00650
test_table();
00651
#endif
00652
00653
return (errs ? -1 : 0);
00654 }
00655
00656
int KMimeMagic::buff_apprentice(
char *buff)
00657 {
00658
char line[BUFSIZ + 2];
00659
int errs = 0;
00660
int lineno = 1;
00661
char *start = buff;
00662
char *
end;
00663
int count = 0;
00664
int rule = 0;
00665
int len = strlen(buff) + 1;
00666
00667
00668
do {
00669 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00670 strncpy(line, start, count);
00671 line[count] =
'\0';
00672
if ((
end = strchr(line,
'\n'))) {
00673 *(++
end) =
'\0';
00674 count = strlen(line);
00675 }
else
00676 strcat(line,
"\n");
00677 start += count;
00678 len -= count;
00679
if (parse_line(line, &rule, lineno))
00680 errs++;
00681 lineno++;
00682 }
while (len > 0);
00683
00684
#ifdef DEBUG_APPRENTICE
00685
kdDebug(7018) <<
"buff_apprentice: conf=" << conf <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
00686
kdDebug(7018) <<
"buff_apprentice: read " << lineno <<
" lines, " << rule <<
" rules, " << errs <<
" errors" <<
endl;
00687
#endif
00688
00689
#ifdef MIME_MAGIC_DEBUG_TABLE
00690
test_table();
00691
#endif
00692
00693
return (errs ? -1 : 0);
00694 }
00695
00696
00697
00698
00699
static unsigned long
00700 signextend(
struct magic *m,
unsigned long v)
00701 {
00702
if (!(m->flag & UNSIGNED))
00703
switch (m->type) {
00704
00705
00706
00707
00708
00709
case BYTE:
00710 v = (
char) v;
00711
break;
00712
case SHORT:
00713
case BESHORT:
00714
case LESHORT:
00715 v = (
short) v;
00716
break;
00717
case DATE:
00718
case BEDATE:
00719
case LEDATE:
00720
case LONG:
00721
case BELONG:
00722
case LELONG:
00723 v = (
long) v;
00724
break;
00725
case STRING:
00726
break;
00727
default:
00728
kdError(7018) <<
"" <<
"signextend" <<
": can't happen: m->type=" << m->type <<
endl;
00729
return ERROR;
00730 }
00731
return v;
00732 }
00733
00734
00735
00736
00737
int KMimeMagic::parse(
char *l,
int
00738 #ifdef DEBUG_LINENUMBERS
00739 lineno
00740 #endif
00741 )
00742 {
00743
int i = 0;
00744
struct magic *m;
00745
char *t,
00746 *s;
00747
00748
if ((m = (
struct magic *) calloc(1,
sizeof(
struct magic))) == NULL) {
00749
kdError(7018) <<
"parse: Out of memory." <<
endl;
00750
return -1;
00751 }
00752
00753 m->next = NULL;
00754
if (!conf->magic || !conf->last) {
00755 conf->magic = conf->last = m;
00756 }
else {
00757 conf->last->next = m;
00758 conf->last = m;
00759 }
00760
00761
00762 m->flag = 0;
00763 m->cont_level = 0;
00764
#ifdef DEBUG_LINENUMBERS
00765
m->lineno = lineno;
00766
#endif
00767
00768
while (*l ==
'>') {
00769 ++l;
00770 m->cont_level++;
00771 }
00772
00773
if (m->cont_level != 0 && *l ==
'(') {
00774 ++l;
00775 m->flag |= INDIR;
00776 }
00777
00778 m->offset = (
int) strtol(l, &t, 0);
00779
if (l == t) {
00780
kdError(7018) <<
"parse: offset " << l <<
" invalid" <<
endl;
00781 }
00782 l = t;
00783
00784
if (m->flag & INDIR) {
00785 m->in.type = LONG;
00786 m->in.offset = 0;
00787
00788
00789
00790
if (*l ==
'.') {
00791
switch (*++l) {
00792
case 'l':
00793 m->in.type = LONG;
00794
break;
00795
case 's':
00796 m->in.type = SHORT;
00797
break;
00798
case 'b':
00799 m->in.type = BYTE;
00800
break;
00801
default:
00802
kdError(7018) <<
"parse: indirect offset type " << *l <<
" invalid" <<
endl;
00803
break;
00804 }
00805 l++;
00806 }
00807 s = l;
00808
if (*l ==
'+' || *l ==
'-')
00809 l++;
00810
if (isdigit((
unsigned char) *l)) {
00811 m->in.offset = strtol(l, &t, 0);
00812
if (*s ==
'-')
00813 m->in.offset = -m->in.offset;
00814 }
else
00815 t = l;
00816
if (*t++ !=
')') {
00817
kdError(7018) <<
"parse: missing ')' in indirect offset" <<
endl;
00818 }
00819 l = t;
00820 }
00821
while (isascii((
unsigned char) *l) && isdigit((
unsigned char) *l))
00822 ++l;
00823 EATAB;
00824
00825
#define NBYTE 4
00826
#define NSHORT 5
00827
#define NLONG 4
00828
#define NSTRING 6
00829
#define NDATE 4
00830
#define NBESHORT 7
00831
#define NBELONG 6
00832
#define NBEDATE 6
00833
#define NLESHORT 7
00834
#define NLELONG 6
00835
#define NLEDATE 6
00836
00837
if (*l ==
'u') {
00838 ++l;
00839 m->flag |= UNSIGNED;
00840 }
00841
00842
if (strncmp(l,
"byte", NBYTE) == 0) {
00843 m->type = BYTE;
00844 l += NBYTE;
00845 }
else if (strncmp(l,
"short", NSHORT) == 0) {
00846 m->type = SHORT;
00847 l += NSHORT;
00848 }
else if (strncmp(l,
"long", NLONG) == 0) {
00849 m->type = LONG;
00850 l += NLONG;
00851 }
else if (strncmp(l,
"string", NSTRING) == 0) {
00852 m->type = STRING;
00853 l += NSTRING;
00854 }
else if (strncmp(l,
"date", NDATE) == 0) {
00855 m->type = DATE;
00856 l += NDATE;
00857 }
else if (strncmp(l,
"beshort", NBESHORT) == 0) {
00858 m->type = BESHORT;
00859 l += NBESHORT;
00860 }
else if (strncmp(l,
"belong", NBELONG) == 0) {
00861 m->type = BELONG;
00862 l += NBELONG;
00863 }
else if (strncmp(l,
"bedate", NBEDATE) == 0) {
00864 m->type = BEDATE;
00865 l += NBEDATE;
00866 }
else if (strncmp(l,
"leshort", NLESHORT) == 0) {
00867 m->type = LESHORT;
00868 l += NLESHORT;
00869 }
else if (strncmp(l,
"lelong", NLELONG) == 0) {
00870 m->type = LELONG;
00871 l += NLELONG;
00872 }
else if (strncmp(l,
"ledate", NLEDATE) == 0) {
00873 m->type = LEDATE;
00874 l += NLEDATE;
00875 }
else {
00876
kdError(7018) <<
"parse: type " << l <<
" invalid" <<
endl;
00877
return -1;
00878 }
00879
00880
if (*l ==
'&') {
00881 ++l;
00882 m->mask = signextend(m, strtol(l, &l, 0));
00883 }
else
00884 m->mask = (
unsigned long) ~0L;
00885 EATAB;
00886
00887
switch (*l) {
00888
case '>':
00889
case '<':
00890
00891
case '&':
00892
case '^':
00893
case '=':
00894 m->reln = *l;
00895 ++l;
00896
break;
00897
case '!':
00898
if (m->type != STRING) {
00899 m->reln = *l;
00900 ++l;
00901
break;
00902 }
00903
00904
default:
00905
if (*l ==
'x' && isascii((
unsigned char) l[1]) &&
00906 isspace((
unsigned char) l[1])) {
00907 m->reln = *l;
00908 ++l;
00909
goto GetDesc;
00910 }
00911 m->reln =
'=';
00912
break;
00913 }
00914 EATAB;
00915
00916
if (getvalue(m, &l))
00917
return -1;
00918
00919
00920
00921 GetDesc:
00922 EATAB;
00923
if (l[0] ==
'\b') {
00924 ++l;
00925 m->nospflag = 1;
00926 }
else if ((l[0] ==
'\\') && (l[1] ==
'b')) {
00927 ++l;
00928 ++l;
00929 m->nospflag = 1;
00930 }
else
00931 m->nospflag = 0;
00932
00933
while (*l !=
'\0' && *l !=
'#' && i < MAXDESC-1)
00934 m->desc[i++] = *l++;
00935 m->desc[i] =
'\0';
00936
00937
while (--i>0 && isspace( m->desc[i] ))
00938 m->desc[i] =
'\0';
00939
00940
00941
00942
00943
#ifdef DEBUG_APPRENTICE
00944
kdDebug(7018) <<
"parse: line=" << lineno <<
" m=" << m <<
" next=" << m->next <<
" cont=" << m->cont_level <<
" desc=" << (m->desc ? m->desc :
"NULL") <<
endl;
00945
#endif
00946
return 0;
00947 }
00948
00949
00950
00951
00952
00953
00954
static int
00955 getvalue(
struct magic *m,
char **p)
00956 {
00957
int slen;
00958
00959
if (m->type == STRING) {
00960 *p = getstr(*p, m->value.s,
sizeof(m->value.s), &slen);
00961 m->vallen = slen;
00962 }
else if (m->reln !=
'x')
00963 m->value.l = signextend(m, strtol(*p, p, 0));
00964
return 0;
00965 }
00966
00967
00968
00969
00970
00971
00972
static char *
00973 getstr(
register char *s,
register char *p,
int plen,
int *slen)
00974 {
00975
char *origs = s,
00976 *origp = p;
00977
char *pmax = p + plen - 1;
00978
register int c;
00979
register int val;
00980
00981
while ((c = *s++) !=
'\0') {
00982
if (isspace((
unsigned char) c))
00983
break;
00984
if (p >= pmax) {
00985
kdError(7018) <<
"String too long: " << origs <<
endl;
00986
break;
00987 }
00988
if (c ==
'\\') {
00989
switch (c = *s++) {
00990
00991
case '\0':
00992
goto out;
00993
00994
default:
00995 *p++ = (
char) c;
00996
break;
00997
00998
case 'n':
00999 *p++ =
'\n';
01000
break;
01001
01002
case 'r':
01003 *p++ =
'\r';
01004
break;
01005
01006
case 'b':
01007 *p++ =
'\b';
01008
break;
01009
01010
case 't':
01011 *p++ =
'\t';
01012
break;
01013
01014
case 'f':
01015 *p++ =
'\f';
01016
break;
01017
01018
case 'v':
01019 *p++ =
'\v';
01020
break;
01021
01022
01023
case '0':
01024
case '1':
01025
case '2':
01026
case '3':
01027
case '4':
01028
case '5':
01029
case '6':
01030
case '7':
01031 val = c -
'0';
01032 c = *s++;
01033
if (c >=
'0' && c <=
'7') {
01034 val = (val << 3) | (c -
'0');
01035 c = *s++;
01036
if (c >=
'0' && c <=
'7')
01037 val = (val << 3) | (c -
'0');
01038
else
01039 --s;
01040 }
else
01041 --s;
01042 *p++ = (
char) val;
01043
break;
01044
01045
01046
case 'x':
01047 val =
'x';
01048 c = hextoint(*s++);
01049
if (c >= 0) {
01050 val = c;
01051 c = hextoint(*s++);
01052
if (c >= 0) {
01053 val = (val << 4) + c;
01054 c = hextoint(*s++);
01055
if (c >= 0) {
01056 val = (val << 4) + c;
01057 }
else
01058 --s;
01059 }
else
01060 --s;
01061 }
else
01062 --s;
01063 *p++ = (
char) val;
01064
break;
01065 }
01066 }
else
01067 *p++ = (
char) c;
01068 }
01069 out:
01070 *p =
'\0';
01071 *slen = p - origp;
01072
01073
01074
return s;
01075 }
01076
01077
01078
01079
static int
01080 hextoint(
int c)
01081 {
01082
if (!isascii((
unsigned char) c))
01083
return -1;
01084
if (isdigit((
unsigned char) c))
01085
return c -
'0';
01086
if ((c >=
'a') && (c <=
'f'))
01087
return c + 10 -
'a';
01088
if ((c >=
'A') && (c <=
'F'))
01089
return c + 10 -
'A';
01090
return -1;
01091 }
01092
01093
01094
01095
01096
static int
01097 mconvert(
union VALUETYPE *p,
struct magic *m)
01098 {
01099
switch (m->type) {
01100
case BYTE:
01101
return 1;
01102
case STRING:
01103
01104 p->s[
sizeof(p->s) - 1] =
'\0';
01105
return 1;
01106
#ifndef WORDS_BIGENDIAN
01107
case SHORT:
01108
#endif
01109
case BESHORT:
01110 p->h = (
short) ((p->hs[0] << 8) | (p->hs[1]));
01111
return 1;
01112
#ifndef WORDS_BIGENDIAN
01113
case LONG:
01114
case DATE:
01115
#endif
01116
case BELONG:
01117
case BEDATE:
01118 p->l = (
long)
01119 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01120
return 1;
01121
#ifdef WORDS_BIGENDIAN
01122
case SHORT:
01123
#endif
01124
case LESHORT:
01125 p->h = (
short) ((p->hs[1] << 8) | (p->hs[0]));
01126
return 1;
01127
#ifdef WORDS_BIGENDIAN
01128
case LONG:
01129
case DATE:
01130
#endif
01131
case LELONG:
01132
case LEDATE:
01133 p->l = (
long)
01134 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01135
return 1;
01136
default:
01137
kdError(7018) <<
"mconvert: invalid type " << m->type <<
endl;
01138
return 0;
01139 }
01140 }
01141
01142
01143
static int
01144 mget(
union VALUETYPE *p,
unsigned char *s,
struct magic *m,
01145
int nbytes)
01146 {
01147
long offset = m->offset;
01148
01149
01150
01151
if (offset + (
int)
sizeof(
union VALUETYPE) > nbytes)
01152 {
01153
int have = nbytes - offset;
01154 memset(p, 0,
sizeof(
union VALUETYPE));
01155
if (have > 0)
01156 memcpy(p, s + offset, have);
01157 }
else
01158 memcpy(p, s + offset,
sizeof(
union VALUETYPE));
01159
01160
if (!mconvert(p, m))
01161
return 0;
01162
01163
if (m->flag & INDIR) {
01164
01165
switch (m->in.type) {
01166
case BYTE:
01167 offset = p->b + m->in.offset;
01168
break;
01169
case SHORT:
01170 offset = p->h + m->in.offset;
01171
break;
01172
case LONG:
01173 offset = p->l + m->in.offset;
01174
break;
01175 }
01176
01177
if (offset + (
int)
sizeof(
union VALUETYPE) > nbytes)
01178 return 0;
01179
01180 memcpy(p, s + offset,
sizeof(
union VALUETYPE));
01181
01182
if (!mconvert(p, m))
01183
return 0;
01184 }
01185
return 1;
01186 }
01187
01188
static int
01189 mcheck(
union VALUETYPE *p,
struct magic *m)
01190 {
01191
register unsigned long l = m->value.l;
01192
register unsigned long v;
01193
int matched;
01194
01195
if ((m->value.s[0] ==
'x') && (m->value.s[1] ==
'\0')) {
01196
kdError(7018) <<
"BOINK" <<
endl;
01197
return 1;
01198 }
01199
switch (m->type) {
01200
case BYTE:
01201 v = p->b;
01202
break;
01203
01204
case SHORT:
01205
case BESHORT:
01206
case LESHORT:
01207 v = p->h;
01208
break;
01209
01210
case LONG:
01211
case BELONG:
01212
case LELONG:
01213
case DATE:
01214
case BEDATE:
01215
case LEDATE:
01216 v = p->l;
01217
break;
01218
01219
case STRING:
01220 l = 0;
01221
01222
01223
01224
01225
01226 v = 0;
01227 {
01228
register unsigned char *a = (
unsigned char *) m->value.s;
01229
register unsigned char *b = (
unsigned char *) p->s;
01230
register int len = m->vallen;
01231 Q_ASSERT(len);
01232
01233
while (--len >= 0)
01234
if ((v = *b++ - *a++) != 0)
01235
break;
01236 }
01237
break;
01238
default:
01239
kdError(7018) <<
"mcheck: invalid type " << m->type <<
endl;
01240
return 0;
01241 }
01242
#if 0
01243
qDebug(
"Before signextend %08x", v);
01244
#endif
01245
v = signextend(m, v) & m->mask;
01246
#if 0
01247
qDebug(
"After signextend %08x", v);
01248
#endif
01249
01250
switch (m->reln) {
01251
case 'x':
01252 matched = 1;
01253
break;
01254
01255
case '!':
01256 matched = v != l;
01257
break;
01258
01259
case '=':
01260 matched = v == l;
01261
break;
01262
01263
case '>':
01264
if (m->flag & UNSIGNED)
01265 matched = v > l;
01266
else
01267 matched = (
long) v > (
long) l;
01268
break;
01269
01270
case '<':
01271
if (m->flag & UNSIGNED)
01272 matched = v < l;
01273
else
01274 matched = (
long) v < (
long) l;
01275
break;
01276
01277
case '&':
01278 matched = (v & l) == l;
01279
break;
01280
01281
case '^':
01282 matched = (v & l) != l;
01283
break;
01284
01285
default:
01286 matched = 0;
01287
kdError(7018) <<
"mcheck: can't happen: invalid relation " << m->reln <<
"." <<
endl;
01288
break;
01289 }
01290
01291
return matched;
01292 }
01293
01294
#if 0
01295
01296
01297
typedef enum {
01298 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
01299 } rsl_states;
01300
01301
01302
int
01303 KMimeMagic::finishResult()
01304 {
01305
int cur_pos,
01306 type_pos,
01307 type_len,
01308 encoding_pos,
01309 encoding_len;
01310
01311
int state;
01312
01313 state = rsl_leading_space;
01314 type_pos = type_len = 0;
01315 encoding_pos = encoding_len = 0;
01316
01317
01318
for (cur_pos = 0; cur_pos < (
int)resultBuf.length(); cur_pos++) {
01319
if (resultBuf[cur_pos].isSpace()) {
01320
01321
if (state == rsl_leading_space) {
01322
01323
continue;
01324 }
else if (state == rsl_type) {
01325
01326
return DECLINED;
01327 }
else if (state == rsl_subtype) {
01328
01329 state++;
01330
continue;
01331 }
else if (state == rsl_separator) {
01332
01333
continue;
01334 }
else if (state == rsl_encoding) {
01335
01336
01337
break;
01338 }
else {
01339
01340
01341
kdError(7018) <<
"KMimeMagic::finishResult: bad state " << state <<
" (ws)" <<
endl;
01342
return DECLINED;
01343 }
01344
01345 }
else if (state == rsl_type &&
01346 resultBuf.at(cur_pos) ==
'/') {
01347
01348 type_len++;
01349 state++;
01350 }
else {
01351
01352
if (state == rsl_leading_space) {
01353
01354 state++;
01355 type_pos = cur_pos;
01356 type_len = 1;
01357
continue;
01358 }
else if (state == rsl_type ||
01359 state == rsl_subtype) {
01360
01361 type_len++;
01362
continue;
01363 }
else if (state == rsl_separator) {
01364
01365 state++;
01366 encoding_pos = cur_pos;
01367 encoding_len = 1;
01368
continue;
01369 }
else if (state == rsl_encoding) {
01370
01371 encoding_len++;
01372
continue;
01373 }
else {
01374
01375
01376
kdError(7018) <<
" KMimeMagic::finishResult: bad state " << state <<
" (ns)" <<
endl;
01377
return DECLINED;
01378 }
01379
01380 }
01381
01382 }
01383
01384
01385
if (state != rsl_subtype && state != rsl_separator &&
01386 state != rsl_encoding) {
01387
01388
return DECLINED;
01389 }
01390
01391
if (state == rsl_subtype || state == rsl_encoding ||
01392 state == rsl_encoding || state == rsl_separator) {
01393
magicResult->
setMimeType(conf->resultBuf.mid(type_pos, type_len).ascii());
01394 }
01395
if (state == rsl_encoding)
01396
magicResult->setEncoding(conf->resultBuf.mid(encoding_pos,
01397 encoding_len).ascii());
01398
01399
if (!
magicResult->
mimeType() ||
01400 (state == rsl_encoding && !
magicResult->encoding())) {
01401
return -1;
01402 }
01403
01404
return OK;
01405 }
01406
#endif
01407
01408
01409
01410
01411
01412
01413
static void process(
struct config_rec* conf,
const QString & fn)
01414 {
01415
int fd = 0;
01416
unsigned char buf[HOWMANY + 1];
01417 KDE_struct_stat sb;
01418
int nbytes = 0;
01419
int tagbytes = 0;
01420
QCString fileName =
QFile::encodeName( fn );
01421
01422
01423
01424
01425
if (fsmagic(conf, fileName, &sb) != 0) {
01426
01427
return;
01428 }
01429
if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01430
01431
01432
01433
01434
01435
01436 conf->resultBuf = MIME_BINARY_UNREADABLE;
01437
return;
01438 }
01439
01440
01441
01442
if ((nbytes = read(fd, (
char *) buf, HOWMANY)) == -1) {
01443
kdError(7018) <<
"" << fn <<
" read failed (" << strerror(errno) <<
")." <<
endl;
01444 conf->resultBuf = MIME_BINARY_UNREADABLE;
01445
return;
01446 }
01447
if ((tagbytes = tagmagic(buf, nbytes))) {
01448
01449 lseek(fd, tagbytes, SEEK_SET);
01450 nbytes = read(fd, (
char*)buf, HOWMANY);
01451
if (nbytes < 0) {
01452 conf->resultBuf = MIME_BINARY_UNREADABLE;
01453
return;
01454 }
01455 }
01456
if (nbytes == 0) {
01457 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01458 }
else {
01459 buf[nbytes++] =
'\0';
01460 tryit(conf, buf, nbytes);
01461 }
01462
01463
if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01464 {
01465
01466
01467
01468
01469
01470
struct utimbuf utbuf;
01471 utbuf.actime = sb.st_atime;
01472 utbuf.modtime = sb.st_mtime;
01473 (
void) utime(fileName, &utbuf);
01474 }
01475 (
void)
close(fd);
01476 }
01477
01478
01479
static void tryit(
struct config_rec* conf,
unsigned char *buf,
int nb)
01480 {
01481
01482
if (match(conf, buf, nb))
01483
return;
01484
01485
01486
if (ascmagic(conf, buf, nb) == 1)
01487
return;
01488
01489
01490
if (textmagic(conf, buf, nb))
01491
return;
01492
01493
01494 conf->resultBuf = MIME_BINARY_UNKNOWN;
01495 conf->accuracy = 0;
01496 }
01497
01498
static int
01499 fsmagic(
struct config_rec* conf,
const char *fn, KDE_struct_stat *sb)
01500 {
01501
int ret = 0;
01502
01503
01504
01505
01506
01507 ret = KDE_lstat(fn, sb);
01508
01509
if (ret) {
01510
return 1;
01511
01512 }
01513
01514
01515
01516
01517
01518
01519
switch (sb->st_mode & S_IFMT) {
01520
case S_IFDIR:
01521 conf->resultBuf = MIME_INODE_DIR;
01522
return 1;
01523
case S_IFCHR:
01524 conf->resultBuf = MIME_INODE_CDEV;
01525
return 1;
01526
case S_IFBLK:
01527 conf->resultBuf = MIME_INODE_BDEV;
01528
return 1;
01529
01530
#ifdef S_IFIFO
01531
case S_IFIFO:
01532 conf->resultBuf = MIME_INODE_FIFO;
01533
return 1;
01534
#endif
01535
#ifdef S_IFLNK
01536
case S_IFLNK:
01537 {
01538
char buf[BUFSIZ + BUFSIZ + 4];
01539
register int nch;
01540 KDE_struct_stat tstatbuf;
01541
01542
if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01543 conf->resultBuf = MIME_INODE_LINK;
01544
01545
return 1;
01546 }
01547 buf[nch] =
'\0';
01548
01549
if (*buf ==
'/') {
01550
if (KDE_stat(buf, &tstatbuf) < 0) {
01551 conf->resultBuf = MIME_INODE_LINK;
01552
01553
return 1;
01554 }
01555 }
else {
01556
char *tmp;
01557
char buf2[BUFSIZ + BUFSIZ + 4];
01558
01559 strncpy(buf2, fn, BUFSIZ);
01560 buf2[BUFSIZ] = 0;
01561
01562
if ((tmp = strrchr(buf2,
'/')) == NULL) {
01563 tmp = buf;
01564 }
else {
01565
01566 *++tmp =
'\0';
01567 strcat(buf2, buf);
01568 tmp = buf2;
01569 }
01570
if (KDE_stat(tmp, &tstatbuf) < 0) {
01571 conf->resultBuf = MIME_INODE_LINK;
01572
01573
return 1;
01574 }
else
01575 strcpy(buf, tmp);
01576 }
01577
if (conf->followLinks)
01578 process( conf, QFile::decodeName( buf ) );
01579
else
01580 conf->resultBuf = MIME_INODE_LINK;
01581
return 1;
01582 }
01583
return 1;
01584
#endif
01585
#ifdef S_IFSOCK
01586
#ifndef __COHERENT__
01587
case S_IFSOCK:
01588 conf->resultBuf = MIME_INODE_SOCK;
01589
return 1;
01590
#endif
01591
#endif
01592
case S_IFREG:
01593
break;
01594
default:
01595
kdError(7018) <<
"KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode <<
"." <<
endl;
01596
01597 }
01598
01599
01600
01601
01602
if (sb->st_size == 0) {
01603 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01604
return 1;
01605 }
01606
return 0;
01607 }
01608
01609
01610
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632
01633
01634
01635
static int
01636 match(
struct config_rec* conf,
unsigned char *s,
int nbytes)
01637 {
01638
int cont_level = 0;
01639
union VALUETYPE p;
01640
struct magic *m;
01641
01642
#ifdef DEBUG_MIMEMAGIC
01643
kdDebug(7018) <<
"match: conf=" << conf <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
01644
for (m = conf->magic; m; m = m->next) {
01645
if (isprint((((
unsigned long) m) >> 24) & 255) &&
01646 isprint((((
unsigned long) m) >> 16) & 255) &&
01647 isprint((((
unsigned long) m) >> 8) & 255) &&
01648 isprint(((
unsigned long) m) & 255)) {
01649
kdDebug(7018) <<
"match: POINTER CLOBBERED! " <<
endl;
01650
break;
01651 }
01652 }
01653
#endif
01654
01655
for (m = conf->magic; m; m = m->next) {
01656
#ifdef DEBUG_MIMEMAGIC
01657
kdDebug(7018) <<
"match: line=" << m->lineno <<
" desc=" << m->desc <<
endl;
01658
#endif
01659
memset(&p, 0,
sizeof(
union VALUETYPE));
01660
01661
01662
if (!mget(&p, s, m, nbytes) ||
01663 !mcheck(&p, m)) {
01664
struct magic *m_cont;
01665
01666
01667
01668
01669
if (!m->next || (m->next->cont_level == 0)) {
01670
continue;
01671 }
01672 m_cont = m->next;
01673
while (m_cont && (m_cont->cont_level != 0)) {
01674
#ifdef DEBUG_MIMEMAGIC
01675
kdDebug(7018) <<
"match: line=" << m->lineno <<
" cont=" << m_cont->cont_level <<
" mc=" << m_cont->lineno <<
" mc->next=" << m_cont <<
" " <<
endl;
01676
#endif
01677
01678
01679
01680
01681 m = m_cont;
01682 m_cont = m_cont->next;
01683 }
01684
continue;
01685 }
01686
01687
01688
#ifdef DEBUG_MIMEMAGIC
01689
kdDebug(7018) <<
"match: rule matched, line=" << m->lineno <<
" type=" << m->type <<
" " << ((m->type == STRING) ? m->value.s :
"") <<
endl;
01690
#endif
01691
01692
01693 conf->resultBuf = m->desc;
01694
01695 cont_level++;
01696
01697
01698
01699
01700 m = m->next;
01701
while (m && (m->cont_level != 0)) {
01702
#ifdef DEBUG_MIMEMAGIC
01703
kdDebug(7018) <<
"match: line=" << m->lineno <<
" cont=" << m->cont_level <<
" type=" << m->type <<
" " << ((m->type == STRING) ? m->value.s :
"") <<
endl;
01704
#endif
01705
if (cont_level >= m->cont_level) {
01706
if (cont_level > m->cont_level) {
01707
01708
01709
01710
01711 cont_level = m->cont_level;
01712 }
01713
if (mget(&p, s, m, nbytes) &&
01714 mcheck(&p, m)) {
01715
01716
01717
01718
01719
01720
01721
#ifdef DEBUG_MIMEMAGIC
01722
kdDebug(7018) <<
"continuation matched" <<
endl;
01723
#endif
01724
conf->resultBuf = m->desc;
01725 cont_level++;
01726 }
01727 }
01728
01729 m = m->next;
01730 }
01731
01732
01733
if ( !conf->resultBuf.isEmpty() )
01734 {
01735
#ifdef DEBUG_MIMEMAGIC
01736
kdDebug(7018) <<
"match: matched" <<
endl;
01737
#endif
01738
return 1;
01739 }
01740 }
01741
#ifdef DEBUG_MIMEMAGIC
01742
kdDebug(7018) <<
"match: failed" <<
endl;
01743
#endif
01744
return 0;
01745 }
01746
01747
01748
01749
static int tagmagic(
unsigned char *buf,
int nbytes)
01750 {
01751
if(nbytes<40)
return 0;
01752
if(buf[0] ==
'I' && buf[1] ==
'D' && buf[2] ==
'3') {
01753
int size = 10;
01754
01755
if(buf[3] > 4)
return 0;
01756
if(buf[5] & 0x0F)
return 0;
01757
01758
if(buf[5] & 0x10) size += 10;
01759
01760 size += buf[9];
01761 size += buf[8] << 7;
01762 size += buf[7] << 14;
01763 size += buf[6] << 21;
01764
return size;
01765 }
01766
return 0;
01767 }
01768
01769
01770
01771
#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
01772
01773
static int ascmagic(
struct config_rec* conf,
unsigned char *buf,
int nbytes)
01774 {
01775
int i;
01776
double pct, maxpct, pctsum;
01777
double pcts[NTYPES];
01778
int mostaccurate, tokencount;
01779
int typeset, jonly, conly, jconly, cppcomm, ccomm;
01780
int has_escapes = 0;
01781
unsigned char *s;
01782
char nbuf[HOWMANY + 1];
01783
char *token;
01784
register const struct names *p;
01785
int typecount[NTYPES];
01786
01787
01788 conf->accuracy = 70;
01789
01790
01791
01792
01793
01794
01795
if (*buf ==
'.') {
01796
unsigned char *tp = buf + 1;
01797
01798
while (isascii(*tp) && isspace(*tp))
01799 ++tp;
01800
if ((isascii(*tp) && (isalnum(*tp) || *tp ==
'\\') &&
01801 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp ==
'"'))) {
01802 conf->resultBuf = MIME_APPL_TROFF;
01803
return 1;
01804 }
01805 }
01806
if ((*buf ==
'c' || *buf ==
'C') &&
01807 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01808
01809 conf->resultBuf = MIME_TEXT_FORTRAN;
01810
return 1;
01811 }
01812 assert(nbytes-1 < HOWMANY + 1);
01813
01814
01815 s = (
unsigned char *) memcpy(nbuf, buf, nbytes);
01816 s[nbytes-1] =
'\0';
01817 has_escapes = (memchr(s,
'\033', nbytes) != NULL);
01818
01819
01820
01821
01822 memset(&typecount, 0,
sizeof(typecount));
01823 typeset = 0;
01824 jonly = 0;
01825 conly = 0;
01826 jconly = 0;
01827 cppcomm = 0;
01828 ccomm = 0;
01829 tokencount = 0;
01830
bool foundClass =
false;
01831
01832
01833
while ((token = strtok((
char *) s,
" \t\n\r\f,;>")) != NULL) {
01834 s = NULL;
01835
#ifdef DEBUG_MIMEMAGIC
01836
kdDebug(7018) <<
"KMimeMagic::ascmagic token=" << token <<
endl;
01837
#endif
01838
for (p = names; p->name ; p++) {
01839
if (STREQ(p->name, token)) {
01840
#ifdef DEBUG_MIMEMAGIC
01841
kdDebug(7018) <<
"KMimeMagic::ascmagic token matches ! name=" << p->name <<
" type=" << p->type <<
endl;
01842
#endif
01843
tokencount++;
01844 typeset |= p->type;
01845
if (p->type == L_JAVA)
01846 jonly++;
01847
if ((p->type & (L_C|L_CPP|L_JAVA))
01848 == (L_CPP|L_JAVA)) {
01849 jconly++;
01850
if ( !foundClass && STREQ(
"class", token) )
01851 foundClass =
true;
01852 }
01853
if ((p->type & (L_C|L_CPP|L_JAVA))
01854 == (L_C|L_CPP))
01855 conly++;
01856
if (STREQ(token,
"//"))
01857 cppcomm++;
01858
if (STREQ(token,
"/*"))
01859 ccomm++;
01860
for (i = 0; i < (
int)NTYPES; i++)
01861
if ((1 << i) & p->type)
01862 typecount[i]++;
01863 }
01864 }
01865 }
01866
01867
if (typeset & (L_C|L_CPP|L_JAVA)) {
01868 conf->accuracy = 40;
01869
if (!(typeset & ~(L_C|L_CPP|L_JAVA))) {
01870
#ifdef DEBUG_MIMEMAGIC
01871
kdDebug(7018) <<
"C/C++/Java: jonly=" << jonly <<
" conly=" << conly <<
" jconly=" << jconly <<
" ccomm=" << ccomm <<
endl;
01872
#endif
01873
if (jonly && conly)
01874
01875
if ( jonly > conly )
01876 conly = 0;
01877
else
01878 jonly = 0;
01879
if (jonly > 1 && foundClass) {
01880
01881 conf->resultBuf =
QString(types[P_JAVA].type);
01882
return 1;
01883 }
01884
if (jconly > 1) {
01885
01886
if (typecount[P_JAVA] > typecount[P_CPP])
01887 conf->resultBuf =
QString(types[P_JAVA].type);
01888
else
01889 conf->resultBuf =
QString(types[P_CPP].type);
01890
return 1;
01891 }
01892
if (conly) {
01893
01894
if (cppcomm)
01895 conf->resultBuf =
QString(types[P_CPP].type);
01896
else
01897 conf->resultBuf =
QString(types[P_C].type);
01898
return 1;
01899 }
01900
if (ccomm) {
01901 conf->resultBuf =
QString(types[P_C].type);
01902
return 1;
01903 }
01904 }
01905 }
01906
01907
01908
01909
01910
01911 mostaccurate = -1;
01912 maxpct = pctsum = 0.0;
01913
for (i = 0; i < (
int)NTYPES; i++) {
01914
if (typecount[i] > 1) {
01915 pct = (
double)typecount[i] / (
double)types[i].kwords *
01916 (
double)types[i].weight;
01917 pcts[i] = pct;
01918 pctsum += pct;
01919
if (pct > maxpct) {
01920 maxpct = pct;
01921 mostaccurate = i;
01922 }
01923
#ifdef DEBUG_MIMEMAGIC
01924
kdDebug(7018) <<
"" << types[i].type <<
" has " << typecount[i] <<
" hits, " << types[i].kwords <<
" kw, weight " << types[i].weight <<
", " << pct <<
" -> max = " << maxpct <<
"\n" <<
endl;
01925
#endif
01926
}
01927 }
01928
if (mostaccurate >= 0) {
01929
if ( mostaccurate != P_JAVA || foundClass )
01930 {
01931 conf->accuracy = (
int)(pcts[mostaccurate] / pctsum * 60);
01932
#ifdef DEBUG_MIMEMAGIC
01933
kdDebug(7018) <<
"mostaccurate=" << mostaccurate <<
" pcts=" << pcts[mostaccurate] <<
" pctsum=" << pctsum <<
" accuracy=" << accuracy <<
endl;
01934
#endif
01935
conf->resultBuf =
QString(types[mostaccurate].type);
01936
return 1;
01937 }
01938 }
01939
01940
switch (is_tar(buf, nbytes)) {
01941
case 1:
01942
01943 conf->resultBuf = MIME_APPL_TAR;
01944 conf->accuracy = 90;
01945
return 1;
01946
case 2:
01947
01948 conf->resultBuf = MIME_APPL_TAR;
01949 conf->accuracy = 90;
01950
return 1;
01951 }
01952
01953
for (i = 0; i < nbytes; i++) {
01954
if (!isascii(*(buf + i)))
01955
return 0;
01956 }
01957
01958
01959 conf->accuracy = 90;
01960
if (has_escapes) {
01961
01962
01963 conf->resultBuf = MIME_TEXT_UNKNOWN;
01964 }
else {
01965
01966 conf->resultBuf = MIME_TEXT_PLAIN;
01967 }
01968
return 1;
01969 }
01970
01971
01972
#define TEXT_MAXLINELEN 300
01973
01974
01975
01976
01977
01978
static int textmagic(
struct config_rec* conf,
unsigned char * buf,
int nbytes)
01979 {
01980
int i;
01981
unsigned char *cp;
01982
01983 nbytes--;
01984
01985
01986
for (i = 0, cp = buf; i < nbytes; i++, cp++)
01987
if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
01988
return 0;
01989
01990
01991
01992
01993
for (i = 0; i < nbytes;) {
01994 cp = (
unsigned char *) memchr(buf,
'\n', nbytes - i);
01995
if (cp == NULL) {
01996
01997
if (i + TEXT_MAXLINELEN >= nbytes)
01998
break;
01999
else
02000
return 0;
02001 }
02002
if (cp - buf > TEXT_MAXLINELEN)
02003
return 0;
02004 i += (cp - buf + 1);
02005 buf = cp + 1;
02006 }
02007 conf->resultBuf = MIME_TEXT_PLAIN;
02008
return 1;
02009 }
02010
02011
02012
02013
02014
02015
02016
02017
02018
02019
02020
02021
02022
02023
02024
02025
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
02026
02027
02028
02029
02030
02031
02032
static int
02033 is_tar(
unsigned char *buf,
int nbytes)
02034 {
02035
register union record *header = (
union record *) buf;
02036
register int i;
02037
register long sum,
02038 recsum;
02039
register char *p;
02040
02041
if (nbytes < (
int)
sizeof(
union record))
02042 return 0;
02043
02044 recsum = from_oct(8, header->header.chksum);
02045
02046 sum = 0;
02047 p = header->charptr;
02048
for (i =
sizeof(
union record); --i >= 0;) {
02049
02050
02051
02052
02053 sum += 0xFF & *p++;
02054 }
02055
02056
02057
for (i =
sizeof(header->header.chksum); --i >= 0;)
02058 sum -= 0xFF & header->header.chksum[i];
02059 sum +=
' ' *
sizeof header->header.chksum;
02060
02061
if (sum != recsum)
02062
return 0;
02063
02064
if (0 == strcmp(header->header.magic, TMAGIC))
02065
return 2;
02066
02067
return 1;
02068 }
02069
02070
02071
02072
02073
02074
02075
02076
static long
02077 from_oct(
int digs,
char *where)
02078 {
02079
register long value;
02080
02081
while (isspace(*where)) {
02082 where++;
02083
if (--digs <= 0)
02084
return -1;
02085 }
02086 value = 0;
02087
while (digs > 0 && isodigit(*where)) {
02088 value = (value << 3) | (*where++ -
'0');
02089 --digs;
02090 }
02091
02092
if (digs > 0 && *where && !isspace(*where))
02093
return -1;
02094
02095
return value;
02096 }
02097
02098 KMimeMagic::KMimeMagic()
02099 {
02100
02101
QString mimefile =
locate(
"mime",
"magic" );
02102 init( mimefile );
02103
02104
QStringList snippets =
KGlobal::dirs()->
findAllResources(
"config",
"magic/*.magic",
true );
02105
for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02106
if ( !
mergeConfig( *it ) )
02107
kdWarning() <<
k_funcinfo <<
"Failed to parse " << *it <<
endl;
02108 }
02109
02110 KMimeMagic::KMimeMagic(
const QString & _configfile)
02111 {
02112 init( _configfile );
02113 }
02114
02115
void KMimeMagic::init(
const QString& _configfile )
02116 {
02117
int result;
02118 conf =
new config_rec;
02119
02120
02121 conf->magic = conf->last = NULL;
02122 magicResult = NULL;
02123 conf->followLinks =
false;
02124
02125 conf->utimeConf = 0L;
02126
02127 result = apprentice(_configfile);
02128
if (result == -1)
02129
return;
02130
#ifdef MIME_MAGIC_DEBUG_TABLE
02131
test_table();
02132
#endif
02133
}
02134
02135
02136
02137
02138
02139 KMimeMagic::~KMimeMagic()
02140 {
02141
if (conf) {
02142
struct magic *p = conf->magic;
02143
struct magic *q;
02144
while (p) {
02145 q = p;
02146 p = p->next;
02147 free(q);
02148 }
02149
delete conf->utimeConf;
02150
delete conf;
02151 }
02152
delete magicResult;
02153 }
02154
02155
bool
02156 KMimeMagic::mergeConfig(
const QString & _configfile)
02157 {
02158
kdDebug(7018) <<
k_funcinfo << _configfile <<
endl;
02159
int result;
02160
02161
if (_configfile.
isEmpty())
02162
return false;
02163 result = apprentice(_configfile);
02164
if (result == -1) {
02165
return false;
02166 }
02167
#ifdef MIME_MAGIC_DEBUG_TABLE
02168
test_table();
02169
#endif
02170
return true;
02171 }
02172
02173
bool
02174 KMimeMagic::mergeBufConfig(
char * _configbuf)
02175 {
02176
int result;
02177
02178
if (conf) {
02179 result = buff_apprentice(_configbuf);
02180
if (result == -1)
02181
return false;
02182
#ifdef MIME_MAGIC_DEBUG_TABLE
02183
test_table();
02184
#endif
02185
return true;
02186 }
02187
return false;
02188 }
02189
02190
void
02191 KMimeMagic::setFollowLinks(
bool _enable )
02192 {
02193 conf->followLinks = _enable;
02194 }
02195
02196
KMimeMagicResult *
02197 KMimeMagic::findBufferType(
const QByteArray &array)
02198 {
02199
unsigned char buf[HOWMANY + 1];
02200
02201 conf->resultBuf = QString::null;
02202
if ( !
magicResult )
02203
magicResult =
new KMimeMagicResult();
02204
magicResult->
setInvalid();
02205 conf->accuracy = 100;
02206
02207
int nbytes = array.size();
02208
02209
if (nbytes > HOWMANY)
02210 nbytes = HOWMANY;
02211 memcpy(buf, array.data(), nbytes);
02212
if (nbytes == 0) {
02213 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02214 }
else {
02215 buf[nbytes++] =
'\0';
02216 tryit(conf, buf, nbytes);
02217 }
02218
02219
02220
magicResult->
setMimeType(conf->resultBuf.stripWhiteSpace());
02221
magicResult->
setAccuracy(conf->accuracy);
02222
return magicResult;
02223 }
02224
02225
static void
02226 refineResult(
KMimeMagicResult *r,
const QString & _filename)
02227 {
02228
QString tmp = r->
mimeType();
02229
if (tmp.
isEmpty())
02230
return;
02231
if ( tmp ==
"text/x-c" ||
02232 tmp ==
"text/x-c++" )
02233 {
02234
if ( _filename.
right(2) ==
".h" )
02235 tmp +=
"hdr";
02236
else
02237 tmp +=
"src";
02238 r->
setMimeType(tmp);
02239 }
02240 }
02241
02242
KMimeMagicResult *
02243 KMimeMagic::findBufferFileType(
const QByteArray &data,
02244
const QString &fn)
02245 {
02246
KMimeMagicResult * r =
findBufferType( data );
02247 refineResult(r, fn);
02248
return r;
02249 }
02250
02251
02252
02253
02254 KMimeMagicResult*
KMimeMagic::findFileType(
const QString & fn)
02255 {
02256
#ifdef DEBUG_MIMEMAGIC
02257
kdDebug(7018) <<
"KMimeMagic::findFileType " << fn <<
endl;
02258
#endif
02259
conf->resultBuf = QString::null;
02260
02261
if ( !
magicResult )
02262
magicResult =
new KMimeMagicResult();
02263
magicResult->
setInvalid();
02264 conf->accuracy = 100;
02265
02266
if ( !conf->utimeConf )
02267 conf->utimeConf =
new KMimeMagicUtimeConf();
02268
02269
02270 process(conf, fn );
02271
02272
02273
02274
magicResult->
setMimeType(conf->resultBuf.stripWhiteSpace());
02275
magicResult->
setAccuracy(conf->accuracy);
02276 refineResult(
magicResult, fn);
02277
return magicResult;
02278 }