00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "kmimemagic.h"
00019 #include <kdebug.h>
00020 #include <kapplication.h>
00021 #include <qfile.h>
00022 #include <ksimpleconfig.h>
00023 #include <kstandarddirs.h>
00024 #include <kstaticdeleter.h>
00025 #include <klargefile.h>
00026 #include <assert.h>
00027
00028 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00029 static void process(struct config_rec* conf, const QString &);
00030 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00031 static int tagmagic(unsigned char *buf, int nbytes);
00032 static int textmagic(struct config_rec* conf, unsigned char *, int);
00033
00034 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00035 static int match(struct config_rec* conf, unsigned char *, int);
00036
00037 KMimeMagic* KMimeMagic::s_pSelf;
00038 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00039
00040 KMimeMagic* KMimeMagic::self()
00041 {
00042 if( !s_pSelf )
00043 initStatic();
00044 return s_pSelf;
00045 }
00046
00047 void KMimeMagic::initStatic()
00048 {
00049 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00050 s_pSelf->setFollowLinks( true );
00051 }
00052
00053 #include <stdio.h>
00054 #include <unistd.h>
00055 #include <stdlib.h>
00056 #include <sys/wait.h>
00057 #include <sys/types.h>
00058 #include <sys/stat.h>
00059 #include <fcntl.h>
00060 #include <errno.h>
00061 #include <ctype.h>
00062 #include <time.h>
00063 #include <utime.h>
00064 #include <stdarg.h>
00065 #include <qregexp.h>
00066 #include <qstring.h>
00067
00068
00069
00070
00071
00072
00073
00074
00075 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00076 #define DEBUG_LINENUMBERS
00077 #endif
00078
00079
00080
00081
00082 #define DECLINED 999
00083 #define ERROR 998
00084 #define OK 0
00085
00086
00087
00088
00089 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00090 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00091 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00092 #define MIME_TEXT_UNKNOWN "text/plain"
00093 #define MIME_TEXT_PLAIN "text/plain"
00094 #define MIME_INODE_DIR "inode/directory"
00095 #define MIME_INODE_CDEV "inode/chardevice"
00096 #define MIME_INODE_BDEV "inode/blockdevice"
00097 #define MIME_INODE_FIFO "inode/fifo"
00098 #define MIME_INODE_LINK "inode/link"
00099 #define MIME_INODE_SOCK "inode/socket"
00100
00101 #define MIME_APPL_TROFF "application/x-troff"
00102 #define MIME_APPL_TAR "application/x-tar"
00103 #define MIME_TEXT_FORTRAN "text/x-fortran"
00104
00105 #define MAXMIMESTRING 256
00106
00107 #define HOWMANY 1024
00108 #define MAXDESC 50
00109 #define MAXstring 64
00110
00111 typedef union VALUETYPE {
00112 unsigned char b;
00113 unsigned short h;
00114 unsigned long l;
00115 char s[MAXstring];
00116 unsigned char hs[2];
00117 unsigned char hl[4];
00118 } VALUETYPE;
00119
00120 struct magic {
00121 struct magic *next;
00122 #ifdef DEBUG_LINENUMBERS
00123 int lineno;
00124 #endif
00125
00126 short flag;
00127 #define INDIR 1
00128 #define UNSIGNED 2
00129 short cont_level;
00130 struct {
00131 char type;
00132 long offset;
00133 } in;
00134 long offset;
00135 unsigned char reln;
00136 char type;
00137 char vallen;
00138 #define BYTE 1
00139 #define SHORT 2
00140 #define LONG 4
00141 #define STRING 5
00142 #define DATE 6
00143 #define BESHORT 7
00144 #define BELONG 8
00145 #define BEDATE 9
00146 #define LESHORT 10
00147 #define LELONG 11
00148 #define LEDATE 12
00149 VALUETYPE value;
00150 unsigned long mask;
00151 char nospflag;
00152
00153
00154 char desc[MAXDESC];
00155 };
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171 #define RECORDSIZE 512
00172 #define NAMSIZ 100
00173 #define TUNMLEN 32
00174 #define TGNMLEN 32
00175
00176 union record {
00177 char charptr[RECORDSIZE];
00178 struct header {
00179 char name[NAMSIZ];
00180 char mode[8];
00181 char uid[8];
00182 char gid[8];
00183 char size[12];
00184 char mtime[12];
00185 char chksum[8];
00186 char linkflag;
00187 char linkname[NAMSIZ];
00188 char magic[8];
00189 char uname[TUNMLEN];
00190 char gname[TGNMLEN];
00191 char devmajor[8];
00192 char devminor[8];
00193 } header;
00194 };
00195
00196
00197 #define TMAGIC "ustar "
00198
00199
00200
00201
00202 static int is_tar(unsigned char *, int);
00203 static unsigned long signextend(struct magic *, unsigned long);
00204 static int getvalue(struct magic *, char **);
00205 static int hextoint(int);
00206 static char *getstr(char *, char *, int, int *);
00207 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00208 static int mcheck(union VALUETYPE *, struct magic *);
00209 static int mconvert(union VALUETYPE *, struct magic *);
00210 static long from_oct(int, char *);
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226 #define L_HTML 0x001
00227 #define L_C 0x002
00228 #define L_MAKE 0x004
00229 #define L_PLI 0x008
00230 #define L_MACH 0x010
00231 #define L_PAS 0x020
00232 #define L_JAVA 0x040
00233 #define L_CPP 0x080
00234 #define L_MAIL 0x100
00235 #define L_NEWS 0x200
00236 #define L_DIFF 0x400
00237
00238 #define P_HTML 0
00239 #define P_C 1
00240 #define P_MAKE 2
00241 #define P_PLI 3
00242 #define P_MACH 4
00243 #define P_PAS 5
00244 #define P_JAVA 6
00245 #define P_CPP 7
00246 #define P_MAIL 8
00247 #define P_NEWS 9
00248 #define P_DIFF 10
00249
00250 typedef struct asc_type {
00251 const char *type;
00252 int kwords;
00253 double weight;
00254 } asc_type;
00255
00256 static const asc_type types[] = {
00257 { "text/html", 19, 2 },
00258 { "text/x-c", 9, 1.3 },
00259 { "text/x-makefile", 4, 1.9 },
00260 { "text/x-pli", 1, 3 },
00261 { "text/x-assembler", 6, 2.1 },
00262 { "text/x-pascal", 1, 1 },
00263 { "text/x-java", 14, 1 },
00264 { "text/x-c++", 14, 1 },
00265 { "message/rfc822", 4, 1.9 },
00266 { "message/news", 3, 2 },
00267 { "text/x-diff", 4, 2 }
00268 };
00269
00270 #define NTYPES (sizeof(types)/sizeof(asc_type))
00271
00272 static struct names {
00273 const char *name;
00274 short type;
00275 } const names[] = {
00276 {
00277 "<html", L_HTML
00278 },
00279 {
00280 "<HTML", L_HTML
00281 },
00282 {
00283 "<head", L_HTML
00284 },
00285 {
00286 "<HEAD", L_HTML
00287 },
00288 {
00289 "<body", L_HTML
00290 },
00291 {
00292 "<BODY", L_HTML
00293 },
00294 {
00295 "<title", L_HTML
00296 },
00297 {
00298 "<TITLE", L_HTML
00299 },
00300 {
00301 "<h1", L_HTML
00302 },
00303 {
00304 "<H1", L_HTML
00305 },
00306 {
00307 "<a", L_HTML
00308 },
00309 {
00310 "<A", L_HTML
00311 },
00312 {
00313 "<img", L_HTML
00314 },
00315 {
00316 "<IMG", L_HTML
00317 },
00318 {
00319 "<!--", L_HTML
00320 },
00321 {
00322 "<!doctype", L_HTML
00323 },
00324 {
00325 "<!DOCTYPE", L_HTML
00326 },
00327 {
00328 "<div", L_HTML
00329 },
00330 {
00331 "<DIV", L_HTML
00332 },
00333 {
00334 "<frame", L_HTML
00335 },
00336 {
00337 "<FRAME", L_HTML
00338 },
00339 {
00340 "<frameset", L_HTML
00341 },
00342 {
00343 "<FRAMESET", L_HTML
00344 },
00345 {
00346 "<script", L_HTML
00347 },
00348 {
00349 "<SCRIPT", L_HTML
00350 },
00351 {
00352 "/*", L_C|L_CPP|L_JAVA
00353 },
00354 {
00355 "//", L_CPP|L_JAVA
00356 },
00357 {
00358 "#include", L_C|L_CPP
00359 },
00360 {
00361 "char", L_C|L_CPP|L_JAVA
00362 },
00363 {
00364 "double", L_C|L_CPP|L_JAVA
00365 },
00366 {
00367 "extern", L_C|L_CPP
00368 },
00369 {
00370 "float", L_C|L_CPP|L_JAVA
00371 },
00372 {
00373 "real", L_C|L_CPP|L_JAVA
00374 },
00375 {
00376 "struct", L_C|L_CPP
00377 },
00378 {
00379 "union", L_C|L_CPP
00380 },
00381 {
00382 "implements", L_JAVA
00383 },
00384 {
00385 "super", L_JAVA
00386 },
00387 {
00388 "import", L_JAVA
00389 },
00390 {
00391 "class", L_CPP|L_JAVA
00392 },
00393 {
00394 "public", L_CPP|L_JAVA
00395 },
00396 {
00397 "private", L_CPP|L_JAVA
00398 },
00399 {
00400 "CFLAGS", L_MAKE
00401 },
00402 {
00403 "LDFLAGS", L_MAKE
00404 },
00405 {
00406 "all:", L_MAKE
00407 },
00408 {
00409 ".PHONY:", L_MAKE
00410 },
00411 {
00412 "srcdir", L_MAKE
00413 },
00414 {
00415 "exec_prefix", L_MAKE
00416 },
00417
00418
00419
00420
00421 {
00422 ".ascii", L_MACH
00423 },
00424 {
00425 ".asciiz", L_MACH
00426 },
00427 {
00428 ".byte", L_MACH
00429 },
00430 {
00431 ".even", L_MACH
00432 },
00433 {
00434 ".globl", L_MACH
00435 },
00436 {
00437 "clr", L_MACH
00438 },
00439 {
00440 "(input", L_PAS
00441 },
00442 {
00443 "dcl", L_PLI
00444 },
00445 {
00446 "Received:", L_MAIL
00447 },
00448
00449
00450
00451 {
00452 "Return-Path:", L_MAIL
00453 },
00454 {
00455 "Cc:", L_MAIL
00456 },
00457 {
00458 "Newsgroups:", L_NEWS
00459 },
00460 {
00461 "Path:", L_NEWS
00462 },
00463 {
00464 "Organization:", L_NEWS
00465 },
00466 {
00467 "---", L_DIFF
00468 },
00469 {
00470 "+++", L_DIFF
00471 },
00472 {
00473 "***", L_DIFF
00474 },
00475 {
00476 "@@", L_DIFF
00477 },
00478 {
00479 NULL, 0
00480 }
00481 };
00482
00493 class KMimeMagicUtimeConf
00494 {
00495 public:
00496 KMimeMagicUtimeConf()
00497 {
00498 tmpDirs << QString::fromLatin1("/tmp");
00499
00500
00501
00502 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00503 if ( !confDirs.isEmpty() )
00504 {
00505 QString globalConf = confDirs.last() + "kmimemagicrc";
00506 if ( QFile::exists( globalConf ) )
00507 {
00508 KSimpleConfig cfg( globalConf );
00509 cfg.setGroup( "Settings" );
00510 tmpDirs = cfg.readListEntry( "atimeDirs" );
00511 }
00512 if ( confDirs.count() > 1 )
00513 {
00514 QString localConf = confDirs.first() + "kmimemagicrc";
00515 if ( QFile::exists( localConf ) )
00516 {
00517 KSimpleConfig cfg( localConf );
00518 cfg.setGroup( "Settings" );
00519 tmpDirs += cfg.readListEntry( "atimeDirs" );
00520 }
00521 }
00522 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00523 {
00524 QString dir = *it;
00525 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00526 (*it) += '/';
00527 }
00528 }
00529 #if 0
00530
00531 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00532 kdDebug(7018) << " atimeDir: " << *it << endl;
00533 #endif
00534 }
00535
00536 bool restoreAccessTime( const QString & file ) const
00537 {
00538 QString dir = file.left( file.findRev( '/' ) );
00539 bool res = tmpDirs.contains( dir );
00540
00541 return res;
00542 }
00543 QStringList tmpDirs;
00544 };
00545
00546
00547 struct config_rec {
00548 bool followLinks;
00549 QString resultBuf;
00550 int accuracy;
00551
00552 struct magic *magic,
00553 *last;
00554 KMimeMagicUtimeConf * utimeConf;
00555 };
00556
00557 #ifdef MIME_MAGIC_DEBUG_TABLE
00558 static void
00559 test_table()
00560 {
00561 struct magic *m;
00562 struct magic *prevm = NULL;
00563
00564 kdDebug(7018) << "test_table : started" << endl;
00565 for (m = conf->magic; m; m = m->next) {
00566 if (isprint((((unsigned long) m) >> 24) & 255) &&
00567 isprint((((unsigned long) m) >> 16) & 255) &&
00568 isprint((((unsigned long) m) >> 8) & 255) &&
00569 isprint(((unsigned long) m) & 255)) {
00570
00571
00572 (((unsigned long) m) >> 24) & 255,
00573 (((unsigned long) m) >> 16) & 255,
00574 (((unsigned long) m) >> 8) & 255,
00575 ((unsigned long) m) & 255,
00576 prevm ? prevm->lineno : -1);
00577 break;
00578 }
00579 prevm = m;
00580 }
00581 }
00582 #endif
00583
00584 #define EATAB {while (isascii((unsigned char) *l) && \
00585 isspace((unsigned char) *l)) ++l;}
00586
00587 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00588 {
00589 int ws_offset;
00590
00591
00592 if (line[0]) {
00593 line[strlen(line) - 1] = '\0';
00594 }
00595
00596 ws_offset = 0;
00597 while (line[ws_offset] && isspace(line[ws_offset])) {
00598 ws_offset++;
00599 }
00600
00601
00602 if (line[ws_offset] == 0) {
00603 return 0;
00604 }
00605
00606 if (line[ws_offset] == '#')
00607 return 0;
00608
00609
00610 (*rule)++;
00611
00612
00613 return (parse(line + ws_offset, lineno) != 0);
00614 }
00615
00616
00617
00618
00619 int KMimeMagic::apprentice( const QString& magicfile )
00620 {
00621 FILE *f;
00622 char line[BUFSIZ + 1];
00623 int errs = 0;
00624 int lineno;
00625 int rule = 0;
00626 QCString fname;
00627
00628 if (magicfile.isEmpty())
00629 return -1;
00630 fname = QFile::encodeName(magicfile);
00631 f = fopen(fname, "r");
00632 if (f == NULL) {
00633 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00634 return -1;
00635 }
00636
00637
00638 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00639 if (parse_line(line, &rule, lineno))
00640 errs++;
00641
00642 fclose(f);
00643
00644 #ifdef DEBUG_APPRENTICE
00645 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00646 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00647 #endif
00648
00649 #ifdef MIME_MAGIC_DEBUG_TABLE
00650 test_table();
00651 #endif
00652
00653 return (errs ? -1 : 0);
00654 }
00655
00656 int KMimeMagic::buff_apprentice(char *buff)
00657 {
00658 char line[BUFSIZ + 2];
00659 int errs = 0;
00660 int lineno = 1;
00661 char *start = buff;
00662 char *end;
00663 int count = 0;
00664 int rule = 0;
00665 int len = strlen(buff) + 1;
00666
00667
00668 do {
00669 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00670 strncpy(line, start, count);
00671 line[count] = '\0';
00672 if ((end = strchr(line, '\n'))) {
00673 *(++end) = '\0';
00674 count = strlen(line);
00675 } else
00676 strcat(line, "\n");
00677 start += count;
00678 len -= count;
00679 if (parse_line(line, &rule, lineno))
00680 errs++;
00681 lineno++;
00682 } while (len > 0);
00683
00684 #ifdef DEBUG_APPRENTICE
00685 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00686 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00687 #endif
00688
00689 #ifdef MIME_MAGIC_DEBUG_TABLE
00690 test_table();
00691 #endif
00692
00693 return (errs ? -1 : 0);
00694 }
00695
00696
00697
00698
00699 static unsigned long
00700 signextend(struct magic *m, unsigned long v)
00701 {
00702 if (!(m->flag & UNSIGNED))
00703 switch (m->type) {
00704
00705
00706
00707
00708
00709 case BYTE:
00710 v = (char) v;
00711 break;
00712 case SHORT:
00713 case BESHORT:
00714 case LESHORT:
00715 v = (short) v;
00716 break;
00717 case DATE:
00718 case BEDATE:
00719 case LEDATE:
00720 case LONG:
00721 case BELONG:
00722 case LELONG:
00723 v = (long) v;
00724 break;
00725 case STRING:
00726 break;
00727 default:
00728 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00729 return ERROR;
00730 }
00731 return v;
00732 }
00733
00734
00735
00736
00737 int KMimeMagic::parse(char *l, int
00738 #ifdef DEBUG_LINENUMBERS
00739 lineno
00740 #endif
00741 )
00742 {
00743 int i = 0;
00744 struct magic *m;
00745 char *t,
00746 *s;
00747
00748 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00749 kdError(7018) << "parse: Out of memory." << endl;
00750 return -1;
00751 }
00752
00753 m->next = NULL;
00754 if (!conf->magic || !conf->last) {
00755 conf->magic = conf->last = m;
00756 } else {
00757 conf->last->next = m;
00758 conf->last = m;
00759 }
00760
00761
00762 m->flag = 0;
00763 m->cont_level = 0;
00764 #ifdef DEBUG_LINENUMBERS
00765 m->lineno = lineno;
00766 #endif
00767
00768 while (*l == '>') {
00769 ++l;
00770 m->cont_level++;
00771 }
00772
00773 if (m->cont_level != 0 && *l == '(') {
00774 ++l;
00775 m->flag |= INDIR;
00776 }
00777
00778 m->offset = (int) strtol(l, &t, 0);
00779 if (l == t) {
00780 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00781 }
00782 l = t;
00783
00784 if (m->flag & INDIR) {
00785 m->in.type = LONG;
00786 m->in.offset = 0;
00787
00788
00789
00790 if (*l == '.') {
00791 switch (*++l) {
00792 case 'l':
00793 m->in.type = LONG;
00794 break;
00795 case 's':
00796 m->in.type = SHORT;
00797 break;
00798 case 'b':
00799 m->in.type = BYTE;
00800 break;
00801 default:
00802 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00803 break;
00804 }
00805 l++;
00806 }
00807 s = l;
00808 if (*l == '+' || *l == '-')
00809 l++;
00810 if (isdigit((unsigned char) *l)) {
00811 m->in.offset = strtol(l, &t, 0);
00812 if (*s == '-')
00813 m->in.offset = -m->in.offset;
00814 } else
00815 t = l;
00816 if (*t++ != ')') {
00817 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00818 }
00819 l = t;
00820 }
00821 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00822 ++l;
00823 EATAB;
00824
00825 #define NBYTE 4
00826 #define NSHORT 5
00827 #define NLONG 4
00828 #define NSTRING 6
00829 #define NDATE 4
00830 #define NBESHORT 7
00831 #define NBELONG 6
00832 #define NBEDATE 6
00833 #define NLESHORT 7
00834 #define NLELONG 6
00835 #define NLEDATE 6
00836
00837 if (*l == 'u') {
00838 ++l;
00839 m->flag |= UNSIGNED;
00840 }
00841
00842 if (strncmp(l, "byte", NBYTE) == 0) {
00843 m->type = BYTE;
00844 l += NBYTE;
00845 } else if (strncmp(l, "short", NSHORT) == 0) {
00846 m->type = SHORT;
00847 l += NSHORT;
00848 } else if (strncmp(l, "long", NLONG) == 0) {
00849 m->type = LONG;
00850 l += NLONG;
00851 } else if (strncmp(l, "string", NSTRING) == 0) {
00852 m->type = STRING;
00853 l += NSTRING;
00854 } else if (strncmp(l, "date", NDATE) == 0) {
00855 m->type = DATE;
00856 l += NDATE;
00857 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00858 m->type = BESHORT;
00859 l += NBESHORT;
00860 } else if (strncmp(l, "belong", NBELONG) == 0) {
00861 m->type = BELONG;
00862 l += NBELONG;
00863 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00864 m->type = BEDATE;
00865 l += NBEDATE;
00866 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00867 m->type = LESHORT;
00868 l += NLESHORT;
00869 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00870 m->type = LELONG;
00871 l += NLELONG;
00872 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00873 m->type = LEDATE;
00874 l += NLEDATE;
00875 } else {
00876 kdError(7018) << "parse: type " << l << " invalid" << endl;
00877 return -1;
00878 }
00879
00880 if (*l == '&') {
00881 ++l;
00882 m->mask = signextend(m, strtol(l, &l, 0));
00883 } else
00884 m->mask = (unsigned long) ~0L;
00885 EATAB;
00886
00887 switch (*l) {
00888 case '>':
00889 case '<':
00890
00891 case '&':
00892 case '^':
00893 case '=':
00894 m->reln = *l;
00895 ++l;
00896 break;
00897 case '!':
00898 if (m->type != STRING) {
00899 m->reln = *l;
00900 ++l;
00901 break;
00902 }
00903
00904 default:
00905 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00906 isspace((unsigned char) l[1])) {
00907 m->reln = *l;
00908 ++l;
00909 goto GetDesc;
00910 }
00911 m->reln = '=';
00912 break;
00913 }
00914 EATAB;
00915
00916 if (getvalue(m, &l))
00917 return -1;
00918
00919
00920
00921 GetDesc:
00922 EATAB;
00923 if (l[0] == '\b') {
00924 ++l;
00925 m->nospflag = 1;
00926 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00927 ++l;
00928 ++l;
00929 m->nospflag = 1;
00930 } else
00931 m->nospflag = 0;
00932
00933 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00934 m->desc[i++] = *l++;
00935 m->desc[i] = '\0';
00936
00937 while (--i>0 && isspace( m->desc[i] ))
00938 m->desc[i] = '\0';
00939
00940
00941
00942
00943 #ifdef DEBUG_APPRENTICE
00944 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00945 #endif
00946 return 0;
00947 }
00948
00949
00950
00951
00952
00953
00954 static int
00955 getvalue(struct magic *m, char **p)
00956 {
00957 int slen;
00958
00959 if (m->type == STRING) {
00960 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00961 m->vallen = slen;
00962 } else if (m->reln != 'x')
00963 m->value.l = signextend(m, strtol(*p, p, 0));
00964 return 0;
00965 }
00966
00967
00968
00969
00970
00971
00972 static char *
00973 getstr(register char *s, register char *p, int plen, int *slen)
00974 {
00975 char *origs = s,
00976 *origp = p;
00977 char *pmax = p + plen - 1;
00978 register int c;
00979 register int val;
00980
00981 while ((c = *s++) != '\0') {
00982 if (isspace((unsigned char) c))
00983 break;
00984 if (p >= pmax) {
00985 kdError(7018) << "String too long: " << origs << endl;
00986 break;
00987 }
00988 if (c == '\\') {
00989 switch (c = *s++) {
00990
00991 case '\0':
00992 goto out;
00993
00994 default:
00995 *p++ = (char) c;
00996 break;
00997
00998 case 'n':
00999 *p++ = '\n';
01000 break;
01001
01002 case 'r':
01003 *p++ = '\r';
01004 break;
01005
01006 case 'b':
01007 *p++ = '\b';
01008 break;
01009
01010 case 't':
01011 *p++ = '\t';
01012 break;
01013
01014 case 'f':
01015 *p++ = '\f';
01016 break;
01017
01018 case 'v':
01019 *p++ = '\v';
01020 break;
01021
01022
01023 case '0':
01024 case '1':
01025 case '2':
01026 case '3':
01027 case '4':
01028 case '5':
01029 case '6':
01030 case '7':
01031 val = c - '0';
01032 c = *s++;
01033 if (c >= '0' && c <= '7') {
01034 val = (val << 3) | (c - '0');
01035 c = *s++;
01036 if (c >= '0' && c <= '7')
01037 val = (val << 3) | (c - '0');
01038 else
01039 --s;
01040 } else
01041 --s;
01042 *p++ = (char) val;
01043 break;
01044
01045
01046 case 'x':
01047 val = 'x';
01048 c = hextoint(*s++);
01049 if (c >= 0) {
01050 val = c;
01051 c = hextoint(*s++);
01052 if (c >= 0) {
01053 val = (val << 4) + c;
01054 c = hextoint(*s++);
01055 if (c >= 0) {
01056 val = (val << 4) + c;
01057 } else
01058 --s;
01059 } else
01060 --s;
01061 } else
01062 --s;
01063 *p++ = (char) val;
01064 break;
01065 }
01066 } else
01067 *p++ = (char) c;
01068 }
01069 out:
01070 *p = '\0';
01071 *slen = p - origp;
01072
01073
01074 return s;
01075 }
01076
01077
01078
01079 static int
01080 hextoint(int c)
01081 {
01082 if (!isascii((unsigned char) c))
01083 return -1;
01084 if (isdigit((unsigned char) c))
01085 return c - '0';
01086 if ((c >= 'a') && (c <= 'f'))
01087 return c + 10 - 'a';
01088 if ((c >= 'A') && (c <= 'F'))
01089 return c + 10 - 'A';
01090 return -1;
01091 }
01092
01093
01094
01095
01096 static int
01097 mconvert(union VALUETYPE *p, struct magic *m)
01098 {
01099 switch (m->type) {
01100 case BYTE:
01101 return 1;
01102 case STRING:
01103
01104 p->s[sizeof(p->s) - 1] = '\0';
01105 return 1;
01106 #ifndef WORDS_BIGENDIAN
01107 case SHORT:
01108 #endif
01109 case BESHORT:
01110 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01111 return 1;
01112 #ifndef WORDS_BIGENDIAN
01113 case LONG:
01114 case DATE:
01115 #endif
01116 case BELONG:
01117 case BEDATE:
01118 p->l = (long)
01119 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01120 return 1;
01121 #ifdef WORDS_BIGENDIAN
01122 case SHORT:
01123 #endif
01124 case LESHORT:
01125 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01126 return 1;
01127 #ifdef WORDS_BIGENDIAN
01128 case LONG:
01129 case DATE:
01130 #endif
01131 case LELONG:
01132 case LEDATE:
01133 p->l = (long)
01134 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01135 return 1;
01136 default:
01137 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01138 return 0;
01139 }
01140 }
01141
01142
01143 static int
01144 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01145 int nbytes)
01146 {
01147 long offset = m->offset;
01148 switch ( m->type )
01149 {
01150 case BYTE:
01151 if ( offset + 1 > nbytes-1 )
01152 return 0;
01153 break;
01154 case SHORT:
01155 case BESHORT:
01156 case LESHORT:
01157 if ( offset + 2 > nbytes-1 )
01158 return 0;
01159 break;
01160 case LONG:
01161 case BELONG:
01162 case LELONG:
01163 case DATE:
01164 case BEDATE:
01165 case LEDATE:
01166 if ( offset + 4 > nbytes-1 )
01167 return 0;
01168 break;
01169 case STRING:
01170 break;
01171 }
01172
01173
01174
01175
01176 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01177 {
01178 int have = nbytes - offset;
01179 memset(p, 0, sizeof(union VALUETYPE));
01180 if (have > 0)
01181 memcpy(p, s + offset, have);
01182 } else
01183 memcpy(p, s + offset, sizeof(union VALUETYPE));
01184
01185 if (!mconvert(p, m))
01186 return 0;
01187
01188 if (m->flag & INDIR) {
01189
01190 switch (m->in.type) {
01191 case BYTE:
01192 offset = p->b + m->in.offset;
01193 break;
01194 case SHORT:
01195 offset = p->h + m->in.offset;
01196 break;
01197 case LONG:
01198 offset = p->l + m->in.offset;
01199 break;
01200 }
01201
01202 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01203 return 0;
01204
01205 memcpy(p, s + offset, sizeof(union VALUETYPE));
01206
01207 if (!mconvert(p, m))
01208 return 0;
01209 }
01210 return 1;
01211 }
01212
01213 static int
01214 mcheck(union VALUETYPE *p, struct magic *m)
01215 {
01216 register unsigned long l = m->value.l;
01217 register unsigned long v;
01218 int matched;
01219
01220 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01221 kdError(7018) << "BOINK" << endl;
01222 return 1;
01223 }
01224 switch (m->type) {
01225 case BYTE:
01226 v = p->b;
01227 break;
01228
01229 case SHORT:
01230 case BESHORT:
01231 case LESHORT:
01232 v = p->h;
01233 break;
01234
01235 case LONG:
01236 case BELONG:
01237 case LELONG:
01238 case DATE:
01239 case BEDATE:
01240 case LEDATE:
01241 v = p->l;
01242 break;
01243
01244 case STRING:
01245 l = 0;
01246
01247
01248
01249
01250
01251 v = 0;
01252 {
01253 register unsigned char *a = (unsigned char *) m->value.s;
01254 register unsigned char *b = (unsigned char *) p->s;
01255 register int len = m->vallen;
01256 Q_ASSERT(len);
01257
01258 while (--len >= 0)
01259 if ((v = *b++ - *a++) != 0)
01260 break;
01261 }
01262 break;
01263 default:
01264 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01265 return 0;
01266 }
01267 #if 0
01268 qDebug("Before signextend %08x", v);
01269 #endif
01270 v = signextend(m, v) & m->mask;
01271 #if 0
01272 qDebug("After signextend %08x", v);
01273 #endif
01274
01275 switch (m->reln) {
01276 case 'x':
01277 matched = 1;
01278 break;
01279
01280 case '!':
01281 matched = v != l;
01282 break;
01283
01284 case '=':
01285 matched = v == l;
01286 break;
01287
01288 case '>':
01289 if (m->flag & UNSIGNED)
01290 matched = v > l;
01291 else
01292 matched = (long) v > (long) l;
01293 break;
01294
01295 case '<':
01296 if (m->flag & UNSIGNED)
01297 matched = v < l;
01298 else
01299 matched = (long) v < (long) l;
01300 break;
01301
01302 case '&':
01303 matched = (v & l) == l;
01304 break;
01305
01306 case '^':
01307 matched = (v & l) != l;
01308 break;
01309
01310 default:
01311 matched = 0;
01312 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01313 break;
01314 }
01315
01316 return matched;
01317 }
01318
01319
01320
01321
01322
01323
01324 void process(struct config_rec* conf, const QString & fn)
01325 {
01326 int fd = 0;
01327 unsigned char buf[HOWMANY + 1];
01328 KDE_struct_stat sb;
01329 int nbytes = 0;
01330 int tagbytes = 0;
01331 QCString fileName = QFile::encodeName( fn );
01332
01333
01334
01335
01336 if (fsmagic(conf, fileName, &sb) != 0) {
01337
01338 return;
01339 }
01340 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01341
01342
01343
01344
01345
01346
01347 conf->resultBuf = MIME_BINARY_UNREADABLE;
01348 return;
01349 }
01350
01351
01352
01353 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01354 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01355 conf->resultBuf = MIME_BINARY_UNREADABLE;
01356 return;
01357 }
01358 if ((tagbytes = tagmagic(buf, nbytes))) {
01359
01360 lseek(fd, tagbytes, SEEK_SET);
01361 nbytes = read(fd, (char*)buf, HOWMANY);
01362 if (nbytes < 0) {
01363 conf->resultBuf = MIME_BINARY_UNREADABLE;
01364 return;
01365 }
01366 }
01367 if (nbytes == 0) {
01368 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01369 } else {
01370 buf[nbytes++] = '\0';
01371 tryit(conf, buf, nbytes);
01372 }
01373
01374 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01375 {
01376
01377
01378
01379
01380
01381 struct utimbuf utbuf;
01382 utbuf.actime = sb.st_atime;
01383 utbuf.modtime = sb.st_mtime;
01384 (void) utime(fileName, &utbuf);
01385 }
01386 (void) close(fd);
01387 }
01388
01389
01390 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01391 {
01392
01393 if (match(conf, buf, nb))
01394 return;
01395
01396
01397 if (ascmagic(conf, buf, nb) == 1)
01398 return;
01399
01400
01401 if (textmagic(conf, buf, nb))
01402 return;
01403
01404
01405 conf->resultBuf = MIME_BINARY_UNKNOWN;
01406 conf->accuracy = 0;
01407 }
01408
01409 static int
01410 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01411 {
01412 int ret = 0;
01413
01414
01415
01416
01417
01418 ret = KDE_lstat(fn, sb);
01419
01420 if (ret) {
01421 return 1;
01422
01423 }
01424
01425
01426
01427
01428
01429
01430 switch (sb->st_mode & S_IFMT) {
01431 case S_IFDIR:
01432 conf->resultBuf = MIME_INODE_DIR;
01433 return 1;
01434 case S_IFCHR:
01435 conf->resultBuf = MIME_INODE_CDEV;
01436 return 1;
01437 case S_IFBLK:
01438 conf->resultBuf = MIME_INODE_BDEV;
01439 return 1;
01440
01441 #ifdef S_IFIFO
01442 case S_IFIFO:
01443 conf->resultBuf = MIME_INODE_FIFO;
01444 return 1;
01445 #endif
01446 #ifdef S_IFLNK
01447 case S_IFLNK:
01448 {
01449 char buf[BUFSIZ + BUFSIZ + 4];
01450 register int nch;
01451 KDE_struct_stat tstatbuf;
01452
01453 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01454 conf->resultBuf = MIME_INODE_LINK;
01455
01456 return 1;
01457 }
01458 buf[nch] = '\0';
01459
01460 if (*buf == '/') {
01461 if (KDE_stat(buf, &tstatbuf) < 0) {
01462 conf->resultBuf = MIME_INODE_LINK;
01463
01464 return 1;
01465 }
01466 } else {
01467 char *tmp;
01468 char buf2[BUFSIZ + BUFSIZ + 4];
01469
01470 strncpy(buf2, fn, BUFSIZ);
01471 buf2[BUFSIZ] = 0;
01472
01473 if ((tmp = strrchr(buf2, '/')) == NULL) {
01474 tmp = buf;
01475 } else {
01476
01477 *++tmp = '\0';
01478 strcat(buf2, buf);
01479 tmp = buf2;
01480 }
01481 if (KDE_stat(tmp, &tstatbuf) < 0) {
01482 conf->resultBuf = MIME_INODE_LINK;
01483
01484 return 1;
01485 } else
01486 strcpy(buf, tmp);
01487 }
01488 if (conf->followLinks)
01489 process( conf, QFile::decodeName( buf ) );
01490 else
01491 conf->resultBuf = MIME_INODE_LINK;
01492 return 1;
01493 }
01494 return 1;
01495 #endif
01496 #ifdef S_IFSOCK
01497 #ifndef __COHERENT__
01498 case S_IFSOCK:
01499 conf->resultBuf = MIME_INODE_SOCK;
01500 return 1;
01501 #endif
01502 #endif
01503 case S_IFREG:
01504 break;
01505 default:
01506 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01507
01508 }
01509
01510
01511
01512
01513 if (sb->st_size == 0) {
01514 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01515 return 1;
01516 }
01517 return 0;
01518 }
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533
01534
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546 static int
01547 match(struct config_rec* conf, unsigned char *s, int nbytes)
01548 {
01549 int cont_level = 0;
01550 union VALUETYPE p;
01551 struct magic *m;
01552
01553 #ifdef DEBUG_MIMEMAGIC
01554 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01555 for (m = conf->magic; m; m = m->next) {
01556 if (isprint((((unsigned long) m) >> 24) & 255) &&
01557 isprint((((unsigned long) m) >> 16) & 255) &&
01558 isprint((((unsigned long) m) >> 8) & 255) &&
01559 isprint(((unsigned long) m) & 255)) {
01560 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01561 break;
01562 }
01563 }
01564 #endif
01565
01566 for (m = conf->magic; m; m = m->next) {
01567 #ifdef DEBUG_MIMEMAGIC
01568 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01569 #endif
01570 memset(&p, 0, sizeof(union VALUETYPE));
01571
01572
01573 if (!mget(&p, s, m, nbytes) ||
01574 !mcheck(&p, m)) {
01575 struct magic *m_cont;
01576
01577
01578
01579
01580 if (!m->next || (m->next->cont_level == 0)) {
01581 continue;
01582 }
01583 m_cont = m->next;
01584 while (m_cont && (m_cont->cont_level != 0)) {
01585 #ifdef DEBUG_MIMEMAGIC
01586 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01587 #endif
01588
01589
01590
01591
01592 m = m_cont;
01593 m_cont = m_cont->next;
01594 }
01595 continue;
01596 }
01597
01598
01599 #ifdef DEBUG_MIMEMAGIC
01600 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01601 #endif
01602
01603
01604 conf->resultBuf = m->desc;
01605
01606 cont_level++;
01607
01608
01609
01610
01611 m = m->next;
01612 while (m && (m->cont_level != 0)) {
01613 #ifdef DEBUG_MIMEMAGIC
01614 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01615 #endif
01616 if (cont_level >= m->cont_level) {
01617 if (cont_level > m->cont_level) {
01618
01619
01620
01621
01622 cont_level = m->cont_level;
01623 }
01624 if (mget(&p, s, m, nbytes) &&
01625 mcheck(&p, m)) {
01626
01627
01628
01629
01630
01631
01632 #ifdef DEBUG_MIMEMAGIC
01633 kdDebug(7018) << "continuation matched" << endl;
01634 #endif
01635 conf->resultBuf = m->desc;
01636 cont_level++;
01637 }
01638 }
01639
01640 m = m->next;
01641 }
01642
01643
01644 if ( !conf->resultBuf.isEmpty() )
01645 {
01646 #ifdef DEBUG_MIMEMAGIC
01647 kdDebug(7018) << "match: matched" << endl;
01648 #endif
01649 return 1;
01650 }
01651 }
01652 #ifdef DEBUG_MIMEMAGIC
01653 kdDebug(7018) << "match: failed" << endl;
01654 #endif
01655 return 0;
01656 }
01657
01658
01659
01660 static int tagmagic(unsigned char *buf, int nbytes)
01661 {
01662 if(nbytes<40) return 0;
01663 if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01664 int size = 10;
01665
01666 if(buf[3] > 4) return 0;
01667 if(buf[5] & 0x0F) return 0;
01668
01669 if(buf[5] & 0x10) size += 10;
01670
01671 size += buf[9];
01672 size += buf[8] << 7;
01673 size += buf[7] << 14;
01674 size += buf[6] << 21;
01675 return size;
01676 }
01677 return 0;
01678 }
01679
01680
01681
01682 #define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
01683
01684 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01685 {
01686 int i;
01687 double pct, maxpct, pctsum;
01688 double pcts[NTYPES];
01689 int mostaccurate, tokencount;
01690 int typeset, jonly, conly, jconly, cppcomm, ccomm;
01691 int has_escapes = 0;
01692 unsigned char *s;
01693 char nbuf[HOWMANY + 1];
01694 char *token;
01695 register const struct names *p;
01696 int typecount[NTYPES];
01697
01698
01699 conf->accuracy = 70;
01700
01701
01702
01703
01704
01705
01706 if (*buf == '.') {
01707 unsigned char *tp = buf + 1;
01708
01709 while (isascii(*tp) && isspace(*tp))
01710 ++tp;
01711 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01712 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01713 conf->resultBuf = MIME_APPL_TROFF;
01714 return 1;
01715 }
01716 }
01717 if ((*buf == 'c' || *buf == 'C') &&
01718 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01719
01720 conf->resultBuf = MIME_TEXT_FORTRAN;
01721 return 1;
01722 }
01723 assert(nbytes-1 < HOWMANY + 1);
01724
01725
01726 s = (unsigned char *) memcpy(nbuf, buf, nbytes);
01727 s[nbytes-1] = '\0';
01728 has_escapes = (memchr(s, '\033', nbytes) != NULL);
01729
01730
01731
01732
01733 memset(&typecount, 0, sizeof(typecount));
01734 typeset = 0;
01735 jonly = 0;
01736 conly = 0;
01737 jconly = 0;
01738 cppcomm = 0;
01739 ccomm = 0;
01740 tokencount = 0;
01741 bool foundClass = false;
01742
01743
01744 while ((token = strtok((char *) s, " \t\n\r\f,;>")) != NULL) {
01745 s = NULL;
01746 #ifdef DEBUG_MIMEMAGIC
01747 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01748 #endif
01749 for (p = names; p->name ; p++) {
01750 if (STREQ(p->name, token)) {
01751 #ifdef DEBUG_MIMEMAGIC
01752 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01753 #endif
01754 tokencount++;
01755 typeset |= p->type;
01756 if (p->type == L_JAVA)
01757 jonly++;
01758 if ((p->type & (L_C|L_CPP|L_JAVA))
01759 == (L_CPP|L_JAVA)) {
01760 jconly++;
01761 if ( !foundClass && STREQ("class", token) )
01762 foundClass = true;
01763 }
01764 if ((p->type & (L_C|L_CPP|L_JAVA))
01765 == (L_C|L_CPP))
01766 conly++;
01767 if (STREQ(token, "//"))
01768 cppcomm++;
01769 if (STREQ(token, "/*"))
01770 ccomm++;
01771 for (i = 0; i < (int)NTYPES; i++)
01772 if ((1 << i) & p->type)
01773 typecount[i]++;
01774 }
01775 }
01776 }
01777
01778 if (typeset & (L_C|L_CPP|L_JAVA)) {
01779 conf->accuracy = 40;
01780 if (!(typeset & ~(L_C|L_CPP|L_JAVA))) {
01781 #ifdef DEBUG_MIMEMAGIC
01782 kdDebug(7018) << "C/C++/Java: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " ccomm=" << ccomm << endl;
01783 #endif
01784 if (jonly && conly)
01785
01786 if ( jonly > conly )
01787 conly = 0;
01788 else
01789 jonly = 0;
01790 if (jonly > 1 && foundClass) {
01791
01792 conf->resultBuf = QString(types[P_JAVA].type);
01793 return 1;
01794 }
01795 if (jconly > 1) {
01796
01797 if (typecount[P_JAVA] > typecount[P_CPP])
01798 conf->resultBuf = QString(types[P_JAVA].type);
01799 else
01800 conf->resultBuf = QString(types[P_CPP].type);
01801 return 1;
01802 }
01803 if (conly) {
01804
01805 if (cppcomm)
01806 conf->resultBuf = QString(types[P_CPP].type);
01807 else
01808 conf->resultBuf = QString(types[P_C].type);
01809 return 1;
01810 }
01811 if (ccomm) {
01812 conf->resultBuf = QString(types[P_C].type);
01813 return 1;
01814 }
01815 }
01816 }
01817
01818
01819
01820
01821
01822 mostaccurate = -1;
01823 maxpct = pctsum = 0.0;
01824 for (i = 0; i < (int)NTYPES; i++) {
01825 if (typecount[i] > 1) {
01826 pct = (double)typecount[i] / (double)types[i].kwords *
01827 (double)types[i].weight;
01828 pcts[i] = pct;
01829 pctsum += pct;
01830 if (pct > maxpct) {
01831 maxpct = pct;
01832 mostaccurate = i;
01833 }
01834 #ifdef DEBUG_MIMEMAGIC
01835 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01836 #endif
01837 }
01838 }
01839 if (mostaccurate >= 0) {
01840 if ( mostaccurate != P_JAVA || foundClass )
01841 {
01842 conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01843 #ifdef DEBUG_MIMEMAGIC
01844 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
01845 #endif
01846 conf->resultBuf = QString(types[mostaccurate].type);
01847 return 1;
01848 }
01849 }
01850
01851 switch (is_tar(buf, nbytes)) {
01852 case 1:
01853
01854 conf->resultBuf = MIME_APPL_TAR;
01855 conf->accuracy = 90;
01856 return 1;
01857 case 2:
01858
01859 conf->resultBuf = MIME_APPL_TAR;
01860 conf->accuracy = 90;
01861 return 1;
01862 }
01863
01864 for (i = 0; i < nbytes; i++) {
01865 if (!isascii(*(buf + i)))
01866 return 0;
01867 }
01868
01869
01870 conf->accuracy = 90;
01871 if (has_escapes) {
01872
01873
01874 conf->resultBuf = MIME_TEXT_UNKNOWN;
01875 } else {
01876
01877 conf->resultBuf = MIME_TEXT_PLAIN;
01878 }
01879 return 1;
01880 }
01881
01882
01883 #define TEXT_MAXLINELEN 300
01884
01885
01886
01887
01888
01889 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01890 {
01891 int i;
01892 unsigned char *cp;
01893
01894 nbytes--;
01895
01896
01897 for (i = 0, cp = buf; i < nbytes; i++, cp++)
01898 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
01899 return 0;
01900
01901
01902
01903
01904 for (i = 0; i < nbytes;) {
01905 cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
01906 if (cp == NULL) {
01907
01908 if (i + TEXT_MAXLINELEN >= nbytes)
01909 break;
01910 else
01911 return 0;
01912 }
01913 if (cp - buf > TEXT_MAXLINELEN)
01914 return 0;
01915 i += (cp - buf + 1);
01916 buf = cp + 1;
01917 }
01918 conf->resultBuf = MIME_TEXT_PLAIN;
01919 return 1;
01920 }
01921
01922
01923
01924
01925
01926
01927
01928
01929
01930
01931
01932
01933
01934
01935
01936 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
01937
01938
01939
01940
01941
01942
01943 static int
01944 is_tar(unsigned char *buf, int nbytes)
01945 {
01946 register union record *header = (union record *) buf;
01947 register int i;
01948 register long sum,
01949 recsum;
01950 register char *p;
01951
01952 if (nbytes < (int)sizeof(union record))
01953 return 0;
01954
01955 recsum = from_oct(8, header->header.chksum);
01956
01957 sum = 0;
01958 p = header->charptr;
01959 for (i = sizeof(union record); --i >= 0;) {
01960
01961
01962
01963
01964 sum += 0xFF & *p++;
01965 }
01966
01967
01968 for (i = sizeof(header->header.chksum); --i >= 0;)
01969 sum -= 0xFF & header->header.chksum[i];
01970 sum += ' ' * sizeof header->header.chksum;
01971
01972 if (sum != recsum)
01973 return 0;
01974
01975 if (0 == strcmp(header->header.magic, TMAGIC))
01976 return 2;
01977
01978 return 1;
01979 }
01980
01981
01982
01983
01984
01985
01986
01987 static long
01988 from_oct(int digs, char *where)
01989 {
01990 register long value;
01991
01992 while (isspace(*where)) {
01993 where++;
01994 if (--digs <= 0)
01995 return -1;
01996 }
01997 value = 0;
01998 while (digs > 0 && isodigit(*where)) {
01999 value = (value << 3) | (*where++ - '0');
02000 --digs;
02001 }
02002
02003 if (digs > 0 && *where && !isspace(*where))
02004 return -1;
02005
02006 return value;
02007 }
02008
02009 KMimeMagic::KMimeMagic()
02010 {
02011
02012 QString mimefile = locate( "mime", "magic" );
02013 init( mimefile );
02014
02015 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02016 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02017 if ( !mergeConfig( *it ) )
02018 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02019 }
02020
02021 KMimeMagic::KMimeMagic(const QString & _configfile)
02022 {
02023 init( _configfile );
02024 }
02025
02026 void KMimeMagic::init( const QString& _configfile )
02027 {
02028 int result;
02029 conf = new config_rec;
02030
02031
02032 conf->magic = conf->last = NULL;
02033 magicResult = NULL;
02034 conf->followLinks = false;
02035
02036 conf->utimeConf = 0L;
02037
02038 result = apprentice(_configfile);
02039 if (result == -1)
02040 return;
02041 #ifdef MIME_MAGIC_DEBUG_TABLE
02042 test_table();
02043 #endif
02044 }
02045
02046
02047
02048
02049
02050 KMimeMagic::~KMimeMagic()
02051 {
02052 if (conf) {
02053 struct magic *p = conf->magic;
02054 struct magic *q;
02055 while (p) {
02056 q = p;
02057 p = p->next;
02058 free(q);
02059 }
02060 delete conf->utimeConf;
02061 delete conf;
02062 }
02063 delete magicResult;
02064 }
02065
02066 bool
02067 KMimeMagic::mergeConfig(const QString & _configfile)
02068 {
02069 kdDebug(7018) << k_funcinfo << _configfile << endl;
02070 int result;
02071
02072 if (_configfile.isEmpty())
02073 return false;
02074 result = apprentice(_configfile);
02075 if (result == -1) {
02076 return false;
02077 }
02078 #ifdef MIME_MAGIC_DEBUG_TABLE
02079 test_table();
02080 #endif
02081 return true;
02082 }
02083
02084 bool
02085 KMimeMagic::mergeBufConfig(char * _configbuf)
02086 {
02087 int result;
02088
02089 if (conf) {
02090 result = buff_apprentice(_configbuf);
02091 if (result == -1)
02092 return false;
02093 #ifdef MIME_MAGIC_DEBUG_TABLE
02094 test_table();
02095 #endif
02096 return true;
02097 }
02098 return false;
02099 }
02100
02101 void
02102 KMimeMagic::setFollowLinks( bool _enable )
02103 {
02104 conf->followLinks = _enable;
02105 }
02106
02107 KMimeMagicResult *
02108 KMimeMagic::findBufferType(const QByteArray &array)
02109 {
02110 unsigned char buf[HOWMANY + 1];
02111
02112 conf->resultBuf = QString::null;
02113 if ( !magicResult )
02114 magicResult = new KMimeMagicResult();
02115 magicResult->setInvalid();
02116 conf->accuracy = 100;
02117
02118 int nbytes = array.size();
02119
02120 if (nbytes > HOWMANY)
02121 nbytes = HOWMANY;
02122 memcpy(buf, array.data(), nbytes);
02123 if (nbytes == 0) {
02124 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02125 } else {
02126 buf[nbytes++] = '\0';
02127 tryit(conf, buf, nbytes);
02128 }
02129
02130 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02131 magicResult->setAccuracy(conf->accuracy);
02132 return magicResult;
02133 }
02134
02135 static void
02136 refineResult(KMimeMagicResult *r, const QString & _filename)
02137 {
02138 QString tmp = r->mimeType();
02139 if (tmp.isEmpty())
02140 return;
02141 if ( tmp == "text/x-c" ||
02142 tmp == "text/x-c++" )
02143 {
02144 if ( _filename.right(2) == ".h" )
02145 tmp += "hdr";
02146 else
02147 tmp += "src";
02148 r->setMimeType(tmp);
02149 }
02150 }
02151
02152 KMimeMagicResult *
02153 KMimeMagic::findBufferFileType( const QByteArray &data,
02154 const QString &fn)
02155 {
02156 KMimeMagicResult * r = findBufferType( data );
02157 refineResult(r, fn);
02158 return r;
02159 }
02160
02161
02162
02163
02164 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02165 {
02166 #ifdef DEBUG_MIMEMAGIC
02167 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02168 #endif
02169 conf->resultBuf = QString::null;
02170
02171 if ( !magicResult )
02172 magicResult = new KMimeMagicResult();
02173 magicResult->setInvalid();
02174 conf->accuracy = 100;
02175
02176 if ( !conf->utimeConf )
02177 conf->utimeConf = new KMimeMagicUtimeConf();
02178
02179
02180 process(conf, fn );
02181
02182
02183
02184 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02185 magicResult->setAccuracy(conf->accuracy);
02186 refineResult(magicResult, fn);
02187 return magicResult;
02188 }