00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
#include "regexp.h"
00023
00024
#include <stdio.h>
00025
#include <stdlib.h>
00026
#include <string.h>
00027
00028
using namespace KJS;
00029
00030 RegExp::RegExp(
const UString &p,
int f)
00031 : pattern(p), flgs(f), m_notEmpty(false)
00032 {
00033
#ifdef HAVE_PCREPOSIX
00034
int pcreflags = 0;
00035
const char *perrormsg;
00036
int errorOffset;
00037
00038
if (flgs & IgnoreCase)
00039 pcreflags |= PCRE_CASELESS;
00040
00041
if (flgs & Multiline)
00042 pcreflags |= PCRE_MULTILINE;
00043
00044 pcregex = pcre_compile(p.
ascii(), pcreflags,
00045 &perrormsg, &errorOffset, NULL);
00046
#ifndef NDEBUG
00047
if (!pcregex)
00048 fprintf(stderr,
"KJS: pcre_compile() failed with '%s'\n", perrormsg);
00049
#endif
00050
00051
#ifdef PCRE_INFO_CAPTURECOUNT
00052
00053
int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
00054
if (rc != 0)
00055
#endif
00056
nrSubPatterns = 0;
00057
00058
#else
00059
00060 nrSubPatterns = 0;
00061
int regflags = 0;
00062
#ifdef REG_EXTENDED
00063
regflags |= REG_EXTENDED;
00064
#endif
00065
#ifdef REG_ICASE
00066
if ( f & IgnoreCase )
00067 regflags |= REG_ICASE;
00068
#endif
00069
00070
00071
00072
00073
00074
00075
if (regcomp(&preg, p.
ascii(), regflags) != 0) {
00076
00077 regcomp(&preg,
"", regflags);
00078 }
00079
#endif
00080
}
00081
00082 RegExp::~RegExp()
00083 {
00084
#ifdef HAVE_PCREPOSIX
00085
if (pcregex)
00086 pcre_free(pcregex);
00087
#else
00088
00089 regfree(&preg);
00090
#endif
00091
}
00092
00093
UString RegExp::match(
const UString &s,
int i,
int *pos,
int **ovector)
00094 {
00095
if (i < 0)
00096 i = 0;
00097
if (ovector)
00098 *ovector = 0L;
00099
int dummyPos;
00100
if (!pos)
00101 pos = &dummyPos;
00102 *pos = -1;
00103
if (i > s.
size() || s.
isNull())
00104
return UString::null;
00105
00106
#ifdef HAVE_PCREPOSIX
00107
CString buffer(s.
cstring());
00108
int bufferSize = buffer.size();
00109
int ovecsize = (nrSubPatterns+1)*3;
00110
if (ovector) *ovector =
new int[ovecsize];
00111
if (!pcregex)
00112
return UString::null;
00113
00114
if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i,
00115 m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0,
00116 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00117 {
00118
00119
if ((flgs & Global) && m_notEmpty && ovector)
00120 {
00121
00122
00123
00124 m_notEmpty = 0;
00125
if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i+1, 0,
00126 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00127
return UString::null;
00128 }
00129
else
00130
return UString::null;
00131 }
00132
00133
00134
00135
if (!ovector)
00136
return UString::null;
00137
#else
00138
const uint maxMatch = 10;
00139 regmatch_t rmatch[maxMatch];
00140
00141
char *str = strdup(s.
ascii());
00142
if (regexec(&preg, str + i, maxMatch, rmatch, 0)) {
00143 free(str);
00144
return UString::null;
00145 }
00146 free(str);
00147
00148
if (!ovector) {
00149 *pos = rmatch[0].rm_so + i;
00150
return s.
substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
00151 }
00152
00153
00154 nrSubPatterns = 0;
00155
for(uint j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++)
00156 nrSubPatterns++;
00157
int ovecsize = (nrSubPatterns+1)*3;
00158 *ovector =
new int[ovecsize];
00159
for (uint j = 0; j < nrSubPatterns + 1; j++) {
00160
if (j>maxMatch)
00161
break;
00162 (*ovector)[2*j] = rmatch[j].rm_so + i;
00163 (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
00164 }
00165
#endif
00166
00167 *pos = (*ovector)[0];
00168
#ifdef HAVE_PCREPOSIX // TODO check this stuff in non-pcre mode
00169
if ( *pos == (*ovector)[1] && (flgs & Global) )
00170 {
00171
00172 m_notEmpty=
true;
00173 }
00174
#endif
00175
return s.
substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
00176 }
00177
00178
#if 0 // unused
00179
bool RegExp::test(
const UString &s,
int)
00180 {
00181
#ifdef HAVE_PCREPOSIX
00182
int ovector[300];
00183
CString buffer(s.
cstring());
00184
00185
if (s.
isNull() ||
00186 pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
00187 0, ovector, 300) == PCRE_ERROR_NOMATCH)
00188
return false;
00189
else
00190
return true;
00191
00192
#else
00193
00194
char *str = strdup(s.
ascii());
00195
int r = regexec(&preg, str, 0, 0, 0);
00196 free(str);
00197
00198
return r == 0;
00199
#endif
00200
}
00201
#endif