kjs Library API Documentation

regexp.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2001 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Lesser General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Lesser General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Lesser General Public
00017  *  License along with this library; if not, write to the Free Software
00018  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #include "regexp.h"
00023 
00024 #include <stdio.h>
00025 #include <stdlib.h>
00026 #include <string.h>
00027 
00028 using namespace KJS;
00029 
00030 RegExp::RegExp(const UString &p, int f)
00031   : pattern(p), flgs(f), m_notEmpty(false)
00032 {
00033 #ifdef HAVE_PCREPOSIX
00034   int pcreflags = 0;
00035   const char *perrormsg;
00036   int errorOffset;
00037 
00038   if (flgs & IgnoreCase)
00039     pcreflags |= PCRE_CASELESS;
00040 
00041   if (flgs & Multiline)
00042     pcreflags |= PCRE_MULTILINE;
00043 
00044   pcregex = pcre_compile(p.ascii(), pcreflags,
00045              &perrormsg, &errorOffset, NULL);
00046 #ifndef NDEBUG
00047   if (!pcregex)
00048     fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg);
00049 #endif
00050 
00051 #ifdef PCRE_INFO_CAPTURECOUNT
00052   // Get number of subpatterns that will be returned
00053   int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns);
00054   if (rc != 0)
00055 #endif
00056     nrSubPatterns = 0; // fallback. We always need the first pair of offsets.
00057 
00058 #else /* HAVE_PCREPOSIX */
00059 
00060   nrSubPatterns = 0; // determined in match() with POSIX regex.
00061   int regflags = 0;
00062 #ifdef REG_EXTENDED
00063   regflags |= REG_EXTENDED;
00064 #endif
00065 #ifdef REG_ICASE
00066   if ( f & IgnoreCase )
00067     regflags |= REG_ICASE;
00068 #endif
00069 
00070   //NOTE: Multiline is not feasible with POSIX regex.
00071   //if ( f & Multiline )
00072   //    ;
00073   // Note: the Global flag is already handled by RegExpProtoFunc::execute
00074 
00075   regcomp(&preg, p.ascii(), regflags);
00076   /* TODO check for errors */
00077 #endif
00078 
00079 }
00080 
00081 RegExp::~RegExp()
00082 {
00083 #ifdef HAVE_PCREPOSIX
00084   if (pcregex)
00085     pcre_free(pcregex);
00086 #else
00087   /* TODO: is this really okay after an error ? */
00088   regfree(&preg);
00089 #endif
00090 }
00091 
00092 UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
00093 {
00094   if (i < 0)
00095     i = 0;
00096   if (ovector)
00097     *ovector = 0L;
00098   int dummyPos;
00099   if (!pos)
00100     pos = &dummyPos;
00101   *pos = -1;
00102   if (i > s.size() || s.isNull())
00103     return UString::null;
00104 
00105 #ifdef HAVE_PCREPOSIX
00106   CString buffer(s.cstring());
00107   int bufferSize = buffer.size();
00108   int ovecsize = (nrSubPatterns+1)*3; // see pcre docu
00109   if (ovector) *ovector = new int[ovecsize];
00110   if (!pcregex)
00111     return UString::null;
00112 
00113   if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i,
00114                 m_notEmpty ? (PCRE_NOTEMPTY | PCRE_ANCHORED) : 0, // see man pcretest
00115                 ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00116   {
00117     // Failed to match.
00118     if ((flgs & Global) && m_notEmpty && ovector)
00119     {
00120       // We set m_notEmpty ourselves, to look for a non-empty match
00121       // (see man pcretest or pcretest.c for details).
00122       // So we don't stop here, we want to try again at i+1.
00123       m_notEmpty = 0;
00124       if (pcre_exec(pcregex, NULL, buffer.c_str(), bufferSize, i+1, 0,
00125                     ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH)
00126         return UString::null;
00127     }
00128     else // done
00129       return UString::null;
00130   }
00131 
00132   // Got a match, proceed with it.
00133 
00134   if (!ovector)
00135     return UString::null; // don't rely on the return value if you pass ovector==0
00136 #else
00137   const uint maxMatch = 10;
00138   regmatch_t rmatch[maxMatch];
00139 
00140   char *str = strdup(s.ascii()); // TODO: why ???
00141   if (regexec(&preg, str + i, maxMatch, rmatch, 0)) {
00142     free(str);
00143     return UString::null;
00144   }
00145   free(str);
00146 
00147   if (!ovector) {
00148     *pos = rmatch[0].rm_so + i;
00149     return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
00150   }
00151 
00152   // map rmatch array to ovector used in PCRE case
00153   nrSubPatterns = 0;
00154   for(uint j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++)
00155       nrSubPatterns++;
00156   int ovecsize = (nrSubPatterns+1)*3; // see above
00157   *ovector = new int[ovecsize];
00158   for (uint j = 0; j < nrSubPatterns + 1; j++) {
00159     if (j>maxMatch)
00160       break;
00161     (*ovector)[2*j] = rmatch[j].rm_so + i;
00162     (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
00163   }
00164 #endif
00165 
00166   *pos = (*ovector)[0];
00167 #ifdef HAVE_PCREPOSIX  // TODO check this stuff in non-pcre mode
00168   if ( *pos == (*ovector)[1] && (flgs & Global) )
00169   {
00170     // empty match, next try will be with m_notEmpty=true
00171     m_notEmpty=true;
00172   }
00173 #endif
00174   return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
00175 }
00176 
00177 #if 0 // unused
00178 bool RegExp::test(const UString &s, int)
00179 {
00180 #ifdef HAVE_PCREPOSIX
00181   int ovector[300];
00182   CString buffer(s.cstring());
00183 
00184   if (s.isNull() ||
00185       pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0,
00186         0, ovector, 300) == PCRE_ERROR_NOMATCH)
00187     return false;
00188   else
00189     return true;
00190 
00191 #else
00192 
00193   char *str = strdup(s.ascii());
00194   int r = regexec(&preg, str, 0, 0, 0);
00195   free(str);
00196 
00197   return r == 0;
00198 #endif
00199 }
00200 #endif
KDE Logo
This file is part of the documentation for kjs Library Version 3.2.2.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Wed Apr 21 18:43:31 2004 by doxygen 1.3.6-20040222 written by Dimitri van Heesch, © 1997-2003