831819980e
* There actually is a way to count the matches, so use it instead of attempting to guess * In some cases (when using optional groups (xxx)?, for example), there may be a non-matching group (with offsets set to -1) and matching groups after it, so the binary search wasn't quite working * Instead, we always return the number of capturing groups in the in the given expression, which is the maximal number of matches. Some (or all) of these may not have any content. We do return 0 matches on any error, including when the regular expression didn't match anything.
372 lines
6.4 KiB
C++
372 lines
6.4 KiB
C++
/*
|
|
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
|
|
* Copyright 2013, Rene Gollent, rene@gollent.com.
|
|
* Distributed under the terms of the MIT License.
|
|
*/
|
|
|
|
|
|
#include <RegExp.h>
|
|
|
|
#include <new>
|
|
|
|
#include <regex.h>
|
|
|
|
#include <String.h>
|
|
|
|
#include <Referenceable.h>
|
|
|
|
|
|
// #pragma mark - RegExp::Data
|
|
|
|
|
|
struct RegExp::Data : public BReferenceable {
|
|
Data(const char* pattern, PatternType patternType, bool caseSensitive)
|
|
:
|
|
BReferenceable()
|
|
{
|
|
// convert the shell pattern to a regular expression
|
|
BString patternString;
|
|
if (patternType == PATTERN_TYPE_WILDCARD) {
|
|
while (*pattern != '\0') {
|
|
char c = *pattern++;
|
|
switch (c) {
|
|
case '?':
|
|
patternString += '.';
|
|
continue;
|
|
case '*':
|
|
patternString += ".*";
|
|
continue;
|
|
case '[':
|
|
{
|
|
// find the matching ']' first
|
|
const char* end = pattern;
|
|
while (*end != ']') {
|
|
if (*end++ == '\0') {
|
|
fError = REG_EBRACK;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (pattern == end) {
|
|
// Empty bracket expression. It will never match
|
|
// anything. Strictly speaking this is not
|
|
// considered an error, but we handle it like one.
|
|
fError = REG_EBRACK;
|
|
return;
|
|
}
|
|
|
|
patternString += '[';
|
|
|
|
// We need to avoid "[." ... ".]", "[=" ... "=]", and
|
|
// "[:" ... ":]" sequences, since those have special
|
|
// meaning in regular expressions. If we encounter
|
|
// a '[' followed by either of '.', '=', or ':', we
|
|
// replace the '[' by "[.[.]".
|
|
while (pattern < end) {
|
|
c = *pattern++;
|
|
if (c == '[' && pattern < end) {
|
|
switch (*pattern) {
|
|
case '.':
|
|
case '=':
|
|
case ':':
|
|
patternString += "[.[.]";
|
|
continue;
|
|
}
|
|
}
|
|
patternString += c;
|
|
}
|
|
|
|
pattern++;
|
|
patternString += ']';
|
|
break;
|
|
}
|
|
|
|
case '\\':
|
|
{
|
|
// Quotes the next character. Works the same way for
|
|
// regular expressions.
|
|
if (*pattern == '\0') {
|
|
fError = REG_EESCAPE;
|
|
return;
|
|
}
|
|
|
|
patternString += '\\';
|
|
patternString += *pattern++;
|
|
break;
|
|
}
|
|
|
|
case '^':
|
|
case '.':
|
|
case '$':
|
|
case '(':
|
|
case ')':
|
|
case '|':
|
|
case '+':
|
|
case '{':
|
|
// need to be quoted
|
|
patternString += '\\';
|
|
// fall through
|
|
default:
|
|
patternString += c;
|
|
break;
|
|
}
|
|
}
|
|
|
|
pattern = patternString.String();
|
|
}
|
|
|
|
int flags = REG_EXTENDED;
|
|
if (!caseSensitive)
|
|
flags |= REG_ICASE;
|
|
|
|
fError = regcomp(&fCompiledExpression, pattern, flags);
|
|
}
|
|
|
|
~Data()
|
|
{
|
|
if (fError == 0)
|
|
regfree(&fCompiledExpression);
|
|
}
|
|
|
|
bool IsValid() const
|
|
{
|
|
return fError == 0;
|
|
}
|
|
|
|
const regex_t* CompiledExpression() const
|
|
{
|
|
return &fCompiledExpression;
|
|
}
|
|
|
|
private:
|
|
int fError;
|
|
regex_t fCompiledExpression;
|
|
};
|
|
|
|
|
|
// #pragma mark - RegExp::MatchResultData
|
|
|
|
|
|
struct RegExp::MatchResultData : public BReferenceable {
|
|
MatchResultData(const regex_t* compiledExpression, const char* string)
|
|
:
|
|
BReferenceable(),
|
|
fMatchCount(0),
|
|
fMatches(NULL)
|
|
{
|
|
// fMatchCount is always set to the number of matching groups in the
|
|
// expression (or 0 if an error occured). Some of the "matches" in
|
|
// the array may still point to the (-1,-1) range if they don't
|
|
// actually match anything.
|
|
fMatchCount = compiledExpression->re_nsub + 1;
|
|
fMatches = new regmatch_t[fMatchCount];
|
|
if (regexec(compiledExpression, string, fMatchCount, fMatches, 0)
|
|
!= 0) {
|
|
delete[] fMatches;
|
|
fMatches = NULL;
|
|
fMatchCount = 0;
|
|
}
|
|
}
|
|
|
|
~MatchResultData()
|
|
{
|
|
delete[] fMatches;
|
|
}
|
|
|
|
size_t MatchCount() const
|
|
{
|
|
return fMatchCount;
|
|
}
|
|
|
|
const regmatch_t* Matches() const
|
|
{
|
|
return fMatches;
|
|
}
|
|
|
|
private:
|
|
size_t fMatchCount;
|
|
regmatch_t* fMatches;
|
|
};
|
|
|
|
|
|
// #pragma mark - RegExp
|
|
|
|
|
|
RegExp::RegExp()
|
|
:
|
|
fData(NULL)
|
|
{
|
|
}
|
|
|
|
|
|
RegExp::RegExp(const char* pattern, PatternType patternType,
|
|
bool caseSensitive)
|
|
:
|
|
fData(NULL)
|
|
{
|
|
SetPattern(pattern, patternType, caseSensitive);
|
|
}
|
|
|
|
|
|
RegExp::RegExp(const RegExp& other)
|
|
:
|
|
fData(other.fData)
|
|
{
|
|
if (fData != NULL)
|
|
fData->AcquireReference();
|
|
}
|
|
|
|
|
|
RegExp::~RegExp()
|
|
{
|
|
if (fData != NULL)
|
|
fData->ReleaseReference();
|
|
}
|
|
|
|
|
|
bool
|
|
RegExp::SetPattern(const char* pattern, PatternType patternType,
|
|
bool caseSensitive)
|
|
{
|
|
if (fData != NULL) {
|
|
fData->ReleaseReference();
|
|
fData = NULL;
|
|
}
|
|
|
|
Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive);
|
|
if (newData == NULL)
|
|
return false;
|
|
|
|
BReference<Data> dataReference(newData, true);
|
|
if (!newData->IsValid())
|
|
return false;
|
|
|
|
fData = dataReference.Detach();
|
|
return true;
|
|
}
|
|
|
|
|
|
RegExp::MatchResult
|
|
RegExp::Match(const char* string) const
|
|
{
|
|
if (!IsValid())
|
|
return MatchResult();
|
|
|
|
return MatchResult(
|
|
new(std::nothrow) MatchResultData(fData->CompiledExpression(),
|
|
string));
|
|
}
|
|
|
|
|
|
RegExp&
|
|
RegExp::operator=(const RegExp& other)
|
|
{
|
|
if (fData != NULL)
|
|
fData->ReleaseReference();
|
|
|
|
fData = other.fData;
|
|
|
|
if (fData != NULL)
|
|
fData->AcquireReference();
|
|
|
|
return *this;
|
|
}
|
|
|
|
|
|
// #pragma mark - RegExp::MatchResult
|
|
|
|
|
|
RegExp::MatchResult::MatchResult()
|
|
:
|
|
fData(NULL)
|
|
{
|
|
}
|
|
|
|
|
|
RegExp::MatchResult::MatchResult(MatchResultData* data)
|
|
:
|
|
fData(data)
|
|
{
|
|
}
|
|
|
|
|
|
RegExp::MatchResult::MatchResult(const MatchResult& other)
|
|
:
|
|
fData(other.fData)
|
|
{
|
|
if (fData != NULL)
|
|
fData->AcquireReference();
|
|
}
|
|
|
|
|
|
RegExp::MatchResult::~MatchResult()
|
|
{
|
|
if (fData != NULL)
|
|
fData->ReleaseReference();
|
|
}
|
|
|
|
|
|
bool
|
|
RegExp::MatchResult::HasMatched() const
|
|
{
|
|
return fData != NULL && fData->MatchCount() > 0;
|
|
}
|
|
|
|
|
|
size_t
|
|
RegExp::MatchResult::StartOffset() const
|
|
{
|
|
return fData != NULL && fData->MatchCount() > 0
|
|
? fData->Matches()[0].rm_so : 0;
|
|
}
|
|
|
|
|
|
size_t
|
|
RegExp::MatchResult::EndOffset() const
|
|
{
|
|
return fData != NULL && fData->MatchCount() > 0
|
|
? fData->Matches()[0].rm_eo : 0;
|
|
}
|
|
|
|
|
|
size_t
|
|
RegExp::MatchResult::GroupCount() const
|
|
{
|
|
if (fData == NULL)
|
|
return 0;
|
|
|
|
size_t matchCount = fData->MatchCount();
|
|
return matchCount > 0 ? matchCount - 1 : 0;
|
|
}
|
|
|
|
|
|
size_t
|
|
RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
|
|
{
|
|
return fData != NULL && fData->MatchCount() > index + 1
|
|
? fData->Matches()[index + 1].rm_so : 0;
|
|
}
|
|
|
|
|
|
size_t
|
|
RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
|
|
{
|
|
return fData != NULL && fData->MatchCount() > index + 1
|
|
? fData->Matches()[index + 1].rm_eo : 0;
|
|
}
|
|
|
|
|
|
RegExp::MatchResult&
|
|
RegExp::MatchResult::operator=(const MatchResult& other)
|
|
{
|
|
if (fData != NULL)
|
|
fData->ReleaseReference();
|
|
|
|
fData = other.fData;
|
|
|
|
if (fData != NULL)
|
|
fData->AcquireReference();
|
|
|
|
return *this;
|
|
}
|