2013-06-06 05:00:19 +04:00
|
|
|
/*
|
|
|
|
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
|
|
|
|
* Copyright 2013, Rene Gollent, rene@gollent.com.
|
|
|
|
* Distributed under the terms of the MIT License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include <RegExp.h>
|
|
|
|
|
|
|
|
#include <new>
|
|
|
|
|
|
|
|
#include <regex.h>
|
|
|
|
|
|
|
|
#include <String.h>
|
|
|
|
|
|
|
|
#include <Referenceable.h>
|
|
|
|
|
|
|
|
|
|
|
|
// #pragma mark - RegExp::Data
|
|
|
|
|
|
|
|
|
|
|
|
struct RegExp::Data : public BReferenceable {
|
2013-06-06 05:57:50 +04:00
|
|
|
Data(const char* pattern, PatternType patternType, bool caseSensitive)
|
2013-06-06 05:00:19 +04:00
|
|
|
:
|
|
|
|
BReferenceable()
|
|
|
|
{
|
|
|
|
// convert the shell pattern to a regular expression
|
|
|
|
BString patternString;
|
|
|
|
if (patternType == PATTERN_TYPE_WILDCARD) {
|
|
|
|
while (*pattern != '\0') {
|
|
|
|
char c = *pattern++;
|
|
|
|
switch (c) {
|
|
|
|
case '?':
|
|
|
|
patternString += '.';
|
|
|
|
continue;
|
|
|
|
case '*':
|
|
|
|
patternString += ".*";
|
|
|
|
continue;
|
|
|
|
case '[':
|
|
|
|
{
|
|
|
|
// find the matching ']' first
|
|
|
|
const char* end = pattern;
|
|
|
|
while (*end != ']') {
|
|
|
|
if (*end++ == '\0') {
|
|
|
|
fError = REG_EBRACK;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pattern == end) {
|
|
|
|
// Empty bracket expression. It will never match
|
|
|
|
// anything. Strictly speaking this is not
|
|
|
|
// considered an error, but we handle it like one.
|
|
|
|
fError = REG_EBRACK;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
patternString += '[';
|
|
|
|
|
|
|
|
// We need to avoid "[." ... ".]", "[=" ... "=]", and
|
|
|
|
// "[:" ... ":]" sequences, since those have special
|
|
|
|
// meaning in regular expressions. If we encounter
|
|
|
|
// a '[' followed by either of '.', '=', or ':', we
|
|
|
|
// replace the '[' by "[.[.]".
|
|
|
|
while (pattern < end) {
|
|
|
|
c = *pattern++;
|
|
|
|
if (c == '[' && pattern < end) {
|
|
|
|
switch (*pattern) {
|
|
|
|
case '.':
|
|
|
|
case '=':
|
|
|
|
case ':':
|
|
|
|
patternString += "[.[.]";
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
patternString += c;
|
|
|
|
}
|
|
|
|
|
|
|
|
pattern++;
|
|
|
|
patternString += ']';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '\\':
|
|
|
|
{
|
|
|
|
// Quotes the next character. Works the same way for
|
|
|
|
// regular expressions.
|
|
|
|
if (*pattern == '\0') {
|
|
|
|
fError = REG_EESCAPE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
patternString += '\\';
|
|
|
|
patternString += *pattern++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '^':
|
|
|
|
case '.':
|
|
|
|
case '$':
|
|
|
|
case '(':
|
|
|
|
case ')':
|
|
|
|
case '|':
|
|
|
|
case '+':
|
|
|
|
case '{':
|
|
|
|
// need to be quoted
|
|
|
|
patternString += '\\';
|
|
|
|
// fall through
|
|
|
|
default:
|
|
|
|
patternString += c;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pattern = patternString.String();
|
|
|
|
}
|
|
|
|
|
2013-06-06 05:57:50 +04:00
|
|
|
int flags = REG_EXTENDED;
|
|
|
|
if (!caseSensitive)
|
|
|
|
flags |= REG_ICASE;
|
|
|
|
|
|
|
|
fError = regcomp(&fCompiledExpression, pattern, flags);
|
2013-06-06 05:00:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
~Data()
|
|
|
|
{
|
|
|
|
if (fError == 0)
|
|
|
|
regfree(&fCompiledExpression);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool IsValid() const
|
|
|
|
{
|
|
|
|
return fError == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
const regex_t* CompiledExpression() const
|
|
|
|
{
|
|
|
|
return &fCompiledExpression;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
int fError;
|
|
|
|
regex_t fCompiledExpression;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// #pragma mark - RegExp::MatchResultData
|
|
|
|
|
|
|
|
|
|
|
|
struct RegExp::MatchResultData : public BReferenceable {
|
|
|
|
MatchResultData(const regex_t* compiledExpression, const char* string)
|
|
|
|
:
|
|
|
|
BReferenceable(),
|
|
|
|
fMatchCount(0),
|
|
|
|
fMatches(NULL)
|
|
|
|
{
|
2013-10-16 22:51:47 +04:00
|
|
|
// fMatchCount is always set to the number of matching groups in the
|
|
|
|
// expression (or 0 if an error occured). Some of the "matches" in
|
|
|
|
// the array may still point to the (-1,-1) range if they don't
|
|
|
|
// actually match anything.
|
|
|
|
fMatchCount = compiledExpression->re_nsub + 1;
|
|
|
|
fMatches = new regmatch_t[fMatchCount];
|
|
|
|
if (regexec(compiledExpression, string, fMatchCount, fMatches, 0)
|
|
|
|
!= 0) {
|
2013-06-06 05:00:19 +04:00
|
|
|
delete[] fMatches;
|
|
|
|
fMatches = NULL;
|
2013-10-16 22:51:47 +04:00
|
|
|
fMatchCount = 0;
|
2013-06-06 05:00:19 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
~MatchResultData()
|
|
|
|
{
|
|
|
|
delete[] fMatches;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t MatchCount() const
|
|
|
|
{
|
|
|
|
return fMatchCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
const regmatch_t* Matches() const
|
|
|
|
{
|
|
|
|
return fMatches;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
size_t fMatchCount;
|
|
|
|
regmatch_t* fMatches;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// #pragma mark - RegExp
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::RegExp()
|
|
|
|
:
|
|
|
|
fData(NULL)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-06-06 05:57:50 +04:00
|
|
|
RegExp::RegExp(const char* pattern, PatternType patternType,
|
|
|
|
bool caseSensitive)
|
2013-06-06 05:00:19 +04:00
|
|
|
:
|
|
|
|
fData(NULL)
|
|
|
|
{
|
2013-06-06 05:57:50 +04:00
|
|
|
SetPattern(pattern, patternType, caseSensitive);
|
2013-06-06 05:00:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::RegExp(const RegExp& other)
|
|
|
|
:
|
|
|
|
fData(other.fData)
|
|
|
|
{
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->AcquireReference();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::~RegExp()
|
|
|
|
{
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->ReleaseReference();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool
|
2013-06-06 05:57:50 +04:00
|
|
|
RegExp::SetPattern(const char* pattern, PatternType patternType,
|
|
|
|
bool caseSensitive)
|
2013-06-06 05:00:19 +04:00
|
|
|
{
|
|
|
|
if (fData != NULL) {
|
|
|
|
fData->ReleaseReference();
|
|
|
|
fData = NULL;
|
|
|
|
}
|
|
|
|
|
2013-06-06 05:57:50 +04:00
|
|
|
Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive);
|
|
|
|
if (newData == NULL)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
BReference<Data> dataReference(newData, true);
|
|
|
|
if (!newData->IsValid())
|
2013-06-06 05:00:19 +04:00
|
|
|
return false;
|
|
|
|
|
2013-06-06 05:57:50 +04:00
|
|
|
fData = dataReference.Detach();
|
2013-06-06 05:00:19 +04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::MatchResult
|
|
|
|
RegExp::Match(const char* string) const
|
|
|
|
{
|
|
|
|
if (!IsValid())
|
|
|
|
return MatchResult();
|
|
|
|
|
|
|
|
return MatchResult(
|
|
|
|
new(std::nothrow) MatchResultData(fData->CompiledExpression(),
|
|
|
|
string));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp&
|
|
|
|
RegExp::operator=(const RegExp& other)
|
|
|
|
{
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->ReleaseReference();
|
|
|
|
|
|
|
|
fData = other.fData;
|
|
|
|
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->AcquireReference();
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// #pragma mark - RegExp::MatchResult
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::MatchResult::MatchResult()
|
|
|
|
:
|
|
|
|
fData(NULL)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::MatchResult::MatchResult(MatchResultData* data)
|
|
|
|
:
|
|
|
|
fData(data)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::MatchResult::MatchResult(const MatchResult& other)
|
|
|
|
:
|
|
|
|
fData(other.fData)
|
|
|
|
{
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->AcquireReference();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::MatchResult::~MatchResult()
|
|
|
|
{
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->ReleaseReference();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
RegExp::MatchResult::HasMatched() const
|
|
|
|
{
|
|
|
|
return fData != NULL && fData->MatchCount() > 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t
|
|
|
|
RegExp::MatchResult::StartOffset() const
|
|
|
|
{
|
|
|
|
return fData != NULL && fData->MatchCount() > 0
|
|
|
|
? fData->Matches()[0].rm_so : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t
|
|
|
|
RegExp::MatchResult::EndOffset() const
|
|
|
|
{
|
|
|
|
return fData != NULL && fData->MatchCount() > 0
|
|
|
|
? fData->Matches()[0].rm_eo : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t
|
|
|
|
RegExp::MatchResult::GroupCount() const
|
|
|
|
{
|
|
|
|
if (fData == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
size_t matchCount = fData->MatchCount();
|
|
|
|
return matchCount > 0 ? matchCount - 1 : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t
|
|
|
|
RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
|
|
|
|
{
|
|
|
|
return fData != NULL && fData->MatchCount() > index + 1
|
|
|
|
? fData->Matches()[index + 1].rm_so : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t
|
|
|
|
RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
|
|
|
|
{
|
|
|
|
return fData != NULL && fData->MatchCount() > index + 1
|
|
|
|
? fData->Matches()[index + 1].rm_eo : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RegExp::MatchResult&
|
|
|
|
RegExp::MatchResult::operator=(const MatchResult& other)
|
|
|
|
{
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->ReleaseReference();
|
|
|
|
|
|
|
|
fData = other.fData;
|
|
|
|
|
|
|
|
if (fData != NULL)
|
|
|
|
fData->AcquireReference();
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|