haiku/src/kits/shared/RegExp.cpp

372 lines
6.4 KiB
C++
Raw Normal View History

/*
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
* Copyright 2013, Rene Gollent, rene@gollent.com.
* Distributed under the terms of the MIT License.
*/
#include <RegExp.h>
#include <new>
#include <regex.h>
#include <String.h>
#include <Referenceable.h>
// #pragma mark - RegExp::Data
struct RegExp::Data : public BReferenceable {
Data(const char* pattern, PatternType patternType, bool caseSensitive)
:
BReferenceable()
{
// convert the shell pattern to a regular expression
BString patternString;
if (patternType == PATTERN_TYPE_WILDCARD) {
while (*pattern != '\0') {
char c = *pattern++;
switch (c) {
case '?':
patternString += '.';
continue;
case '*':
patternString += ".*";
continue;
case '[':
{
// find the matching ']' first
const char* end = pattern;
while (*end != ']') {
if (*end++ == '\0') {
fError = REG_EBRACK;
return;
}
}
if (pattern == end) {
// Empty bracket expression. It will never match
// anything. Strictly speaking this is not
// considered an error, but we handle it like one.
fError = REG_EBRACK;
return;
}
patternString += '[';
// We need to avoid "[." ... ".]", "[=" ... "=]", and
// "[:" ... ":]" sequences, since those have special
// meaning in regular expressions. If we encounter
// a '[' followed by either of '.', '=', or ':', we
// replace the '[' by "[.[.]".
while (pattern < end) {
c = *pattern++;
if (c == '[' && pattern < end) {
switch (*pattern) {
case '.':
case '=':
case ':':
patternString += "[.[.]";
continue;
}
}
patternString += c;
}
pattern++;
patternString += ']';
break;
}
case '\\':
{
// Quotes the next character. Works the same way for
// regular expressions.
if (*pattern == '\0') {
fError = REG_EESCAPE;
return;
}
patternString += '\\';
patternString += *pattern++;
break;
}
case '^':
case '.':
case '$':
case '(':
case ')':
case '|':
case '+':
case '{':
// need to be quoted
patternString += '\\';
// fall through
default:
patternString += c;
break;
}
}
pattern = patternString.String();
}
int flags = REG_EXTENDED;
if (!caseSensitive)
flags |= REG_ICASE;
fError = regcomp(&fCompiledExpression, pattern, flags);
}
~Data()
{
if (fError == 0)
regfree(&fCompiledExpression);
}
bool IsValid() const
{
return fError == 0;
}
const regex_t* CompiledExpression() const
{
return &fCompiledExpression;
}
private:
int fError;
regex_t fCompiledExpression;
};
// #pragma mark - RegExp::MatchResultData
struct RegExp::MatchResultData : public BReferenceable {
MatchResultData(const regex_t* compiledExpression, const char* string)
:
BReferenceable(),
fMatchCount(0),
fMatches(NULL)
{
// fMatchCount is always set to the number of matching groups in the
// expression (or 0 if an error occured). Some of the "matches" in
// the array may still point to the (-1,-1) range if they don't
// actually match anything.
fMatchCount = compiledExpression->re_nsub + 1;
fMatches = new regmatch_t[fMatchCount];
if (regexec(compiledExpression, string, fMatchCount, fMatches, 0)
!= 0) {
delete[] fMatches;
fMatches = NULL;
fMatchCount = 0;
}
}
~MatchResultData()
{
delete[] fMatches;
}
size_t MatchCount() const
{
return fMatchCount;
}
const regmatch_t* Matches() const
{
return fMatches;
}
private:
size_t fMatchCount;
regmatch_t* fMatches;
};
// #pragma mark - RegExp
RegExp::RegExp()
:
fData(NULL)
{
}
RegExp::RegExp(const char* pattern, PatternType patternType,
bool caseSensitive)
:
fData(NULL)
{
SetPattern(pattern, patternType, caseSensitive);
}
RegExp::RegExp(const RegExp& other)
:
fData(other.fData)
{
if (fData != NULL)
fData->AcquireReference();
}
RegExp::~RegExp()
{
if (fData != NULL)
fData->ReleaseReference();
}
bool
RegExp::SetPattern(const char* pattern, PatternType patternType,
bool caseSensitive)
{
if (fData != NULL) {
fData->ReleaseReference();
fData = NULL;
}
Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive);
if (newData == NULL)
return false;
BReference<Data> dataReference(newData, true);
if (!newData->IsValid())
return false;
fData = dataReference.Detach();
return true;
}
RegExp::MatchResult
RegExp::Match(const char* string) const
{
if (!IsValid())
return MatchResult();
return MatchResult(
new(std::nothrow) MatchResultData(fData->CompiledExpression(),
string));
}
RegExp&
RegExp::operator=(const RegExp& other)
{
if (fData != NULL)
fData->ReleaseReference();
fData = other.fData;
if (fData != NULL)
fData->AcquireReference();
return *this;
}
// #pragma mark - RegExp::MatchResult
RegExp::MatchResult::MatchResult()
:
fData(NULL)
{
}
RegExp::MatchResult::MatchResult(MatchResultData* data)
:
fData(data)
{
}
RegExp::MatchResult::MatchResult(const MatchResult& other)
:
fData(other.fData)
{
if (fData != NULL)
fData->AcquireReference();
}
RegExp::MatchResult::~MatchResult()
{
if (fData != NULL)
fData->ReleaseReference();
}
bool
RegExp::MatchResult::HasMatched() const
{
return fData != NULL && fData->MatchCount() > 0;
}
size_t
RegExp::MatchResult::StartOffset() const
{
return fData != NULL && fData->MatchCount() > 0
? fData->Matches()[0].rm_so : 0;
}
size_t
RegExp::MatchResult::EndOffset() const
{
return fData != NULL && fData->MatchCount() > 0
? fData->Matches()[0].rm_eo : 0;
}
size_t
RegExp::MatchResult::GroupCount() const
{
if (fData == NULL)
return 0;
size_t matchCount = fData->MatchCount();
return matchCount > 0 ? matchCount - 1 : 0;
}
size_t
RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
{
return fData != NULL && fData->MatchCount() > index + 1
? fData->Matches()[index + 1].rm_so : 0;
}
size_t
RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
{
return fData != NULL && fData->MatchCount() > index + 1
? fData->Matches()[index + 1].rm_eo : 0;
}
RegExp::MatchResult&
RegExp::MatchResult::operator=(const MatchResult& other)
{
if (fData != NULL)
fData->ReleaseReference();
fData = other.fData;
if (fData != NULL)
fData->AcquireReference();
return *this;
}