Import RegExp classes from Ham.

Minor adjustments made by myself to fit into Haiku better.
This commit is contained in:
Rene Gollent 2013-06-05 21:00:19 -04:00
parent 58535f5a9a
commit 47fedeb598
3 changed files with 465 additions and 0 deletions

View File

@ -0,0 +1,78 @@
/*
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
* Distributed under the terms of the MIT License.
*/
#ifndef REG_EXP_H
#define REG_EXP_H
#include <stddef.h>
class RegExp {
public:
enum PatternType {
PATTERN_TYPE_REGULAR_EXPRESSION,
PATTERN_TYPE_WILDCARD
};
class MatchResult;
public:
RegExp();
RegExp(const char* pattern,
PatternType patternType
= PATTERN_TYPE_REGULAR_EXPRESSION);
RegExp(const RegExp& other);
~RegExp();
bool IsValid() const
{ return fData != NULL; }
bool SetPattern(const char* pattern,
PatternType patternType
= PATTERN_TYPE_REGULAR_EXPRESSION);
MatchResult Match(const char* string) const;
RegExp& operator=(const RegExp& other);
private:
struct Data;
struct MatchResultData;
private:
Data* fData;
};
class RegExp::MatchResult {
public:
MatchResult();
MatchResult(const MatchResult& other);
~MatchResult();
bool HasMatched() const;
size_t StartOffset() const;
size_t EndOffset() const;
size_t GroupCount() const;
size_t GroupStartOffsetAt(size_t index) const;
size_t GroupEndOffsetAt(size_t index) const;
MatchResult& operator=(const MatchResult& other);
private:
friend class RegExp;
private:
MatchResult(MatchResultData* data);
// takes over the data reference
private:
MatchResultData* fData;
};
#endif // REG_EXP_H

View File

@ -28,6 +28,7 @@ StaticLibrary libshared.a :
NaturalCompare.cpp
PromptWindow.cpp
QueryFile.cpp
RegExp.cpp
RWLockManager.cpp
SHA256.cpp
ShakeTrackingFilter.cpp

386
src/kits/shared/RegExp.cpp Normal file
View File

@ -0,0 +1,386 @@
/*
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
* Copyright 2013, Rene Gollent, rene@gollent.com.
* Distributed under the terms of the MIT License.
*/
#include <RegExp.h>
#include <new>
#include <regex.h>
#include <String.h>
#include <Referenceable.h>
// #pragma mark - RegExp::Data
struct RegExp::Data : public BReferenceable {
Data(const char* pattern, PatternType patternType)
:
BReferenceable()
{
// convert the shell pattern to a regular expression
BString patternString;
if (patternType == PATTERN_TYPE_WILDCARD) {
while (*pattern != '\0') {
char c = *pattern++;
switch (c) {
case '?':
patternString += '.';
continue;
case '*':
patternString += ".*";
continue;
case '[':
{
// find the matching ']' first
const char* end = pattern;
while (*end != ']') {
if (*end++ == '\0') {
fError = REG_EBRACK;
return;
}
}
if (pattern == end) {
// Empty bracket expression. It will never match
// anything. Strictly speaking this is not
// considered an error, but we handle it like one.
fError = REG_EBRACK;
return;
}
patternString += '[';
// We need to avoid "[." ... ".]", "[=" ... "=]", and
// "[:" ... ":]" sequences, since those have special
// meaning in regular expressions. If we encounter
// a '[' followed by either of '.', '=', or ':', we
// replace the '[' by "[.[.]".
while (pattern < end) {
c = *pattern++;
if (c == '[' && pattern < end) {
switch (*pattern) {
case '.':
case '=':
case ':':
patternString += "[.[.]";
continue;
}
}
patternString += c;
}
pattern++;
patternString += ']';
break;
}
case '\\':
{
// Quotes the next character. Works the same way for
// regular expressions.
if (*pattern == '\0') {
fError = REG_EESCAPE;
return;
}
patternString += '\\';
patternString += *pattern++;
break;
}
case '^':
case '.':
case '$':
case '(':
case ')':
case '|':
case '+':
case '{':
// need to be quoted
patternString += '\\';
// fall through
default:
patternString += c;
break;
}
}
pattern = patternString.String();
}
fError = regcomp(&fCompiledExpression, pattern, REG_EXTENDED);
}
~Data()
{
if (fError == 0)
regfree(&fCompiledExpression);
}
bool IsValid() const
{
return fError == 0;
}
const regex_t* CompiledExpression() const
{
return &fCompiledExpression;
}
private:
int fError;
regex_t fCompiledExpression;
};
// #pragma mark - RegExp::MatchResultData
struct RegExp::MatchResultData : public BReferenceable {
MatchResultData(const regex_t* compiledExpression, const char* string)
:
BReferenceable(),
fMatchCount(0),
fMatches(NULL)
{
// Do the matching: Since we need to provide a buffer for the matches
// for regexec() to fill in, but don't know the number of matches
// beforehand, we need to guess and retry with a larger buffer, if it
// wasn't large enough.
size_t maxMatchCount = 32;
for (;;) {
fMatches = new regmatch_t[maxMatchCount];
if (regexec(compiledExpression, string, maxMatchCount, fMatches, 0)
!= 0) {
delete[] fMatches;
fMatches = NULL;
fMatchCount = 0;
break;
}
if (fMatches[maxMatchCount - 1].rm_so == -1) {
// determine the match count
size_t lower = 0;
size_t upper = maxMatchCount;
while (lower < upper) {
size_t mid = (lower + upper) / 2;
if (fMatches[mid].rm_so == -1)
upper = mid;
else
lower = mid + 1;
}
fMatchCount = lower;
break;
}
// buffer too small -- try again with larger buffer
delete[] fMatches;
fMatches = NULL;
maxMatchCount *= 2;
}
}
~MatchResultData()
{
delete[] fMatches;
}
size_t MatchCount() const
{
return fMatchCount;
}
const regmatch_t* Matches() const
{
return fMatches;
}
private:
size_t fMatchCount;
regmatch_t* fMatches;
};
// #pragma mark - RegExp
RegExp::RegExp()
:
fData(NULL)
{
}
RegExp::RegExp(const char* pattern, PatternType patternType)
:
fData(NULL)
{
SetPattern(pattern, patternType);
}
RegExp::RegExp(const RegExp& other)
:
fData(other.fData)
{
if (fData != NULL)
fData->AcquireReference();
}
RegExp::~RegExp()
{
if (fData != NULL)
fData->ReleaseReference();
}
bool
RegExp::SetPattern(const char* pattern, PatternType patternType)
{
if (fData != NULL) {
fData->ReleaseReference();
fData = NULL;
}
fData = new Data(pattern, patternType);
if (!fData->IsValid()) {
delete fData;
fData = NULL;
return false;
}
return true;
}
RegExp::MatchResult
RegExp::Match(const char* string) const
{
if (!IsValid())
return MatchResult();
return MatchResult(
new(std::nothrow) MatchResultData(fData->CompiledExpression(),
string));
}
RegExp&
RegExp::operator=(const RegExp& other)
{
if (fData != NULL)
fData->ReleaseReference();
fData = other.fData;
if (fData != NULL)
fData->AcquireReference();
return *this;
}
// #pragma mark - RegExp::MatchResult
RegExp::MatchResult::MatchResult()
:
fData(NULL)
{
}
RegExp::MatchResult::MatchResult(MatchResultData* data)
:
fData(data)
{
}
RegExp::MatchResult::MatchResult(const MatchResult& other)
:
fData(other.fData)
{
if (fData != NULL)
fData->AcquireReference();
}
RegExp::MatchResult::~MatchResult()
{
if (fData != NULL)
fData->ReleaseReference();
}
bool
RegExp::MatchResult::HasMatched() const
{
return fData != NULL && fData->MatchCount() > 0;
}
size_t
RegExp::MatchResult::StartOffset() const
{
return fData != NULL && fData->MatchCount() > 0
? fData->Matches()[0].rm_so : 0;
}
size_t
RegExp::MatchResult::EndOffset() const
{
return fData != NULL && fData->MatchCount() > 0
? fData->Matches()[0].rm_eo : 0;
}
size_t
RegExp::MatchResult::GroupCount() const
{
if (fData == NULL)
return 0;
size_t matchCount = fData->MatchCount();
return matchCount > 0 ? matchCount - 1 : 0;
}
size_t
RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
{
return fData != NULL && fData->MatchCount() > index + 1
? fData->Matches()[index + 1].rm_so : 0;
}
size_t
RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
{
return fData != NULL && fData->MatchCount() > index + 1
? fData->Matches()[index + 1].rm_eo : 0;
}
RegExp::MatchResult&
RegExp::MatchResult::operator=(const MatchResult& other)
{
if (fData != NULL)
fData->ReleaseReference();
fData = other.fData;
if (fData != NULL)
fData->AcquireReference();
return *this;
}