Import RegExp classes from Ham.
Minor adjustments made by myself to fit into Haiku better.
This commit is contained in:
parent
58535f5a9a
commit
47fedeb598
78
headers/private/shared/RegExp.h
Normal file
78
headers/private/shared/RegExp.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
|
||||
* Distributed under the terms of the MIT License.
|
||||
*/
|
||||
#ifndef REG_EXP_H
|
||||
#define REG_EXP_H
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
|
||||
class RegExp {
|
||||
public:
|
||||
enum PatternType {
|
||||
PATTERN_TYPE_REGULAR_EXPRESSION,
|
||||
PATTERN_TYPE_WILDCARD
|
||||
};
|
||||
|
||||
class MatchResult;
|
||||
|
||||
public:
|
||||
RegExp();
|
||||
RegExp(const char* pattern,
|
||||
PatternType patternType
|
||||
= PATTERN_TYPE_REGULAR_EXPRESSION);
|
||||
RegExp(const RegExp& other);
|
||||
~RegExp();
|
||||
|
||||
bool IsValid() const
|
||||
{ return fData != NULL; }
|
||||
|
||||
bool SetPattern(const char* pattern,
|
||||
PatternType patternType
|
||||
= PATTERN_TYPE_REGULAR_EXPRESSION);
|
||||
|
||||
MatchResult Match(const char* string) const;
|
||||
|
||||
RegExp& operator=(const RegExp& other);
|
||||
|
||||
private:
|
||||
struct Data;
|
||||
struct MatchResultData;
|
||||
|
||||
private:
|
||||
Data* fData;
|
||||
};
|
||||
|
||||
|
||||
class RegExp::MatchResult {
|
||||
public:
|
||||
MatchResult();
|
||||
MatchResult(const MatchResult& other);
|
||||
~MatchResult();
|
||||
|
||||
bool HasMatched() const;
|
||||
|
||||
size_t StartOffset() const;
|
||||
size_t EndOffset() const;
|
||||
|
||||
size_t GroupCount() const;
|
||||
size_t GroupStartOffsetAt(size_t index) const;
|
||||
size_t GroupEndOffsetAt(size_t index) const;
|
||||
|
||||
MatchResult& operator=(const MatchResult& other);
|
||||
|
||||
private:
|
||||
friend class RegExp;
|
||||
|
||||
private:
|
||||
MatchResult(MatchResultData* data);
|
||||
// takes over the data reference
|
||||
|
||||
private:
|
||||
MatchResultData* fData;
|
||||
};
|
||||
|
||||
|
||||
#endif // REG_EXP_H
|
@ -28,6 +28,7 @@ StaticLibrary libshared.a :
|
||||
NaturalCompare.cpp
|
||||
PromptWindow.cpp
|
||||
QueryFile.cpp
|
||||
RegExp.cpp
|
||||
RWLockManager.cpp
|
||||
SHA256.cpp
|
||||
ShakeTrackingFilter.cpp
|
||||
|
386
src/kits/shared/RegExp.cpp
Normal file
386
src/kits/shared/RegExp.cpp
Normal file
@ -0,0 +1,386 @@
|
||||
/*
|
||||
* Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
|
||||
* Copyright 2013, Rene Gollent, rene@gollent.com.
|
||||
* Distributed under the terms of the MIT License.
|
||||
*/
|
||||
|
||||
|
||||
#include <RegExp.h>
|
||||
|
||||
#include <new>
|
||||
|
||||
#include <regex.h>
|
||||
|
||||
#include <String.h>
|
||||
|
||||
#include <Referenceable.h>
|
||||
|
||||
|
||||
// #pragma mark - RegExp::Data
|
||||
|
||||
|
||||
struct RegExp::Data : public BReferenceable {
|
||||
Data(const char* pattern, PatternType patternType)
|
||||
:
|
||||
BReferenceable()
|
||||
{
|
||||
// convert the shell pattern to a regular expression
|
||||
BString patternString;
|
||||
if (patternType == PATTERN_TYPE_WILDCARD) {
|
||||
while (*pattern != '\0') {
|
||||
char c = *pattern++;
|
||||
switch (c) {
|
||||
case '?':
|
||||
patternString += '.';
|
||||
continue;
|
||||
case '*':
|
||||
patternString += ".*";
|
||||
continue;
|
||||
case '[':
|
||||
{
|
||||
// find the matching ']' first
|
||||
const char* end = pattern;
|
||||
while (*end != ']') {
|
||||
if (*end++ == '\0') {
|
||||
fError = REG_EBRACK;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (pattern == end) {
|
||||
// Empty bracket expression. It will never match
|
||||
// anything. Strictly speaking this is not
|
||||
// considered an error, but we handle it like one.
|
||||
fError = REG_EBRACK;
|
||||
return;
|
||||
}
|
||||
|
||||
patternString += '[';
|
||||
|
||||
// We need to avoid "[." ... ".]", "[=" ... "=]", and
|
||||
// "[:" ... ":]" sequences, since those have special
|
||||
// meaning in regular expressions. If we encounter
|
||||
// a '[' followed by either of '.', '=', or ':', we
|
||||
// replace the '[' by "[.[.]".
|
||||
while (pattern < end) {
|
||||
c = *pattern++;
|
||||
if (c == '[' && pattern < end) {
|
||||
switch (*pattern) {
|
||||
case '.':
|
||||
case '=':
|
||||
case ':':
|
||||
patternString += "[.[.]";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
patternString += c;
|
||||
}
|
||||
|
||||
pattern++;
|
||||
patternString += ']';
|
||||
break;
|
||||
}
|
||||
|
||||
case '\\':
|
||||
{
|
||||
// Quotes the next character. Works the same way for
|
||||
// regular expressions.
|
||||
if (*pattern == '\0') {
|
||||
fError = REG_EESCAPE;
|
||||
return;
|
||||
}
|
||||
|
||||
patternString += '\\';
|
||||
patternString += *pattern++;
|
||||
break;
|
||||
}
|
||||
|
||||
case '^':
|
||||
case '.':
|
||||
case '$':
|
||||
case '(':
|
||||
case ')':
|
||||
case '|':
|
||||
case '+':
|
||||
case '{':
|
||||
// need to be quoted
|
||||
patternString += '\\';
|
||||
// fall through
|
||||
default:
|
||||
patternString += c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pattern = patternString.String();
|
||||
}
|
||||
|
||||
fError = regcomp(&fCompiledExpression, pattern, REG_EXTENDED);
|
||||
}
|
||||
|
||||
~Data()
|
||||
{
|
||||
if (fError == 0)
|
||||
regfree(&fCompiledExpression);
|
||||
}
|
||||
|
||||
bool IsValid() const
|
||||
{
|
||||
return fError == 0;
|
||||
}
|
||||
|
||||
const regex_t* CompiledExpression() const
|
||||
{
|
||||
return &fCompiledExpression;
|
||||
}
|
||||
|
||||
private:
|
||||
int fError;
|
||||
regex_t fCompiledExpression;
|
||||
};
|
||||
|
||||
|
||||
// #pragma mark - RegExp::MatchResultData
|
||||
|
||||
|
||||
struct RegExp::MatchResultData : public BReferenceable {
|
||||
MatchResultData(const regex_t* compiledExpression, const char* string)
|
||||
:
|
||||
BReferenceable(),
|
||||
fMatchCount(0),
|
||||
fMatches(NULL)
|
||||
{
|
||||
// Do the matching: Since we need to provide a buffer for the matches
|
||||
// for regexec() to fill in, but don't know the number of matches
|
||||
// beforehand, we need to guess and retry with a larger buffer, if it
|
||||
// wasn't large enough.
|
||||
size_t maxMatchCount = 32;
|
||||
for (;;) {
|
||||
fMatches = new regmatch_t[maxMatchCount];
|
||||
if (regexec(compiledExpression, string, maxMatchCount, fMatches, 0)
|
||||
!= 0) {
|
||||
delete[] fMatches;
|
||||
fMatches = NULL;
|
||||
fMatchCount = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (fMatches[maxMatchCount - 1].rm_so == -1) {
|
||||
// determine the match count
|
||||
size_t lower = 0;
|
||||
size_t upper = maxMatchCount;
|
||||
while (lower < upper) {
|
||||
size_t mid = (lower + upper) / 2;
|
||||
if (fMatches[mid].rm_so == -1)
|
||||
upper = mid;
|
||||
else
|
||||
lower = mid + 1;
|
||||
}
|
||||
fMatchCount = lower;
|
||||
break;
|
||||
}
|
||||
|
||||
// buffer too small -- try again with larger buffer
|
||||
delete[] fMatches;
|
||||
fMatches = NULL;
|
||||
maxMatchCount *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
~MatchResultData()
|
||||
{
|
||||
delete[] fMatches;
|
||||
}
|
||||
|
||||
size_t MatchCount() const
|
||||
{
|
||||
return fMatchCount;
|
||||
}
|
||||
|
||||
const regmatch_t* Matches() const
|
||||
{
|
||||
return fMatches;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t fMatchCount;
|
||||
regmatch_t* fMatches;
|
||||
};
|
||||
|
||||
|
||||
// #pragma mark - RegExp
|
||||
|
||||
|
||||
RegExp::RegExp()
|
||||
:
|
||||
fData(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
RegExp::RegExp(const char* pattern, PatternType patternType)
|
||||
:
|
||||
fData(NULL)
|
||||
{
|
||||
SetPattern(pattern, patternType);
|
||||
}
|
||||
|
||||
|
||||
RegExp::RegExp(const RegExp& other)
|
||||
:
|
||||
fData(other.fData)
|
||||
{
|
||||
if (fData != NULL)
|
||||
fData->AcquireReference();
|
||||
}
|
||||
|
||||
|
||||
RegExp::~RegExp()
|
||||
{
|
||||
if (fData != NULL)
|
||||
fData->ReleaseReference();
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
RegExp::SetPattern(const char* pattern, PatternType patternType)
|
||||
{
|
||||
if (fData != NULL) {
|
||||
fData->ReleaseReference();
|
||||
fData = NULL;
|
||||
}
|
||||
|
||||
fData = new Data(pattern, patternType);
|
||||
if (!fData->IsValid()) {
|
||||
delete fData;
|
||||
fData = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
RegExp::MatchResult
|
||||
RegExp::Match(const char* string) const
|
||||
{
|
||||
if (!IsValid())
|
||||
return MatchResult();
|
||||
|
||||
return MatchResult(
|
||||
new(std::nothrow) MatchResultData(fData->CompiledExpression(),
|
||||
string));
|
||||
}
|
||||
|
||||
|
||||
RegExp&
|
||||
RegExp::operator=(const RegExp& other)
|
||||
{
|
||||
if (fData != NULL)
|
||||
fData->ReleaseReference();
|
||||
|
||||
fData = other.fData;
|
||||
|
||||
if (fData != NULL)
|
||||
fData->AcquireReference();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
// #pragma mark - RegExp::MatchResult
|
||||
|
||||
|
||||
RegExp::MatchResult::MatchResult()
|
||||
:
|
||||
fData(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
RegExp::MatchResult::MatchResult(MatchResultData* data)
|
||||
:
|
||||
fData(data)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
RegExp::MatchResult::MatchResult(const MatchResult& other)
|
||||
:
|
||||
fData(other.fData)
|
||||
{
|
||||
if (fData != NULL)
|
||||
fData->AcquireReference();
|
||||
}
|
||||
|
||||
|
||||
RegExp::MatchResult::~MatchResult()
|
||||
{
|
||||
if (fData != NULL)
|
||||
fData->ReleaseReference();
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
RegExp::MatchResult::HasMatched() const
|
||||
{
|
||||
return fData != NULL && fData->MatchCount() > 0;
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
RegExp::MatchResult::StartOffset() const
|
||||
{
|
||||
return fData != NULL && fData->MatchCount() > 0
|
||||
? fData->Matches()[0].rm_so : 0;
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
RegExp::MatchResult::EndOffset() const
|
||||
{
|
||||
return fData != NULL && fData->MatchCount() > 0
|
||||
? fData->Matches()[0].rm_eo : 0;
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
RegExp::MatchResult::GroupCount() const
|
||||
{
|
||||
if (fData == NULL)
|
||||
return 0;
|
||||
|
||||
size_t matchCount = fData->MatchCount();
|
||||
return matchCount > 0 ? matchCount - 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
|
||||
{
|
||||
return fData != NULL && fData->MatchCount() > index + 1
|
||||
? fData->Matches()[index + 1].rm_so : 0;
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
|
||||
{
|
||||
return fData != NULL && fData->MatchCount() > index + 1
|
||||
? fData->Matches()[index + 1].rm_eo : 0;
|
||||
}
|
||||
|
||||
|
||||
RegExp::MatchResult&
|
||||
RegExp::MatchResult::operator=(const MatchResult& other)
|
||||
{
|
||||
if (fData != NULL)
|
||||
fData->ReleaseReference();
|
||||
|
||||
fData = other.fData;
|
||||
|
||||
if (fData != NULL)
|
||||
fData->AcquireReference();
|
||||
|
||||
return *this;
|
||||
}
|
Loading…
Reference in New Issue
Block a user