Move helper function out of QueryParser.h

They live in the new QueryParserUtils.{h,cpp} now.
2011-07-07 15:33:05 +02:00 · 2011-07-07 15:33:05 +02:00 · 01f7f92aef
commit 01f7f92aef
parent 3877a7f4a0
4 changed files with 392 additions and 323 deletions
--- a/headers/private/file_systems/QueryParser.h
+++ b/headers/private/file_systems/QueryParser.h
@ -4,8 +4,8 @@
 * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de.
 * This file may be used under the terms of the MIT License.
 */
-#ifndef _FILE_SYSTEMS_QUERY_H
-#define _FILE_SYSTEMS_QUERY_H
+#ifndef _FILE_SYSTEMS_QUERY_PARSER_H
+#define _FILE_SYSTEMS_QUERY_PARSER_H


 /*!	Query parsing and evaluation
@ -46,6 +46,8 @@

 #include <lock.h>

+#include <file_systems/QueryParserUtils.h>
+

 //#define DEBUG_QUERY

@ -96,20 +98,6 @@ enum ops {
 	OP_LESS_THAN_OR_EQUAL,
 };

-enum match {
-	NO_MATCH = 0,
-	MATCH_OK = 1,
-
-	MATCH_BAD_PATTERN = -2,
-	MATCH_INVALID_CHARACTER
-};
-
-// return values from isValidPattern()
-enum {
-	PATTERN_INVALID_ESCAPE = -3,
-	PATTERN_INVALID_RANGE,
-	PATTERN_INVALID_SET
-};

 template<typename QueryPolicy>
 union value {
@ -385,309 +373,6 @@ private:
 //	#pragma mark -


-void
-skipWhitespace(char** expr, int32 skip = 0)
-{
-	char* string = (*expr) + skip;
-	while (*string == ' ' || *string == '\t') string++;
-	*expr = string;
-}
-
-
-void
-skipWhitespaceReverse(char** expr, char* stop)
-{
-	char* string = *expr;
-	while (string > stop && (*string == ' ' || *string == '\t'))
-		string--;
-	*expr = string;
-}
-
-
-template<typename Key>
-static inline int
-compare_integral(const Key &a, const Key &b)
-{
-	if (a < b)
-		return -1;
-	else if (a > b)
-		return 1;
-	return 0;
-}
-
-
-static inline int
-compareKeys(uint32 type, const uint8* key1, size_t length1, const uint8* key2,
-	size_t length2)
-{
-	switch (type) {
-		case B_INT32_TYPE:
-			return compare_integral(*(int32*)key1, *(int32*)key2);
-		case B_UINT32_TYPE:
-			return compare_integral(*(uint32*)key1, *(uint32*)key2);
-		case B_INT64_TYPE:
-			return compare_integral(*(int64*)key1, *(int64*)key2);
-		case B_UINT64_TYPE:
-			return compare_integral(*(uint64*)key1, *(uint64*)key2);
-		case B_FLOAT_TYPE:
-			return compare_integral(*(float*)key1, *(float*)key2);
-		case B_DOUBLE_TYPE:
-			return compare_integral(*(double*)key1, *(double*)key2);
-		case B_STRING_TYPE:
-		{
-			int result = strncmp((const char*)key1, (const char*)key2,
-				std::min(length1, length2));
-			if (result == 0) {
-				result = compare_integral(strnlen((const char*)key1, length1),
-					strnlen((const char*)key2, length2));
-			}
-			return result;
-		}
-	}
-	return -1;
-}
-
-
-//	#pragma mark -
-
-
-uint32
-utf8ToUnicode(char** string)
-{
-	uint8* bytes = (uint8*)*string;
-	int32 length;
-	uint8 mask = 0x1f;
-
-	switch (bytes[0] & 0xf0) {
-		case 0xc0:
-		case 0xd0:
-			length = 2;
-			break;
-		case 0xe0:
-			length = 3;
-			break;
-		case 0xf0:
-			mask = 0x0f;
-			length = 4;
-			break;
-		default:
-			// valid 1-byte character
-			// and invalid characters
-			(*string)++;
-			return bytes[0];
-	}
-	uint32 c = bytes[0] & mask;
-	int32 i = 1;
-	for (; i < length && (bytes[i] & 0x80) > 0; i++)
-		c = (c << 6) | (bytes[i] & 0x3f);
-
-	if (i < length) {
-		// invalid character
-		(*string)++;
-		return (uint32)bytes[0];
-	}
-	*string += length;
-	return c;
-}
-
-
-int32
-getFirstPatternSymbol(char* string)
-{
-	char c;
-
-	for (int32 index = 0; (c = *string++); index++) {
-		if (c == '*' || c == '?' || c == '[')
-			return index;
-	}
-	return -1;
-}
-
-
-bool
-isPattern(char* string)
-{
-	return getFirstPatternSymbol(string) >= 0 ? true : false;
-}
-
-
-status_t
-isValidPattern(char* pattern)
-{
-	while (*pattern) {
-		switch (*pattern++) {
-			case '\\':
-				// the escape character must not be at the end of the pattern
-				if (!*pattern++)
-					return PATTERN_INVALID_ESCAPE;
-				break;
-
-			case '[':
-				if (pattern[0] == ']' || !pattern[0])
-					return PATTERN_INVALID_SET;
-
-				while (*pattern != ']') {
-					if (*pattern == '\\' && !*++pattern)
-						return PATTERN_INVALID_ESCAPE;
-
-					if (!*pattern)
-						return PATTERN_INVALID_SET;
-
-					if (pattern[0] == '-' && pattern[1] == '-')
-						return PATTERN_INVALID_RANGE;
-
-					pattern++;
-				}
-				break;
-		}
-	}
-	return B_OK;
-}
-
-
-/*!	Matches the string against the given wildcard pattern.
-	Returns either MATCH_OK, or NO_MATCH when everything went fine, or
-	values < 0 (see enum at the top of Query.cpp) if an error occurs.
-*/
-status_t
-matchString(char* pattern, char* string)
-{
-	while (*pattern) {
-		// end of string == valid end of pattern?
-		if (!string[0]) {
-			while (pattern[0] == '*')
-				pattern++;
-			return !pattern[0] ? MATCH_OK : NO_MATCH;
-		}
-
-		switch (*pattern++) {
-			case '?':
-			{
-				// match exactly one UTF-8 character; we are
-				// not interested in the result
-				utf8ToUnicode(&string);
-				break;
-			}
-
-			case '*':
-			{
-				// compact pattern
-				while (true) {
-					if (pattern[0] == '?') {
-						if (!*++string)
-							return NO_MATCH;
-					} else if (pattern[0] != '*')
-						break;
-
-					pattern++;
-				}
-
-				// if the pattern is done, we have matched the string
-				if (!pattern[0])
-					return MATCH_OK;
-
-				while(true) {
-					// we have removed all occurences of '*' and '?'
-					if (pattern[0] == string[0]
-						|| pattern[0] == '['
-						|| pattern[0] == '\\') {
-						status_t status = matchString(pattern, string);
-						if (status < B_OK || status == MATCH_OK)
-							return status;
-					}
-
-					// we could be nice here and just jump to the next
-					// UTF-8 character - but we wouldn't gain that much
-					// and it'd be slower (since we're checking for
-					// equality before entering the recursion)
-					if (!*++string)
-						return NO_MATCH;
-				}
-				break;
-			}
-
-			case '[':
-			{
-				bool invert = false;
-				if (pattern[0] == '^' || pattern[0] == '!') {
-					invert = true;
-					pattern++;
-				}
-
-				if (!pattern[0] || pattern[0] == ']')
-					return MATCH_BAD_PATTERN;
-
-				uint32 c = utf8ToUnicode(&string);
-				bool matched = false;
-
-				while (pattern[0] != ']') {
-					if (!pattern[0])
-						return MATCH_BAD_PATTERN;
-
-					if (pattern[0] == '\\')
-						pattern++;
-
-					uint32 first = utf8ToUnicode(&pattern);
-
-					// Does this character match, or is this a range?
-					if (first == c) {
-						matched = true;
-						break;
-					} else if (pattern[0] == '-' && pattern[1] != ']'
-							&& pattern[1]) {
-						pattern++;
-
-						if (pattern[0] == '\\') {
-							pattern++;
-							if (!pattern[0])
-								return MATCH_BAD_PATTERN;
-						}
-						uint32 last = utf8ToUnicode(&pattern);
-
-						if (c >= first && c <= last) {
-							matched = true;
-							break;
-						}
-					}
-				}
-
-				if (invert)
-					matched = !matched;
-
-				if (matched) {
-					while (pattern[0] != ']') {
-						if (!pattern[0])
-							return MATCH_BAD_PATTERN;
-						pattern++;
-					}
-					pattern++;
-					break;
-				}
-				return NO_MATCH;
-			}
-
-            case '\\':
-				if (!pattern[0])
-					return MATCH_BAD_PATTERN;
-				// supposed to fall through
-			default:
-				if (pattern[-1] != string[0])
-					return NO_MATCH;
-				string++;
-				break;
-		}
-	}
-
-	if (string[0])
-		return NO_MATCH;
-
-	return MATCH_OK;
-}
-
-
-//	#pragma mark -
-
-
 template<typename QueryPolicy>
 Equation<QueryPolicy>::Equation(char** expr)
 	:
@ -1921,4 +1606,4 @@ Query<QueryPolicy>::_SendEntryNotification(Entry* entry,
 }	// namespace QueryParser


-#endif	// _FILE_SYSTEMS_QUERY_H
+#endif	// _FILE_SYSTEMS_QUERY_PARSER_H
--- a/headers/private/file_systems/QueryParserUtils.h
+++ b/headers/private/file_systems/QueryParserUtils.h
@ -0,0 +1,61 @@
+/*
+ * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de.
+ * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de>
+ * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de.
+ * This file may be used under the terms of the MIT License.
+ */
+#ifndef _FILE_SYSTEMS_QUERY_PARSER_UTILS_H
+#define _FILE_SYSTEMS_QUERY_PARSER_UTILS_H
+
+
+#include <sys/cdefs.h>
+
+#include <SupportDefs.h>
+
+
+namespace QueryParser {
+
+
+enum match {
+	NO_MATCH = 0,
+	MATCH_OK = 1,
+
+	MATCH_BAD_PATTERN = -2,
+	MATCH_INVALID_CHARACTER
+};
+
+// return values from isValidPattern()
+enum {
+	PATTERN_INVALID_ESCAPE = -3,
+	PATTERN_INVALID_RANGE,
+	PATTERN_INVALID_SET
+};
+
+
+__BEGIN_DECLS
+
+
+void		skipWhitespace(char** expr, int32 skip = 0);
+void		skipWhitespaceReverse(char** expr, char* stop);
+int			compareKeys(uint32 type, const void* key1, size_t length1,
+				const void* key2, size_t length2);
+uint32		utf8ToUnicode(char** string);
+int32		getFirstPatternSymbol(char* string);
+status_t	isValidPattern(char* pattern);
+status_t	matchString(char* pattern, char* string);
+
+
+__END_DECLS
+
+
+static inline bool
+isPattern(char* string)
+{
+	return getFirstPatternSymbol(string) >= 0 ? true : false;
+}
+
+
+}	// namespace QueryParser
+
+
+#endif	// _FILE_SYSTEMS_QUERY_PARSER_UTILS_H
--- a/src/add-ons/kernel/file_systems/packagefs/Jamfile
+++ b/src/add-ons/kernel/file_systems/packagefs/Jamfile
@ -49,6 +49,10 @@ HAIKU_PACKAGE_FS_SOURCES =
 	Volume.cpp
 ;

+HAIKU_PACKAGE_FS_SHARED_SOURCES =
+	QueryParserUtils.cpp
+;
+
 HAIKU_PACKAGE_FS_PACKAGE_READER_SOURCES =
 	BlockBufferCacheImpl.cpp
 	BufferCache.cpp
@ -76,12 +80,10 @@ local libSharedSources =
 ;


-SEARCH_SOURCE += [ FDirName $(HAIKU_TOP) src kits package hpkg ] ;
-
-
 KernelAddon packagefs
 	:
 	$(HAIKU_PACKAGE_FS_SOURCES)
+	$(HAIKU_PACKAGE_FS_SHARED_SOURCES)
 	$(HAIKU_PACKAGE_FS_PACKAGE_READER_SOURCES)
 	$(libSharedSources)

@ -89,6 +91,10 @@ KernelAddon packagefs
 ;


+SEARCH on [ FGristFiles $(HAIKU_PACKAGE_FS_SHARED_SOURCES) ]
+	+= [ FDirName $(HAIKU_TOP) src add-ons kernel file_systems shared ] ;
+SEARCH on [ FGristFiles $(HAIKU_PACKAGE_FS_PACKAGE_READER_SOURCES) ]
+	+= [ FDirName $(HAIKU_TOP) src kits package hpkg ] ;
 SEARCH on [ FGristFiles $(libSharedSources) ]
 	+= [ FDirName $(HAIKU_TOP) src kits shared ] ;

--- a/src/add-ons/kernel/file_systems/shared/QueryParserUtils.cpp
+++ b/src/add-ons/kernel/file_systems/shared/QueryParserUtils.cpp
@ -0,0 +1,317 @@
+/*
+ * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de.
+ * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de>
+ * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de.
+ * This file may be used under the terms of the MIT License.
+ */
+
+
+#include <file_systems/QueryParserUtils.h>
+
+#include <string.h>
+
+#include <algorithm>
+
+#include <TypeConstants.h>
+
+
+namespace QueryParser {
+
+
+template<typename Key>
+static inline int
+compare_integral(const Key& a, const Key& b)
+{
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+	return 0;
+}
+
+
+// #pragma mark -
+
+
+void
+skipWhitespace(char** expr, int32 skip)
+{
+	char* string = (*expr) + skip;
+	while (*string == ' ' || *string == '\t') string++;
+	*expr = string;
+}
+
+
+void
+skipWhitespaceReverse(char** expr, char* stop)
+{
+	char* string = *expr;
+	while (string > stop && (*string == ' ' || *string == '\t'))
+		string--;
+	*expr = string;
+}
+
+
+int
+compareKeys(uint32 type, const void* key1, size_t length1, const void* key2,
+	size_t length2)
+{
+	switch (type) {
+		case B_INT32_TYPE:
+			return compare_integral(*(int32*)key1, *(int32*)key2);
+		case B_UINT32_TYPE:
+			return compare_integral(*(uint32*)key1, *(uint32*)key2);
+		case B_INT64_TYPE:
+			return compare_integral(*(int64*)key1, *(int64*)key2);
+		case B_UINT64_TYPE:
+			return compare_integral(*(uint64*)key1, *(uint64*)key2);
+		case B_FLOAT_TYPE:
+			return compare_integral(*(float*)key1, *(float*)key2);
+		case B_DOUBLE_TYPE:
+			return compare_integral(*(double*)key1, *(double*)key2);
+		case B_STRING_TYPE:
+		{
+			int result = strncmp((const char*)key1, (const char*)key2,
+				std::min(length1, length2));
+			if (result == 0) {
+				result = compare_integral(strnlen((const char*)key1, length1),
+					strnlen((const char*)key2, length2));
+			}
+			return result;
+		}
+	}
+	return -1;
+}
+
+
+//	#pragma mark -
+
+
+uint32
+utf8ToUnicode(char** string)
+{
+	uint8* bytes = (uint8*)*string;
+	int32 length;
+	uint8 mask = 0x1f;
+
+	switch (bytes[0] & 0xf0) {
+		case 0xc0:
+		case 0xd0:
+			length = 2;
+			break;
+		case 0xe0:
+			length = 3;
+			break;
+		case 0xf0:
+			mask = 0x0f;
+			length = 4;
+			break;
+		default:
+			// valid 1-byte character
+			// and invalid characters
+			(*string)++;
+			return bytes[0];
+	}
+	uint32 c = bytes[0] & mask;
+	int32 i = 1;
+	for (; i < length && (bytes[i] & 0x80) > 0; i++)
+		c = (c << 6) | (bytes[i] & 0x3f);
+
+	if (i < length) {
+		// invalid character
+		(*string)++;
+		return (uint32)bytes[0];
+	}
+	*string += length;
+	return c;
+}
+
+
+int32
+getFirstPatternSymbol(char* string)
+{
+	char c;
+
+	for (int32 index = 0; (c = *string++); index++) {
+		if (c == '*' || c == '?' || c == '[')
+			return index;
+	}
+	return -1;
+}
+
+
+status_t
+isValidPattern(char* pattern)
+{
+	while (*pattern) {
+		switch (*pattern++) {
+			case '\\':
+				// the escape character must not be at the end of the pattern
+				if (!*pattern++)
+					return PATTERN_INVALID_ESCAPE;
+				break;
+
+			case '[':
+				if (pattern[0] == ']' || !pattern[0])
+					return PATTERN_INVALID_SET;
+
+				while (*pattern != ']') {
+					if (*pattern == '\\' && !*++pattern)
+						return PATTERN_INVALID_ESCAPE;
+
+					if (!*pattern)
+						return PATTERN_INVALID_SET;
+
+					if (pattern[0] == '-' && pattern[1] == '-')
+						return PATTERN_INVALID_RANGE;
+
+					pattern++;
+				}
+				break;
+		}
+	}
+	return B_OK;
+}
+
+
+/*!	Matches the string against the given wildcard pattern.
+	Returns either MATCH_OK, or NO_MATCH when everything went fine, or
+	values < 0 (see enum at the top of Query.cpp) if an error occurs.
+*/
+status_t
+matchString(char* pattern, char* string)
+{
+	while (*pattern) {
+		// end of string == valid end of pattern?
+		if (!string[0]) {
+			while (pattern[0] == '*')
+				pattern++;
+			return !pattern[0] ? MATCH_OK : NO_MATCH;
+		}
+
+		switch (*pattern++) {
+			case '?':
+			{
+				// match exactly one UTF-8 character; we are
+				// not interested in the result
+				utf8ToUnicode(&string);
+				break;
+			}
+
+			case '*':
+			{
+				// compact pattern
+				while (true) {
+					if (pattern[0] == '?') {
+						if (!*++string)
+							return NO_MATCH;
+					} else if (pattern[0] != '*')
+						break;
+
+					pattern++;
+				}
+
+				// if the pattern is done, we have matched the string
+				if (!pattern[0])
+					return MATCH_OK;
+
+				while(true) {
+					// we have removed all occurences of '*' and '?'
+					if (pattern[0] == string[0]
+						|| pattern[0] == '['
+						|| pattern[0] == '\\') {
+						status_t status = matchString(pattern, string);
+						if (status < B_OK || status == MATCH_OK)
+							return status;
+					}
+
+					// we could be nice here and just jump to the next
+					// UTF-8 character - but we wouldn't gain that much
+					// and it'd be slower (since we're checking for
+					// equality before entering the recursion)
+					if (!*++string)
+						return NO_MATCH;
+				}
+				break;
+			}
+
+			case '[':
+			{
+				bool invert = false;
+				if (pattern[0] == '^' || pattern[0] == '!') {
+					invert = true;
+					pattern++;
+				}
+
+				if (!pattern[0] || pattern[0] == ']')
+					return MATCH_BAD_PATTERN;
+
+				uint32 c = utf8ToUnicode(&string);
+				bool matched = false;
+
+				while (pattern[0] != ']') {
+					if (!pattern[0])
+						return MATCH_BAD_PATTERN;
+
+					if (pattern[0] == '\\')
+						pattern++;
+
+					uint32 first = utf8ToUnicode(&pattern);
+
+					// Does this character match, or is this a range?
+					if (first == c) {
+						matched = true;
+						break;
+					} else if (pattern[0] == '-' && pattern[1] != ']'
+							&& pattern[1]) {
+						pattern++;
+
+						if (pattern[0] == '\\') {
+							pattern++;
+							if (!pattern[0])
+								return MATCH_BAD_PATTERN;
+						}
+						uint32 last = utf8ToUnicode(&pattern);
+
+						if (c >= first && c <= last) {
+							matched = true;
+							break;
+						}
+					}
+				}
+
+				if (invert)
+					matched = !matched;
+
+				if (matched) {
+					while (pattern[0] != ']') {
+						if (!pattern[0])
+							return MATCH_BAD_PATTERN;
+						pattern++;
+					}
+					pattern++;
+					break;
+				}
+				return NO_MATCH;
+			}
+
+            case '\\':
+				if (!pattern[0])
+					return MATCH_BAD_PATTERN;
+				// supposed to fall through
+			default:
+				if (pattern[-1] != string[0])
+					return NO_MATCH;
+				string++;
+				break;
+		}
+	}
+
+	if (string[0])
+		return NO_MATCH;
+
+	return MATCH_OK;
+}
+
+
+}	// namespace QueryParser