modure: Update to re1.5 v0.6, support for char sets/classes ([a-c]).
This commit is contained in:
parent
d27c0bb3aa
commit
95908b0f50
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
#if MICROPY_PY_URE
|
#if MICROPY_PY_URE
|
||||||
|
|
||||||
#include "re1.5/regexp.h"
|
#include "re1.5/re1.5.h"
|
||||||
|
|
||||||
#define FLAG_DEBUG 0x1000
|
#define FLAG_DEBUG 0x1000
|
||||||
|
|
||||||
|
@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = {
|
||||||
#include "re1.5/compilecode.c"
|
#include "re1.5/compilecode.c"
|
||||||
#include "re1.5/dumpcode.c"
|
#include "re1.5/dumpcode.c"
|
||||||
#include "re1.5/recursiveloop.c"
|
#include "re1.5/recursiveloop.c"
|
||||||
|
#include "re1.5/charclass.c"
|
||||||
|
|
||||||
#endif //MICROPY_PY_URE
|
#endif //MICROPY_PY_URE
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
#include "re1.5.h"
|
||||||
|
|
||||||
|
int _re1_5_classmatch(const char *pc, const char *sp)
|
||||||
|
{
|
||||||
|
// pc points to "cnt" byte after opcode
|
||||||
|
int cnt = *pc++;
|
||||||
|
while (cnt--) {
|
||||||
|
if (!(*sp >= *pc && *sp <= pc[1])) return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
|
@ -2,7 +2,7 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#include "regexp.h"
|
#include "re1.5.h"
|
||||||
|
|
||||||
static void insert_code(char *code, int at, int num, int *pc)
|
static void insert_code(char *code, int at, int num, int *pc)
|
||||||
{
|
{
|
||||||
|
@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re)
|
||||||
break;
|
break;
|
||||||
case ')':
|
case ')':
|
||||||
break;
|
break;
|
||||||
|
case '[': {
|
||||||
|
pc += 2;
|
||||||
|
re++;
|
||||||
|
while (*re != ']') {
|
||||||
|
if (!*re) return -1;
|
||||||
|
if (re[1] == '-') {
|
||||||
|
re += 2;
|
||||||
|
}
|
||||||
|
pc += 2;
|
||||||
|
re++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog)
|
||||||
EMIT(pc++, Any);
|
EMIT(pc++, Any);
|
||||||
prog->len++;
|
prog->len++;
|
||||||
break;
|
break;
|
||||||
|
case '[': {
|
||||||
|
int cnt;
|
||||||
|
term = pc;
|
||||||
|
EMIT(pc++, Class);
|
||||||
|
pc++; // Skip # of pair byte
|
||||||
|
prog->len++;
|
||||||
|
re++;
|
||||||
|
for (cnt = 0; *re != ']'; re++, cnt++) {
|
||||||
|
if (!*re) return NULL;
|
||||||
|
EMIT(pc++, *re);
|
||||||
|
if (re[1] == '-') {
|
||||||
|
re += 2;
|
||||||
|
}
|
||||||
|
EMIT(pc++, *re);
|
||||||
|
}
|
||||||
|
EMIT(term + 1, cnt);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case '(':
|
case '(':
|
||||||
term = pc;
|
term = pc;
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#include "regexp.h"
|
#include "re1.5.h"
|
||||||
|
|
||||||
void re1_5_dumpcode(ByteProg *prog)
|
void re1_5_dumpcode(ByteProg *prog)
|
||||||
{
|
{
|
||||||
|
@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog)
|
||||||
case Any:
|
case Any:
|
||||||
printf("any\n");
|
printf("any\n");
|
||||||
break;
|
break;
|
||||||
|
case Class: {
|
||||||
|
int num = code[pc++];
|
||||||
|
printf("class %d", num);
|
||||||
|
while (num--) {
|
||||||
|
printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
|
||||||
|
pc += 2;
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
case Match:
|
case Match:
|
||||||
printf("match\n");
|
printf("match\n");
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -80,14 +80,18 @@ enum /* Inst.opcode */
|
||||||
CONSUMERS = 1,
|
CONSUMERS = 1,
|
||||||
Char = CONSUMERS,
|
Char = CONSUMERS,
|
||||||
Any,
|
Any,
|
||||||
|
Class,
|
||||||
|
|
||||||
ASSERTS = 0x50,
|
ASSERTS = 0x50,
|
||||||
Bol = ASSERTS,
|
Bol = ASSERTS,
|
||||||
Eol,
|
Eol,
|
||||||
|
|
||||||
// Instructions which take relative offset as arg
|
// Instructions which take relative offset as arg
|
||||||
JUMPS = 0x60,
|
JUMPS = 0x60,
|
||||||
Jmp = JUMPS,
|
Jmp = JUMPS,
|
||||||
Split,
|
Split,
|
||||||
RSplit,
|
RSplit,
|
||||||
|
|
||||||
// Other (special) instructions
|
// Other (special) instructions
|
||||||
Save = 0x7e,
|
Save = 0x7e,
|
||||||
Match = 0x7f,
|
Match = 0x7f,
|
||||||
|
@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re);
|
||||||
int re1_5_compilecode(ByteProg *prog, const char *re);
|
int re1_5_compilecode(ByteProg *prog, const char *re);
|
||||||
void re1_5_dumpcode(ByteProg *prog);
|
void re1_5_dumpcode(ByteProg *prog);
|
||||||
void cleanmarks(ByteProg *prog);
|
void cleanmarks(ByteProg *prog);
|
||||||
|
int _re1_5_classmatch(const char *pc, const char *sp);
|
||||||
|
|
||||||
#endif /*_RE1_5_REGEXP__H*/
|
#endif /*_RE1_5_REGEXP__H*/
|
|
@ -2,7 +2,7 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
#include "regexp.h"
|
#include "re1.5.h"
|
||||||
|
|
||||||
static int
|
static int
|
||||||
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
|
recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
|
||||||
|
@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
|
||||||
case Any:
|
case Any:
|
||||||
sp++;
|
sp++;
|
||||||
continue;
|
continue;
|
||||||
|
case Class:
|
||||||
|
if (!_re1_5_classmatch(pc, sp))
|
||||||
|
return 0;
|
||||||
|
pc += *(unsigned char*)pc * 2 + 1;
|
||||||
|
sp++;
|
||||||
|
continue;
|
||||||
case Match:
|
case Match:
|
||||||
return 1;
|
return 1;
|
||||||
case Jmp:
|
case Jmp:
|
||||||
|
|
|
@ -20,6 +20,13 @@ try:
|
||||||
except IndexError:
|
except IndexError:
|
||||||
print("IndexError")
|
print("IndexError")
|
||||||
|
|
||||||
|
r = re.compile("[a-c]")
|
||||||
|
m = r.match("a")
|
||||||
|
print(m.group(0))
|
||||||
|
m = r.match("d")
|
||||||
|
print(m)
|
||||||
|
m = r.match("A")
|
||||||
|
print(m)
|
||||||
|
|
||||||
r = re.compile("o+")
|
r = re.compile("o+")
|
||||||
m = r.search("foobar")
|
m = r.search("foobar")
|
||||||
|
|
Loading…
Reference in New Issue