qapi.py: Restructure lexer and parser
The parser has a rather unorthodox structure: Until EOF: Read a section: Generator function get_expr() yields one section after the other, as a string. An unindented, non-empty line that isn't a comment starts a new section. Lexing: Split section into a list of tokens (strings), with help of generator function tokenize(). Parsing: Parse the first expression from the list of tokens, with parse(), throw away any remaining tokens. In parse_schema(): record value of an enum, union or struct key (if any) in the appropriate global table, append expression to the list of expressions. Return list of expressions. Known issues: (1) Indentation is significant, unlike in real JSON. (2) Neither lexer nor parser have any idea of source positions. Error reporting is hard, let's go shopping. (3) The one error we bother to detect, we "report" via raise. (4) The lexer silently ignores invalid characters. (5) If everything in a section gets ignored, the parser crashes. (6) The lexer treats a string containing a structural character exactly like the structural character. (7) Tokens trailing the first expression in a section are silently ignored. (8) The parser accepts any token in place of a colon. (9) The parser treats comma as optional. (10) parse() crashes on unexpected EOF. (11) parse_schema() crashes when a section's expression isn't a JSON object. Replace this piece of original art by a thoroughly unoriginal design. Takes care of (1), (2), (5), (6) and (7), and lays the groundwork for addressing the others. Generated source files remain unchanged. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Eric Blake <eblake@redhat.com> Message-id: 1374939721-7876-4-git-send-email-armbru@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
parent
4f193e34c6
commit
c7a3f25200
149
scripts/qapi.py
149
scripts/qapi.py
@ -2,9 +2,11 @@
|
|||||||
# QAPI helper library
|
# QAPI helper library
|
||||||
#
|
#
|
||||||
# Copyright IBM, Corp. 2011
|
# Copyright IBM, Corp. 2011
|
||||||
|
# Copyright (c) 2013 Red Hat Inc.
|
||||||
#
|
#
|
||||||
# Authors:
|
# Authors:
|
||||||
# Anthony Liguori <aliguori@us.ibm.com>
|
# Anthony Liguori <aliguori@us.ibm.com>
|
||||||
|
# Markus Armbruster <armbru@redhat.com>
|
||||||
#
|
#
|
||||||
# This work is licensed under the terms of the GNU GPLv2.
|
# This work is licensed under the terms of the GNU GPLv2.
|
||||||
# See the COPYING.LIB file in the top-level directory.
|
# See the COPYING.LIB file in the top-level directory.
|
||||||
@ -32,91 +34,92 @@ builtin_type_qtypes = {
|
|||||||
'uint64': 'QTYPE_QINT',
|
'uint64': 'QTYPE_QINT',
|
||||||
}
|
}
|
||||||
|
|
||||||
def tokenize(data):
|
class QAPISchema:
|
||||||
while len(data):
|
|
||||||
ch = data[0]
|
|
||||||
data = data[1:]
|
|
||||||
if ch in ['{', '}', ':', ',', '[', ']']:
|
|
||||||
yield ch
|
|
||||||
elif ch in ' \n':
|
|
||||||
None
|
|
||||||
elif ch == "'":
|
|
||||||
string = ''
|
|
||||||
esc = False
|
|
||||||
while True:
|
|
||||||
if (data == ''):
|
|
||||||
raise Exception("Mismatched quotes")
|
|
||||||
ch = data[0]
|
|
||||||
data = data[1:]
|
|
||||||
if esc:
|
|
||||||
string += ch
|
|
||||||
esc = False
|
|
||||||
elif ch == "\\":
|
|
||||||
esc = True
|
|
||||||
elif ch == "'":
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
string += ch
|
|
||||||
yield string
|
|
||||||
|
|
||||||
def parse(tokens):
|
def __init__(self, fp):
|
||||||
if tokens[0] == '{':
|
self.fp = fp
|
||||||
ret = OrderedDict()
|
self.src = fp.read()
|
||||||
tokens = tokens[1:]
|
if self.src == '' or self.src[-1] != '\n':
|
||||||
while tokens[0] != '}':
|
self.src += '\n'
|
||||||
key = tokens[0]
|
self.cursor = 0
|
||||||
tokens = tokens[1:]
|
self.exprs = []
|
||||||
|
self.accept()
|
||||||
|
|
||||||
tokens = tokens[1:] # :
|
while self.tok != None:
|
||||||
|
self.exprs.append(self.get_expr())
|
||||||
|
|
||||||
value, tokens = parse(tokens)
|
def accept(self):
|
||||||
|
while True:
|
||||||
|
bol = self.cursor == 0 or self.src[self.cursor-1] == '\n'
|
||||||
|
self.tok = self.src[self.cursor]
|
||||||
|
self.cursor += 1
|
||||||
|
self.val = None
|
||||||
|
|
||||||
if tokens[0] == ',':
|
if self.tok == '#' and bol:
|
||||||
tokens = tokens[1:]
|
self.cursor = self.src.find('\n', self.cursor)
|
||||||
|
elif self.tok in ['{', '}', ':', ',', '[', ']']:
|
||||||
|
return
|
||||||
|
elif self.tok == "'":
|
||||||
|
string = ''
|
||||||
|
esc = False
|
||||||
|
while True:
|
||||||
|
ch = self.src[self.cursor]
|
||||||
|
self.cursor += 1
|
||||||
|
if ch == '\n':
|
||||||
|
raise Exception("Mismatched quotes")
|
||||||
|
if esc:
|
||||||
|
string += ch
|
||||||
|
esc = False
|
||||||
|
elif ch == "\\":
|
||||||
|
esc = True
|
||||||
|
elif ch == "'":
|
||||||
|
self.val = string
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
string += ch
|
||||||
|
elif self.tok == '\n':
|
||||||
|
if self.cursor == len(self.src):
|
||||||
|
self.tok = None
|
||||||
|
return
|
||||||
|
|
||||||
ret[key] = value
|
def get_members(self):
|
||||||
tokens = tokens[1:]
|
expr = OrderedDict()
|
||||||
return ret, tokens
|
while self.tok != '}':
|
||||||
elif tokens[0] == '[':
|
key = self.val
|
||||||
ret = []
|
self.accept()
|
||||||
tokens = tokens[1:]
|
self.accept() # :
|
||||||
while tokens[0] != ']':
|
expr[key] = self.get_expr()
|
||||||
value, tokens = parse(tokens)
|
if self.tok == ',':
|
||||||
if tokens[0] == ',':
|
self.accept()
|
||||||
tokens = tokens[1:]
|
self.accept()
|
||||||
ret.append(value)
|
return expr
|
||||||
tokens = tokens[1:]
|
|
||||||
return ret, tokens
|
|
||||||
else:
|
|
||||||
return tokens[0], tokens[1:]
|
|
||||||
|
|
||||||
def evaluate(string):
|
def get_values(self):
|
||||||
return parse(map(lambda x: x, tokenize(string)))[0]
|
expr = []
|
||||||
|
while self.tok != ']':
|
||||||
|
expr.append(self.get_expr())
|
||||||
|
if self.tok == ',':
|
||||||
|
self.accept()
|
||||||
|
self.accept()
|
||||||
|
return expr
|
||||||
|
|
||||||
def get_expr(fp):
|
def get_expr(self):
|
||||||
expr = ''
|
if self.tok == '{':
|
||||||
|
self.accept()
|
||||||
for line in fp:
|
expr = self.get_members()
|
||||||
if line.startswith('#') or line == '\n':
|
elif self.tok == '[':
|
||||||
continue
|
self.accept()
|
||||||
|
expr = self.get_values()
|
||||||
if line.startswith(' '):
|
|
||||||
expr += line
|
|
||||||
elif expr:
|
|
||||||
yield expr
|
|
||||||
expr = line
|
|
||||||
else:
|
else:
|
||||||
expr += line
|
expr = self.val
|
||||||
|
self.accept()
|
||||||
if expr:
|
return expr
|
||||||
yield expr
|
|
||||||
|
|
||||||
def parse_schema(fp):
|
def parse_schema(fp):
|
||||||
|
schema = QAPISchema(fp)
|
||||||
exprs = []
|
exprs = []
|
||||||
|
|
||||||
for expr in get_expr(fp):
|
for expr_eval in schema.exprs:
|
||||||
expr_eval = evaluate(expr)
|
|
||||||
|
|
||||||
if expr_eval.has_key('enum'):
|
if expr_eval.has_key('enum'):
|
||||||
add_enum(expr_eval['enum'])
|
add_enum(expr_eval['enum'])
|
||||||
elif expr_eval.has_key('union'):
|
elif expr_eval.has_key('union'):
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
[OrderedDict([('id', 'eins')])]
|
[OrderedDict([('id', 'eins')]), OrderedDict([('id', 'zwei')])]
|
||||||
[]
|
[]
|
||||||
[]
|
[]
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
[OrderedDict([('enum', ','), ('data', ['good', 'bad', 'ugly'])])]
|
[OrderedDict([('enum', None), ('data', ['good', 'bad', 'ugly'])])]
|
||||||
[',']
|
[None]
|
||||||
[]
|
[]
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
Crashed: <type 'exceptions.AttributeError'>
|
@ -1 +1 @@
|
|||||||
0
|
1
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
[OrderedDict([('key1', 'value1'), ('key2', [])])]
|
|
||||||
[]
|
|
||||||
[]
|
|
Loading…
Reference in New Issue
Block a user