qapi.py: Restructure lexer and parser

The parser has a rather unorthodox structure:

    Until EOF:

        Read a section:

            Generator function get_expr() yields one section after the
            other, as a string.  An unindented, non-empty line that
            isn't a comment starts a new section.

        Lexing:

            Split section into a list of tokens (strings), with help
            of generator function tokenize().

        Parsing:

            Parse the first expression from the list of tokens, with
            parse(), throw away any remaining tokens.

            In parse_schema(): record value of an enum, union or
            struct key (if any) in the appropriate global table,
            append expression to the list of expressions.

    Return list of expressions.

Known issues:

(1) Indentation is significant, unlike in real JSON.

(2) Neither lexer nor parser have any idea of source positions.  Error
    reporting is hard, let's go shopping.

(3) The one error we bother to detect, we "report" via raise.

(4) The lexer silently ignores invalid characters.

(5) If everything in a section gets ignored, the parser crashes.

(6) The lexer treats a string containing a structural character exactly
    like the structural character.

(7) Tokens trailing the first expression in a section are silently
    ignored.

(8) The parser accepts any token in place of a colon.

(9) The parser treats comma as optional.

(10) parse() crashes on unexpected EOF.

(11) parse_schema() crashes when a section's expression isn't a JSON
    object.

Replace this piece of original art by a thoroughly unoriginal design.
Takes care of (1), (2), (5), (6) and (7), and lays the groundwork for
addressing the others.  Generated source files remain unchanged.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1374939721-7876-4-git-send-email-armbru@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Markus Armbruster 2013-07-27 17:41:55 +02:00 committed by Anthony Liguori
parent 4f193e34c6
commit c7a3f25200
6 changed files with 81 additions and 80 deletions

View File

@ -2,9 +2,11 @@
# QAPI helper library # QAPI helper library
# #
# Copyright IBM, Corp. 2011 # Copyright IBM, Corp. 2011
# Copyright (c) 2013 Red Hat Inc.
# #
# Authors: # Authors:
# Anthony Liguori <aliguori@us.ibm.com> # Anthony Liguori <aliguori@us.ibm.com>
# Markus Armbruster <armbru@redhat.com>
# #
# This work is licensed under the terms of the GNU GPLv2. # This work is licensed under the terms of the GNU GPLv2.
# See the COPYING.LIB file in the top-level directory. # See the COPYING.LIB file in the top-level directory.
@ -32,91 +34,92 @@ builtin_type_qtypes = {
'uint64': 'QTYPE_QINT', 'uint64': 'QTYPE_QINT',
} }
def tokenize(data): class QAPISchema:
while len(data):
ch = data[0] def __init__(self, fp):
data = data[1:] self.fp = fp
if ch in ['{', '}', ':', ',', '[', ']']: self.src = fp.read()
yield ch if self.src == '' or self.src[-1] != '\n':
elif ch in ' \n': self.src += '\n'
None self.cursor = 0
elif ch == "'": self.exprs = []
self.accept()
while self.tok != None:
self.exprs.append(self.get_expr())
def accept(self):
while True:
bol = self.cursor == 0 or self.src[self.cursor-1] == '\n'
self.tok = self.src[self.cursor]
self.cursor += 1
self.val = None
if self.tok == '#' and bol:
self.cursor = self.src.find('\n', self.cursor)
elif self.tok in ['{', '}', ':', ',', '[', ']']:
return
elif self.tok == "'":
string = '' string = ''
esc = False esc = False
while True: while True:
if (data == ''): ch = self.src[self.cursor]
self.cursor += 1
if ch == '\n':
raise Exception("Mismatched quotes") raise Exception("Mismatched quotes")
ch = data[0]
data = data[1:]
if esc: if esc:
string += ch string += ch
esc = False esc = False
elif ch == "\\": elif ch == "\\":
esc = True esc = True
elif ch == "'": elif ch == "'":
break self.val = string
return
else: else:
string += ch string += ch
yield string elif self.tok == '\n':
if self.cursor == len(self.src):
self.tok = None
return
def parse(tokens): def get_members(self):
if tokens[0] == '{': expr = OrderedDict()
ret = OrderedDict() while self.tok != '}':
tokens = tokens[1:] key = self.val
while tokens[0] != '}': self.accept()
key = tokens[0] self.accept() # :
tokens = tokens[1:] expr[key] = self.get_expr()
if self.tok == ',':
self.accept()
self.accept()
return expr
tokens = tokens[1:] # : def get_values(self):
expr = []
while self.tok != ']':
expr.append(self.get_expr())
if self.tok == ',':
self.accept()
self.accept()
return expr
value, tokens = parse(tokens) def get_expr(self):
if self.tok == '{':
if tokens[0] == ',': self.accept()
tokens = tokens[1:] expr = self.get_members()
elif self.tok == '[':
ret[key] = value self.accept()
tokens = tokens[1:] expr = self.get_values()
return ret, tokens
elif tokens[0] == '[':
ret = []
tokens = tokens[1:]
while tokens[0] != ']':
value, tokens = parse(tokens)
if tokens[0] == ',':
tokens = tokens[1:]
ret.append(value)
tokens = tokens[1:]
return ret, tokens
else: else:
return tokens[0], tokens[1:] expr = self.val
self.accept()
def evaluate(string): return expr
return parse(map(lambda x: x, tokenize(string)))[0]
def get_expr(fp):
expr = ''
for line in fp:
if line.startswith('#') or line == '\n':
continue
if line.startswith(' '):
expr += line
elif expr:
yield expr
expr = line
else:
expr += line
if expr:
yield expr
def parse_schema(fp): def parse_schema(fp):
schema = QAPISchema(fp)
exprs = [] exprs = []
for expr in get_expr(fp): for expr_eval in schema.exprs:
expr_eval = evaluate(expr)
if expr_eval.has_key('enum'): if expr_eval.has_key('enum'):
add_enum(expr_eval['enum']) add_enum(expr_eval['enum'])
elif expr_eval.has_key('union'): elif expr_eval.has_key('union'):

View File

@ -1,3 +1,3 @@
[OrderedDict([('id', 'eins')])] [OrderedDict([('id', 'eins')]), OrderedDict([('id', 'zwei')])]
[] []
[] []

View File

@ -1,3 +1,3 @@
[OrderedDict([('enum', ','), ('data', ['good', 'bad', 'ugly'])])] [OrderedDict([('enum', None), ('data', ['good', 'bad', 'ugly'])])]
[','] [None]
[] []

View File

@ -0,0 +1 @@
Crashed: <type 'exceptions.AttributeError'>

View File

@ -1,3 +0,0 @@
[OrderedDict([('key1', 'value1'), ('key2', [])])]
[]
[]