qemu/target/hexagon/gen_decodetree.py

#!/usr/bin/env python3

##
##  Copyright (c) 2024 Taylor Simpson <ltaylorsimpson@gmail.com>
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, see <http://www.gnu.org/licenses/>.
##

import io
import re

import sys
import textwrap
import iset
import hex_common

encs = {
    tag: "".join(reversed(iset.iset[tag]["enc"].replace(" ", "")))
    for tag in iset.tags
    if iset.iset[tag]["enc"] != "MISSING ENCODING"
}


regre = re.compile(r"((?<!DUP)[MNORCPQXSGVZA])([stuvwxyzdefg]+)([.]?[LlHh]?)(\d+S?)")
immre = re.compile(r"[#]([rRsSuUm])(\d+)(?:[:](\d+))?")


def ordered_unique(l):
    return sorted(set(l), key=l.index)

num_registers = {"R": 32, "V": 32}

operand_letters = {
    "P",
    "i",
    "I",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
    "d",
    "e",
    "f",
    "g",
}

#
# These instructions have unused operand letters in their encoding
# They don't correspond to actual operands in the instruction semantics
# We will mark them as ignored in QEMU decodetree
#
tags_with_unused_d_encoding = {
    "R6_release_at_vi",
    "R6_release_st_vi",
    "S4_stored_rl_at_vi",
    "S4_stored_rl_st_vi",
    "S2_storew_rl_at_vi",
    "S2_stored_rl_at_vi",
    "S2_storew_rl_st_vi",
}

tags_with_unused_t_encoding = {
    "R6_release_at_vi",
    "R6_release_st_vi",
}

def skip_tag(tag, class_to_decode):
    enc_class = iset.iset[tag]["enc_class"]
    return enc_class != class_to_decode


##
## Generate the QEMU decodetree file for each instruction in class_to_decode
##     For A2_add: Rd32=add(Rs32,Rt32)
##     We produce:
##     %A2_add_Rd   0:5
##     %A2_add_Rs   16:5
##     %A2_add_Rt   8:5
##     @A2_add  11110011000.......-.....---..... Rd=%A2_add_Rd Rs=%A2_add_Rs Rt=%A2_add_Rt %PP
##     A2_add   ..................-.....---..... @A2_add
##
def gen_decodetree_file(f, class_to_decode):
    is_subinsn = class_to_decode.startswith("SUBINSN_")
    f.write(f"## DO NOT MODIFY - This file is generated by {sys.argv[0]}\n\n")
    if not is_subinsn:
        f.write("%PP\t14:2\n\n")
    for tag in sorted(encs.keys(), key=iset.tags.index):
        if skip_tag(tag, class_to_decode):
            continue

        enc = encs[tag]
        enc_str = "".join(reversed(encs[tag]))
        f.write(("#" * 80) + "\n"
                f"## {tag}:\t{enc_str}\n"
                "##\n")

        # The subinstructions come with a 13-bit encoding, but
        # decodetree.py needs 16 bits
        if is_subinsn:
            enc_str = "---" + enc_str

        regs = ordered_unique(regre.findall(iset.iset[tag]["syntax"]))
        imms = ordered_unique(immre.findall(iset.iset[tag]["syntax"]))

        # Write the field definitions for the registers
        for regno, reg in enumerate(regs):
            reg_type, reg_id, _, reg_enc_size = reg
            reg_letter = reg_id[0]
            reg_num_choices = int(reg_enc_size.rstrip("S"))
            reg_mapping = reg_type + "".join("_" for letter in reg_id) + \
                          reg_enc_size
            reg_enc_fields = re.findall(reg_letter + "+", enc)

            # Check for some errors
            if len(reg_enc_fields) == 0:
                raise Exception(f"{tag} missing register field!")
            if len(reg_enc_fields) > 1:
                raise Exception(f"{tag} has split register field!")
            reg_enc_field = reg_enc_fields[0]
            if 2 ** len(reg_enc_field) != reg_num_choices:
                raise Exception(f"{tag} has incorrect register field width!")

            f.write(f"%{tag}_{reg_type}{reg_id}\t"
                    f"{enc.index(reg_enc_field)}:{len(reg_enc_field)}")

            if (reg_type in num_registers and
                reg_num_choices != num_registers[reg_type]):
                f.write(f"\t!function=decode_mapped_reg_{reg_mapping}")
            f.write("\n")

        # Write the field definitions for the immediates
        for imm in imms:
            immno = 1 if imm[0].isupper() else 0
            imm_type = imm[0]
            imm_width = int(imm[1])
            imm_letter = "i" if imm_type.islower() else "I"
            fields = []
            sign_mark = "s" if imm_type.lower() in "sr" else ""
            for m in reversed(list(re.finditer(imm_letter + "+", enc))):
                fields.append(f"{m.start()}:{sign_mark}{m.end() - m.start()}")
                sign_mark = ""
            field_str = " ".join(fields)
            f.write(f"%{tag}_{imm_type}{imm_letter}\t{field_str}\n")

        ## Handle instructions with unused encoding letters
        ## Change the unused letters to ignored
        if tag in tags_with_unused_d_encoding:
            enc_str = enc_str.replace("d", "-")
        if tag in tags_with_unused_t_encoding:
            enc_str = enc_str.replace("t", "-")

        # Replace the operand letters with .
        for x in operand_letters:
            enc_str = enc_str.replace(x, ".")

        # Write the instruction format
        f.write(f"@{tag}\t{enc_str}")
        for reg in regs:
            reg_type = reg[0]
            reg_id = reg[1]
            f.write(f" {reg_type}{reg_id}=%{tag}_{reg_type}{reg_id}")
        for imm in imms:
            imm_type = imm[0]
            imm_letter = "i" if imm_type.islower() else "I"
            f.write(f" {imm_type}{imm_letter}=%{tag}_{imm_type}{imm_letter}")

        if not is_subinsn:
            f.write(" %PP")
        f.write("\n")

         # Replace the 0s and 1s with .
        enc_str = enc_str.replace("0", ".").replace("1", ".")

        # Write the instruction pattern
        f.write(f"{tag}\t{enc_str} @{tag}\n")


if __name__ == "__main__":
    hex_common.read_semantics_file(sys.argv[1])
    class_to_decode = sys.argv[2]
    with open(sys.argv[3], "w") as f:
        gen_decodetree_file(f, class_to_decode)
Hexagon (target/hexagon) Use QEMU decodetree (32-bit instructions) The Decodetree Specification can be found here https://www.qemu.org/docs/master/devel/decodetree.html Covers all 32-bit instructions, including HVX We generate separate decoders for each instruction class. The reason will be more apparent in the next patch in this series. We add 2 new scripts gen_decodetree.py Generate the input to decodetree.py gen_trans_funcs.py Generate the trans_* functions used by the output of decodetree.py Since the functions generated by decodetree.py take DisasContext * as an argument, we add the argument to a couple of functions that didn't need it previously. We also set the insn field in DisasContext during decode because it is used by the trans_* functions. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 32-bit instructions Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-2-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:41 +03:00			`#!/usr/bin/env python3`

			`##`
			`## Copyright (c) 2024 Taylor Simpson <ltaylorsimpson@gmail.com>`
			`##`
			`## This program is free software; you can redistribute it and/or modify`
			`## it under the terms of the GNU General Public License as published by`
			`## the Free Software Foundation; either version 2 of the License, or`
			`## (at your option) any later version.`
			`##`
			`## This program is distributed in the hope that it will be useful,`
			`## but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`## GNU General Public License for more details.`
			`##`
			`## You should have received a copy of the GNU General Public License`
			`## along with this program; if not, see <http://www.gnu.org/licenses/>.`
			`##`

			`import io`
			`import re`

			`import sys`
			`import textwrap`
			`import iset`
			`import hex_common`

			`encs = {`
			`tag: "".join(reversed(iset.iset[tag]["enc"].replace(" ", "")))`
			`for tag in iset.tags`
			`if iset.iset[tag]["enc"] != "MISSING ENCODING"`
			`}`


			`regre = re.compile(r"((?<!DUP)[MNORCPQXSGVZA])([stuvwxyzdefg]+)([.]?[LlHh]?)(\d+S?)")`
			`immre = re.compile(r"[#]([rRsSuUm])(\d+)(?:[:](\d+))?")`


			`def ordered_unique(l):`
			`return sorted(set(l), key=l.index)`

			`num_registers = {"R": 32, "V": 32}`

			`operand_letters = {`
			`"P",`
			`"i",`
			`"I",`
			`"r",`
			`"s",`
			`"t",`
			`"u",`
			`"v",`
			`"w",`
			`"x",`
			`"y",`
			`"z",`
			`"d",`
			`"e",`
			`"f",`
			`"g",`
			`}`

			`#`
			`# These instructions have unused operand letters in their encoding`
			`# They don't correspond to actual operands in the instruction semantics`
			`# We will mark them as ignored in QEMU decodetree`
			`#`
			`tags_with_unused_d_encoding = {`
			`"R6_release_at_vi",`
			`"R6_release_st_vi",`
			`"S4_stored_rl_at_vi",`
			`"S4_stored_rl_st_vi",`
			`"S2_storew_rl_at_vi",`
			`"S2_stored_rl_at_vi",`
			`"S2_storew_rl_st_vi",`
			`}`

			`tags_with_unused_t_encoding = {`
			`"R6_release_at_vi",`
			`"R6_release_st_vi",`
			`}`

			`def skip_tag(tag, class_to_decode):`
			`enc_class = iset.iset[tag]["enc_class"]`
			`return enc_class != class_to_decode`


			`##`
			`## Generate the QEMU decodetree file for each instruction in class_to_decode`
			`## For A2_add: Rd32=add(Rs32,Rt32)`
			`## We produce:`
			`## %A2_add_Rd 0:5`
			`## %A2_add_Rs 16:5`
			`## %A2_add_Rt 8:5`
			`## @A2_add 11110011000.......-.....---..... Rd=%A2_add_Rd Rs=%A2_add_Rs Rt=%A2_add_Rt %PP`
			`## A2_add ..................-.....---..... @A2_add`
			`##`
			`def gen_decodetree_file(f, class_to_decode):`
Hexagon (target/hexagon) Use QEMU decodetree (16-bit instructions) Section 10.3 of the Hexagon V73 Programmer's Reference Manual A duplex is encoded as a 32-bit instruction with bits [15:14] set to 00. The sub-instructions that comprise a duplex are encoded as 13-bit fields in the duplex. Create a decoder for each subinstruction class (a, l1, l2, s1, s2). Extend gen_trans_funcs.py to handle all instructions rather than filter by instruction class. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 16-bit instructions. Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-3-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:42 +03:00			`is_subinsn = class_to_decode.startswith("SUBINSN_")`
Hexagon (target/hexagon) Use QEMU decodetree (32-bit instructions) The Decodetree Specification can be found here https://www.qemu.org/docs/master/devel/decodetree.html Covers all 32-bit instructions, including HVX We generate separate decoders for each instruction class. The reason will be more apparent in the next patch in this series. We add 2 new scripts gen_decodetree.py Generate the input to decodetree.py gen_trans_funcs.py Generate the trans_* functions used by the output of decodetree.py Since the functions generated by decodetree.py take DisasContext * as an argument, we add the argument to a couple of functions that didn't need it previously. We also set the insn field in DisasContext during decode because it is used by the trans_* functions. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 32-bit instructions Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-2-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:41 +03:00			`f.write(f"## DO NOT MODIFY - This file is generated by {sys.argv[0]}\n\n")`
Hexagon (target/hexagon) Use QEMU decodetree (16-bit instructions) Section 10.3 of the Hexagon V73 Programmer's Reference Manual A duplex is encoded as a 32-bit instruction with bits [15:14] set to 00. The sub-instructions that comprise a duplex are encoded as 13-bit fields in the duplex. Create a decoder for each subinstruction class (a, l1, l2, s1, s2). Extend gen_trans_funcs.py to handle all instructions rather than filter by instruction class. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 16-bit instructions. Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-3-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:42 +03:00			`if not is_subinsn:`
			`f.write("%PP\t14:2\n\n")`
Hexagon (target/hexagon) Use QEMU decodetree (32-bit instructions) The Decodetree Specification can be found here https://www.qemu.org/docs/master/devel/decodetree.html Covers all 32-bit instructions, including HVX We generate separate decoders for each instruction class. The reason will be more apparent in the next patch in this series. We add 2 new scripts gen_decodetree.py Generate the input to decodetree.py gen_trans_funcs.py Generate the trans_* functions used by the output of decodetree.py Since the functions generated by decodetree.py take DisasContext * as an argument, we add the argument to a couple of functions that didn't need it previously. We also set the insn field in DisasContext during decode because it is used by the trans_* functions. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 32-bit instructions Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-2-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:41 +03:00			`for tag in sorted(encs.keys(), key=iset.tags.index):`
			`if skip_tag(tag, class_to_decode):`
			`continue`

			`enc = encs[tag]`
			`enc_str = "".join(reversed(encs[tag]))`
			`f.write(("#" * 80) + "\n"`
			`f"## {tag}:\t{enc_str}\n"`
			`"##\n")`

Hexagon (target/hexagon) Use QEMU decodetree (16-bit instructions) Section 10.3 of the Hexagon V73 Programmer's Reference Manual A duplex is encoded as a 32-bit instruction with bits [15:14] set to 00. The sub-instructions that comprise a duplex are encoded as 13-bit fields in the duplex. Create a decoder for each subinstruction class (a, l1, l2, s1, s2). Extend gen_trans_funcs.py to handle all instructions rather than filter by instruction class. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 16-bit instructions. Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-3-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:42 +03:00			`# The subinstructions come with a 13-bit encoding, but`
			`# decodetree.py needs 16 bits`
			`if is_subinsn:`
			`enc_str = "---" + enc_str`
Hexagon (target/hexagon) Use QEMU decodetree (32-bit instructions) The Decodetree Specification can be found here https://www.qemu.org/docs/master/devel/decodetree.html Covers all 32-bit instructions, including HVX We generate separate decoders for each instruction class. The reason will be more apparent in the next patch in this series. We add 2 new scripts gen_decodetree.py Generate the input to decodetree.py gen_trans_funcs.py Generate the trans_* functions used by the output of decodetree.py Since the functions generated by decodetree.py take DisasContext * as an argument, we add the argument to a couple of functions that didn't need it previously. We also set the insn field in DisasContext during decode because it is used by the trans_* functions. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 32-bit instructions Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-2-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:41 +03:00
			`regs = ordered_unique(regre.findall(iset.iset[tag]["syntax"]))`
			`imms = ordered_unique(immre.findall(iset.iset[tag]["syntax"]))`

			`# Write the field definitions for the registers`
			`for regno, reg in enumerate(regs):`
			`reg_type, reg_id, _, reg_enc_size = reg`
			`reg_letter = reg_id[0]`
			`reg_num_choices = int(reg_enc_size.rstrip("S"))`
			`reg_mapping = reg_type + "".join("_" for letter in reg_id) + \`
			`reg_enc_size`
			`reg_enc_fields = re.findall(reg_letter + "+", enc)`

			`# Check for some errors`
			`if len(reg_enc_fields) == 0:`
			`raise Exception(f"{tag} missing register field!")`
			`if len(reg_enc_fields) > 1:`
			`raise Exception(f"{tag} has split register field!")`
			`reg_enc_field = reg_enc_fields[0]`
			`if 2 ** len(reg_enc_field) != reg_num_choices:`
			`raise Exception(f"{tag} has incorrect register field width!")`

			`f.write(f"%{tag}_{reg_type}{reg_id}\t"`
			`f"{enc.index(reg_enc_field)}:{len(reg_enc_field)}")`

			`if (reg_type in num_registers and`
			`reg_num_choices != num_registers[reg_type]):`
			`f.write(f"\t!function=decode_mapped_reg_{reg_mapping}")`
			`f.write("\n")`

			`# Write the field definitions for the immediates`
			`for imm in imms:`
			`immno = 1 if imm[0].isupper() else 0`
			`imm_type = imm[0]`
			`imm_width = int(imm[1])`
			`imm_letter = "i" if imm_type.islower() else "I"`
			`fields = []`
			`sign_mark = "s" if imm_type.lower() in "sr" else ""`
			`for m in reversed(list(re.finditer(imm_letter + "+", enc))):`
			`fields.append(f"{m.start()}:{sign_mark}{m.end() - m.start()}")`
			`sign_mark = ""`
			`field_str = " ".join(fields)`
			`f.write(f"%{tag}_{imm_type}{imm_letter}\t{field_str}\n")`

			`## Handle instructions with unused encoding letters`
			`## Change the unused letters to ignored`
			`if tag in tags_with_unused_d_encoding:`
			`enc_str = enc_str.replace("d", "-")`
			`if tag in tags_with_unused_t_encoding:`
			`enc_str = enc_str.replace("t", "-")`

			`# Replace the operand letters with .`
			`for x in operand_letters:`
			`enc_str = enc_str.replace(x, ".")`

			`# Write the instruction format`
			`f.write(f"@{tag}\t{enc_str}")`
			`for reg in regs:`
			`reg_type = reg[0]`
			`reg_id = reg[1]`
			`f.write(f" {reg_type}{reg_id}=%{tag}_{reg_type}{reg_id}")`
			`for imm in imms:`
			`imm_type = imm[0]`
			`imm_letter = "i" if imm_type.islower() else "I"`
			`f.write(f" {imm_type}{imm_letter}=%{tag}_{imm_type}{imm_letter}")`

Hexagon (target/hexagon) Use QEMU decodetree (16-bit instructions) Section 10.3 of the Hexagon V73 Programmer's Reference Manual A duplex is encoded as a 32-bit instruction with bits [15:14] set to 00. The sub-instructions that comprise a duplex are encoded as 13-bit fields in the duplex. Create a decoder for each subinstruction class (a, l1, l2, s1, s2). Extend gen_trans_funcs.py to handle all instructions rather than filter by instruction class. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 16-bit instructions. Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-3-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:42 +03:00			`if not is_subinsn:`
			`f.write(" %PP")`
			`f.write("\n")`
Hexagon (target/hexagon) Use QEMU decodetree (32-bit instructions) The Decodetree Specification can be found here https://www.qemu.org/docs/master/devel/decodetree.html Covers all 32-bit instructions, including HVX We generate separate decoders for each instruction class. The reason will be more apparent in the next patch in this series. We add 2 new scripts gen_decodetree.py Generate the input to decodetree.py gen_trans_funcs.py Generate the trans_* functions used by the output of decodetree.py Since the functions generated by decodetree.py take DisasContext * as an argument, we add the argument to a couple of functions that didn't need it previously. We also set the insn field in DisasContext during decode because it is used by the trans_* functions. There is a g_assert_not_reached() in decode_insns() in decode.c to verify we never try to use the old decoder on 32-bit instructions Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20240115221443.365287-2-ltaylorsimpson@gmail.com> Signed-off-by: Brian Cain <bcain@quicinc.com> 2024-01-16 01:14:41 +03:00
			`# Replace the 0s and 1s with .`
			`enc_str = enc_str.replace("0", ".").replace("1", ".")`

			`# Write the instruction pattern`
			`f.write(f"{tag}\t{enc_str} @{tag}\n")`


			`if __name__ == "__main__":`
			`hex_common.read_semantics_file(sys.argv[1])`
			`class_to_decode = sys.argv[2]`
			`with open(sys.argv[3], "w") as f:`
			`gen_decodetree_file(f, class_to_decode)`