kolibrios/kernel/trunk/asmxygen.py

1208 lines
47 KiB
Python

import re
import os
import argparse
import sys
import pickle
import hashlib
import difflib
# fasm keywords
keywords = [
"align", "equ", "org", "while", "load", "store", "times", "repeat",
"display", "err", "assert", "if", "aaa", "aad", "aam", "aas", "adc",
"add", "addpd", "addps", "addsd", "addss", "addsubpd", "addsubps", "adox",
"aesdeclast", "aesenc", "aesenclast", "aesimc", "aeskeygenassist", "and",
"andnpd", "andnps", "andpd", "andps", "arpl", "bextr", "blendpd",
"blendvpd", "blendvps", "blsi", "blsmsk", "blsr", "bndcl", "bndcn",
"bndldx", "bndmk", "bndmov", "bndstx", "bound", "bsf", "bsr", "bswap",
"btc", "btr", "bts", "bzhi", "call", "cbw", "cdq", "cdqe", "clac", "clc",
"cldemote", "clflush", "clflushopt", "cli", "clts", "clwb", "cmc", "cmova",
"cmovb", "cmovbe", "cmovc", "cmove", "cmovg", "cmovge", "cmovl", "cmovle",
"cmovnae", "cmovnb", "cmovnbe", "cmovnc", "cmovne", "cmovng", "cmovnge",
"cmovnle", "cmovno", "cmovnp", "cmovns", "cmovnz", "cmovo", "cmovp",
"cmovpo", "cmovs", "cmovz", "cmp", "cmppd", "cmpps", "cmps", "cmpsb",
"cmpsd", "cmpsq", "cmpss", "cmpsw", "cmpxchg", "cmpxchg16b", "cmpxchg8b",
"comiss", "cpuid", "cqo", "crc32", "cvtdq2pd", "cvtdq2ps", "cvtpd2dq",
"cvtpd2ps", "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi",
"cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd", "cvtss2si", "cvttpd2dq",
"cvttps2dq", "cvttps2pi", "cvttsd2si", "cvttss2si", "cwd", "cwde", "daa",
"dec", "div", "divpd", "divps", "divsd", "divss", "dppd", "dpps", "emms",
"extractps", "f2xm1", "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs",
"fcmova", "fcmovae", "fcmovb", "fcmovbe", "fcmovc", "fcmove", "fcmovg",
"fcmovl", "fcmovle", "fcmovna", "fcmovnae", "fcmovnb", "fcmovnbe",
"fcmovne", "fcmovng", "fcmovnge", "fcmovnl", "fcmovnle", "fcmovno",
"fcmovns", "fcmovnz", "fcmovo", "fcmovp", "fcmovpe", "fcmovpo", "fcmovs",
"fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", "fdiv",
"fdivr", "fdivrp", "ffree", "fiadd", "ficom", "ficomp", "fidiv", "fidivr",
"fimul", "fincstp", "finit", "fist", "fistp", "fisttp", "fisub", "fisubr",
"fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi",
"fmul", "fmulp", "fnclex", "fninit", "fnop", "fnsave", "fnstcw", "fnstenv",
"fpatan", "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave",
"fsin", "fsincos", "fsqrt", "fst", "fstcw", "fstenv", "fstp", "fstsw",
"fsubp", "fsubr", "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp",
"fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x",
"gf2p8affineinvqb", "gf2p8affineqb", "gf2p8mulb", "haddpd", "haddps",
"hsubpd", "hsubps", "idiv", "imul", "in", "inc", "ins", "insb", "insd",
"insw", "int", "int1", "int3", "into", "invd", "invlpg", "invpcid", "iret",
"jmp", "ja", "jae", "jb", "jbe", "jc", "jcxz", "jecxz", "je", "jg", "jge",
"jle", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge", "jnl",
"jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo", "js", "jz", "kaddb",
"kaddq", "kaddw", "kandb", "kandd", "kandnb", "kandnd", "kandnq", "kandnw",
"kandw", "kmovb", "kmovd", "kmovq", "kmovw", "knotb", "knotd", "knotq",
"korb", "kord", "korq", "kortestb", "kortestd", "kortestq", "kortestw",
"kshiftlb", "kshiftld", "kshiftlq", "kshiftlw", "kshiftrb", "kshiftrd",
"kshiftrw", "ktestb", "ktestd", "ktestq", "ktestw", "kunpckbw", "kunpckdq",
"kxnorb", "kxnord", "kxnorq", "kxnorw", "kxorb", "kxord", "kxorq", "kxorw",
"lar", "lddqu", "ldmxcsr", "lds", "lea", "leave", "les", "lfence", "lfs",
"lgs", "lidt", "lldt", "lmsw", "lock", "lods", "lodsb", "lodsd", "lodsq",
"loop", "loopa", "loopae", "loopb", "loopbe", "loopc", "loope", "loopg",
"loopl", "loople", "loopna", "loopnae", "loopnb", "loopnbe", "loopnc",
"loopng", "loopnge", "loopnl", "loopnle", "loopno", "loopnp", "loopns",
"loopo", "loopp", "looppe", "looppo", "loops", "loopz", "lsl", "lss",
"lzcnt", "maskmovdqu", "maskmovq", "maxpd", "maxps", "maxsd", "maxss",
"minpd", "minps", "minsd", "minss", "monitor", "mov", "movapd", "movaps",
"movd", "movddup", "movdir64b", "movdiri", "movdq2q", "movdqa", "movdqu",
"movhpd", "movhps", "movlhps", "movlpd", "movlps", "movmskpd", "movmskps",
"movntdqa", "movnti", "movntpd", "movntps", "movntq", "movq", "movq",
"movs", "movsb", "movsd", "movsd", "movshdup", "movsldup", "movsq",
"movsw", "movsx", "movsxd", "movupd", "movups", "movzx", "mpsadbw", "mul",
"mulps", "mulsd", "mulss", "mulx", "mwait", "neg", "nop", "not", "or",
"orps", "out", "outs", "outsb", "outsd", "outsw", "pabsb", "pabsd",
"pabsw", "packssdw", "packsswb", "packusdw", "packuswb", "paddb", "paddd",
"paddsb", "paddsw", "paddusb", "paddusw", "paddw", "palignr", "pand",
"pause", "pavgb", "pavgw", "pblendvb", "pblendw", "pclmulqdq", "pcmpeqb",
"pcmpeqq", "pcmpeqw", "pcmpestri", "pcmpestrm", "pcmpgtb", "pcmpgtd",
"pcmpgtw", "pcmpistri", "pcmpistrm", "pdep", "pext", "pextrb", "pextrd",
"pextrw", "phaddd", "phaddsw", "phaddw", "phminposuw", "phsubd", "phsubsw",
"pinsrb", "pinsrd", "pinsrq", "pinsrw", "pmaddubsw", "pmaddwd", "pmaxsb",
"pmaxsq", "pmaxsw", "pmaxub", "pmaxud", "pmaxuq", "pmaxuw", "pminsb",
"pminsq", "pminsw", "pminub", "pminud", "pminuq", "pminuw", "pmovmskb",
"pmovzx", "pmuldq", "pmulhrsw", "pmulhuw", "pmulhw", "pmulld", "pmullq",
"pmuludq", "pop", "popa", "popad", "popcnt", "popf", "popfd", "popfq",
"prefetchw", "prefetchh", "psadbw", "pshufb", "pshufd", "pshufhw",
"pshufw", "psignb", "psignd", "psignw", "pslld", "pslldq", "psllq",
"psrad", "psraq", "psraw", "psrld", "psrldq", "psrlq", "psrlw", "psubb",
"psubq", "psubsb", "psubsw", "psubusb", "psubusw", "psubw", "ptest",
"punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd", "punpcklbw",
"punpcklqdq", "punpcklwd", "push", "pushw", "pushd", "pusha", "pushad",
"pushfd", "pushfq", "pxor", "rcl", "rcpps", "rcpss", "rcr", "rdfsbase",
"rdmsr", "rdpid", "rdpkru", "rdpmc", "rdrand", "rdseed", "rdtsc", "rdtscp",
"repe", "repne", "repnz", "repz", "ret", "rol", "ror", "rorx", "roundpd",
"roundsd", "roundss", "rsm", "rsqrtps", "rsqrtss", "sahf", "sal", "sar",
"sbb", "scas", "scasb", "scasd", "scasw", "seta", "setae", "setb", "setbe",
"sete", "setg", "setge", "setl", "setle", "setna", "setnae", "setnb",
"setnc", "setne", "setng", "setnge", "setnl", "setnle", "setno", "setnp",
"setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "sfence",
"sha1msg1", "sha1msg2", "sha1nexte", "sha1rnds4", "sha256msg1",
"sha256rnds2", "shl", "shld", "shlx", "shr", "shrd", "shrx", "shufpd",
"sidt", "sldt", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stac",
"std", "sti", "stmxcsr", "stos", "stosb", "stosd", "stosq", "stosw", "str",
"subpd", "subps", "subsd", "subss", "swapgs", "syscall", "sysenter",
"sysret", "test", "tpause", "tzcnt", "ucomisd", "ucomiss", "ud",
"umwait", "unpckhpd", "unpckhps", "unpcklpd", "unpcklps", "valignd",
"vblendmpd", "vblendmps", "vbroadcast", "vcompresspd", "vcompressps",
"vcvtpd2udq", "vcvtpd2uqq", "vcvtph2ps", "vcvtps2ph", "vcvtps2qq",
"vcvtps2uqq", "vcvtqq2pd", "vcvtqq2ps", "vcvtsd2usi", "vcvtss2usi",
"vcvttpd2udq", "vcvttpd2uqq", "vcvttps2qq", "vcvttps2udq", "vcvttps2uqq",
"vcvttss2usi", "vcvtudq2pd", "vcvtudq2ps", "vcvtuqq2pd", "vcvtuqq2ps",
"vcvtusi2ss", "vdbpsadbw", "verr", "verw", "vexpandpd", "vexpandps",
"vextractf32x4", "vextractf32x8", "vextractf64x2", "vextractf64x4",
"vextracti32x4", "vextracti32x8", "vextracti64x2", "vextracti64x4",
"vfixupimmps", "vfixupimmsd", "vfixupimmss", "vfmadd132pd", "vfmadd132ps",
"vfmadd132ss", "vfmadd213pd", "vfmadd213ps", "vfmadd213sd", "vfmadd213ss",
"vfmadd231ps", "vfmadd231sd", "vfmadd231ss", "vfmaddsub132pd",
"vfmaddsub213pd", "vfmaddsub213ps", "vfmaddsub231pd", "vfmaddsub231ps",
"vfmsub132ps", "vfmsub132sd", "vfmsub132ss", "vfmsub213pd", "vfmsub213ps",
"vfmsub213ss", "vfmsub231pd", "vfmsub231ps", "vfmsub231sd", "vfmsub231ss",
"vfmsubadd132ps", "vfmsubadd213pd", "vfmsubadd213ps", "vfmsubadd231pd",
"vfnmadd132pd", "vfnmadd132ps", "vfnmadd132sd", "vfnmadd132ss",
"vfnmadd213ps", "vfnmadd213sd", "vfnmadd213ss", "vfnmadd231pd",
"vfnmadd231sd", "vfnmadd231ss", "vfnmsub132pd", "vfnmsub132ps",
"vfnmsub132ss", "vfnmsub213pd", "vfnmsub213ps", "vfnmsub213sd",
"vfnmsub231pd", "vfnmsub231ps", "vfnmsub231sd", "vfnmsub231ss",
"vfpclassps", "vfpclasssd", "vfpclassss", "vgatherdpd", "vgatherdpd",
"vgatherdps", "vgatherqpd", "vgatherqpd", "vgatherqps", "vgatherqps",
"vgetexpps", "vgetexpsd", "vgetexpss", "vgetmantpd", "vgetmantps",
"vgetmantss", "vinsertf128", "vinsertf32x4", "vinsertf32x8",
"vinsertf64x4", "vinserti128", "vinserti32x4", "vinserti32x8",
"vinserti64x4", "vmaskmov", "vmovdqa32", "vmovdqa64", "vmovdqu16",
"vmovdqu64", "vmovdqu8", "vpblendd", "vpblendmb", "vpblendmd", "vpblendmq",
"vpbroadcast", "vpbroadcastb", "vpbroadcastd", "vpbroadcastm",
"vpbroadcastw", "vpcmpb", "vpcmpd", "vpcmpq", "vpcmpub", "vpcmpud",
"vpcmpuw", "vpcmpw", "vpcompressd", "vpcompressq", "vpconflictd",
"vperm2f128", "vperm2i128", "vpermb", "vpermd", "vpermi2b", "vpermi2d",
"vpermi2ps", "vpermi2q", "vpermi2w", "vpermilpd", "vpermilps", "vpermpd",
"vpermq", "vpermt2b", "vpermt2d", "vpermt2pd", "vpermt2ps", "vpermt2q",
"vpermw", "vpexpandd", "vpexpandq", "vpgatherdd", "vpgatherdd",
"vpgatherdq", "vpgatherqd", "vpgatherqd", "vpgatherqq", "vpgatherqq",
"vplzcntq", "vpmadd52huq", "vpmadd52luq", "vpmaskmov", "vpmovb2m",
"vpmovdb", "vpmovdw", "vpmovm2b", "vpmovm2d", "vpmovm2q", "vpmovm2w",
"vpmovqb", "vpmovqd", "vpmovqw", "vpmovsdb", "vpmovsdw", "vpmovsqb",
"vpmovsqw", "vpmovswb", "vpmovusdb", "vpmovusdw", "vpmovusqb", "vpmovusqd",
"vpmovuswb", "vpmovw2m", "vpmovwb", "vpmultishiftqb", "vprold", "vprolq",
"vprolvq", "vprord", "vprorq", "vprorvd", "vprorvq", "vpscatterdd",
"vpscatterqd", "vpscatterqq", "vpsllvd", "vpsllvq", "vpsllvw", "vpsravd",
"vpsravw", "vpsrlvd", "vpsrlvq", "vpsrlvw", "vpternlogd", "vpternlogq",
"vptestmd", "vptestmq", "vptestmw", "vptestnmb", "vptestnmd", "vptestnmq",
"vrangepd", "vrangeps", "vrangesd", "vrangess", "vrcp14pd", "vrcp14ps",
"vrcp14ss", "vreducepd", "vreduceps", "vreducesd", "vreducess",
"vrndscaleps", "vrndscalesd", "vrndscaless", "vrsqrt14pd", "vrsqrt14ps",
"vrsqrt14ss", "vscalefpd", "vscalefps", "vscalefsd", "vscalefss",
"vscatterdps", "vscatterqpd", "vscatterqps", "vshuff32x4", "vshuff64x2",
"vshufi64x2", "vtestpd", "vtestps", "vzeroall", "vzeroupper", "wait",
"wrfsbase", "wrgsbase", "wrmsr", "wrpkru", "xabort", "xacquire", "xadd",
"xchg", "xend", "xgetbv", "xlat", "xlatb", "xor", "xorpd", "xorps",
"xrstor", "xrstors", "xsave", "xsavec", "xsaveopt", "xsaves", "xsetbv",
]
fasm_types = [
"db", "rb",
"dw", "rw",
"dd", "rd",
"dp", "rp",
"df", "rf",
"dq", "rq",
"dt", "rt",
"du",
]
# Add kind flag to identifier in id2kind
def id_add_kind(identifier, kind):
if identifier not in id2kind:
id2kind[identifier] = ''
id2kind[identifier] += kind
# Remove kind flag of identifier in id2kind
def id_remove_kind(identifier, kind):
if identifier in id2kind:
if kind in id2kind[identifier]:
id2kind[identifier] = id2kind[identifier].replace(kind, '')
# Get kind of an identifier
def id_get_kind(identifier):
if identifier in id2kind:
return id2kind[identifier]
else:
return ''
class LegacyAsmReader:
def __init__(self, file):
self.file = file
self.lines = open(file, "r", encoding="utf-8").readlines()
self.line_idx = 0
self.i = 0
def currline(self):
return self.lines[self.line_idx]
def curr(self):
try:
return self.lines[self.line_idx][self.i]
except:
return ''
def step(self):
c = self.curr()
self.i += 1
# Wrap the line if '\\' followed by whitespaces and/or comment
while self.curr() == '\\':
i_of_backslash = self.i
self.i += 1
while self.curr().isspace():
self.i += 1
if self.curr() == ';' or self.curr() == '':
self.line_idx += 1
self.i = 0
else:
# There's something other than a comment after the backslash
# So don't interpret the backslash as a line wrap
self.i = i_of_backslash
break
return c
def nextline(self):
c = self.curr()
while c != '':
c = self.step()
self.line_idx += 1
self.i = 0
def no_lines(self):
if self.line_idx >= len(self.lines):
return True
return False
def location(self):
return f"{self.file}:{self.line_idx + 1}"
def skip_spaces(self):
while self.curr().isspace():
self.step()
class AsmReaderRecognizingStrings(LegacyAsmReader):
def __init__(self, file):
super().__init__(file)
self.in_string = None
self.should_recognize_strings = True
def step(self):
c = super().step()
if self.should_recognize_strings and (c == '"' or c == "'"):
# If just now we was at the double or single quotation mark
# and we aren't in a string yet then say
# "we are in a string openned with this quotation mark now"
if self.in_string is None:
self.in_string = c
# If just now we was at the double or single quotation mark
# and we are in the string entered with the same quotation mark
# then say "we aren't in a string anymore"
elif self.in_string == c:
self.in_string = None
return c
class AsmReaderReadingComments(AsmReaderRecognizingStrings):
def __init__(self, file):
super().__init__(file)
self.status = dict()
self.status_reset()
self.comment = ''
def status_reset(self):
# If the line has non-comment code
self.status_has_code = False
# If the line has a comment at the end
self.status_has_comment = False
# Let it recognize strings further, we are definitely out of a comment
self.should_recognize_strings = True
def status_set_has_comment(self):
self.status_has_comment = True
# Don't let it recognize strings cause we are in a comment now
self.should_recognize_strings = False
def status_set_has_code(self):
self.status_has_code = True
def update_status(self):
# If we aren't in a comment and we aren't in a string -
# say we are now in a comment if ';' met
if (not self.status_has_comment and
not self.in_string and
self.curr() == ';'):
self.status_set_has_comment()
# Else if we are in a comment - collect the comment
elif self.status_has_comment:
self.comment += self.curr()
# Else if there's some non-whitespace character out of a comment
# then the line has code
elif not self.status_has_comment and not self.curr().isspace():
self.status_set_has_code()
def step(self):
# Get to the next character
c = super().step()
# Update status of the line according to the next character
self.update_status()
return c
def nextline(self):
prev_line = self.currline()
super().nextline()
# If the line we leave was not a comment-only line
# then forget the collected comment
# Otherwise the collected comment should be complemented by
# comment from next line in step()
if self.status_has_code:
# But we should preserve comment for the next line
# If previous line set align (cause many functions re documented
# right before align set, not before their labels)
if not prev_line.startswith("align "):
self.comment = ''
# Reset the line status (now it's the status of the new line)
self.status_reset()
# Set new status for this line according to the
# first character in the line
self.update_status()
class AsmReaderFetchingIdentifiers(AsmReaderReadingComments):
def __init__(self, file):
super().__init__(file)
def fetch_identifier(self):
self.skip_spaces()
result = ''
while is_id(self.curr()):
result += self.step()
return result
class AsmReader(AsmReaderFetchingIdentifiers):
def __init__(self, file):
super().__init__(file)
def append_file(full_path, contents):
if debug_mode:
if full_path not in output_files:
output_files[full_path] = ""
output_files[full_path] += contents
else:
f = open(full_path, "a")
f.write(contents)
f.close()
class AsmElement:
def __init__(self, location, name, comment):
global warnings
# If the element was constructed during this execution then
# the element is new
self.new = True
self.location = location
self.file = self.location.split(':')[0].replace('\\', '/')
self.line = self.location.split(':')[1]
self.name = name
self.comment = comment
if self.comment == '':
warnings += f'{self.location}: Undocumented element\n'
def dump(self):
print(f"\n{self.location}: {self.name}")
print(f"{self.comment}")
def emit(self, dest, doxycomment='', declaration=''):
# Do not emit anything if the symbol is marked as hidden in its comment
if '@dont_give_a_doxygen' in self.comment:
return
global warnings
# Redefine default declaration
if declaration == '':
declaration = f'#define {self.name}'
# Check doxycomment
if not doxycomment.endswith('\n'):
doxycomment += '\n'
if doxycomment.split('@brief ')[1][0].islower():
warnings += (f"{self.location}: Brief comment starting from " +
"lowercase\n")
# Build contents to emit
contents = ''
contents += '/**\n'
contents += doxycomment
contents += (f"@par Source\n" +
f"<a href='{link_root}/{self.file}" +
f"#line-{self.line}'>{self.file}:{self.line}</a>\n")
contents += '*/\n'
contents += declaration
contents += '\n\n'
# Get path to file to emit this
full_path = dest + '/' + self.file
# Remove the file on first access if it was
# created by previous generation
if full_path not in created_files:
if os.path.isfile(full_path):
os.remove(full_path)
created_files.append(full_path)
# Create directories need for the file
os.makedirs(os.path.dirname(full_path), exist_ok=True)
contents = ''.join([i if ord(i) < 128 else '?' for i in contents])
append_file(full_path, contents)
class AsmVariable(AsmElement):
def __init__(self, location, name, comment, type, init):
super().__init__(location, name, comment)
self.type = type
self.init = init
def dump(self):
super().dump()
print(f"(Variable)\n---")
def emit(self, dest):
# Build doxycomment specific for the variable
doxycomment = ''
doxycomment += self.comment
if '@brief' not in doxycomment:
doxycomment = '@brief ' + doxycomment
doxycomment += (f"@par Initial value\n" +
f"{self.init}\n")
# Build the declaration
name = self.name.replace(".", "_")
var_type = self.type.replace(".", "_")
declaration = f"{var_type} {name};"
# Emit this
super().emit(dest, doxycomment, declaration)
class AsmFunction(AsmElement):
def __init__(self, location, name, comment, calling_convention,
args, used_regs):
super().__init__(location, name, comment)
self.calling_convention = calling_convention
self.args = args
self.used_regs = used_regs
def dump(self):
super().dump()
print(f"(Function)\n---")
def emit(self, dest):
# Build doxycomment specific for the variable
doxycomment = ''
doxycomment += self.comment
if '@brief' not in doxycomment:
doxycomment = '@brief ' + doxycomment
# If there was no arguments, maybe that's just a label
# then parse parameters from its comment
if len(self.args) == 0 and '@param' in self.comment:
i = 0
while '@param' in self.comment[i:]:
i = self.comment.index('@param', i)
# Skip '@param'
i += len('@param')
# Skip spaces after '@param'
while self.comment[i].isspace():
i += 1
# Get the parameter name
name = ''
while is_id(self.comment[i]):
name += self.comment[i]
i += 1
# Save the parameter
self.args.append((name, 'arg_t'))
# Build the arg list for declaration
arg_list = '('
if len(self.args) > 0:
argc = 0
for arg in self.args:
if argc != 0:
arg_list += ", "
arg_list += f"{arg[1]} {arg[0]}"
argc += 1
arg_list += ')'
# Build the declaration
name = self.name.replace(".", "_")
declaration = f"void {name}{arg_list};"
# Emit this
super().emit(dest, doxycomment, declaration)
class AsmLabel(AsmElement):
def __init__(self, location, name, comment):
super().__init__(location, name, comment)
def dump(self):
super().dump()
print(f"(Label)\n---")
def emit(self, dest):
# Build doxycomment specific for the variable
doxycomment = ''
doxycomment += self.comment
if '@brief' not in doxycomment:
doxycomment = '@brief ' + doxycomment
# Build the declaration
name = self.name.replace(".", "_")
declaration = f"label {name};"
# Emit this
super().emit(dest, doxycomment, declaration)
class AsmMacro(AsmElement):
def __init__(self, location, name, comment, args):
super().__init__(location, name, comment)
self.args = args
def dump(self):
super().dump()
print(f"(Macro)\n---")
def emit(self, dest):
# Construct arg list without '['s, ']'s and '*'s
args = [arg for arg in self.args if arg not in "[]*"]
# Construct C-like arg list
arg_list = ""
if len(args) > 0:
arg_list += '('
argc = 0
for arg in args:
if argc != 0:
arg_list += ", "
arg_list += arg
argc += 1
arg_list += ')'
# Build doxycomment
doxycomment = ''
doxycomment += self.comment
if '@brief' not in doxycomment:
doxycomment = '@brief ' + doxycomment
# Build declaration
declaration = f"#define {self.name}{arg_list}"
# Emit this
super().emit(dest, doxycomment, declaration)
class AsmStruct(AsmElement):
def __init__(self, location, name, comment, members):
super().__init__(location, name, comment)
self.members = members
def dump(self):
super().dump()
print(f"(Struct)\n---")
def emit(self, dest):
# Build doxycomment
doxycomment = ''
doxycomment += self.comment
if '@brief' not in doxycomment:
doxycomment = '@brief ' + doxycomment
doxycomment += '\n'
# Build declaration
declaration = f"struct {self.name}" + " {\n"
for member in self.members:
if type(member) == AsmVariable:
declaration += (f'\t{member.type} {member.name}; ' +
f'/**< {member.comment} */\n')
declaration += '};'
# Emit this
super().emit(dest, doxycomment, declaration)
class AsmUnion(AsmElement):
def __init__(self, location, name, comment, members):
super().__init__(location, name, comment)
self.members = members
def dump(self):
super().dump()
print(f"(Union)\n---")
def emit(self, dest):
# Build doxycomment
doxycomment = ''
doxycomment += self.comment
if '@brief' not in doxycomment:
doxycomment = '@brief ' + doxycomment
# Build declaration
declaration = f"union {self.name}" + " {};"
# Emit this
super().emit(dest, doxycomment, declaration)
class VariableNameIsMacroName:
def __init__(self, name):
self.name = name
def is_id(c):
return c.isprintable() and c not in "+-/*=<>()[]{};:,|&~#`'\" \n\r\t\v"
def is_starts_as_id(s):
return not s[0].isdigit()
def parse_after_macro(r):
location = r.location()
# Skip spaces after the "macro" keyword
r.skip_spaces()
# Read macro name
name = ""
while is_id(r.curr()) or r.curr() == '#':
name += r.step()
# Skip spaces after macro name
r.skip_spaces()
# Find all arguments
args = []
arg = ''
while r.curr() and r.curr() != ';' and r.curr() != '{':
# Collect identifier
if is_id(r.curr()):
arg += r.step()
# Save the collected identifier
elif r.curr() == ',':
args.append(arg)
arg = ''
r.step()
# Just push the '['
elif r.curr() == '[':
args.append(r.step())
# Just push the identifier and get ']' ready to be pushed on next comma
elif r.curr() == ']':
args.append(arg)
arg = r.step()
# Just push the identifier and get '*' ready to be pushed on next comma
elif r.curr() == '*':
args.append(arg)
arg = r.step()
# Just skip whitespaces
elif r.curr().isspace():
r.step()
# Something unexpected
else:
raise Exception(f"Unexpected symbol '{r.curr()}' " +
f"at index #{r.i} in the macro declaration " +
f"at {location} " +
f"(line: {r.lines[r.line_idx]})\n''")
# Append the last argument
if arg != '':
args.append(arg)
# Skip t spaces after the argument list
r.skip_spaces()
# Get a comment if it is: read till the end of the line and
# get the comment from the reader
while r.curr() != '':
r.step()
comment = r.comment
# Find end of the macro
prev = ''
while True:
if r.curr() == '}' and prev != '\\':
break
elif r.curr() == '':
prev = ''
r.nextline()
continue
prev = r.step()
# Build the output
return AsmMacro(location, name, comment, args)
def parse_variable(r, first_word=None):
global warnings
location = r.location()
# Skip spaces before variable name
r.skip_spaces()
# Get variable name
name = ""
# Read it if it was not supplied
if first_word is None:
while is_id(r.curr()):
name += r.step()
# Or use the supplied one instead
else:
name = first_word
# Check the name
# If it's 0 len, that means threr's something else than an
# identifier at the beginning
if len(name) == 0:
return None
# If it starts from digit or othervice illegally it's illegal
if not is_starts_as_id(name):
return None
# Get kind of the identifier from id2kind table
kind = id_get_kind(name)
# If it's a keyword, that's not a variable declaration
if ID_KIND_KEYWORD in kind:
return None
# If it's a macro name, that's not a variable declaration
if ID_KIND_MACRO_NAME in kind:
return VariableNameIsMacroName(name)
# If it's a datatype or a structure name that's not a
# variable declaration: that's just a data
# don't document just a data for now
if ID_KIND_STRUCT_NAME in kind or ID_KIND_FASM_TYPE in kind:
return None
# Skip spaces before type name
r.skip_spaces()
# Read type name
var_type = ""
while is_id(r.curr()):
var_type += r.step()
# Check the type name
if len(var_type) == 0:
# If there's no type identifier after the name
# maybe the name is something meaningful for the next parser
# return it
return name
# If it starts from digit or othervice illegally it's illegal
if not is_starts_as_id(var_type):
return None
# Get kind of type identifier
type_kind = id_get_kind(var_type)
# If it's a keyword, that's not a variable declaration
# return the two words of the lexical structure
if ID_KIND_KEYWORD in type_kind:
return (name, var_type)
# Skip spaces before the value
r.skip_spaces()
# Read the value until the comment or end of the line
value = ""
while r.curr() != ';' and r.curr() != '' and r.curr() != '\n':
value += r.step()
# Skip spaces after the value
r.skip_spaces()
# Read till end of the line to get a comment from the reader
while r.curr() != '':
r.step()
# Build the result
return AsmVariable(location, name, r.comment, var_type, value)
def parse_after_struct(r, as_union=True):
global warnings
location = r.location()
# Skip spaces after "struct" keyword
r.skip_spaces()
# Read struct name
name = ""
while is_id(r.curr()):
name += r.step()
# Read till end of the line and get the comment from the reader
while r.curr() != '':
r.step()
comment = r.comment
# Get to the next line to parse struct members
r.nextline()
# Parse struct members
members = []
while True:
r.skip_spaces()
var = parse_variable(r)
if type(var) == AsmVariable:
members.append(var)
elif type(var) == str:
if var == 'union':
# Parse the union as a struct
union = parse_after_struct(r, as_union=True)
members.append(union)
# Skip the ends of the union
r.nextline()
elif r.curr() == ':':
warnings += f"{r.location()}: Skept the label in the struct\n"
else:
raise Exception(f"Garbage in struct member at {location} " +
f" (got '{var}' identifier)")
elif type(var) == VariableNameIsMacroName:
if var.name == 'ends':
break
r.nextline()
# Return the result
if as_union:
return AsmStruct(location, name, comment, members)
else:
return AsmUnion(location, name, comment, members)
def parse_after_proc(r):
# Get proc name
name = r.fetch_identifier()
# Next identifier after the proc name
identifier = r.fetch_identifier()
# Check if the id is 'stdcall' or 'c' (calling convention specifier)
# and if so - save the convention and lookup the next identifier
calling_convention = ''
if identifier == 'stdcall' or identifier == 'c':
calling_convention = identifier
# If next is a comma, just skip it
if r.curr() == ',':
r.step()
# Read the next identifier
identifier = r.fetch_identifier()
# Check if the id is 'uses' (used register list specifier)
# and if so save the used register list
used_regs = []
if identifier == 'uses':
# Read the registers
while True:
reg_name = r.fetch_identifier()
if reg_name != '':
used_regs.append(reg_name)
else:
break
# If next is a comma, just skip it
if r.curr() == ',':
r.step()
# Read the next identifier
identifier = r.fetch_identifier()
# Check if there are argument identifiers
args = []
while identifier != '':
arg_name = identifier
arg_type = 'arg_t'
# Skip spaces after argument name
r.skip_spaces()
# If there's a ':' after the name - the next identifier is type
if r.curr() == ':':
r.step()
arg_type = r.fetch_identifier()
# If there's a comma - there's one more argument
# else no arguments anymore
if r.curr() == ',':
r.step()
identifier = r.fetch_identifier()
else:
identifier = ''
args.append((arg_name, arg_type))
# Get to the end of the line and get a comment from the reader
while r.curr() != '':
r.step()
comment = r.comment
# Build the element
return AsmFunction(r.location(), name, comment, calling_convention,
args, used_regs)
def get_declarations(asm_file_contents, asm_file_name):
r = AsmReader(asm_file_name)
while not r.no_lines():
# Skip leading spaces
r.skip_spaces()
# Skip the line if it's starting with a comment
if r.curr() == ';':
r.nextline()
continue
# Get first word
first_word = ""
while is_id(r.curr()):
first_word += r.step()
# Match macro declaration
if first_word == "macro":
macro = parse_after_macro(r)
elements.append(macro)
id_add_kind(macro.name, ID_KIND_MACRO_NAME)
# Match structure declaration
elif first_word == "struct":
struct = parse_after_struct(r)
elements.append(struct)
id_add_kind(struct.name, ID_KIND_STRUCT_NAME)
# Match function definition
elif first_word == "proc":
proc = parse_after_proc(r)
elements.append(proc)
elif first_word == 'format':
# Skip the format directive
pass
elif first_word == 'include':
# Skip the include directive
pass
elif first_word == 'if':
# Skip the conditional directive
pass
elif first_word == 'repeat':
# Skip the repeat directive
pass
elif first_word == 'purge':
while True:
# Skip spaces after the 'purge' keyword or after
# the comma what separated the previous macro name
r.skip_spaces()
# Get the purged macro name
name = ''
while is_id(r.curr()):
name += r.step()
# Remove the purged macro from the macro names list
try:
id_remove_kind(name, ID_KIND_MACRO_NAME)
except:
pass
# Skip spaces after the name
r.skip_spaces()
# If it's comma (',') after then that's not the last purged
# macro, continue purging
if r.curr() == ',':
r.step()
continue
# Here we purged all the macros should be purged
break
# Match label or a variable
elif len(first_word) != 0:
# Skip spaces after the identifier
r.skip_spaces()
# Match a variable
var = parse_variable(r, first_word)
if type(var) == AsmVariable:
elements.append(var)
# If it wasn't a variable but there was an identifier
# Maybe that's a label and the identifier is the label name
# The parse_variable returns the first found or supplied identifier
# In this case it returns the first_word which is supplied
# If it didn't match a type identifier after the word
elif type(var) == str:
name = var
# Match label beginning (':' after name)
if r.curr() == ':':
# Get to the end of the line and
# get the coment from the reader
while r.curr() != '':
r.step()
comment = r.comment
# Only handle non-local labels
if name[0] != '.' and name != "@@" and name != "$Revision":
# Treate the label as function if there's @return or
# @param in its comment. Othervice it's just a variable
# with type `label` in generated doxygen C
if '@return' in comment or '@param' in comment:
element = AsmFunction(r.location(), name, comment,
'', [], [])
else:
element = AsmLabel(r.location(), name, comment)
elements.append(element)
elif r.curr() == '=':
# Save the identifier as a set constant
id_add_kind(first_word, ID_KIND_SET_CONSTANT)
elif type(var) == tuple:
(word_one, word_two) = var
if word_two == 'equ':
# Save the identifier as an equated constant
id_add_kind(word_one, ID_KIND_EQUATED_CONSTANT)
r.nextline()
def it_neds_to_be_parsed(source_file):
# If there's no symbols file saved - parse it anyway
# cause we need to create the symbols file and use it
# if we gonna generate proper doxygen
if not os.path.isfile('asmxygen.elements.pickle'):
return True
dest = doxygen_src_path + '/' + source_file
# If there's no the doxygen file it should be compiled to
# then yes, we should compile it to doxygen
if not os.path.isfile(dest):
return True
source_change_time = os.path.getmtime(source_file)
dest_change_file = os.path.getmtime(dest)
# If the source is newer than the doxygen it was compiled to
# then the source should be recompiled (existing doxygen is old)
if source_change_time > dest_change_file:
return True
return False
def handle_file(handled_files, asm_file_name, subdir="."):
global elements
# Canonicalize the file path and get it relative to cwd
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
asm_file_name = os.path.realpath(asm_file_name)
asm_file_name = asm_file_name[len(cwd) + 1:]
# If it's lang.inc - skip it
if asm_file_name == 'lang.inc':
return
# If the file was handled in this execution before - skip it
if asm_file_name in handled_files:
return
# Say that the file was handled in this execution
handled_files.append(asm_file_name)
# Check if the file should be parsed
# (if it was modified or wasn't parsed yet)
should_get_declarations = True
if not it_neds_to_be_parsed(asm_file_name):
print(f"Skipping {asm_file_name} (already newest)")
should_get_declarations = False
else:
print(f"Handling {asm_file_name}")
# Remove elements parsed from this file before if any
elements_to_remove = [
x for x in elements if x.location.split(':')[0] == asm_file_name
]
elements = [
x for x in elements if x.location.split(':')[0] != asm_file_name
]
# Forget types of identifiers of names of the removed elements
for element in elements_to_remove:
if type(element) == AsmStruct:
id_remove_kind(element.name, ID_KIND_STRUCT_NAME)
elif type(element) == AsmMacro:
id_remove_kind(element.name, ID_KIND_MACRO_NAME)
# Read the source
asm_file_contents = open(asm_file_name, "r", encoding="utf-8").read()
# Find includes, fix their paths and handle em recoursively
includes = re.findall(r'^include (["\'])(.*)\1', asm_file_contents,
flags=re.MULTILINE)
for include in includes:
include = include[1].replace('\\', '/')
full_path = subdir + '/' + include
# If the path isn't valid, maybe that's not relative path
if not os.path.isfile(full_path):
full_path = include
new_subdir = full_path.rsplit('/', 1)[0]
handle_file(handled_files, full_path, new_subdir)
# Only collect declarations from the file if it wasn't parsed before
if should_get_declarations and not clean_generated_stuff:
get_declarations(asm_file_contents, asm_file_name)
if __name__ == "__main__":
link_root = "http://websvn.kolibrios.org/filedetails.php"
link_root += "?repname=Kolibri+OS&path=/kernel/trunk"
# Dict where an identifier is assicoated with a string
# The string contains characters specifying flags
# Available flags:
# k - Keyword
# m - Macro name
# t - fasm data Type name (db, rq, etc.)
# s - Struct type name
# e - equated constant (name equ value)
# = - set constants (name = value)
ID_KIND_KEYWORD = 'k'
ID_KIND_MACRO_NAME = 'm'
ID_KIND_FASM_TYPE = 't'
ID_KIND_STRUCT_NAME = 's'
ID_KIND_EQUATED_CONSTANT = 'e'
ID_KIND_SET_CONSTANT = '='
id2kind = {}
for keyword in keywords:
id_add_kind(keyword, ID_KIND_KEYWORD)
for fasm_type in fasm_types:
id_add_kind(fasm_type, ID_KIND_FASM_TYPE)
# Warning list
warnings = ""
# Parameters
# Path to doxygen folder to make doxygen files in: -o <path>
doxygen_src_path = 'docs/doxygen'
# Remove generated doxygen files: --clean
clean_generated_stuff = False
# Dump all defined symbols: --dump
dump_symbols = False
# Print symbol stats: --stats
print_stats = False
# Do not write warnings file: --nowarn
enable_warnings = True
# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument("-o", help="Doxygen output folder")
parser.add_argument("--clean",
help="Remove generated files",
action="store_true")
parser.add_argument("--dump",
help="Dump all defined symbols",
action="store_true")
parser.add_argument("--stats",
help="Print symbol stats",
action="store_true")
parser.add_argument("--nowarn",
help="Do not write warnings file",
action="store_true")
parser.add_argument("--noemit",
help="Do not emit doxygen files (for testing)",
action="store_true")
parser.add_argument("--debug",
help="Show hashes of files (for testing)",
action="store_true")
args = parser.parse_args()
doxygen_src_path = args.o if args.o else 'docs/doxygen'
clean_generated_stuff = args.clean
dump_symbols = args.dump
print_stats = args.stats
enable_warnings = not args.nowarn
noemit = args.noemit
debug_mode = args.debug
# Variables, functions, labels, macros, structure types
elements = []
created_files = []
kernel_files = []
output_files = {} # If --debug then all the files are written here
# Load remembered list of symbols
if os.path.isfile('asmxygen.elements.pickle'):
print('Reading existing dump of symbols')
pickle_file = open('asmxygen.elements.pickle', 'rb')
(elements, id2kind) = pickle.load(pickle_file)
pickle_file.close()
handle_file(kernel_files, "./kernel.asm")
if dump_symbols:
stdout = sys.stdout
sys.stdout = open('asmxygen.dump.txt', 'w', encoding='utf-8')
for asm_element in elements:
asm_element.dump()
sys.stdout = stdout
if clean_generated_stuff:
kernel_files_set = set(kernel_files)
for file in kernel_files:
doxygen_file = f"{doxygen_src_path}/{file}"
if (os.path.isfile(doxygen_file)):
print(f"Removing {file}... ", end='')
os.remove(doxygen_file)
print("Done.")
elif not noemit:
print(f"Writing doumented sources to {doxygen_src_path}")
i = 0
new_elements = [x for x in elements if x.new]
for element in new_elements:
counter = f"[{i + 1}/{len(new_elements)}]"
print(f"{counter} Emitting {element.name} from {element.location}")
element.emit(doxygen_src_path)
i += 1
print(f"Writing dump of symbols to asmxygen.elements.pickle")
# Now when the new elements already was written, there's no new
# elements anymore
for element in elements:
element.new = False
pickle_file = open('asmxygen.elements.pickle', 'wb')
pickle.dump((elements, id2kind), pickle_file)
pickle_file.close()
if print_stats:
var_count = 0
mac_count = 0
lab_count = 0
fun_count = 0
uni_count = 0
str_count = 0
for element in elements:
if type(element) == AsmVariable:
var_count += 1
elif type(element) == AsmMacro:
mac_count += 1
elif type(element) == AsmLabel:
lab_count += 1
elif type(element) == AsmFunction:
fun_count += 1
elif type(element) == AsmUnion:
uni_count += 1
elif type(element) == AsmStruct:
str_count += 1
print(f'Parsed variable count: {var_count}')
print(f'Parsed macro count: {mac_count}')
print(f'Parsed label count: {lab_count}')
print(f'Parsed function count: {fun_count}')
print(f'Parsed union type count: {uni_count}')
print(f'Parsed structure type count: {str_count}')
if enable_warnings:
open('asmxygen.txt', "w", encoding="utf-8").write(warnings)
if debug_mode:
hash_per_file = ""
for file in output_files:
h = hashlib.sha1(bytes(output_files[file], "ascii")).hexdigest()
hash_per_file += f"{file}: {h}\n"
if not os.path.exists("asmxygen_hash_per_file.txt"):
open("asmxygen_hash_per_file.txt", "w").write(hash_per_file)
print("NEW")
else:
reference_hash_per_file = open("asmxygen_hash_per_file.txt").read()
if reference_hash_per_file != hash_per_file:
diffs = difflib.ndiff(reference_hash_per_file, hash_per_file)
print(''.join(diffs))
else:
print("SUCCESS")