import fileio import os let eaw_txt = '/tmp/EastAsianWidth.txt' os.system(f"wget -O '{eaw_txt}' https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt") let lines with fileio.open(eaw_txt,'r') as f: lines = f.readlines() def classify(cp, gr, ct): # U+00AD: Soft hyphen - other things seem to want this to be 1 if cp == 0xAD: return 1 # Low control codes if cp < 0x20: return -1 # Del, higher control codes if cp >= 0x7f and cp < 0xa0: return -1 # Surrogates if cp >= 0xd800 and cp <= 0xdfff: return -1 # Combining characters if ct in ['Cf','Me','Mn']: return 0 # Hangul jamo if cp >= 0x1160 and cp <= 0x11FF: return 0 # Zero-width space if cp == 0x200b: return 0 # Mark neutral, narrow, ambigus, and half-width as 1 if gr in ['N','Na','A','H']: return 1 # Mark wide and full-width as 2 if gr in ['W','F']: return 2 # Mark everything else as invalid return -1 let classes = [None] * 0x110000 for line in lines: if !line or line.startswith('#') or ';' not in line: continue line = line.strip() let codepoint, rest = line.split(';',1) let group, comment = rest.split('#',1) group = group.strip() comment = comment.strip() let ctype = comment.split(' ')[0] # Is this a range? if '..' in codepoint: let start, end = codepoint.split('..',1) start = int(f'0x{start}') end = int(f'0x{end}') for i = start; i <= end; i++: classes[i] = classify(i, group, ctype) else: codepoint = int(f'0x{codepoint}') classes[codepoint] = classify(codepoint, group, ctype) for i in range(1,0x110000): if classes[i] is None: classes[i] = -1 print('''/* Generated by util/gen_wcwidth.krk */ #include int wcwidth(wchar_t wc) { \tif (wc == 0) return 0;''') let last = None for i in range(1,0x110000): if last is not None and classes[i] != last: print(f'\telse if (wc < {hex(i)}) return {last};') last = classes[i] print(f'\telse if (wc < 0x110000) return {last};\n\treturn -1;\n}')