2021-10-27 13:23:05 +03:00
|
|
|
import fileio
|
|
|
|
import os
|
|
|
|
|
|
|
|
let eaw_txt = '/tmp/EastAsianWidth.txt'
|
|
|
|
|
|
|
|
os.system(f"wget -O '{eaw_txt}' https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt")
|
|
|
|
|
|
|
|
let lines
|
|
|
|
|
|
|
|
with fileio.open(eaw_txt,'r') as f:
|
|
|
|
lines = f.readlines()
|
|
|
|
|
|
|
|
def classify(cp, gr, ct):
|
|
|
|
# U+00AD: Soft hyphen - other things seem to want this to be 1
|
|
|
|
if cp == 0xAD: return 1
|
|
|
|
# Low control codes
|
|
|
|
if cp < 0x20: return -1
|
|
|
|
# Del, higher control codes
|
|
|
|
if cp >= 0x7f and cp < 0xa0: return -1
|
|
|
|
# Surrogates
|
|
|
|
if cp >= 0xd800 and cp <= 0xdfff: return -1
|
|
|
|
|
|
|
|
# Combining characters
|
|
|
|
if ct in ['Cf','Me','Mn']: return 0
|
|
|
|
|
|
|
|
# Hangul jamo
|
|
|
|
if cp >= 0x1160 and cp <= 0x11FF: return 0
|
|
|
|
|
|
|
|
# Zero-width space
|
|
|
|
if cp == 0x200b: return 0
|
|
|
|
|
|
|
|
# Mark neutral, narrow, ambigus, and half-width as 1
|
|
|
|
if gr in ['N','Na','A','H']: return 1
|
|
|
|
|
|
|
|
# Mark wide and full-width as 2
|
|
|
|
if gr in ['W','F']: return 2
|
|
|
|
|
|
|
|
# Mark everything else as invalid
|
|
|
|
return -1
|
|
|
|
|
|
|
|
let classes = [None] * 0x110000
|
|
|
|
|
|
|
|
for line in lines:
|
|
|
|
if !line or line.startswith('#') or ';' not in line:
|
|
|
|
continue
|
|
|
|
line = line.strip()
|
|
|
|
|
|
|
|
let codepoint, rest = line.split(';',1)
|
|
|
|
let group, comment = rest.split('#',1)
|
|
|
|
|
|
|
|
group = group.strip()
|
|
|
|
comment = comment.strip()
|
|
|
|
|
|
|
|
let ctype = comment.split(' ')[0]
|
|
|
|
|
|
|
|
# Is this a range?
|
|
|
|
if '..' in codepoint:
|
|
|
|
let start, end = codepoint.split('..',1)
|
2023-04-20 07:12:23 +03:00
|
|
|
start = int(f'0x{start}',0)
|
|
|
|
end = int(f'0x{end}',0)
|
2021-10-27 13:23:05 +03:00
|
|
|
for i = start; i <= end; i++:
|
|
|
|
classes[i] = classify(i, group, ctype)
|
|
|
|
else:
|
2023-04-20 07:12:23 +03:00
|
|
|
codepoint = int(f'0x{codepoint}',0)
|
2021-10-27 13:23:05 +03:00
|
|
|
classes[codepoint] = classify(codepoint, group, ctype)
|
|
|
|
|
|
|
|
for i in range(1,0x110000):
|
|
|
|
if classes[i] is None: classes[i] = -1
|
|
|
|
|
|
|
|
print('''/* Generated by util/gen_wcwidth.krk */
|
|
|
|
#include <wchar.h>
|
|
|
|
|
|
|
|
int wcwidth(wchar_t wc) {
|
|
|
|
\tif (wc == 0) return 0;''')
|
|
|
|
|
|
|
|
let last = None
|
|
|
|
|
|
|
|
for i in range(1,0x110000):
|
|
|
|
if last is not None and classes[i] != last:
|
|
|
|
print(f'\telse if (wc < {hex(i)}) return {last};')
|
|
|
|
|
|
|
|
last = classes[i]
|
|
|
|
|
2023-04-20 07:12:23 +03:00
|
|
|
print(f'\telse if (wc < 0x110000) return {last};\n\treturn -1;\n}}')
|