kuroko/test/testCodecsModule.krk

110 lines
29 KiB
Python
Raw Permalink Normal View History

import codecs
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="ignore"))
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="replace"))
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="backslashreplace"))
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="xmlcharrefreplace"))
let data = b'\xc7\xed\xe0\xea\xee\xec\xf1\xf2\xe2\xee \xf3 \xd0\xee\xf1\xf2\xee\xe2\xfb\xf5 \xe1\xfb\xeb\xe0 \xe2\xf1\xff \xcc\xee\xf1\xea\xe2\xe0; \xe4\xe5\xed\xe5\xe3 \xe2 \xed\xfb\xed\xe5\xf8\xed\xe8\xe9 \xe3\xee\xe4 \xf3 \xf1\xf2\xe0\xf0\xee\xe3\xee \xe3\xf0\xe0\xf4\xe0 \xe1\xfb\xeb\xee \xe4\xee\xf1\xf2\xe0\xf2\xee\xf7\xed\xee, \xef\xee\xf2\xee\xec\xf3 \xf7\xf2\xee \xe1\xfb\xeb\xe8 \xef\xe5\xf0\xe5\xe7\xe0\xeb\xee\xe6\xe5\xed\xfb \xe2\xf1\xe5 \xe8\xec\xe5\xed\xe8\xff, \xe8 \xef\xee\xf2\xee\xec\xf3 \xcd\xe8\xea\xee\xeb\xf3\xf8\xea\xe0, \xe7\xe0\xe2\xe5\xe4\xff \xf1\xe2\xee\xe5\xe3\xee \xf1\xee\xe1\xf1\xf2\xe2\xe5\xed\xed\xee\xe3\xee \xf0\xfb\xf1\xe0\xea\xe0 \xe8 \xf1\xe0\xec\xfb\xe5 \xec\xee\xe4\xed\xfb\xe5 \xf0\xe5\xe9\xf2\xf3\xe7\xfb, \xee\xf1\xee\xe1\xe5\xed\xed\xfb\xe5, \xea\xe0\xea\xe8\xf5 \xed\xe8 \xf3 \xea\xee\xe3\xee \xe5\xf9\xe5 \xe2 \xcc\xee\xf1\xea\xe2\xe5 \xed\xe5 \xe1\xfb\xeb\xee, \xe8 \xf1\xe0\xef\xee\xe3\xe8 \xf1\xe0\xec\xfb\xe5 \xec\xee\xe4\xed\xfb\xe5, \xf1 \xf1\xe0\xec\xfb\xec\xe8 \xee\xf1\xf2\xf0\xfb\xec\xe8 \xed\xee\xf1\xea\xe0\xec\xe8 \xe8 \xec\xe0\xeb\xe5\xed\xfc\xea\xe8\xec\xe8 \xf1\xe5\xf0\xe5\xe1\xf0\xff\xed\xfb\xec\xe8 \xf8\xef\xee\xf0\xe0\xec\xe8, \xef\xf0\xee\xe2\xee\xe4\xe8\xeb \xe2\xf0\xe5\xec\xff \xee\xf7\xe5\xed\xfc \xe2\xe5\xf1\xe5\xeb\xee. \xd0\xee\xf1\xf2\xee\xe2, \xe2\xe5\xf0\xed\xf3\xe2\xf8\xe8\xf1\xfc \xe4\xee\xec\xee\xe9, \xe8\xf1\xef\xfb\xf2\xe0\xeb \xef\xf0\xe8\xff\xf2\xed\xee\xe5 \xf7\xf3\xe2\xf1\xf2\xe2\xee \xef\xee\xf1\xeb\xe5 \xed\xe5\xea\xee\xf2\xee\xf0\xee\xe3\xee \xef\xf0\xee\xec\xe5\xe6\xf3\xf2\xea\xe0 \xe2\xf0\xe5\xec\xe5\xed\xe8 \xef\xf0\xe8\xec\xe5\xf0\xe8\xe2\xe0\xed\xe8\xff \xf1\xe5\xe1\xff \xea \xf1\xf2\xe0\xf0\xfb\xec \xf3\xf1\xeb\xee\xe2\xe8\xff\xec \xe6\xe8\xe7\xed\xe8. \xc5\xec\xf3 \xea\xe0\xe7\xe0\xeb\xee\xf1\xfc, \xf7\xf2\xee \xee\xed \xee\xf7\xe5\xed\xfc \xe2\xee\xe7\xec\xf3\xe6\xe0\xeb \xe8 \xe2\xfb\xf0\xee\xf1. \xce\xf2\xf7\xe0\xff\xed\xe8\xe5 \xe7\xe0 \xed\xe5\xe2\xfb\xe4\xe5\xf0\xe6\xe0\xed\xed\xfb\xe9 \xe8\xe7 \xe7\xe0\xea\xee\xed\xe0 \xe1\xee\xe6\xfc\xe5\xe3\xee \xfd\xea\xe7\xe0\xec\xe5\xed, \xe7\xe0\xed\xe8\xec\xe0\xed\xe8\xe5 \xe4\xe5\xed\xe5\xe3 \xf3 \xc3\xe0\xe2\xf0\xe8\xeb\xfb \xed\xe0 \xe8\xe7\xe2\xee\xe7\xf7\xe8\xea\xe0, \xf2\xe0\xe9\xed\xfb\xe5 \xef\xee\xf6\xe5\xeb\xf3\xe8 \xf1 \xd1\xee\xed\xe5\xe9 \x97 \xee\xed \xef\xf0\xee \xe2\xf1\xe5 \xfd\xf2\xee \xe2\xf1\xef\xee\xec\xe8\xed\xe0\xeb, \xea\xe0\xea \xef\xf0\xee \xf0\xe5\xe1\xff\xf7\xe5\xf1\xf2\xe2\xee, \xee\xf2 \xea\xee\xf2\xee\xf0\xee\xe3\xee \xee\xed \xed\xe5\xe8\xe7\xec\xe5\xf0\xe8\xec\xee \xe1\xfb\xeb \xe4\xe0\xeb\xe5\xea \xf2\xe5\xef\xe5\xf0\xfc. \xd2\xe5\xef\xe5\xf0\xfc \xee\xed \x97 \xe3\xf3\xf1\xe0\xf0\xf1\xea\xe8\xe9 \xef\xee\xf0\xf3\xf7\xe8\xea \xe2 \xf1\xe5\xf0\xe5\xe1\xf0\xff\xed\xee\xec \xec\xe5\xed\xf2\xe8\xea\xe5, \xf1 \xf1\xee\xeb\xe4\xe0\xf2\xf1\xea\xe8\xec \xc3\xe5\xee\xf0\xe3\xe8\xe5\xec, \xe3\xee\xf2\xee\xe2\xe8\xf2 \xf1\xe2\xee\xe5\xe3\xee \xf0\xfb\xf1\xe0\xea\xe0 \xed\xe0 \xe1\xe5\xe3, \xe2\xec\xe5\xf1\xf2\xe5 \xf1 \xe8\xe7\xe2\xe5\xf1\xf2\xed\xfb\xec\xe8 \xee\xf5\xee\xf2\xed\xe8\xea\xe0\xec\xe8, \xef\xee\xe6\xe8\xeb\xfb\xec\xe8, \xef\xee\xf7\xf2\xe5\xed\xed\xfb\xec\xe8. \xd3 \xed\xe5\xe3\xee \xe7\xed\xe0\xea\xee\xec\xe0\xff \xe4\xe0\xec\xe0 \xed\xe0 \xe1\xf3\xeb\xfc\xe2\xe0\xf0\xe5, \xea \xea\xee\xf2\xee\xf0\xee\xe9 \xee\xed \xe5\xe7\xe4\xe8\xf2 \xe2\xe5\xf7\xe5\xf0\xee\xec. \xce\xed \xe4\xe8\xf0\xe8\xe6\xe8\xf0\xee\xe2\xe0\xeb \xec\xe0\xe7\xf3\xf0\xea\xf3 \xed\xe0 \xe1\xe0\xeb\xe5 \xf3 \xc0\xf0\xf5\xe0\xf0\xee\xe2\xfb\xf5, \xf0\xe0\xe7\xe3\xee\xe2\xe0\xf0\xe8\xe2\xe0\xeb \xee \xe2\xee\xe9\xed\xe5 \xf1 \xf4\xe5\xeb\xfc\xe4\xec\xe0\xf0\xf8\xe0\xeb\xee\xec \xca\xe0\xec\xe5\xed\xf1\xea\xe8\xec, \xe1\xfb\xe2\xe0\xeb \xe2 \xc0\xed\xe3\xeb\xe8\xe9\xf1\xea\xee\xec \xea\xeb\xf3\xe1\xe5 \xe8 \xe1\xfb\xeb \xed\xe0 \xf2\xfb \xf1 \xee\xe4\xed\xe8\xec \xf1\xee\xf0\xee\xea\xe0\xeb\xe5\xf2\xed\xe8\xec \xef\xee\xeb\xea\xee\xe2\xed\xe8\xea\xee\xec, \xf1 \xea\xee\xf2\xee\xf0\xfb\xec \xef\xee\xe7\xed\xe0\xea\xee\xec\xe8\xeb \xe5\xe3\xee \xc4\xe5\xed\xe8\xf1\xee\xe2. '
let decoded = codecs.decode(data, "windows-1251")
print(decoded)
let encoded = codecs.encode(decoded, "windows-1251")
if encoded != data:
print("Didn't roundtrip")
let data2 = b'\x81@\x93\xfa\x96{\x9a\xa0\x96\xaf\x82\xcd\x81A\x90\xb3\xe1c\x82\xc9\x91I\x9d\xa7\x82\xb3\x82\xea\x82\xbd\x9a\xa0\x98\xf0\x82\xc9\x82\xa8\x82\xaf\x82\xe9\x91\xe3\x95\\\x8e\xd2\x82\xf0\x92\xca\x82\xb6\x82\xc4\x8ds\x93\xae\x82\xb5\x81A\x82\xed\x82\xea\x82\xe7\x82\xc6\x82\xed\x82\xea\x82\xe7\x82\xcc\x8eq\x91\xb7\x82\xcc\x82\xbd\x82\xdf\x82\xc9\x81A\x8f\x94\x9a\xa0\x96\xaf\x82\xc6\x82\xcc\x8b\xa6\x98a\x82\xc9\x82\xe6\x82\xe9\x90\xac\x89\xca\x82\xc6\x81A\x82\xed\x82\xaa\x9a\xa0\x91S\x93y\x82\xc9\x82\xed\x82\xbd\x82\xc2\x82\xc4\x8e\xa9\x97R\x82\xcc\x82\xe0\x82\xbd\x82\xe7\x82\xb7\x9c\xa8\xe0V\x82\xf0\x8am\x95\xdb\x82\xb5\x81A\x90\xad\x95{\x82\xcc\x8ds\xe0\xa8\x82\xc9\x82\xe6\x82\xc2\x82\xc4\x8d\xc4\x82\xd1\x9dD\xe0\xa5\x82\xcc\x9c\xcc\x89\xd0\x82\xaa\x8bN\x82\xe9\x82\xb1\x82\xc6\x82\xcc\x82\xc8\x82\xa2\x82\xe2\x82\xa4\x82\xc9\x82\xb7\x82\xe9\x82\xb1\x82\xc6\x82\xf0\x8c\x88\x88\xd3\x82\xb5\x81A\x82\xb1\x82\xb1\x82\xc9\x8e\xe5\x9e\xdc\x82\xaa\x9a\xa0\x96\xaf\x82\xc9\x91\xb6\x82\xb7\x82\xe9\x82\xb1\x82\xc6\x82\xf0\x90\xe9\x8c\xbe\x82\xb5\x81A\x82\xb1\x82\xcc\x8c\x9b\x96@\x82\xf0\x8am\x92\xe8\x82\xb7\x82\xe9\x81B\x82\xbb\x82\xe0\x82\xbb\x82\xe0\x9a\xa0\x90\xad\x82\xcd\x81A\x9a\xa0\x96\xaf\x82\xcc\x9a\x8e\xe3\xe7\x82\xc8\x90M\x91\xf5\x82\xc9\x82\xe6\x82\xe9\x82\xe0\x82\xcc\x82\xc5\x82\xa0\x82\xc2\x82\xc4\x81A\x82\xbb\x82\xcc\x9e\xdc\x88\xd0\x82\xcd\x9a\xa0\x96\xaf\x82\xc9\x97R\x98\xd2\x82\xb5\x81A\x82\xbb\x82\xcc\x9e\xdc\x97\xcd\x82\xcd\x9a\xa0\x96\xaf\x82\xcc\x91\xe3\x95\\\x8e\xd2\x82\xaa\x82\xb1\x82\xea\x82\xf0\x8ds\x8eg\x82\xb5\x81A\x82\xbb\x82\xcc\x95\x9f\x97\x98\x82\xcd\x9a\xa0\x96\xaf\x82\xaa\x82\xb1\x82\xea\x82\xf0\x8b\x9d\x8e\xf3\x82\xb7\x82\xe9\x81B\x82\xb1\x82\xea\x82\xcd\x90l\x97\xde\x95\x81\x95\xd5\x82\xcc\x8c\xb4\x97\x9d\x82\xc5\x82\xa0\x82\xe8\x81A\x82\xb1\x82\xcc\x8c\x9b\x96@\x82\xcd\x81A\x82\xa9\x82\xa9\x82\xe9\x8c\xb4\x97\x9d\x82\xc9\x8a\xee\x82\xad\x82\xe0\x82\xcc\x82\xc5\x82\xa0\x82\xe9\x81B\x82\xed\x82\xea\x82\xe7\x82\xcd\x81A\x82\xb1\x82\xea\x82\xc9\x94\xbd\x82\xb7\x82\xe9\x88\xea\x90\xd8\x82\xcc\x8c\x9b\x96@\x81A\x96@\x97\xdf\x8by\x82\xd1\x8f\xd9\x92\xba\x82\xf0\x94r\x8f\x9c\x82\xb7\x82\xe9\x81B \x87\x82'
let decoded2 = codecs.decode(data2, "windows-31j")
print(decoded2)
let encoded2 = codecs.encode(decoded2, "windows-31j")
if encoded2 != data2:
print("Didn't roundtrip")
let data2a = b'\xa1\xa1\xc6\xfc\xcb\xdc\xd4\xa2\xcc\xb1\xa4\xcf\xa1\xa2\xc0\xb5\xe1\xc4\xa4\xcb\xc1\xaa\xda\xa9\xa4\xb5\xa4\xec\xa4\xbf\xd4\xa2\xd0\xf2\xa4\xcb\xa4\xaa\xa4\xb1\xa4\xeb\xc2\xe5\xc9\xbd\xbc\xd4\xa4\xf2\xc4\xcc\xa4\xb8\xa4\xc6\xb9\xd4\xc6\xb0\xa4\xb7\xa1\xa2\xa4\xef\xa4\xec\xa4\xe9\xa4\xc8\xa4\xef\xa4\xec\xa4\xe9\xa4\xce\xbb\xd2\xc2\xb9\xa4\xce\xa4\xbf\xa4\xe1\xa4\xcb\xa1\xa2\xbd\xf4\xd4\xa2\xcc\xb1\xa4\xc8\xa4\xce\xb6\xa8\xcf\xc2\xa4\xcb\xa4\xe8\xa4\xeb\xc0\xae\xb2\xcc\xa4\xc8\xa1\xa2\xa4\xef\xa4\xac\xd4\xa2\xc1\xb4\xc5\xda\xa4\xcb\xa4\xef\xa4\xbf\xa4\xc4\xa4\xc6\xbc\xab\xcd\xb3\xa4\xce\xa4\xe2\xa4\xbf\xa4\xe9\xa4\xb9\xd8\xaa\xdf\xb7\xa4\xf2\xb3\xce\xca\xdd\xa4\xb7\xa1\xa2\xc0\xaf\xc9\xdc\xa4\xce\xb9\xd4\xe0\xaa\xa4\xcb\xa4\xe8\xa4\xc4\xa4\xc6\xba\xc6\xa4\xd3\xd9\xa5\xe0\xa7\xa4\xce\xd8\xce\xb2\xd2\xa4\xac\xb5\xaf\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xce\xa4\xca\xa4\xa4\xa4\xe4\xa4\xa6\xa4\xcb\xa4\xb9\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xf2\xb7\xe8\xb0\xd5\xa4\xb7\xa1\xa2\xa4\xb3\xa4\xb3\xa4\xcb\xbc\xe7\xdc\xde\xa4\xac\xd4\xa2\xcc\xb1\xa4\xcb\xc2\xb8\xa4\xb9\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xf2\xc0\xeb\xb8\xc0\xa4\xb7\xa1\xa2\xa4\xb3\xa4\xce\xb7\xfb\xcb\xa1\xa4\xf2\xb3\xce\xc4\xea\xa4\xb9\xa4\xeb\xa1\xa3\xa4\xbd\xa4\xe2\xa4\xbd\xa4\xe2\xd4\xa2\xc0\xaf\xa4\xcf\xa1\xa2\xd4\xa2\xcc\xb1\xa4\xce\xd3\xee\xe6\xe9\xa4\xca\xbf\xae\xc2\xf7\xa4\xcb\xa4\xe8\xa4\xeb\xa4\xe2\xa4\xce\xa4\xc7\xa4\xa2\xa4\xc4\xa4\xc6\xa1\xa2\xa4\xbd\xa4\xce\xdc\xde\xb0\xd2\xa4\xcf\xd4\xa2\xcc\xb1\xa4\xcb\xcd\xb3\xd0\xd4\xa4\xb7\xa1\xa2\xa4\xbd\xa4\xce\xdc\xde\xce\xcf\xa4\xcf\xd4\xa2\xcc\xb1\xa4\xce\xc2\xe5\xc9\xbd\xbc\xd4\xa4\xac\xa4\xb3\xa4\xec\xa4\xf2\xb9\xd4\xbb\xc8\xa4\xb7\xa1\xa2\xa4\xbd\xa4\xce\xca\xa1\xcd\xf8\xa4\xcf\xd4\xa2\xcc\xb1\xa4\xac\xa4\xb3\xa4\xec\xa4\xf2\xb5\xfd\xbc\xf5\xa4\xb9\xa4\xeb\xa1\xa3\xa4\xb3\xa4\xec\xa4\xcf\xbf\xcd\xce\xe0\xc9\xe1\xca\xd7\xa4\xce\xb8\xb6\xcd\xfd\xa4\xc7\xa4\xa2\xa4\xea\xa1\xa2\xa4\xb3\xa4\xce\xb7\xfb\xcb\xa1\xa4\xcf\xa1\xa2\xa4\xab\xa4\xab\xa4\xeb\xb8\xb6\xcd\xfd\xa4\xcb\xb4\xf0\xa4\xaf\xa4\xe2\xa4\xce\xa4\xc7\xa4\xa2\xa4\xeb\xa1\xa3\xa4\xef\xa4\xec\xa4\xe9\xa4\xcf\xa1\xa2\xa4\xb3\xa4\xec\xa4\xcb\xc8\xbf\xa4\xb9\xa4\xeb\xb0\xec\xc0\xda\xa4\xce\xb7\xfb\xcb\xa1\xa1\xa2\xcb\xa1\xce\xe1\xb5\xda\xa4\xd3\xbe\xdb\xc4\xbc\xa4\xf2\xc7\xd3\xbd\xfc\xa4\xb9\xa4\xeb\xa1\xa3 \xad\xe2'
let decoded2a = codecs.decode(data2a, "euc-jp")
print(decoded2a)
let encoded2a = codecs.encode(decoded2a, "euc-jp")
if encoded2a != data2a:
print("Didn't roundtrip")
if codecs.decode(b'\x8f\xa2\xf1', "euc-jp") != codecs.decode(b'\xad\xe2', "euc-jp"):
print("NEC / 0212 equivalent didn't match")
let data2b = b'\[$B!!F|K\\T"L1$O!"@5aD$KA*Z)$5$l$?T"Pr$K$*$1$kBeI=<T$rDL$8$F9TF0$7!"$o$l$i$H$o$l$i$N;RB9$N$?$a$K!"=tT"L1$H$N6(OB$K$h$k@.2L$H!"$o$,T"A4EZ$K$o$?$D$F<+M3$N$b$?$i$9X*_7$r3NJ]$7!"@/I\\$N9T`*$K$h$D$F:F$SY%`\'$NXN2R$,5/$k$3$H$N$J$$$d$&$K$9$k$3$H$r7h0U$7!"$3$3$K<g\\^$,T"L1$KB8$9$k$3$H$r@k8@$7!"$3$N7{K!$r3NDj$9$k!#$=$b$=$bT"@/$O!"T"L1$NSnfi$J?.Bw$K$h$k$b$N$G$"$D$F!"$=$N\\^0R$OT"L1$KM3PT$7!"$=$N\\^NO$OT"L1$NBeI=<T$,$3$l$r9T;H$7!"$=$NJ!Mx$OT"L1$,$3$l$r5}<u$9$k!#$3$l$O?MN`IaJW$N86M}$G$"$j!"$3$N7{K!$O!"$+$+$k86M}$K4p$/$b$N$G$"$k!#$o$l$i$O!"$3$l$KH?$9$k0l@Z$N7{K!!"K!Na5Z$S>[D<$rGS=|$9$k!#\[(B \[$B-b\[(B'
let decoded2b = codecs.decode(data2b, "iso-2022-jp")
print(decoded2b)
let encoded2b = codecs.encode(decoded2b, "iso-2022-jp")
if encoded2b != data2b:
print("Didn't roundtrip")
let data3 = b'\xc0\xaf\xb1\xb8\xc7\xd1 \xbf\xaa\xbb\xe7\xbf\xcd \xc0\xfc\xc5\xeb\xbf\xa1 \xba\xfb\xb3\xaa\xb4\xc2 \xbf\xec\xb8\xae \xb4\xeb\xc7\xd1\xb1\xb9\xb9\xce\xc0\xba 3\xa1\xa41\xbf\xee\xb5\xbf\xc0\xb8\xb7\xce \xb0\xc7\xb8\xb3\xb5\xc8 \xb4\xeb\xc7\xd1\xb9\xce\xb1\xb9\xc0\xd3\xbd\xc3\xc1\xa4\xba\xce\xc0\xc7 \xb9\xfd\xc5\xeb\xb0\xfa \xba\xd2\xc0\xc7\xbf\xa1 \xc7\xd7\xb0\xc5\xc7\xd1 4\xa1\xa419\xb9\xce\xc1\xd6\xc0\xcc\xb3\xe4\xc0\xbb \xb0\xe8\xbd\xc2\xc7\xcf\xb0\xed, \xc1\xb6\xb1\xb9\xc0\xc7 \xb9\xce\xc1\xd6\xb0\xb3\xc7\xf5\xb0\xfa \xc6\xf2\xc8\xad\xc0\xfb \xc5\xeb\xc0\xcf\xc0\xc7 \xbb\xe7\xb8\xed\xbf\xa1 \xc0\xd4\xb0\xa2\xc7\xcf\xbf\xa9 \xc1\xa4\xc0\xc7\xa1\xa4\xc0\xce\xb5\xb5\xbf\xcd \xb5\xbf\xc6\xf7\xbe\xd6\xb7\xce\xbd\xe1 \xb9\xce\xc1\xb7\xc0\xc7 \xb4\xdc\xb0\xe1\xc0\xbb \xb0\xf8\xb0\xed\xc8\xf7 \xc7\xcf\xb0\xed, \xb8\xf0\xb5\xe7 \xbb\xe7\xc8\xb8\xc0\xfb \xc6\xf3\xbd\xc0\xb0\xfa \xba\xd2\xc0\xc7\xb8\xa6 \xc5\xb8\xc6\xc4\xc7\xcf\xb8\xe7, \xc0\xda\xc0\xb2\xb0\xfa \xc1\xb6\xc8\xad\xb8\xa6 \xb9\xd9\xc5\xc1\xc0\xb8\xb7\xce \xc0\xda\xc0\xaf\xb9\xce\xc1\xd6\xc0\xfb \xb1\xe2\xba\xbb\xc1\xfa\xbc\xad\xb8\xa6 \xb4\xf5\xbf\xed \xc8\xae\xb0\xed\xc8\xf7 \xc7\xcf\xbf\xa9 \xc1\xa4\xc4\xa1\xa1\xa4\xb0\xe6\xc1\xa6\xa1\xa4\xbb\xe7\xc8\xb8\xa1\xa4\xb9\xae\xc8\xad\xc0\xc7 \xb8\xf0\xb5\xe7 \xbf\xb5\xbf\xaa\xbf\xa1 \xc0\xd6\xbe\xee\xbc\xad \xb0\xa2\xc0\xce\xc0\xc7 \xb1\xe2\xc8\xb8\xb8\xa6 \xb1\xd5\xb5\xee\xc8\xf7 \xc7\xcf\xb0\xed, \xb4\xc9\xb7\xc2\xc0\xbb \xc3\xd6\xb0\xed\xb5\xb5\xb7\xce \xb9\xdf\xc8\xd6\xc7\xcf\xb0\xd4 \xc7\xcf\xb8\xe7, \xc0\xda\xc0\xaf\xbf\xcd \xb1\xc7\xb8\xae\xbf\xa1 \xb5\xfb\xb8\xa3\xb4\xc2 \xc3\xa5\xc0\xd3\xb0\xfa \xc0\xc7\xb9\xab\xb8\xa6 \xbf\xcf\xbc\xf6\xc7\xcf\xb0\xd4 \xc7\xcf\xbf\xa9, \xbe\xc8\xc0\xb8\xb7\xce\xb4\xc2 \xb1\xb9\xb9\xce\xbb\xfd\xc8\xb0\xc0\xc7 \xb1\xd5\xb5\xee\xc7\xd1 \xc7\xe2\xbb\xf3\xc0\xbb \xb1\xe2\xc7\xcf\xb0\xed \xb9\xdb\xc0\xb8\xb7\xce\xb4\xc2 \xc7\xd7\xb1\xb8\xc0\xfb\xc0\xce \xbc\xbc\xb0\xe8\xc6\xf2\xc8\xad\xbf\xcd \xc0\xce\xb7\xf9\xb0\xf8\xbf\xb5\xbf\xa1 \xc0\xcc\xb9\xd9\xc1\xf6\xc7\xd4\xc0\xb8\xb7\xce\xbd\xe1 \xbf\xec\xb8\xae\xb5\xe9\xb0\xfa \xbf\xec\xb8\xae\xb5\xe9\xc0\xc7 \xc0\xda\xbc\xd5\xc0\xc7 \xbe\xc8\xc0\xfc\xb0\xfa \xc0\xda\xc0\xaf\xbf\xcd \xc7\xe0\xba\xb9\xc0\xbb \xbf\xb5\xbf\xf8\xc8\xf7 \xc8\xae\xba\xb8\xc7\xd2 \xb0\xcd\xc0\xbb \xb4\xd9\xc1\xfc\xc7\xcf\xb8\xe9\xbc\xad 1948\xb3\xe2 7\xbf\xf9 12\xc0\xcf\xbf\xa1 \xc1\xa6\xc1\xa4\xb5\xc7\xb0\xed 8\xc2\xf7\xbf\xa1 \xb0\xc9\xc3\xc4 \xb0\xb3\xc1\xa4\xb5\xc8 \xc7\xe5\xb9\xfd\xc0\xbb \xc0\xcc\xc1\xa6 \xb1\xb9\xc8\xb8\xc0\xc7 \xc0\xc7\xb0\xe1\xc0\xbb \xb0\xc5\xc3\xc4 \xb1\xb9\xb9\xce\xc5\xf5\xc7\xa5\xbf\xa1 \xc0\xc7\xc7\xcf\xbf\xa9 \xb0\xb3\xc1\xa4\xc7\xd1\xb4\xd9. \x8cc\xb9\xe6\xb0\xa2\xc7\xcf'
let decoded3 = codecs.decode(data3, "windows-949", errors="replace")
print(decoded3)
let encoded3 = codecs.encode(decoded3, "windows-949")
if encoded3 != data3:
print("Didn't roundtrip")
if codecs.decode("hello", "undefined", errors="replace") != "\uFFFD":
print("Undefined codec didn't work as expected")
let data4 = b'\xa4\xd1\xa6a\xa5\xc8\xb6\xc0\xa1@\xa6t\xa9z\xacx\xaf\xee \xa1\xfc \xa4\xe9\xa4\xeb\xac\xd5\xcc\xca\xa1@\xa8\xb0\xb1J\xa6C\xb1i \xa1\xfc \xb4H\xa8\xd3\xb4\xbb\xa9\xb9\xa1@\xac\xee\xa6\xac\xa5V\xc2\xc3 \xa1\xfc \xb6|\xbel\xa6\xa8\xb7\xb3\xa1@\xab\xdf\xa5l\xbd\xd5\xb6\xa7 \xa1\xfc \xb6\xb3\xc4\xcb\xadP\xabB\xa1@\xc5S\xb5\xb2\xac\xb0\xc1\xf7 \xa1\xfc \xaa\xf7\xa5\xcd\xc4R\xa4\xf4\xa1@\xa5\xc9\xa5X\xb1X\xa9\xa3 \xa1\xfc \xbcC\xb8\xb9\xa5\xa8\xc2\xf6\xa1@\xaf]\xba\xd9\xa9]\xa5\xfa \xa1\xfc \xaaG\xac\xc3\xa7\xf5\xcfU\xa1@\xb5\xe6\xad\xab\xaa\xe3\xc1\xa4 \xa1\xfc \xae\xfc\xc4\xd0\xaae\xb2H\xa1@\xc5\xec\xbc\xe7\xa6\xd0\xb5\xbe \xa1\xfc \xc0s\xaev\xa4\xf5\xab\xd2\xa1@\xb3\xbe\xa9x\xa4H\xac\xd3 \xa1\xfc \xa9l\xa8\xee\xa4\xe5\xa6r\xa1@\xa4D\xaaA\xa6\xe7\xbbn \xa1\xfc \xb1\xc0\xa6\xec\xc5\xfd\xb0\xea\xa1@\xa6\xb3\xb8\xb7\xb3\xb3\xad\xf0 \xa1\xfc \xa4\xdd\xa5\xc1\xa5\xef\xb8o\xa1@\xa9P\xb5o\xae\xef\xb4\xf6 \xa1\xfc \xa7\xa4\xb4\xc2\xb0\xdd\xb9D\xa1@\xab\xab\xab\xfd\xa5\xad\xb3\xb9 \xa1\xfc \xb7R\xa8|\xbe\xa4\xad\xba\xa1@\xa6\xda\xa5\xf1\xa6\xa5\xaa\xca \xa1\xfc \xb9I\xc2\xe2\xb3\xfc\xc5\xe9\xa1@\xb2v\xbb\xab\xc2k\xa4\xfd \xa1\xfc \xbb\xef\xbb\xf1\xa6b\xbe\xf0\xa1@\xa5\xd5\xbes\xad\xb9\xb3\xf5 \xa1\xfc \xa4\xc6\xb3Q\xaf\xf3\xa4\xec\xa1@\xbf\xe0\xa4\xce\xb8U\xa4\xe8 \xa1\xfc \xbb\\\xa6\xb9\xa8\xad\xbev\xa1@\xa5|\xa4j\xa4\xad\xb1` \xa1\xfc \xae\xa5\xb1\xa9\xc1\xf9\xbei\xa1@\xb0Z\xb4\xb1\xb7\xb4\xb6\xcb \xa1\xfc \xa4k\xbc}\xads\xda\xf4\xa1@\xa8k\xae\xc4\xa4~\xa8} \xa1\xfc \xaa\xbe\xb9L\xa5\xb2\xa7\xef\xa1@\xb1o\xaf\xe0\xb2\xf6\xa7\xd1 \xa1\xfc \xaa\xc9\xbd\xcd\xa9\xbc\xb5u\xa1@\xc3\xfb\xab\xee\xa4v\xaa\xf8 \xa1\xfc \xabH\xa8\xcf\xa5i\xc2\xd0\xa1@\xbe\xb9\xb1\xfd\xc3\xf8\xb6q \xa1\xfc \xbe\xa5\xb4d\xb5\xb7\xb2f\xa1@\xb8\xd6\xc6g\xaf\xcc\xa6\xcf \xa1\xfc \xb4\xba\xa6\xe6\xba\xfb\xbd\xe5\xa1@\xa7J\xa9\xc0\xa7@\xb8t \xa1\xfc \xbcw\xab\xd8\xa6W\xa5\xdf\xa1@\xa7\xce\xba\xdd\xaa\xed\xa5\xbf \xa1\xfc \xaa\xc5\xa8\xa6\xb6\xc7\xc1n\xa1@\xb5\xea\xb0\xf3\xb2\xdf\xc5\xa5 \xa1\xfc \xba\xd7\xa6]\xb4c\xbfn\xa1@\xba\xd6\xbdt\xb5\xbd\xbcy \xa1\xfc \xa4\xd8\xc2z\xabD\xc4_\xa1@\xa4o\xb3\xb1\xacO\xc4v \xa1\xfc \xb8\xea\xa4\xf7\xa8\xc6\xa7g\xa1@\xa4\xea\xc4Y\xbbP\xb7q \xa1\xfc \xa7\xb5\xb7\xed\xba\xdc\xa4O\xa1@\xa9\xbe\xabh\xba\xc9\xa9R \xa1\xfc \xc1{\xb2`\xbci\xc1\xa1\xa1@\xa6g\xbf\xb3\xb7\xc5\xd0\xe1 \xa1\xfc \xa6\xfc\xc4\xf5\xb4\xb5\xc4\xc9\xa1@\xa6p\xaaQ\xa4\xa7\xb2\xb1 \xa1\xfc \xa4t\xacy\xa4\xa3\xae\xa7\xa1@\xb2W\xbc\xe1\xa8\xfa\xacM \xa1\xfc \xaee\xa4\xee\xadY\xab\xe4\xa1@\xa8\xa5\xc3\xe3\xa6w\xa9w \xa1\xfc \xbfw\xaa\xec\xb8\xdb\xac\xfc\xa1@\xb7V\xb2\xd7\xa9y\xa5O \xa1\xfc \xbaa\xb7~\xa9\xd2\xb0\xf2\xa1@\xc4y\xac\xc6\xb5L\xb3\xba \xa1\xfc \xbe\xc7\xc0u\xb5n\xa5K\xa1@\xc4\xe1\xc2\xbe\xb1q\xacF \xa1\xfc \xa6s\xa5H\xa5\xcc\xb4\xc5\xa1@\xa5h\xa6\xd3\xafq\xb5\xfa \xa1\xfc \xbc\xd6\xae\xed\xb6Q\xbd\xe2\xa1@\xc2\xa7\xa7O\xb4L\xa8\xf5 \xa1\xfc \xa4W\xa9M\xa4U\xb7\xfc\xa1@\xa4\xd2\xb0\xdb\xb0\xfc\xc0H \xa1\xfc \xa5~\xa8\xfc\xb3\xc5\xb0V\xa1@\xa4J\xa9^\xa5\xc0\xbb\xf6 \xa1\xfc \xbd\xd1\xa9h\xa7B\xa8\xfb\xa1@\xb5S\xa4l\xa4\xf1\xa8\xe0 \xa1\xfc \xa4\xd5\xc3h\xa5S\xa7\xcc\xa1@\xa6P\xae\xf0\xb3s\xaaK \xa1\xfc \xa5\xe6\xa4\xcd\xa7\xeb\xa4\xc0\xa1@\xa4\xc1\xbfi\xbde\xb3W \xa1\xfc \xa4\xaf\xb7O\xc1\xf4\xb4l\xa1@\xb3y\xa6\xb8\xa5\xb1\xc2\xf7 \xa1\xfc \xb8`\xb8q\xb7G\xb0h\xa1@\xc4A\xa8K\xad\xea\xc1\xab \xa1\xfc \xa9\xca\xc0R\xb1\xa1\xb6h\xa1@\xa4\xdf\xb0\xca\xaf\xab\xafh \xa1\xfc \xa6u\xafu\xa7\xd3\xba\xa1\xa1@\xb3v\xaa\xab\xb7N\xb2\xbe \xa1\xfc \xb0\xed\xab\xf9\xb6\xae\xbe\xde\xa1@\xa6n\xc0\xef\xa6\xdb\xed\xdd \xa1\xfc \xb3\xa3\xa8\xb6\xb5\xd8\xaeL\xa1@\xaaF\xa6\xe8\xa4G\xa8\xca \xa1\xfc \xadI\xcaQ\xad\xb1\xac\xa5\xa1@\xafB\xb4\xf4\xbe\xda\xae\xf9 \xa1\xfc \xaec\xb7\xb5\xbdL\xc6{\xa1@\xbc\xd3\xc6[\xad\xb8\xc5\xe5 \xa1\xfc \xb9\xcf\xbcg\xb8V\xc3~\xa1@\xb5e\xb1m\xa5P\xc6F \xa1\xfc \xa4\xfe\xaa\xd9\xb3\xc4\xb1\xd2\xa1@\xa5\xd2\xb1b\xb9\xef\xb7\xad \xa1\xfc \xb8v\xba\xe1\xb3]\xaeu\xa1@\xb9\xaa\xb7\xe6\xa7j\xb2\xc6 \xa1\xfc \xa4\xc9\xb6\xa5\xaf\xc7\xb0\xa1\xa1@\xa5\xaf\xc2\xe0\xba\xc3\xacP \xa1\xfc \xa5k\xb3q\xbcs\xa4\xba\xa1@\xa5\xaa\xb9F\xa9\xd3\xa9\xfa \xa1
let decoded4 = codecs.decode(data4, "big5")
print(decoded4)
let encoded4 = codecs.encode(decoded4, "big5")
if encoded4 != data4:
print("Didn't roundtrip")
if codecs.decode(b"\x88\x62\x88\x63\x88\x64\x88\x65\x88\x66", "big5", errors="replace") != "Ê̄ẾÊ̌ỀÊ":
print("Didn't decode the combining-sequence-mapped codes")
let data5 = b'\x949\xc65 \xa8\xaa Br\x810\x8d0ko\n\xa7\xa9\xa7\xdf\xa7\xd1\xa7\xdc\xa7\xe0\xa7\xde\xa7\xe3\xa7\xe4\xa7\xd3\xa7\xe0 \xa7\xe5 \xa7\xb2\xa7\xe0\xa7\xe3\xa7\xe4\xa7\xe0\xa7\xd3\xa7\xed\xa7\xe7 \xa7\xd2\xa7\xed\xa7\xdd\xa7\xd1 \xa7\xd3\xa7\xe3\xa7\xf1 \xa7\xae\xa7\xe0\xa7\xe3\xa7\xdc\xa7\xd3\xa7\xd1; \xa7\xd5\xa7\xd6\xa7\xdf\xa7\xd6\xa7\xd4 \xa7\xd3 \xa7\xdf\xa7\xed\xa7\xdf\xa7\xd6\xa7\xea\xa7\xdf\xa7\xda\xa7\xdb \xa7\xd4\xa7\xe0\xa7\xd5 \xa7\xe5 \xa7\xe3\xa7\xe4\xa7\xd1\xa7\xe2\xa7\xe0\xa7\xd4\xa7\xe0 \xa7\xd4\xa7\xe2\xa7\xd1\xa7\xe6\xa7\xd1 \xa7\xd2\xa7\xed\xa7\xdd\xa7\xe0 \xa7\xd5\xa7\xe0\xa7\xe3\xa7\xe4\xa7\xd1\xa7\xe4\xa7\xe0\xa7\xe9\xa7\xdf\xa7\xe0, \xa7\xe1\xa7\xe0\xa7\xe4\xa7\xe0\xa7\xde\xa7\xe5 \xa7\xe9\xa7\xe4\xa7\xe0 \xa7\xd2\xa7\xed\xa7\xdd\xa7\xda \xa7\xe1\xa7\xd6\xa7\xe2\xa7\xd6\xa7\xd9\xa7\xd1\xa7\xdd\xa7\xe0\xa7\xd8\xa7\xd6\xa7\xdf\xa7\xed \xa7\xd3\xa7\xe3\xa7\xd6 \xa7\xda\xa7\xde\xa7\xd6\xa7\xdf\xa7\xda\xa7\xf1, \xa7\xda \xa7\xe1\xa7\xe0\xa7\xe4\xa7\xe0\xa7\xde\xa7\xe5 \xa7\xaf\xa7\xda\xa7\xdc\xa7\xe0\xa7\xdd\xa7\xe5\xa7\xea\xa7\xdc\xa7\xd1, \xa7\xd9\xa7\xd1\xa7\xd3\xa7\xd6\xa7\xd5\xa7\xf1 \xa7\xe3\xa7\xd3\xa7\xe0\xa7\xd6\xa7\xd4\xa7\xe0 \xa7\xe3\xa7\xe0\xa7\xd2\xa7\xe3\xa7\xe4\xa7\xd3\xa7\xd6\xa7\xdf\xa7\xdf\xa7\xe0\xa7\xd4\xa7\xe0 \xa7\xe2\xa7\xed\xa7\xe3\xa7\xd1\xa7\xdc\xa7\xd1 \xa7\xda \xa7\xe3\xa7\xd1\xa7\xde\xa7\xed\xa7\xd6 \xa7\xde\xa7\xe0\xa7\xd5\xa7\xdf\xa7\xed\xa7\xd6 \xa7\xe2\xa7\xd6\xa7\xdb\xa7\xe4\xa7\xe5\xa7\xd9\xa7\xed, \n\xa1\xa1\xc8\xd5\xb1\xbe\x87\xf8\xc3\xf1\xa4\xcf\xa1\xa2\xd5\xfd\xae\x94\xa4\xcb\xdfx\x94H\xa4\xb5\xa4\xec\xa4\xbf\x87\xf8\x95\xfe\xa4\xcb\xa4\xaa\xa4\xb1\xa4\xeb\xb4\xfa\xb1\xed\xd5\xdf\xa4\xf2\xcd\xa8\xa4\xb8\xa4\xc6\xd0\xd0\x84\xd3\xa4\xb7\xa1\xa2\xa4\xef\xa4\xec\xa4\xe9\xa4\xc8\xa4\xef\xa4\xec\xa4\xe9\xa4\xce\xd7\xd3\x8cO\xa4\xce\xa4\xbf\xa4\xe1\xa4\xcb\xa1\xa2\xd6T\x87\xf8\xc3\xf1\xa4\xc8\xa4\xce\x85f\xba\xcd\xa4\xcb\xa4\xe8\xa4\xeb\xb3\xc9\xb9\xfb\xa4\xc8\xa1\xa2\xa4\xef\xa4\xac\x87\xf8\xc8\xab\xcd\xc1\xa4\xcb\xa4\xef\xa4\xbf\xa4\xc4\xa4\xc6\xd7\xd4\xd3\xc9\xa4\xce\xa4\xe2\xa4\xbf\xa4\xe9\xa4\xb9\xbb\xdd\x9d\xc9\xa4\xf2\xb4_\xb1\xa3\xa4\xb7\xa1\xa2\xd5\xfe\xb8\xae\xa4\xce\xd0\xd0\xa0\x91\xa4\xcb\xa4\xe8\xa4\xc4\xa4\xc6\xd4\xd9\xa4\xd3\x91\xf0\xa0\x8e\xa4\xce\x91K\xb5\x9c\xa4\xac\xc6\xf0\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xce\xa4\xca\xa4\xa4\xa4\xe4\xa4\xa6\xa4\xcb\xa4\xb9\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xf2\x9bQ\xd2\xe2\xa4\xb7\xa1\xa2 \xa1\xed\n\x833\x919\x827\xf39\x836\x843 \x832\xf12\x831\xe87\x832\xf95 \x833\xa87\x835\xa48\x832\xee3 \x831\xad4\x828\xc71\x828\xf99 \x833\x867\x830\xb35 \x829\x867\x836\x843\x827\xf40\x830\xee7\x833\x951 3\xa1\xa41\x833\x871\x829\x9c4\x833\x947\x830\x919 \x827\xdb1\x830\xb52\x829\xa31 \x829\x867\x836\x843\x830\xee7\x827\xf40\x833\x9b9\x832\xa27\x833\xaa4\x831\x975\x833\x975 \x830\xfe0\x835\xa48\x827\xe87 \x831\x983\x833\x975\x832\xee3 \x836\x860\x827\xda7\x836\x843 4\xa1\xa419\x830\xee7\x833\xc15\x833\x9a3\x828\xd95\x833\x955 \x827\xe31\x832\x9f2\x836\x839\x827\xe59, \x833\xb35\x827\xf40\x833\x975 \x830\xee7\x833\xc15\x827\xd23\x836\x948\x827\xe87 \x835\xda0\x836\x9d1\x833\xa84 \x835\xa48\x833\x9b1\x833\x975 \x831\xe87\x830\xc92\x832\xee3 \x833\x9c0\x827\xcf6\x836\x839\x832\xf11 \x833\xaa4\x833\x975\xa1\xa4\x833\x9a7\x829\x9a3\x832\xf95 \x829\x9c4\x835\xdd5\x832\xe31\x830\x919\x832\xb07 \x830\xee7\x833\xb36\x833\x975 \x829\x843\x827\xe11\x833\x955 \x827\xe80\x827\xe59\x836\xbb9 \x836\x839\x827\xe59, \x829\xd67\x830\xf32\x827\xcf6\x836\x839\n\xcc\xec\xb5\xd8\xd0\xfe\xfcS\xa1\xa1\xd3\xee\xd6\xe6\xba\xe9\xbb\xc4 \xa1\xce \xc8\xd5\xd4\xc2\xd3\xaf\xea\xbe\xa1\xa1\xb3\xbd\xcb\xde\xc1\xd0\x8f\x88 \xa1\xce \xba\xae\x81\xed\xca\xee\xcd\xf9\xa1\xa1\xc7\xef\xca\xd5\xb6\xac\xb2\xd8 \xa1\xce \xe9c\xf0N\xb3\xc9\x9aq\xa1\xa1\xc2\xc9\xd5\xd9\xd5{\xea\x96 \xa1\xce \xeb\x85\xf2v\xd6\xc2\xd3\xea\xa1\xa1\xc2\xb6\xbdY\x9e\xe9\xcb\xaa \xa1\xce \xbd\xf0\xc9\xfa\xfb\x90\xcb\xae\xa1\xa1\xd3\xf1\xb3\xf6\x8d\x8b\x8c\xf9 \xa1\xce \x84\xa6\xcc\x96\xbe\xde\xeaI\xa1\xa1\xd6\xe9\xb7Q\xd2\xb9\xb9\xe2 \xa1\xce \xb9\xfb\xd5\xe4\xc0\xee\xe8\xcd\xa1\xa1\xb2\xcb\xd6\xd
let decoded5 = codecs.decode(data5, "gb18030")
print(decoded5)
let encoded5 = codecs.encode(decoded5, "gb18030")
if encoded5 != data5:
print("Didn't roundtrip")
print(codecs.decode(codecs.encode(decoded5, "gbk", errors="xmlcharrefreplace"), "gb18030"))
print("-")
let test_0 = "염盐塩鹽䝼丽/〒"
let test_1 = "令䝼むかしむかしあるところに"
let test_2 = """ foo = "Quoted string ****令䝼" """
let out_0a = codecs.decode(codecs.encode(test_0, "iso-2022-jp", errors="replace"), "iso-2022-jp", errors="replace")
let success = True
for n, char in enumerate(out_0a):
if char not in (test_0[n], "?"):
print("replace in DBCS state unsuccessful:", out_0a)
success = False
break
if success:
print("replace in DBCS state successful:", out_0a)
let out_0b = codecs.decode(codecs.encode(test_0, "iso-2022-jp", errors="xmlcharrefreplace"), "iso-2022-jp", errors="replace")
if "&#18300;" not in out_0b:
print("xmlcharrefreplace in DBCS state unsuccessful:", out_0b)
else:
print("xmlcharrefreplace in DBCS state successful:", out_0b)
let out_1 = codecs.decode(codecs.encode(test_1, "iso-2022-jp", errors="replace"), "iso-2022-jp", errors="replace")
if not out_1.endswith("むかしむかしあるところに"):
print("Encoding misaligned:", out_1)
else:
print("Encoding not misaligned:", out_1)
let out_2 = codecs.decode(codecs.encode(test_2, "iso-2022-jp", errors="replace"), "iso-2022-jp", errors="replace")
if out_2[-2] != '"':
print("Delimiter swallowed:", out_2)
else:
print("Delimiter not swallowed:", out_2)
# https://encoding.spec.whatwg.org/#security-background
print(codecs.decode(b'"robert\x82"', "shift_jis", errors="replace"))
let scrutiny = b'\[$BF|K\\T"L1$O!"\[(B\[$B@5aD$KA*Z)$5$l$?\[(BNo: \[(B; Yes: \[(J\\; Yes: \\\[(B; No: \[(J; No: \[(B\[(J; Yes: \[(B~; No: \[(J\[(B; Yes: ~\[(J; No: \[(B; Yes: \[(J~; Yes: .;\[(B'
print(codecs.decode(scrutiny, "iso-2022-jp", errors="replace"))
print(codecs.decode(scrutiny, "jis_encoding", errors="replace"))
Codecs revisited (#28) * xraydict functionality and usage improvements Add a filter_function to xraydict, allowing fewer big data structures. Make uses of xraydict prefer exclusion sets to exclusion lists, to avoid repeated linear search of a list. * Make `big5_coded_forms_from_hkscs` a set, remove set trailing commas. * Remove `big5_coded_forms_from_hkscs` in favour of a filter function. * Similarly, use sets for 7-bit exclusion lists except when really short. * Revise mappings for seven 78JIS codepoints. Mappings for 25-23 and 90-22 were previously the same as those used for 97JIS; they have been swapped to correspond with how the IBM extension versus the standard code are mapped in the "old sequence" (78JIS-based) as opposed to the "new sequence". Mappings for 32-70, 34-45, 35-29, 39-77 and 54-02 in 78JIS have been changed to reflect disunifications made in 2000-JIS and 2004-JIS, assigning the 1978-edition unsimplified variants of those characters separate coded forms (where previously, only swaps and disunifications in 83JIS and disunifications in 90JIS (including JIS X 0212) had been considered). This only affects the `jis_encoding` codec (including the decoding direction for `iso-2022-jp-2`, `iso-2022-jp-3` and `iso-2022-jp-2004`), and the decoding is only affected when `ESC $ @` (not `ESC $ B`) is used. The `iso-2022-jp` codec is unaffected, and remains similar to (but more consistently pedantic than) the WHATWG specification, thus using the same table for both 78JIS and 97JIS. * Make `johab-ebcdic` decoder use many-to-one, not corporate PUA. Many-to-one decodes are not uncommon in CJK encodings (e.g. Windows-31J), and mapping to the IBM Corporate PUA (code page 1449) would probably make it render as completely the wrong character if at all in practice. * Switch `cp950_no_eudc_encoding_map` away from a hardcoded exclusion list. * Codec support for `x-mac-korean`. * Add a test bit for the UTF-8 wrapper. * Document the unique error-condition definition of the ISO-2022-JP codec. * Update docs now there is an actual implementation for `x-mac-korean`. * Further explanations of the hazards of `jis_encoding`. * Sanitised → Sanitised or escaped. * Further clarify the status with not verifying Shift In. * Corrected description of End State 2. * Changes to MacKorean to avoid mapping non-ASCII using ASCII punctuation. * Extraneous word "still". * Fix omitting MacKorean single-byte codes.
2022-07-23 02:32:54 +03:00
let valid_utf8 = b'co\xc3\xb6rdination'
let invalid_utf8 = b'co\x9ardination'
print(codecs.decode(valid_utf8, "utf-8"))
print(codecs.decode(invalid_utf8, "utf-8", errors="replace"))