a580a835b8
* xraydict functionality and usage improvements Add a filter_function to xraydict, allowing fewer big data structures. Make uses of xraydict prefer exclusion sets to exclusion lists, to avoid repeated linear search of a list. * Make `big5_coded_forms_from_hkscs` a set, remove set trailing commas. * Remove `big5_coded_forms_from_hkscs` in favour of a filter function. * Similarly, use sets for 7-bit exclusion lists except when really short. * Revise mappings for seven 78JIS codepoints. Mappings for 25-23 and 90-22 were previously the same as those used for 97JIS; they have been swapped to correspond with how the IBM extension versus the standard code are mapped in the "old sequence" (78JIS-based) as opposed to the "new sequence". Mappings for 32-70, 34-45, 35-29, 39-77 and 54-02 in 78JIS have been changed to reflect disunifications made in 2000-JIS and 2004-JIS, assigning the 1978-edition unsimplified variants of those characters separate coded forms (where previously, only swaps and disunifications in 83JIS and disunifications in 90JIS (including JIS X 0212) had been considered). This only affects the `jis_encoding` codec (including the decoding direction for `iso-2022-jp-2`, `iso-2022-jp-3` and `iso-2022-jp-2004`), and the decoding is only affected when `ESC $ @` (not `ESC $ B`) is used. The `iso-2022-jp` codec is unaffected, and remains similar to (but more consistently pedantic than) the WHATWG specification, thus using the same table for both 78JIS and 97JIS. * Make `johab-ebcdic` decoder use many-to-one, not corporate PUA. Many-to-one decodes are not uncommon in CJK encodings (e.g. Windows-31J), and mapping to the IBM Corporate PUA (code page 1449) would probably make it render as completely the wrong character if at all in practice. * Switch `cp950_no_eudc_encoding_map` away from a hardcoded exclusion list. * Codec support for `x-mac-korean`. * Add a test bit for the UTF-8 wrapper. * Document the unique error-condition definition of the ISO-2022-JP codec. * Update docs now there is an actual implementation for `x-mac-korean`. * Further explanations of the hazards of `jis_encoding`. * Sanitised → Sanitised or escaped. * Further clarify the status with not verifying Shift In. * Corrected description of End State 2. * Changes to MacKorean to avoid mapping non-ASCII using ASCII punctuation. * Extraneous word "still". * Fix omitting MacKorean single-byte codes.
110 lines
29 KiB
Python
110 lines
29 KiB
Python
import codecs
|
|
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="ignore"))
|
|
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="replace"))
|
|
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="backslashreplace"))
|
|
print(codecs.encode("🏣 í Brčko", "windows-1251", errors="xmlcharrefreplace"))
|
|
let data = b'\xc7\xed\xe0\xea\xee\xec\xf1\xf2\xe2\xee \xf3 \xd0\xee\xf1\xf2\xee\xe2\xfb\xf5 \xe1\xfb\xeb\xe0 \xe2\xf1\xff \xcc\xee\xf1\xea\xe2\xe0; \xe4\xe5\xed\xe5\xe3 \xe2 \xed\xfb\xed\xe5\xf8\xed\xe8\xe9 \xe3\xee\xe4 \xf3 \xf1\xf2\xe0\xf0\xee\xe3\xee \xe3\xf0\xe0\xf4\xe0 \xe1\xfb\xeb\xee \xe4\xee\xf1\xf2\xe0\xf2\xee\xf7\xed\xee, \xef\xee\xf2\xee\xec\xf3 \xf7\xf2\xee \xe1\xfb\xeb\xe8 \xef\xe5\xf0\xe5\xe7\xe0\xeb\xee\xe6\xe5\xed\xfb \xe2\xf1\xe5 \xe8\xec\xe5\xed\xe8\xff, \xe8 \xef\xee\xf2\xee\xec\xf3 \xcd\xe8\xea\xee\xeb\xf3\xf8\xea\xe0, \xe7\xe0\xe2\xe5\xe4\xff \xf1\xe2\xee\xe5\xe3\xee \xf1\xee\xe1\xf1\xf2\xe2\xe5\xed\xed\xee\xe3\xee \xf0\xfb\xf1\xe0\xea\xe0 \xe8 \xf1\xe0\xec\xfb\xe5 \xec\xee\xe4\xed\xfb\xe5 \xf0\xe5\xe9\xf2\xf3\xe7\xfb, \xee\xf1\xee\xe1\xe5\xed\xed\xfb\xe5, \xea\xe0\xea\xe8\xf5 \xed\xe8 \xf3 \xea\xee\xe3\xee \xe5\xf9\xe5 \xe2 \xcc\xee\xf1\xea\xe2\xe5 \xed\xe5 \xe1\xfb\xeb\xee, \xe8 \xf1\xe0\xef\xee\xe3\xe8 \xf1\xe0\xec\xfb\xe5 \xec\xee\xe4\xed\xfb\xe5, \xf1 \xf1\xe0\xec\xfb\xec\xe8 \xee\xf1\xf2\xf0\xfb\xec\xe8 \xed\xee\xf1\xea\xe0\xec\xe8 \xe8 \xec\xe0\xeb\xe5\xed\xfc\xea\xe8\xec\xe8 \xf1\xe5\xf0\xe5\xe1\xf0\xff\xed\xfb\xec\xe8 \xf8\xef\xee\xf0\xe0\xec\xe8, \xef\xf0\xee\xe2\xee\xe4\xe8\xeb \xe2\xf0\xe5\xec\xff \xee\xf7\xe5\xed\xfc \xe2\xe5\xf1\xe5\xeb\xee. \xd0\xee\xf1\xf2\xee\xe2, \xe2\xe5\xf0\xed\xf3\xe2\xf8\xe8\xf1\xfc \xe4\xee\xec\xee\xe9, \xe8\xf1\xef\xfb\xf2\xe0\xeb \xef\xf0\xe8\xff\xf2\xed\xee\xe5 \xf7\xf3\xe2\xf1\xf2\xe2\xee \xef\xee\xf1\xeb\xe5 \xed\xe5\xea\xee\xf2\xee\xf0\xee\xe3\xee \xef\xf0\xee\xec\xe5\xe6\xf3\xf2\xea\xe0 \xe2\xf0\xe5\xec\xe5\xed\xe8 \xef\xf0\xe8\xec\xe5\xf0\xe8\xe2\xe0\xed\xe8\xff \xf1\xe5\xe1\xff \xea \xf1\xf2\xe0\xf0\xfb\xec \xf3\xf1\xeb\xee\xe2\xe8\xff\xec \xe6\xe8\xe7\xed\xe8. \xc5\xec\xf3 \xea\xe0\xe7\xe0\xeb\xee\xf1\xfc, \xf7\xf2\xee \xee\xed \xee\xf7\xe5\xed\xfc \xe2\xee\xe7\xec\xf3\xe6\xe0\xeb \xe8 \xe2\xfb\xf0\xee\xf1. \xce\xf2\xf7\xe0\xff\xed\xe8\xe5 \xe7\xe0 \xed\xe5\xe2\xfb\xe4\xe5\xf0\xe6\xe0\xed\xed\xfb\xe9 \xe8\xe7 \xe7\xe0\xea\xee\xed\xe0 \xe1\xee\xe6\xfc\xe5\xe3\xee \xfd\xea\xe7\xe0\xec\xe5\xed, \xe7\xe0\xed\xe8\xec\xe0\xed\xe8\xe5 \xe4\xe5\xed\xe5\xe3 \xf3 \xc3\xe0\xe2\xf0\xe8\xeb\xfb \xed\xe0 \xe8\xe7\xe2\xee\xe7\xf7\xe8\xea\xe0, \xf2\xe0\xe9\xed\xfb\xe5 \xef\xee\xf6\xe5\xeb\xf3\xe8 \xf1 \xd1\xee\xed\xe5\xe9 \x97 \xee\xed \xef\xf0\xee \xe2\xf1\xe5 \xfd\xf2\xee \xe2\xf1\xef\xee\xec\xe8\xed\xe0\xeb, \xea\xe0\xea \xef\xf0\xee \xf0\xe5\xe1\xff\xf7\xe5\xf1\xf2\xe2\xee, \xee\xf2 \xea\xee\xf2\xee\xf0\xee\xe3\xee \xee\xed \xed\xe5\xe8\xe7\xec\xe5\xf0\xe8\xec\xee \xe1\xfb\xeb \xe4\xe0\xeb\xe5\xea \xf2\xe5\xef\xe5\xf0\xfc. \xd2\xe5\xef\xe5\xf0\xfc \xee\xed \x97 \xe3\xf3\xf1\xe0\xf0\xf1\xea\xe8\xe9 \xef\xee\xf0\xf3\xf7\xe8\xea \xe2 \xf1\xe5\xf0\xe5\xe1\xf0\xff\xed\xee\xec \xec\xe5\xed\xf2\xe8\xea\xe5, \xf1 \xf1\xee\xeb\xe4\xe0\xf2\xf1\xea\xe8\xec \xc3\xe5\xee\xf0\xe3\xe8\xe5\xec, \xe3\xee\xf2\xee\xe2\xe8\xf2 \xf1\xe2\xee\xe5\xe3\xee \xf0\xfb\xf1\xe0\xea\xe0 \xed\xe0 \xe1\xe5\xe3, \xe2\xec\xe5\xf1\xf2\xe5 \xf1 \xe8\xe7\xe2\xe5\xf1\xf2\xed\xfb\xec\xe8 \xee\xf5\xee\xf2\xed\xe8\xea\xe0\xec\xe8, \xef\xee\xe6\xe8\xeb\xfb\xec\xe8, \xef\xee\xf7\xf2\xe5\xed\xed\xfb\xec\xe8. \xd3 \xed\xe5\xe3\xee \xe7\xed\xe0\xea\xee\xec\xe0\xff \xe4\xe0\xec\xe0 \xed\xe0 \xe1\xf3\xeb\xfc\xe2\xe0\xf0\xe5, \xea \xea\xee\xf2\xee\xf0\xee\xe9 \xee\xed \xe5\xe7\xe4\xe8\xf2 \xe2\xe5\xf7\xe5\xf0\xee\xec. \xce\xed \xe4\xe8\xf0\xe8\xe6\xe8\xf0\xee\xe2\xe0\xeb \xec\xe0\xe7\xf3\xf0\xea\xf3 \xed\xe0 \xe1\xe0\xeb\xe5 \xf3 \xc0\xf0\xf5\xe0\xf0\xee\xe2\xfb\xf5, \xf0\xe0\xe7\xe3\xee\xe2\xe0\xf0\xe8\xe2\xe0\xeb \xee \xe2\xee\xe9\xed\xe5 \xf1 \xf4\xe5\xeb\xfc\xe4\xec\xe0\xf0\xf8\xe0\xeb\xee\xec \xca\xe0\xec\xe5\xed\xf1\xea\xe8\xec, \xe1\xfb\xe2\xe0\xeb \xe2 \xc0\xed\xe3\xeb\xe8\xe9\xf1\xea\xee\xec \xea\xeb\xf3\xe1\xe5 \xe8 \xe1\xfb\xeb \xed\xe0 \xf2\xfb \xf1 \xee\xe4\xed\xe8\xec \xf1\xee\xf0\xee\xea\xe0\xeb\xe5\xf2\xed\xe8\xec \xef\xee\xeb\xea\xee\xe2\xed\xe8\xea\xee\xec, \xf1 \xea\xee\xf2\xee\xf0\xfb\xec \xef\xee\xe7\xed\xe0\xea\xee\xec\xe8\xeb \xe5\xe3\xee \xc4\xe5\xed\xe8\xf1\xee\xe2. '
|
|
let decoded = codecs.decode(data, "windows-1251")
|
|
print(decoded)
|
|
let encoded = codecs.encode(decoded, "windows-1251")
|
|
if encoded != data:
|
|
print("Didn't roundtrip")
|
|
|
|
let data2 = b'\x81@\x93\xfa\x96{\x9a\xa0\x96\xaf\x82\xcd\x81A\x90\xb3\xe1c\x82\xc9\x91I\x9d\xa7\x82\xb3\x82\xea\x82\xbd\x9a\xa0\x98\xf0\x82\xc9\x82\xa8\x82\xaf\x82\xe9\x91\xe3\x95\\\x8e\xd2\x82\xf0\x92\xca\x82\xb6\x82\xc4\x8ds\x93\xae\x82\xb5\x81A\x82\xed\x82\xea\x82\xe7\x82\xc6\x82\xed\x82\xea\x82\xe7\x82\xcc\x8eq\x91\xb7\x82\xcc\x82\xbd\x82\xdf\x82\xc9\x81A\x8f\x94\x9a\xa0\x96\xaf\x82\xc6\x82\xcc\x8b\xa6\x98a\x82\xc9\x82\xe6\x82\xe9\x90\xac\x89\xca\x82\xc6\x81A\x82\xed\x82\xaa\x9a\xa0\x91S\x93y\x82\xc9\x82\xed\x82\xbd\x82\xc2\x82\xc4\x8e\xa9\x97R\x82\xcc\x82\xe0\x82\xbd\x82\xe7\x82\xb7\x9c\xa8\xe0V\x82\xf0\x8am\x95\xdb\x82\xb5\x81A\x90\xad\x95{\x82\xcc\x8ds\xe0\xa8\x82\xc9\x82\xe6\x82\xc2\x82\xc4\x8d\xc4\x82\xd1\x9dD\xe0\xa5\x82\xcc\x9c\xcc\x89\xd0\x82\xaa\x8bN\x82\xe9\x82\xb1\x82\xc6\x82\xcc\x82\xc8\x82\xa2\x82\xe2\x82\xa4\x82\xc9\x82\xb7\x82\xe9\x82\xb1\x82\xc6\x82\xf0\x8c\x88\x88\xd3\x82\xb5\x81A\x82\xb1\x82\xb1\x82\xc9\x8e\xe5\x9e\xdc\x82\xaa\x9a\xa0\x96\xaf\x82\xc9\x91\xb6\x82\xb7\x82\xe9\x82\xb1\x82\xc6\x82\xf0\x90\xe9\x8c\xbe\x82\xb5\x81A\x82\xb1\x82\xcc\x8c\x9b\x96@\x82\xf0\x8am\x92\xe8\x82\xb7\x82\xe9\x81B\x82\xbb\x82\xe0\x82\xbb\x82\xe0\x9a\xa0\x90\xad\x82\xcd\x81A\x9a\xa0\x96\xaf\x82\xcc\x9a\x8e\xe3\xe7\x82\xc8\x90M\x91\xf5\x82\xc9\x82\xe6\x82\xe9\x82\xe0\x82\xcc\x82\xc5\x82\xa0\x82\xc2\x82\xc4\x81A\x82\xbb\x82\xcc\x9e\xdc\x88\xd0\x82\xcd\x9a\xa0\x96\xaf\x82\xc9\x97R\x98\xd2\x82\xb5\x81A\x82\xbb\x82\xcc\x9e\xdc\x97\xcd\x82\xcd\x9a\xa0\x96\xaf\x82\xcc\x91\xe3\x95\\\x8e\xd2\x82\xaa\x82\xb1\x82\xea\x82\xf0\x8ds\x8eg\x82\xb5\x81A\x82\xbb\x82\xcc\x95\x9f\x97\x98\x82\xcd\x9a\xa0\x96\xaf\x82\xaa\x82\xb1\x82\xea\x82\xf0\x8b\x9d\x8e\xf3\x82\xb7\x82\xe9\x81B\x82\xb1\x82\xea\x82\xcd\x90l\x97\xde\x95\x81\x95\xd5\x82\xcc\x8c\xb4\x97\x9d\x82\xc5\x82\xa0\x82\xe8\x81A\x82\xb1\x82\xcc\x8c\x9b\x96@\x82\xcd\x81A\x82\xa9\x82\xa9\x82\xe9\x8c\xb4\x97\x9d\x82\xc9\x8a\xee\x82\xad\x82\xe0\x82\xcc\x82\xc5\x82\xa0\x82\xe9\x81B\x82\xed\x82\xea\x82\xe7\x82\xcd\x81A\x82\xb1\x82\xea\x82\xc9\x94\xbd\x82\xb7\x82\xe9\x88\xea\x90\xd8\x82\xcc\x8c\x9b\x96@\x81A\x96@\x97\xdf\x8by\x82\xd1\x8f\xd9\x92\xba\x82\xf0\x94r\x8f\x9c\x82\xb7\x82\xe9\x81B \x87\x82'
|
|
let decoded2 = codecs.decode(data2, "windows-31j")
|
|
print(decoded2)
|
|
let encoded2 = codecs.encode(decoded2, "windows-31j")
|
|
if encoded2 != data2:
|
|
print("Didn't roundtrip")
|
|
|
|
let data2a = b'\xa1\xa1\xc6\xfc\xcb\xdc\xd4\xa2\xcc\xb1\xa4\xcf\xa1\xa2\xc0\xb5\xe1\xc4\xa4\xcb\xc1\xaa\xda\xa9\xa4\xb5\xa4\xec\xa4\xbf\xd4\xa2\xd0\xf2\xa4\xcb\xa4\xaa\xa4\xb1\xa4\xeb\xc2\xe5\xc9\xbd\xbc\xd4\xa4\xf2\xc4\xcc\xa4\xb8\xa4\xc6\xb9\xd4\xc6\xb0\xa4\xb7\xa1\xa2\xa4\xef\xa4\xec\xa4\xe9\xa4\xc8\xa4\xef\xa4\xec\xa4\xe9\xa4\xce\xbb\xd2\xc2\xb9\xa4\xce\xa4\xbf\xa4\xe1\xa4\xcb\xa1\xa2\xbd\xf4\xd4\xa2\xcc\xb1\xa4\xc8\xa4\xce\xb6\xa8\xcf\xc2\xa4\xcb\xa4\xe8\xa4\xeb\xc0\xae\xb2\xcc\xa4\xc8\xa1\xa2\xa4\xef\xa4\xac\xd4\xa2\xc1\xb4\xc5\xda\xa4\xcb\xa4\xef\xa4\xbf\xa4\xc4\xa4\xc6\xbc\xab\xcd\xb3\xa4\xce\xa4\xe2\xa4\xbf\xa4\xe9\xa4\xb9\xd8\xaa\xdf\xb7\xa4\xf2\xb3\xce\xca\xdd\xa4\xb7\xa1\xa2\xc0\xaf\xc9\xdc\xa4\xce\xb9\xd4\xe0\xaa\xa4\xcb\xa4\xe8\xa4\xc4\xa4\xc6\xba\xc6\xa4\xd3\xd9\xa5\xe0\xa7\xa4\xce\xd8\xce\xb2\xd2\xa4\xac\xb5\xaf\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xce\xa4\xca\xa4\xa4\xa4\xe4\xa4\xa6\xa4\xcb\xa4\xb9\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xf2\xb7\xe8\xb0\xd5\xa4\xb7\xa1\xa2\xa4\xb3\xa4\xb3\xa4\xcb\xbc\xe7\xdc\xde\xa4\xac\xd4\xa2\xcc\xb1\xa4\xcb\xc2\xb8\xa4\xb9\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xf2\xc0\xeb\xb8\xc0\xa4\xb7\xa1\xa2\xa4\xb3\xa4\xce\xb7\xfb\xcb\xa1\xa4\xf2\xb3\xce\xc4\xea\xa4\xb9\xa4\xeb\xa1\xa3\xa4\xbd\xa4\xe2\xa4\xbd\xa4\xe2\xd4\xa2\xc0\xaf\xa4\xcf\xa1\xa2\xd4\xa2\xcc\xb1\xa4\xce\xd3\xee\xe6\xe9\xa4\xca\xbf\xae\xc2\xf7\xa4\xcb\xa4\xe8\xa4\xeb\xa4\xe2\xa4\xce\xa4\xc7\xa4\xa2\xa4\xc4\xa4\xc6\xa1\xa2\xa4\xbd\xa4\xce\xdc\xde\xb0\xd2\xa4\xcf\xd4\xa2\xcc\xb1\xa4\xcb\xcd\xb3\xd0\xd4\xa4\xb7\xa1\xa2\xa4\xbd\xa4\xce\xdc\xde\xce\xcf\xa4\xcf\xd4\xa2\xcc\xb1\xa4\xce\xc2\xe5\xc9\xbd\xbc\xd4\xa4\xac\xa4\xb3\xa4\xec\xa4\xf2\xb9\xd4\xbb\xc8\xa4\xb7\xa1\xa2\xa4\xbd\xa4\xce\xca\xa1\xcd\xf8\xa4\xcf\xd4\xa2\xcc\xb1\xa4\xac\xa4\xb3\xa4\xec\xa4\xf2\xb5\xfd\xbc\xf5\xa4\xb9\xa4\xeb\xa1\xa3\xa4\xb3\xa4\xec\xa4\xcf\xbf\xcd\xce\xe0\xc9\xe1\xca\xd7\xa4\xce\xb8\xb6\xcd\xfd\xa4\xc7\xa4\xa2\xa4\xea\xa1\xa2\xa4\xb3\xa4\xce\xb7\xfb\xcb\xa1\xa4\xcf\xa1\xa2\xa4\xab\xa4\xab\xa4\xeb\xb8\xb6\xcd\xfd\xa4\xcb\xb4\xf0\xa4\xaf\xa4\xe2\xa4\xce\xa4\xc7\xa4\xa2\xa4\xeb\xa1\xa3\xa4\xef\xa4\xec\xa4\xe9\xa4\xcf\xa1\xa2\xa4\xb3\xa4\xec\xa4\xcb\xc8\xbf\xa4\xb9\xa4\xeb\xb0\xec\xc0\xda\xa4\xce\xb7\xfb\xcb\xa1\xa1\xa2\xcb\xa1\xce\xe1\xb5\xda\xa4\xd3\xbe\xdb\xc4\xbc\xa4\xf2\xc7\xd3\xbd\xfc\xa4\xb9\xa4\xeb\xa1\xa3 \xad\xe2'
|
|
let decoded2a = codecs.decode(data2a, "euc-jp")
|
|
print(decoded2a)
|
|
let encoded2a = codecs.encode(decoded2a, "euc-jp")
|
|
if encoded2a != data2a:
|
|
print("Didn't roundtrip")
|
|
if codecs.decode(b'\x8f\xa2\xf1', "euc-jp") != codecs.decode(b'\xad\xe2', "euc-jp"):
|
|
print("NEC / 0212 equivalent didn't match")
|
|
|
|
let data2b = b'\[$B!!F|K\\T"L1$O!"@5aD$KA*Z)$5$l$?T"Pr$K$*$1$kBeI=<T$rDL$8$F9TF0$7!"$o$l$i$H$o$l$i$N;RB9$N$?$a$K!"=tT"L1$H$N6(OB$K$h$k@.2L$H!"$o$,T"A4EZ$K$o$?$D$F<+M3$N$b$?$i$9X*_7$r3NJ]$7!"@/I\\$N9T`*$K$h$D$F:F$SY%`\'$NXN2R$,5/$k$3$H$N$J$$$d$&$K$9$k$3$H$r7h0U$7!"$3$3$K<g\\^$,T"L1$KB8$9$k$3$H$r@k8@$7!"$3$N7{K!$r3NDj$9$k!#$=$b$=$bT"@/$O!"T"L1$NSnfi$J?.Bw$K$h$k$b$N$G$"$D$F!"$=$N\\^0R$OT"L1$KM3PT$7!"$=$N\\^NO$OT"L1$NBeI=<T$,$3$l$r9T;H$7!"$=$NJ!Mx$OT"L1$,$3$l$r5}<u$9$k!#$3$l$O?MN`IaJW$N86M}$G$"$j!"$3$N7{K!$O!"$+$+$k86M}$K4p$/$b$N$G$"$k!#$o$l$i$O!"$3$l$KH?$9$k0l@Z$N7{K!!"K!Na5Z$S>[D<$rGS=|$9$k!#\[(B \[$B-b\[(B'
|
|
let decoded2b = codecs.decode(data2b, "iso-2022-jp")
|
|
print(decoded2b)
|
|
let encoded2b = codecs.encode(decoded2b, "iso-2022-jp")
|
|
if encoded2b != data2b:
|
|
print("Didn't roundtrip")
|
|
|
|
let data3 = b'\xc0\xaf\xb1\xb8\xc7\xd1 \xbf\xaa\xbb\xe7\xbf\xcd \xc0\xfc\xc5\xeb\xbf\xa1 \xba\xfb\xb3\xaa\xb4\xc2 \xbf\xec\xb8\xae \xb4\xeb\xc7\xd1\xb1\xb9\xb9\xce\xc0\xba 3\xa1\xa41\xbf\xee\xb5\xbf\xc0\xb8\xb7\xce \xb0\xc7\xb8\xb3\xb5\xc8 \xb4\xeb\xc7\xd1\xb9\xce\xb1\xb9\xc0\xd3\xbd\xc3\xc1\xa4\xba\xce\xc0\xc7 \xb9\xfd\xc5\xeb\xb0\xfa \xba\xd2\xc0\xc7\xbf\xa1 \xc7\xd7\xb0\xc5\xc7\xd1 4\xa1\xa419\xb9\xce\xc1\xd6\xc0\xcc\xb3\xe4\xc0\xbb \xb0\xe8\xbd\xc2\xc7\xcf\xb0\xed, \xc1\xb6\xb1\xb9\xc0\xc7 \xb9\xce\xc1\xd6\xb0\xb3\xc7\xf5\xb0\xfa \xc6\xf2\xc8\xad\xc0\xfb \xc5\xeb\xc0\xcf\xc0\xc7 \xbb\xe7\xb8\xed\xbf\xa1 \xc0\xd4\xb0\xa2\xc7\xcf\xbf\xa9 \xc1\xa4\xc0\xc7\xa1\xa4\xc0\xce\xb5\xb5\xbf\xcd \xb5\xbf\xc6\xf7\xbe\xd6\xb7\xce\xbd\xe1 \xb9\xce\xc1\xb7\xc0\xc7 \xb4\xdc\xb0\xe1\xc0\xbb \xb0\xf8\xb0\xed\xc8\xf7 \xc7\xcf\xb0\xed, \xb8\xf0\xb5\xe7 \xbb\xe7\xc8\xb8\xc0\xfb \xc6\xf3\xbd\xc0\xb0\xfa \xba\xd2\xc0\xc7\xb8\xa6 \xc5\xb8\xc6\xc4\xc7\xcf\xb8\xe7, \xc0\xda\xc0\xb2\xb0\xfa \xc1\xb6\xc8\xad\xb8\xa6 \xb9\xd9\xc5\xc1\xc0\xb8\xb7\xce \xc0\xda\xc0\xaf\xb9\xce\xc1\xd6\xc0\xfb \xb1\xe2\xba\xbb\xc1\xfa\xbc\xad\xb8\xa6 \xb4\xf5\xbf\xed \xc8\xae\xb0\xed\xc8\xf7 \xc7\xcf\xbf\xa9 \xc1\xa4\xc4\xa1\xa1\xa4\xb0\xe6\xc1\xa6\xa1\xa4\xbb\xe7\xc8\xb8\xa1\xa4\xb9\xae\xc8\xad\xc0\xc7 \xb8\xf0\xb5\xe7 \xbf\xb5\xbf\xaa\xbf\xa1 \xc0\xd6\xbe\xee\xbc\xad \xb0\xa2\xc0\xce\xc0\xc7 \xb1\xe2\xc8\xb8\xb8\xa6 \xb1\xd5\xb5\xee\xc8\xf7 \xc7\xcf\xb0\xed, \xb4\xc9\xb7\xc2\xc0\xbb \xc3\xd6\xb0\xed\xb5\xb5\xb7\xce \xb9\xdf\xc8\xd6\xc7\xcf\xb0\xd4 \xc7\xcf\xb8\xe7, \xc0\xda\xc0\xaf\xbf\xcd \xb1\xc7\xb8\xae\xbf\xa1 \xb5\xfb\xb8\xa3\xb4\xc2 \xc3\xa5\xc0\xd3\xb0\xfa \xc0\xc7\xb9\xab\xb8\xa6 \xbf\xcf\xbc\xf6\xc7\xcf\xb0\xd4 \xc7\xcf\xbf\xa9, \xbe\xc8\xc0\xb8\xb7\xce\xb4\xc2 \xb1\xb9\xb9\xce\xbb\xfd\xc8\xb0\xc0\xc7 \xb1\xd5\xb5\xee\xc7\xd1 \xc7\xe2\xbb\xf3\xc0\xbb \xb1\xe2\xc7\xcf\xb0\xed \xb9\xdb\xc0\xb8\xb7\xce\xb4\xc2 \xc7\xd7\xb1\xb8\xc0\xfb\xc0\xce \xbc\xbc\xb0\xe8\xc6\xf2\xc8\xad\xbf\xcd \xc0\xce\xb7\xf9\xb0\xf8\xbf\xb5\xbf\xa1 \xc0\xcc\xb9\xd9\xc1\xf6\xc7\xd4\xc0\xb8\xb7\xce\xbd\xe1 \xbf\xec\xb8\xae\xb5\xe9\xb0\xfa \xbf\xec\xb8\xae\xb5\xe9\xc0\xc7 \xc0\xda\xbc\xd5\xc0\xc7 \xbe\xc8\xc0\xfc\xb0\xfa \xc0\xda\xc0\xaf\xbf\xcd \xc7\xe0\xba\xb9\xc0\xbb \xbf\xb5\xbf\xf8\xc8\xf7 \xc8\xae\xba\xb8\xc7\xd2 \xb0\xcd\xc0\xbb \xb4\xd9\xc1\xfc\xc7\xcf\xb8\xe9\xbc\xad 1948\xb3\xe2 7\xbf\xf9 12\xc0\xcf\xbf\xa1 \xc1\xa6\xc1\xa4\xb5\xc7\xb0\xed 8\xc2\xf7\xbf\xa1 \xb0\xc9\xc3\xc4 \xb0\xb3\xc1\xa4\xb5\xc8 \xc7\xe5\xb9\xfd\xc0\xbb \xc0\xcc\xc1\xa6 \xb1\xb9\xc8\xb8\xc0\xc7 \xc0\xc7\xb0\xe1\xc0\xbb \xb0\xc5\xc3\xc4 \xb1\xb9\xb9\xce\xc5\xf5\xc7\xa5\xbf\xa1 \xc0\xc7\xc7\xcf\xbf\xa9 \xb0\xb3\xc1\xa4\xc7\xd1\xb4\xd9. \x8cc\xb9\xe6\xb0\xa2\xc7\xcf'
|
|
let decoded3 = codecs.decode(data3, "windows-949", errors="replace")
|
|
print(decoded3)
|
|
let encoded3 = codecs.encode(decoded3, "windows-949")
|
|
if encoded3 != data3:
|
|
print("Didn't roundtrip")
|
|
|
|
if codecs.decode("hello", "undefined", errors="replace") != "\uFFFD":
|
|
print("Undefined codec didn't work as expected")
|
|
|
|
let data4 = b'\xa4\xd1\xa6a\xa5\xc8\xb6\xc0\xa1@\xa6t\xa9z\xacx\xaf\xee \xa1\xfc \xa4\xe9\xa4\xeb\xac\xd5\xcc\xca\xa1@\xa8\xb0\xb1J\xa6C\xb1i \xa1\xfc \xb4H\xa8\xd3\xb4\xbb\xa9\xb9\xa1@\xac\xee\xa6\xac\xa5V\xc2\xc3 \xa1\xfc \xb6|\xbel\xa6\xa8\xb7\xb3\xa1@\xab\xdf\xa5l\xbd\xd5\xb6\xa7 \xa1\xfc \xb6\xb3\xc4\xcb\xadP\xabB\xa1@\xc5S\xb5\xb2\xac\xb0\xc1\xf7 \xa1\xfc \xaa\xf7\xa5\xcd\xc4R\xa4\xf4\xa1@\xa5\xc9\xa5X\xb1X\xa9\xa3 \xa1\xfc \xbcC\xb8\xb9\xa5\xa8\xc2\xf6\xa1@\xaf]\xba\xd9\xa9]\xa5\xfa \xa1\xfc \xaaG\xac\xc3\xa7\xf5\xcfU\xa1@\xb5\xe6\xad\xab\xaa\xe3\xc1\xa4 \xa1\xfc \xae\xfc\xc4\xd0\xaae\xb2H\xa1@\xc5\xec\xbc\xe7\xa6\xd0\xb5\xbe \xa1\xfc \xc0s\xaev\xa4\xf5\xab\xd2\xa1@\xb3\xbe\xa9x\xa4H\xac\xd3 \xa1\xfc \xa9l\xa8\xee\xa4\xe5\xa6r\xa1@\xa4D\xaaA\xa6\xe7\xbbn \xa1\xfc \xb1\xc0\xa6\xec\xc5\xfd\xb0\xea\xa1@\xa6\xb3\xb8\xb7\xb3\xb3\xad\xf0 \xa1\xfc \xa4\xdd\xa5\xc1\xa5\xef\xb8o\xa1@\xa9P\xb5o\xae\xef\xb4\xf6 \xa1\xfc \xa7\xa4\xb4\xc2\xb0\xdd\xb9D\xa1@\xab\xab\xab\xfd\xa5\xad\xb3\xb9 \xa1\xfc \xb7R\xa8|\xbe\xa4\xad\xba\xa1@\xa6\xda\xa5\xf1\xa6\xa5\xaa\xca \xa1\xfc \xb9I\xc2\xe2\xb3\xfc\xc5\xe9\xa1@\xb2v\xbb\xab\xc2k\xa4\xfd \xa1\xfc \xbb\xef\xbb\xf1\xa6b\xbe\xf0\xa1@\xa5\xd5\xbes\xad\xb9\xb3\xf5 \xa1\xfc \xa4\xc6\xb3Q\xaf\xf3\xa4\xec\xa1@\xbf\xe0\xa4\xce\xb8U\xa4\xe8 \xa1\xfc \xbb\\\xa6\xb9\xa8\xad\xbev\xa1@\xa5|\xa4j\xa4\xad\xb1` \xa1\xfc \xae\xa5\xb1\xa9\xc1\xf9\xbei\xa1@\xb0Z\xb4\xb1\xb7\xb4\xb6\xcb \xa1\xfc \xa4k\xbc}\xads\xda\xf4\xa1@\xa8k\xae\xc4\xa4~\xa8} \xa1\xfc \xaa\xbe\xb9L\xa5\xb2\xa7\xef\xa1@\xb1o\xaf\xe0\xb2\xf6\xa7\xd1 \xa1\xfc \xaa\xc9\xbd\xcd\xa9\xbc\xb5u\xa1@\xc3\xfb\xab\xee\xa4v\xaa\xf8 \xa1\xfc \xabH\xa8\xcf\xa5i\xc2\xd0\xa1@\xbe\xb9\xb1\xfd\xc3\xf8\xb6q \xa1\xfc \xbe\xa5\xb4d\xb5\xb7\xb2f\xa1@\xb8\xd6\xc6g\xaf\xcc\xa6\xcf \xa1\xfc \xb4\xba\xa6\xe6\xba\xfb\xbd\xe5\xa1@\xa7J\xa9\xc0\xa7@\xb8t \xa1\xfc \xbcw\xab\xd8\xa6W\xa5\xdf\xa1@\xa7\xce\xba\xdd\xaa\xed\xa5\xbf \xa1\xfc \xaa\xc5\xa8\xa6\xb6\xc7\xc1n\xa1@\xb5\xea\xb0\xf3\xb2\xdf\xc5\xa5 \xa1\xfc \xba\xd7\xa6]\xb4c\xbfn\xa1@\xba\xd6\xbdt\xb5\xbd\xbcy \xa1\xfc \xa4\xd8\xc2z\xabD\xc4_\xa1@\xa4o\xb3\xb1\xacO\xc4v \xa1\xfc \xb8\xea\xa4\xf7\xa8\xc6\xa7g\xa1@\xa4\xea\xc4Y\xbbP\xb7q \xa1\xfc \xa7\xb5\xb7\xed\xba\xdc\xa4O\xa1@\xa9\xbe\xabh\xba\xc9\xa9R \xa1\xfc \xc1{\xb2`\xbci\xc1\xa1\xa1@\xa6g\xbf\xb3\xb7\xc5\xd0\xe1 \xa1\xfc \xa6\xfc\xc4\xf5\xb4\xb5\xc4\xc9\xa1@\xa6p\xaaQ\xa4\xa7\xb2\xb1 \xa1\xfc \xa4t\xacy\xa4\xa3\xae\xa7\xa1@\xb2W\xbc\xe1\xa8\xfa\xacM \xa1\xfc \xaee\xa4\xee\xadY\xab\xe4\xa1@\xa8\xa5\xc3\xe3\xa6w\xa9w \xa1\xfc \xbfw\xaa\xec\xb8\xdb\xac\xfc\xa1@\xb7V\xb2\xd7\xa9y\xa5O \xa1\xfc \xbaa\xb7~\xa9\xd2\xb0\xf2\xa1@\xc4y\xac\xc6\xb5L\xb3\xba \xa1\xfc \xbe\xc7\xc0u\xb5n\xa5K\xa1@\xc4\xe1\xc2\xbe\xb1q\xacF \xa1\xfc \xa6s\xa5H\xa5\xcc\xb4\xc5\xa1@\xa5h\xa6\xd3\xafq\xb5\xfa \xa1\xfc \xbc\xd6\xae\xed\xb6Q\xbd\xe2\xa1@\xc2\xa7\xa7O\xb4L\xa8\xf5 \xa1\xfc \xa4W\xa9M\xa4U\xb7\xfc\xa1@\xa4\xd2\xb0\xdb\xb0\xfc\xc0H \xa1\xfc \xa5~\xa8\xfc\xb3\xc5\xb0V\xa1@\xa4J\xa9^\xa5\xc0\xbb\xf6 \xa1\xfc \xbd\xd1\xa9h\xa7B\xa8\xfb\xa1@\xb5S\xa4l\xa4\xf1\xa8\xe0 \xa1\xfc \xa4\xd5\xc3h\xa5S\xa7\xcc\xa1@\xa6P\xae\xf0\xb3s\xaaK \xa1\xfc \xa5\xe6\xa4\xcd\xa7\xeb\xa4\xc0\xa1@\xa4\xc1\xbfi\xbde\xb3W \xa1\xfc \xa4\xaf\xb7O\xc1\xf4\xb4l\xa1@\xb3y\xa6\xb8\xa5\xb1\xc2\xf7 \xa1\xfc \xb8`\xb8q\xb7G\xb0h\xa1@\xc4A\xa8K\xad\xea\xc1\xab \xa1\xfc \xa9\xca\xc0R\xb1\xa1\xb6h\xa1@\xa4\xdf\xb0\xca\xaf\xab\xafh \xa1\xfc \xa6u\xafu\xa7\xd3\xba\xa1\xa1@\xb3v\xaa\xab\xb7N\xb2\xbe \xa1\xfc \xb0\xed\xab\xf9\xb6\xae\xbe\xde\xa1@\xa6n\xc0\xef\xa6\xdb\xed\xdd \xa1\xfc \xb3\xa3\xa8\xb6\xb5\xd8\xaeL\xa1@\xaaF\xa6\xe8\xa4G\xa8\xca \xa1\xfc \xadI\xcaQ\xad\xb1\xac\xa5\xa1@\xafB\xb4\xf4\xbe\xda\xae\xf9 \xa1\xfc \xaec\xb7\xb5\xbdL\xc6{\xa1@\xbc\xd3\xc6[\xad\xb8\xc5\xe5 \xa1\xfc \xb9\xcf\xbcg\xb8V\xc3~\xa1@\xb5e\xb1m\xa5P\xc6F \xa1\xfc \xa4\xfe\xaa\xd9\xb3\xc4\xb1\xd2\xa1@\xa5\xd2\xb1b\xb9\xef\xb7\xad \xa1\xfc \xb8v\xba\xe1\xb3]\xaeu\xa1@\xb9\xaa\xb7\xe6\xa7j\xb2\xc6 \xa1\xfc \xa4\xc9\xb6\xa5\xaf\xc7\xb0\xa1\xa1@\xa5\xaf\xc2\xe0\xba\xc3\xacP \xa1\xfc \xa5k\xb3q\xbcs\xa4\xba\xa1@\xa5\xaa\xb9F\xa9\xd3\xa9\xfa \xa1\xfc \xacJ\xb6\xb0\xbcX\xa8\xe5\xa1@\xa5\xe7\xbbE\xb8s\xad^ \xa1\xfc \xa7\xf9\xbdZ\xc1\xe9\xc1\xf5\xa1@\xba\xa3\xae\xd1\xbe\xc0\xb8g \xa1\xfc \xa9\xb2\xc3\xb9\xb1N\xac\xdb\xa1@\xb8\xf4\xabL\xbai\xad\xeb \xa1\xfc \xa4\xe1\xab\xca\xa4K\xbf\xa4\xa1@\xaea\xb5\xb9\xa4d\xa7L \xa1\xfc \xb0\xaa\xaba\xb3\xad\xbd\xfb\xa1@\xc5X\xc1\xd4\xae\xb6\xc5\xd5 \xa1\xfc \xa5@\xb8S\xa8\xd7\xb4I\xa1@\xa8\xae\xber\xaa\xce\xbb\xb4 \xa1\xfc \xb5\xa6\xa5\\\xadZ\xb9\xea\xa1@\xb0\xc7\xb8O\xa8\xe8\xbb\xca \xa1\xfc \xed\xa8\xb7\xcb\xa5\xec\xa4\xa8\xa1@\xa6\xf5\xae\xc9\xaa\xfc\xbf\xc5 \xa1\xfc \xa9a\xa6v\xa6\xb1\xaa\xfa\xa1@\xb7L\xa5\xb9\xb1E\xc0\xe7 \xa1\xfc \xae\xd9\xa4\xbd\xa6J\xa6X\xa1@\xc0\xd9\xaez\xa7\xdf\xb6\xc9 \xa1\xfc \xba\xf6\xb0j\xba~\xb4f\xa1@\xbb\xa1\xb7P\xaaZ\xa4B \xa1\xfc \xabT\xc9@\xb1K\xa4\xc5\xa1@\xa6h\xa4h\xd8\xd3\xb9\xe7 \xa1\xfc \xae\xca\xb7\xa1\xa7\xf3\xc5Q\xa1@\xbb\xaf\xc3Q\xa7x\xbe\xee \xa1\xfc \xb0\xb2\xb3~\xb7\xc0\xe7~\xa1@\xbd\xee\xa4g\xb7|\xb7\xf9 \xa1\xfc \xa6\xf3\xbf\xed\xac\xf9\xaak\xa1@\xc1\xfa\xb9\xfa\xb7\xd0\xa6D \xa1\xfc \xb0_\xe6\xf9\xbb\xe1\xaa\xaa\xa1@\xa5\xce\xadx\xb3\xcc\xba\xeb \xa1\xfc \xab\xc5\xab\xc2\xa8F\xbaz\xa1@\xb9\xa3\xc5A\xa4\xa6\xabC \xa1\xfc \xa4E\xa6{\xac\xea\xb8\xf1\xa1@\xa6\xca\xb0p\xaf\xb3\xa6} \xa1\xfc \xc0\xae\xa9v\xab\xed\xa9\xa7\xa1@\xc1I\xa5D\xa4\xaa\xabF \xa1\xfc \xb6\xad\xaa\xf9\xb5\xb5\xb6\xeb\xa1@\xc2\xfb\xa5\xd0\xa8\xaa\xab\xb0 \xa1\xfc \xa9\xf8\xa6\xc0\xba\xd4\xa5\xdb\xa1@\xb9d\xb3\xa5\xac}\xaex \xa1\xfc \xc3m\xbb\xb7\xba\xf8\xc2\xe4\xa1@\xc5\xc9\xa9\xa6\xaaH\xad\xdf \xa1\xfc \xaav\xa5\xbb\xa9\xf3\xb9A\xa1@\xb0\xc8\xaf\xf7\xbd[\xc2\xa8 \xa1\xfc \xd0\xd4\xb8\xfc\xabn\xafa\xa1@\xa7\xda\xc3\xc0\xb6\xc1\xbd^ \xa1\xfc \xb5|\xbc\xf4\xb0^\xb7s\xa1@\xc4U\xbd\xe0\xc2J\xd4F \xa1\xfc \xa9s\xb6a\xb4\xb0\xaf\xc0\xa1@\xa5v\xb3\xbd\xaa\xc3\xaa\xbd \xa1\xfc \xb1f\xb4X\xa4\xa4\xb1e\xa1@\xb3\xd2\xc1\xbe\xc2\xd4\xb1\xd5 \xa1\xfc \xb2\xe2\xad\xb5\xb9\xee\xb2z\xa1@\xc5\xb2\xbb\xaa\xbf\xeb\xa6\xe2 \xa1\xfc \xb6M\xb3\xd6\xb9\xc5\xb7\xdf\xa1@\xabj\xa8\xe4\xaf\xad\xb4\xd3 \xa1\xfc \xac\xd9\xb0`\xc3\xd5\xbb|\xa1@\xc3d\xbcW\xa7\xdc\xb7\xa5 \xa1\xfc \xacp\xb0d\xaa\xf1\xae\xa2\xa1@\xaaL\xafo\xa9\xaf\xa7Y \xa1\xfc \xa8\xe2\xb2\xa8\xa8\xa3\xbe\xf7\xa1@\xb8\xd1\xb2\xd5\xbd\xd6\xb9G \xa1\xfc \xaf\xc1\xa9~\xb6\xa2\xb3B\xa1@\xa8H\xc0q\xb1I\xb9\xe9 \xa1\xfc \xa8D\xa5j\xb4M\xbd\xd7\xa1@\xb4\xb2\xbc{\xb3p\xbb\xbb \xa1\xfc \xaaY\xab\xb5\xb2\xd6\xbb\xba\xa1@\xbc\xa1\xc1\xc2\xc5w\xa9\xdb \xa1\xfc \xb4\xeb\xb2\xfc\xaa\xba\xbe\xfa\xa1@\xb6\xe9\xb2\xf5\xa9\xe2\xb1\xf8 \xa1\xfc \xaaJ\xaaI\xb1\xdf\xbbA\xa1@\xb1\xef\xae\xe4\xa6\xad\xad\xe4 \xa1\xfc \xb3\xaf\xae\xda\xa9e\xc1k\xa1@\xb8\xa8\xb8\xad\xc4\xc6\xf3\xad \xa1\xfc \xb4\xe5\xf3\xcb\xbfW\xb9B\xa1@\xad\xe2\xbc\xaf\xb5\xbc\xbe] \xa1\xfc \xaf\xd4\xc5\xaa\xe6\xf6\xa5\xab\xa1@\xb4J\xa5\xd8\xc5n\xbdc \xa1\xfc \xa9\xf6\xeb\xc9\xa7\xf1\xac\xc8\xa1@\xc4\xdd\xa6\xd5\xab\xae\xc0\xf0 \xa1\xfc \xa8\xe3\xbf\xaf\xc0\\\xb6\xba\xa1@\xbeA\xa4f\xa5R\xb8z \xa1\xfc \xb9\xa1\xdc\xae\xb2i\xae_\xa1@\xb0\xa7\xb9\xbd\xc1V\xc1R \xa1\xfc \xbf\xcb\xb1\xad\xacG\xc2\xc2\xa1@\xa6\xd1\xa4\xd6\xb2\xa7\xc2\xb3 \xa1\xfc \xa9c\xb1s\xc1Z\xaf\xbc\xa1@\xa8\xcd\xa4y\xb1c\xa9\xd0 \xa1\xfc \xd0K\xae\xb0\xb6\xea\xbc\xe4\xa1@\xbb\xc8\xc0\xeb\xdem\xb7\xd7 \xa1\xfc \xb1\xde\xafv\xa4i\xb4K\xa1@\xc4x\xb5\xab\xb6H\xa7\xc9 \xa1\xfc \xa9\xb6\xbaq\xb0s\xf8@\xa1@\xb1\xb5\xaaM\xc1|\xc2\xd2 \xa1\xfc \xc1B\xa4\xe2\xb9y\xa8\xac\xa1@\xae\xae\xbf\xdd\xa5B\xb1d \xa1\xfc \xb9\xde\xab\xe1\xb6\xe0\xc4\xf2\xa1@\xb2\xbd\xaa\xc1\xd2m\xb9\xc1 \xa1\xfc \xbd]\xf3\xa8\xa6A\xab\xf4\xa1@\xae\xaa\xc4\xdf\xae\xa3\xb4q \xa1\xfc \xba\xe0\xb7\xde\xc2\xb2\xadn\xa1@\xc5U\xb5\xaa\xbcf\xb8\xd4 \xa1\xfc \xc0e\xab\xaf\xb7Q\xafD\xa1@\xb0\xf5\xbc\xf6\xc4@\xb2D \xa1\xfc \xc6j\xc5[\xc3}\xafS\xa1@\xc0b\xc5D\xb6W\xf9\xb3 \xa1\xfc \xb8\xdd\xb1\xd9\xb8\xe9\xb5s\xa1@\xae\xb7\xc0\xf2\xabq\xa4` \xa1\xfc \xa5\xac\xaeg\xbf\xf1\xa4Y\xa1@\xb4R\xb5^\xa8\xbf\xbcS \xa1\xfc \xab\xef\xb5\xa7\xad\xdb\xaf\xc8\xa1@\xb6v\xa5\xa9\xa5\xf4\xb3\xa8 \xa1\xfc \xc4\xc0\xaf\xc9\xa7Q\xabU\xa1@\xa8\xc3\xac\xd2\xa8\xce\xa7\xae \xa1\xfc \xa4\xf2\xacI\xb2Q\xab\xba\xa1@\xa4u\xc6I\xa7\xb0\xaf\xba \xa1\xfc \xa6~\xa5\xda\xa8C\xb6\xca\xa1@\xc4f\xb7u\xae\xd4\xc4\xa3 \xa1\xfc \xd6o\xbfY\xc4a\xbaW\xa1@\xb1\xe2\xbez\xc0\xf4\xb7\xd3 \xa1\xfc \xab\xfc\xc1~\xb2\xe7\xd2\xe9\xa1@\xa5\xc3\xb8k\xa6N\xcao \xa1\xfc \xafx\xa8B\xa4\xde\xbb\xe2\xa1@\xad\xc1\xa5\xf5\xb4Y\xbcq \xa1\xfc \xa7\xf4\xb1a\xac\xe1\xb2\xf8\xa1@\xb1r\xab\xde\xc2\xa4\xb2\xb7 \xa1\xfc \xa9t\xad\xae\xb9\xe8\xbbD\xa1@\xb7M\xbbX\xb5\xa5\xbb\xa6 \xa1\xfc \xbf\xd7\xbby\xa7U\xaa\xcc\xa1@\xb2j\xabv\xa5G\xa4] \xa1\xfc \xc6\xea\xc7t\xc7V\xc7R\xc7b\xc7_\xc7O\xc7H\xc7q\xc7S\xc7r\xc7y\xc7v\xc6\xf1\xc7o\xc7F\xc7s\xc7D\xc7K\xc7T\xc7Q\xc7p\xc7g\xc6\xec\xc7w\xc7U\xc6\xf0\xc6\xf5\xc7k\xc7e\xc6\xf7\xc7\\\xc6\xf9\xc6\xee\xc7M\xc6\xe8\xc6\xfb\xc6\xf3\xc7m\xc7h\xc7f\xc6\xfd\xc7x\xc7Y\xc7i\xc7B\xc7@'
|
|
let decoded4 = codecs.decode(data4, "big5")
|
|
print(decoded4)
|
|
let encoded4 = codecs.encode(decoded4, "big5")
|
|
if encoded4 != data4:
|
|
print("Didn't roundtrip")
|
|
if codecs.decode(b"\x88\x62\x88\x63\x88\x64\x88\x65\x88\x66", "big5", errors="replace") != "Ê̄ẾÊ̌ỀÊ":
|
|
print("Didn't decode the combining-sequence-mapped codes")
|
|
|
|
let data5 = b'\x949\xc65 \xa8\xaa Br\x810\x8d0ko\n\xa7\xa9\xa7\xdf\xa7\xd1\xa7\xdc\xa7\xe0\xa7\xde\xa7\xe3\xa7\xe4\xa7\xd3\xa7\xe0 \xa7\xe5 \xa7\xb2\xa7\xe0\xa7\xe3\xa7\xe4\xa7\xe0\xa7\xd3\xa7\xed\xa7\xe7 \xa7\xd2\xa7\xed\xa7\xdd\xa7\xd1 \xa7\xd3\xa7\xe3\xa7\xf1 \xa7\xae\xa7\xe0\xa7\xe3\xa7\xdc\xa7\xd3\xa7\xd1; \xa7\xd5\xa7\xd6\xa7\xdf\xa7\xd6\xa7\xd4 \xa7\xd3 \xa7\xdf\xa7\xed\xa7\xdf\xa7\xd6\xa7\xea\xa7\xdf\xa7\xda\xa7\xdb \xa7\xd4\xa7\xe0\xa7\xd5 \xa7\xe5 \xa7\xe3\xa7\xe4\xa7\xd1\xa7\xe2\xa7\xe0\xa7\xd4\xa7\xe0 \xa7\xd4\xa7\xe2\xa7\xd1\xa7\xe6\xa7\xd1 \xa7\xd2\xa7\xed\xa7\xdd\xa7\xe0 \xa7\xd5\xa7\xe0\xa7\xe3\xa7\xe4\xa7\xd1\xa7\xe4\xa7\xe0\xa7\xe9\xa7\xdf\xa7\xe0, \xa7\xe1\xa7\xe0\xa7\xe4\xa7\xe0\xa7\xde\xa7\xe5 \xa7\xe9\xa7\xe4\xa7\xe0 \xa7\xd2\xa7\xed\xa7\xdd\xa7\xda \xa7\xe1\xa7\xd6\xa7\xe2\xa7\xd6\xa7\xd9\xa7\xd1\xa7\xdd\xa7\xe0\xa7\xd8\xa7\xd6\xa7\xdf\xa7\xed \xa7\xd3\xa7\xe3\xa7\xd6 \xa7\xda\xa7\xde\xa7\xd6\xa7\xdf\xa7\xda\xa7\xf1, \xa7\xda \xa7\xe1\xa7\xe0\xa7\xe4\xa7\xe0\xa7\xde\xa7\xe5 \xa7\xaf\xa7\xda\xa7\xdc\xa7\xe0\xa7\xdd\xa7\xe5\xa7\xea\xa7\xdc\xa7\xd1, \xa7\xd9\xa7\xd1\xa7\xd3\xa7\xd6\xa7\xd5\xa7\xf1 \xa7\xe3\xa7\xd3\xa7\xe0\xa7\xd6\xa7\xd4\xa7\xe0 \xa7\xe3\xa7\xe0\xa7\xd2\xa7\xe3\xa7\xe4\xa7\xd3\xa7\xd6\xa7\xdf\xa7\xdf\xa7\xe0\xa7\xd4\xa7\xe0 \xa7\xe2\xa7\xed\xa7\xe3\xa7\xd1\xa7\xdc\xa7\xd1 \xa7\xda \xa7\xe3\xa7\xd1\xa7\xde\xa7\xed\xa7\xd6 \xa7\xde\xa7\xe0\xa7\xd5\xa7\xdf\xa7\xed\xa7\xd6 \xa7\xe2\xa7\xd6\xa7\xdb\xa7\xe4\xa7\xe5\xa7\xd9\xa7\xed, \n\xa1\xa1\xc8\xd5\xb1\xbe\x87\xf8\xc3\xf1\xa4\xcf\xa1\xa2\xd5\xfd\xae\x94\xa4\xcb\xdfx\x94H\xa4\xb5\xa4\xec\xa4\xbf\x87\xf8\x95\xfe\xa4\xcb\xa4\xaa\xa4\xb1\xa4\xeb\xb4\xfa\xb1\xed\xd5\xdf\xa4\xf2\xcd\xa8\xa4\xb8\xa4\xc6\xd0\xd0\x84\xd3\xa4\xb7\xa1\xa2\xa4\xef\xa4\xec\xa4\xe9\xa4\xc8\xa4\xef\xa4\xec\xa4\xe9\xa4\xce\xd7\xd3\x8cO\xa4\xce\xa4\xbf\xa4\xe1\xa4\xcb\xa1\xa2\xd6T\x87\xf8\xc3\xf1\xa4\xc8\xa4\xce\x85f\xba\xcd\xa4\xcb\xa4\xe8\xa4\xeb\xb3\xc9\xb9\xfb\xa4\xc8\xa1\xa2\xa4\xef\xa4\xac\x87\xf8\xc8\xab\xcd\xc1\xa4\xcb\xa4\xef\xa4\xbf\xa4\xc4\xa4\xc6\xd7\xd4\xd3\xc9\xa4\xce\xa4\xe2\xa4\xbf\xa4\xe9\xa4\xb9\xbb\xdd\x9d\xc9\xa4\xf2\xb4_\xb1\xa3\xa4\xb7\xa1\xa2\xd5\xfe\xb8\xae\xa4\xce\xd0\xd0\xa0\x91\xa4\xcb\xa4\xe8\xa4\xc4\xa4\xc6\xd4\xd9\xa4\xd3\x91\xf0\xa0\x8e\xa4\xce\x91K\xb5\x9c\xa4\xac\xc6\xf0\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xce\xa4\xca\xa4\xa4\xa4\xe4\xa4\xa6\xa4\xcb\xa4\xb9\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xf2\x9bQ\xd2\xe2\xa4\xb7\xa1\xa2 \xa1\xed\n\x833\x919\x827\xf39\x836\x843 \x832\xf12\x831\xe87\x832\xf95 \x833\xa87\x835\xa48\x832\xee3 \x831\xad4\x828\xc71\x828\xf99 \x833\x867\x830\xb35 \x829\x867\x836\x843\x827\xf40\x830\xee7\x833\x951 3\xa1\xa41\x833\x871\x829\x9c4\x833\x947\x830\x919 \x827\xdb1\x830\xb52\x829\xa31 \x829\x867\x836\x843\x830\xee7\x827\xf40\x833\x9b9\x832\xa27\x833\xaa4\x831\x975\x833\x975 \x830\xfe0\x835\xa48\x827\xe87 \x831\x983\x833\x975\x832\xee3 \x836\x860\x827\xda7\x836\x843 4\xa1\xa419\x830\xee7\x833\xc15\x833\x9a3\x828\xd95\x833\x955 \x827\xe31\x832\x9f2\x836\x839\x827\xe59, \x833\xb35\x827\xf40\x833\x975 \x830\xee7\x833\xc15\x827\xd23\x836\x948\x827\xe87 \x835\xda0\x836\x9d1\x833\xa84 \x835\xa48\x833\x9b1\x833\x975 \x831\xe87\x830\xc92\x832\xee3 \x833\x9c0\x827\xcf6\x836\x839\x832\xf11 \x833\xaa4\x833\x975\xa1\xa4\x833\x9a7\x829\x9a3\x832\xf95 \x829\x9c4\x835\xdd5\x832\xe31\x830\x919\x832\xb07 \x830\xee7\x833\xb36\x833\x975 \x829\x843\x827\xe11\x833\x955 \x827\xe80\x827\xe59\x836\xbb9 \x836\x839\x827\xe59, \x829\xd67\x830\xf32\x827\xcf6\x836\x839\n\xcc\xec\xb5\xd8\xd0\xfe\xfcS\xa1\xa1\xd3\xee\xd6\xe6\xba\xe9\xbb\xc4 \xa1\xce \xc8\xd5\xd4\xc2\xd3\xaf\xea\xbe\xa1\xa1\xb3\xbd\xcb\xde\xc1\xd0\x8f\x88 \xa1\xce \xba\xae\x81\xed\xca\xee\xcd\xf9\xa1\xa1\xc7\xef\xca\xd5\xb6\xac\xb2\xd8 \xa1\xce \xe9c\xf0N\xb3\xc9\x9aq\xa1\xa1\xc2\xc9\xd5\xd9\xd5{\xea\x96 \xa1\xce \xeb\x85\xf2v\xd6\xc2\xd3\xea\xa1\xa1\xc2\xb6\xbdY\x9e\xe9\xcb\xaa \xa1\xce \xbd\xf0\xc9\xfa\xfb\x90\xcb\xae\xa1\xa1\xd3\xf1\xb3\xf6\x8d\x8b\x8c\xf9 \xa1\xce \x84\xa6\xcc\x96\xbe\xde\xeaI\xa1\xa1\xd6\xe9\xb7Q\xd2\xb9\xb9\xe2 \xa1\xce \xb9\xfb\xd5\xe4\xc0\xee\xe8\xcd\xa1\xa1\xb2\xcb\xd6\xd8\xbd\xe6\xcbK \xa1\xce \xa4\xa4\xa4\xed\xa4\xcf\xa4\xcb\xa4\xdb\xa4\xd8\xa4\xc8\xa4\xc1\xa4\xea\xa4\xcc\xa4\xeb\xa4\xf2\xa4\xef\xa4\xab\xa4\xe8\xa4\xbf\xa4\xec\xa4\xbd\xa4\xc4\xa4\xcd\xa4\xca\xa4\xe9\xa4\xe0\xa4\xa6\xa4\xf0\xa4\xce\xa4\xaa\xa4\xaf\xa4\xe4\xa4\xde\xa4\xb1\xa4\xd5\xa4\xb3\xa4\xa8\xa4\xc6\xa4\xa2\xa4\xb5\xa4\xad\xa4\xe6\xa4\xe1\xa4\xdf\xa4\xb7\xa4\xf1\xa4\xd2\xa4\xe2\xa4\xbb\xa4\xb9'
|
|
let decoded5 = codecs.decode(data5, "gb18030")
|
|
print(decoded5)
|
|
let encoded5 = codecs.encode(decoded5, "gb18030")
|
|
if encoded5 != data5:
|
|
print("Didn't roundtrip")
|
|
print(codecs.decode(codecs.encode(decoded5, "gbk", errors="xmlcharrefreplace"), "gb18030"))
|
|
|
|
print("-")
|
|
|
|
let test_0 = "염盐塩鹽䝼丽/〒"
|
|
let test_1 = "令䝼むかしむかしあるところに"
|
|
let test_2 = """ foo = "Quoted string ****令䝼" """
|
|
|
|
let out_0a = codecs.decode(codecs.encode(test_0, "iso-2022-jp", errors="replace"), "iso-2022-jp", errors="replace")
|
|
let success = True
|
|
for n, char in enumerate(out_0a):
|
|
if char not in (test_0[n], "?"):
|
|
print("replace in DBCS state unsuccessful:", out_0a)
|
|
success = False
|
|
break
|
|
if success:
|
|
print("replace in DBCS state successful:", out_0a)
|
|
|
|
let out_0b = codecs.decode(codecs.encode(test_0, "iso-2022-jp", errors="xmlcharrefreplace"), "iso-2022-jp", errors="replace")
|
|
if "䝼" not in out_0b:
|
|
print("xmlcharrefreplace in DBCS state unsuccessful:", out_0b)
|
|
else:
|
|
print("xmlcharrefreplace in DBCS state successful:", out_0b)
|
|
|
|
let out_1 = codecs.decode(codecs.encode(test_1, "iso-2022-jp", errors="replace"), "iso-2022-jp", errors="replace")
|
|
if not out_1.endswith("むかしむかしあるところに"):
|
|
print("Encoding misaligned:", out_1)
|
|
else:
|
|
print("Encoding not misaligned:", out_1)
|
|
|
|
let out_2 = codecs.decode(codecs.encode(test_2, "iso-2022-jp", errors="replace"), "iso-2022-jp", errors="replace")
|
|
if out_2[-2] != '"':
|
|
print("Delimiter swallowed:", out_2)
|
|
else:
|
|
print("Delimiter not swallowed:", out_2)
|
|
|
|
# https://encoding.spec.whatwg.org/#security-background
|
|
print(codecs.decode(b'"robert\x82"', "shift_jis", errors="replace"))
|
|
|
|
let scrutiny = b'\[$BF|K\\T"L1$O!"\[(B\[$B@5aD$KA*Z)$5$l$?\[(BNo: \[(B; Yes: \[(J\\; Yes: \\\[(B; No: \[(J; No: \[(B\[(J; Yes: \[(B~; No: \[(J\[(B; Yes: ~\[(J; No: \[(B; Yes: \[(J~; Yes: .;\[(B'
|
|
print(codecs.decode(scrutiny, "iso-2022-jp", errors="replace"))
|
|
print(codecs.decode(scrutiny, "jis_encoding", errors="replace"))
|
|
|
|
let valid_utf8 = b'co\xc3\xb6rdination'
|
|
let invalid_utf8 = b'co\x9ardination'
|
|
print(codecs.decode(valid_utf8, "utf-8"))
|
|
print(codecs.decode(invalid_utf8, "utf-8", errors="replace"))
|
|
|
|
|