[bim] Copy UTF8 decoder into bim so it is single-file
This commit is contained in:
parent
fa4b4daca8
commit
5bdaa7b77b
43
apps/bim.c
43
apps/bim.c
@ -49,10 +49,8 @@
|
||||
|
||||
#ifdef __toaru__
|
||||
#include <sys/fswait.h>
|
||||
#include <toaru/decodeutf8.h>
|
||||
#else
|
||||
#include <poll.h>
|
||||
#include "../base/usr/include/toaru/decodeutf8.h"
|
||||
#endif
|
||||
|
||||
#define BLOCK_SIZE 4096
|
||||
@ -2056,6 +2054,47 @@ void goto_line(int line) {
|
||||
static uint32_t codepoint_r;
|
||||
static uint32_t state = 0;
|
||||
|
||||
#define UTF8_ACCEPT 0
|
||||
#define UTF8_REJECT 1
|
||||
|
||||
static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
|
||||
static int state_table[32] = {
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xxxxxxx */
|
||||
1,1,1,1,1,1,1,1, /* 10xxxxxx */
|
||||
2,2,2,2, /* 110xxxxx */
|
||||
3,3, /* 1110xxxx */
|
||||
4, /* 11110xxx */
|
||||
1 /* 11111xxx */
|
||||
};
|
||||
|
||||
static int mask_bytes[32] = {
|
||||
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
|
||||
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x1F,0x1F,0x1F,0x1F,
|
||||
0x0F,0x0F,
|
||||
0x07,
|
||||
0x00
|
||||
};
|
||||
|
||||
static int next[5] = {
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
2,
|
||||
3
|
||||
};
|
||||
|
||||
if (*state == UTF8_ACCEPT) {
|
||||
*codep = byte & mask_bytes[byte >> 3];
|
||||
*state = state_table[byte >> 3];
|
||||
} else if (*state > 0) {
|
||||
*codep = (byte & 0x3F) | (*codep << 6);
|
||||
*state = next[*state];
|
||||
}
|
||||
return *state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processs (part of) a file and add it to a buffer.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user