Unicode is hard... thus, I'm not done.

This commit is contained in:
Kevin Lange 2012-01-28 22:27:37 -06:00
parent fd77e36013
commit 1d7c91f3b1
7 changed files with 475 additions and 25 deletions

View File

@ -1,6 +1,6 @@
CC = i686-pc-toaru-gcc
CPP = i686-pc-toaru-g++
CFLAGS = -march=core2 -std=c99 -O3 -m32 -Wa,--32
CFLAGS = -march=core2 -std=c99 -O3 -m32 -Wa,--32 -Xlinker --eh-frame-hdr
CPPFLAGS = -march=core2 -O3 -m32 -Wa,--32
EXECUTABLES = $(patsubst %.c,%.o,$(wildcard lib/*.c)) $(patsubst %.c,../hdd/bin/%,$(wildcard *.c)) $(patsubst %.cpp,../hdd/bin/%,$(wildcard *.cpp))
@ -28,9 +28,9 @@ clean:
@${CC} ${CFLAGS} -s -I ../util/toaru-toolchain/i686-pc-toaru/include/freetype2/ -o $@ $< ../util/toaru-toolchain/i686-pc-toaru/lib/libfreetype.a ${ERRORS}
@${END} "CC" "$< [freetype]"
../hdd/bin/terminal: terminal.c
../hdd/bin/terminal: terminal.c lib/utf8_decode.o
@${BEG} "CC" "$< [freetype]"
@${CC} ${CFLAGS} -s -I ../util/toaru-toolchain/i686-pc-toaru/include/freetype2/ -o $@ $< ../util/toaru-toolchain/i686-pc-toaru/lib/libfreetype.a ${ERRORS}
@${CC} ${CFLAGS} -s -I ../util/toaru-toolchain/i686-pc-toaru/include/freetype2/ -o $@ $< ../util/toaru-toolchain/i686-pc-toaru/lib/libfreetype.a lib/utf8_decode.o ${ERRORS}
@${END} "CC" "$< [freetype]"
../hdd/bin/login: login.c lib/sha2.o

View File

@ -3,6 +3,7 @@
* echo
*/
#include <stdio.h>
#include <string.h>
void usage() {
printf("echo [-n] [-e] [STRING]...\n"

186
userspace/lib/utf8_decode.c Normal file
View File

@ -0,0 +1,186 @@
/* utf8_decode.c */
/* 2009-02-13 */
/*
Copyright (c) 2005 JSON.org
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The Software shall be used for Good, not Evil.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include "utf8_decode.h"
/*
Very Strict UTF-8 Decoder
UTF-8 is a multibyte character encoding of Unicode. A character can be
represented by 1-4 bytes. The bit pattern of the first byte indicates the
number of continuation bytes.
Most UTF-8 decoders tend to be lenient, attempting to recover as much
information as possible, even from badly encoded input. This UTF-8
decoder is not lenient. It will reject input which does not include
proper continuation bytes. It will reject aliases (or suboptimal
codings). It will reject surrogates. (Surrogate encoding should only be
used with UTF-16.)
Code Contination Minimum Maximum
0xxxxxxx 0 0 127
10xxxxxx error
110xxxxx 1 128 2047
1110xxxx 2 2048 65535 excluding 55296 - 57343
11110xxx 3 65536 1114111
11111xxx error
*/
static int the_index = 0;
static int the_length = 0;
static int the_char = 0;
static int the_byte = 0;
static char* the_input;
/*
Get the next byte. It returns UTF8_END if there are no more bytes.
*/
static int
get()
{
int c;
if (the_index >= the_length) {
return UTF8_END;
}
c = the_input[the_index] & 0xFF;
the_index += 1;
return c;
}
/*
Get the 6-bit payload of the next continuation byte.
Return UTF8_ERROR if it is not a contination byte.
*/
static int
cont()
{
int c = get();
return ((c & 0xC0) == 0x80) ? (c & 0x3F) : UTF8_ERROR;
}
/*
Initialize the UTF-8 decoder. The decoder is not reentrant,
*/
void
utf8_decode_init(char p[], int length)
{
the_index = 0;
the_input = p;
the_length = length;
the_char = 0;
the_byte = 0;
}
/*
Get the current byte offset. This is generally used in error reporting.
*/
int
utf8_decode_at_byte()
{
return the_byte;
}
/*
Get the current character offset. This is generally used in error reporting.
The character offset matches the byte offset if the text is strictly ASCII.
*/
int
utf8_decode_at_character()
{
return the_char > 0 ? the_char - 1 : 0;
}
/*
Extract the next character.
Returns: the character (between 0 and 1114111)
or UTF8_END (the end)
or UTF8_ERROR (error)
*/
int
utf8_decode_next()
{
int c; /* the first byte of the character */
int r; /* the result */
if (the_index >= the_length) {
return the_index == the_length ? UTF8_END : UTF8_ERROR;
}
the_byte = the_index;
the_char += 1;
c = get();
/*
Zero continuation (0 to 127)
*/
if ((c & 0x80) == 0) {
return c;
}
/*
One contination (128 to 2047)
*/
if ((c & 0xE0) == 0xC0) {
int c1 = cont();
if (c1 < 0) {
return UTF8_ERROR;
}
r = ((c & 0x1F) << 6) | c1;
return r >= 128 ? r : UTF8_ERROR;
}
/*
Two continuation (2048 to 55295 and 57344 to 65535)
*/
if ((c & 0xF0) == 0xE0) {
int c1 = cont();
int c2 = cont();
if (c1 < 0 || c2 < 0) {
return UTF8_ERROR;
}
r = ((c & 0x0F) << 12) | (c1 << 6) | c2;
return r >= 2048 && (r < 55296 || r > 57343) ? r : UTF8_ERROR;
}
/*
Three continuation (65536 to 1114111)
*/
if ((c & 0xF8) == 0xF0) {
int c1 = cont();
int c2 = cont();
int c3 = cont();
if (c1 < 0 || c2 < 0 || c3 < 0) {
return UTF8_ERROR;
}
r = ((c & 0x0F) << 18) | (c1 << 12) | (c2 << 6) | c3;
return r >= 65536 && r <= 1114111 ? r : UTF8_ERROR;
}
return UTF8_ERROR;
}

View File

@ -0,0 +1,10 @@
/* utf8_decode.h */
#define UTF8_END -1
#define UTF8_ERROR -2
extern int utf8_decode_at_byte();
extern int utf8_decode_at_character();
extern void utf8_decode_init(char p[], int length);
extern int utf8_decode_next();

192
userspace/readelf.c Normal file
View File

@ -0,0 +1,192 @@
/*
* ToAruOS Miniature ELF Reader
* (C) 2011 Kevin Lange
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
/* The Master ELF Header */
#include "../kernel/include/elf.h"
/**
* Show usage for the readelf application.
* @param argc Argument count (unused)
* @param argv Arguments to binary
*/
void usage(int argc, char ** argv) {
/* Show usage */
printf("%s [filename]\n", argv[0]);
printf("\tDisplays information on ELF binaries such as section names,\n");
printf("\tlocations, sizes, and loading positions in memory.\n");
exit(1);
}
/**
* Application entry point.
* @returns 0 on sucess, 1 on failure
*/
int main(int argc, char ** argv) {
/* Process arguments */
if (argc < 2) usage(argc,argv);
FILE * binary; /**< File pointer for requested executable */
size_t binary_size; /**< Size of the file */
char * binary_buf; /**< Buffer to store the binary in memory */
Elf32_Header * header; /**< ELF header */
char * string_table; /**< Room for some string tables */
/* Open the requested binary */
binary = fopen(argv[1], "r");
/* Jump to the end so we can get the size */
fseek(binary, 0, SEEK_END);
binary_size = ftell(binary);
fseek(binary, 0, SEEK_SET);
/* Some sanity checks */
if (binary_size < 4 || binary_size > 0xFFFFFFF) {
printf("Oh no! I don't quite like the size of this binary.\n");
return 1;
}
printf("Binary is %u bytes.\n", (unsigned int)binary_size);
/* Read the binary into a buffer */
binary_buf = malloc(binary_size);
fread((void *)binary_buf, binary_size, 1, binary);
/* Let's start considering this guy an elf, 'eh? */
header = (Elf32_Header *)binary_buf;
/* Verify the magic */
if ( header->e_ident[0] != ELFMAG0 ||
header->e_ident[1] != ELFMAG1 ||
header->e_ident[2] != ELFMAG2 ||
header->e_ident[3] != ELFMAG3) {
printf("Header magic is wrong!\n");
printf("Are you sure this is a 32-bit ELF binary or object file?\n");
return 1;
}
/* Let's print out some of the header information, shall we? */
printf("\033[1mELF Header\033[0m\n");
/* File type */
printf("[Type %d] ", header->e_type);
switch (header->e_type) {
case ET_NONE:
printf("No file type.\n");
break;
case ET_REL:
printf("Relocatable file.\n");
break;
case ET_EXEC:
printf("Executable file.\n");
break;
case ET_DYN:
printf("Shared object file.\n");
break;
case ET_CORE:
printf("Core file.\n");
break;
default:
printf("(Unknown file type)\n");
break;
}
/* Machine Type */
switch (header->e_machine) {
case EM_386:
printf("Intel x86\n");
break;
default:
printf("Unknown machine: %d\n", header->e_machine);
break;
}
/* Version == EV_CURRENT? */
if (header->e_version == EV_CURRENT) {
printf("ELF version is 1, as it should be.\n");
}
/* Entry point in memory */
printf("Binary entry point in virtual memory is at 0x%x\n", header->e_entry);
/* Program header table offset */
printf("Program header table is at +0x%x and one entry is 0x%x bytes.\n"
"There are %d total program headers.\n",
header->e_phoff, header->e_phentsize, header->e_phnum);
/* Section header table offset */
printf("Section header table is at +0x%x and one entry is 0x%x bytes.\n"
"There are %d total section headers.\n",
header->e_shoff, header->e_shentsize, header->e_shnum);
/* Read the program headers */
printf("\033[1mProgram Headers\033[0m\n");
for (uint32_t x = 0; x < header->e_phentsize * header->e_phnum; x += header->e_phentsize) {
if (header->e_phoff + x > binary_size) {
printf("Tried to read beyond the end of the file.\n");
return 1;
}
/* Grab the program header */
Elf32_Phdr * phdr = (Elf32_Phdr *)((uintptr_t)binary_buf + (header->e_phoff + x));
/* Print the header type */
switch (phdr->p_type) {
case PT_LOAD:
printf("[Loadable Segment]\n");
break;
case PT_DYNAMIC:
printf("[Dynamic Loading Information]\n");
break;
case PT_INTERP:
printf("[Interpreter Path]\n");
break;
default:
printf("[Unused Segement]\n");
break;
}
}
/* Find the (hopefully two) string tables */
printf("\033[1mString Tables\033[0m\n");
uint32_t i = 0;
for (uint32_t x = 0; x < header->e_shentsize * header->e_shnum; x += header->e_shentsize) {
if (header->e_shoff + x > binary_size) {
printf("Tried to read beyond the end of the file.\n");
return 1;
}
Elf32_Shdr * shdr = (Elf32_Shdr *)((uintptr_t)binary_buf + (header->e_shoff + x));
if (i == header->e_shstrndx) {
string_table = (char *)((uintptr_t)binary_buf + shdr->sh_offset);
printf("Found a string table at 0x%x\n", shdr->sh_offset);
}
++i;
}
/* Read the section headers */
printf("\033[1mSection Headers\033[0m\n");
for (uint32_t x = 0; x < header->e_shentsize * header->e_shnum; x += header->e_shentsize) {
if (header->e_shoff + x > binary_size) {
printf("Tried to read beyond the end of the file.\n");
return 1;
}
Elf32_Shdr * shdr = (Elf32_Shdr *)((uintptr_t)binary_buf + (header->e_shoff + x));
printf("[%d] %s\n", shdr->sh_type, (char *)((uintptr_t)string_table + shdr->sh_name));
printf("Section starts at 0x%x and is 0x%x bytes long.\n", shdr->sh_offset, shdr->sh_size);
if (shdr->sh_addr) {
printf("It should be loaded at 0x%x.\n", shdr->sh_addr);
}
}
return 0;
}
/*
* vim:noexpandtab
* vim:tabstop=4
* vim:shiftwidth=4
*/

View File

@ -13,6 +13,8 @@
#include FT_FREETYPE_H
#include FT_CACHE_H
#include "lib/utf8_decode.h"
#define FONT_SIZE 13
/* Binary Literals */
@ -50,16 +52,18 @@
#define ANSI_BOLD 0x01
#define ANSI_UNDERLINE 0x02
#define ANSI_ITALIC 0x04
#define ANSI_FRAKTUR 0x08 /* As if I'll ever implement that */
#define ANSI_EXTRA 0x08 /* Character should use "extra" font (Japanese) */
#define ANSI_DOUBLEU 0x10
#define ANSI_OVERLINE 0x20
#define ANSI_BLINK 0x40
#define ANSI_WIDE 0x40 /* Character is double width */
#define ANSI_CROSS 0x80 /* And that's all I'm going to support */
#define DEFAULT_FG 0x07
#define DEFAULT_BG 0x10
#define DEFAULT_FLAGS 0x00
#define ANSI_EXT_IOCTL 'z'
uint16_t min(uint16_t a, uint16_t b) {
return (a < b) ? a : b;
}
@ -82,6 +86,7 @@ static struct _ansi_state {
uint8_t bg ; /* Current background color */
uint8_t flags ; /* Bright, etc. */
uint8_t escape; /* Escape status */
uint8_t local_echo;
uint8_t buflen; /* Buffer Length */
char buffer[100]; /* Previous buffer */
} state;
@ -96,11 +101,6 @@ void (*ansi_cls)(void) = NULL;
void (*redraw_cursor)(void) = NULL;
static struct {
uint16_t x;
uint16_t y;
} saved_state;
void
ansi_dump_buffer() {
for (int i = 0; i < state.buflen; ++i) {
@ -171,15 +171,32 @@ ansi_put(
argv[argc] = NULL;
/* Alright, let's do this */
switch (c) {
case ANSI_EXT_IOCTL:
{
if (argc > 0) {
int arg = atoi(argv[0]);
switch (argc) {
case 1001:
/* Local Echo Off */
break;
case 1002:
/* Local Echo On */
break;
default:
break;
}
}
}
break;
case ANSI_SCP:
{
saved_state.x = ansi_get_csr_x();
saved_state.y = ansi_get_csr_y();
state.save_x = ansi_get_csr_x();
state.save_y = ansi_get_csr_y();
}
break;
case ANSI_RCP:
{
ansi_set_csr(saved_state.x, saved_state.y);
ansi_set_csr(state.save_x, state.save_y);
}
break;
case ANSI_SGR:
@ -208,9 +225,6 @@ ansi_put(
} else if (arg == 39) {
/* Default Foreground */
state.fg = 7;
} else if (arg == 20) {
/* FRAKTUR: Like old German stuff */
state.flags |= ANSI_FRAKTUR;
} else if (arg == 9) {
/* X-OUT */
state.flags |= ANSI_CROSS;
@ -220,8 +234,7 @@ ansi_put(
state.fg = state.bg;
state.bg = temp;
} else if (arg == 5) {
/* BLINK: I have no idea how I'm going to make this work! */
state.flags |= ANSI_BLINK;
/* Supposed to be blink; instead, support X-term 256 colors */
if (i == 0) { break; }
if (i < argc) {
if (atoi(argv[i-1]) == 48) {
@ -381,6 +394,7 @@ ansi_init(void (*writer)(char), int w, int y, void (*setcolor)(unsigned char, un
state.flags = DEFAULT_FLAGS; /* Nothing fancy*/
state.width = w;
state.height = y;
state.local_echo = 1;
ansi_set_color(state.fg, state.bg);
}
@ -427,7 +441,7 @@ uint16_t graphics_depth = 0;
#define GFX(x,y) *((uint32_t *)&gfx_mem[(GFX_W * (y) + (x)) * GFX_B])
uint32_t rgb(uint8_t r, uint8_t g, uint8_t b) {
return (r * 0x10000) + (g * 0x100) + (b * 0x1);
return 0xFF000000 + (r * 0x10000) + (g * 0x100) + (b * 0x1);
}
uint32_t alpha_blend(uint32_t bottom, uint32_t top, uint32_t mask) {
@ -2467,6 +2481,7 @@ FT_Face face;
FT_Face face_bold;
FT_Face face_italic;
FT_Face face_bold_italic;
FT_Face face_extra;
FT_GlyphSlot slot;
FT_UInt glyph_index;
@ -2484,28 +2499,32 @@ void drawChar(FT_Bitmap * bitmap, int x, int y, uint32_t fg, uint32_t bg) {
void
term_write_char(
uint8_t val,
uint32_t val,
uint16_t x,
uint16_t y,
uint32_t fg,
uint32_t bg,
uint8_t flags
) {
if (val < 32 || val > 126) {
return;
}
if (_use_freetype) {
if (val == 0xFFFFFFFF) { return; } /* Unicode, do not redraw here */
for (uint8_t i = 0; i < char_height; ++i) {
for (uint8_t j = 0; j < char_width; ++j) {
term_set_point(x+j,y+i,bg);
}
}
if (val < 32) {
return;
}
int pen_x = x;
int pen_y = y + char_offset;
int error;
FT_Face * _font = NULL;
if (flags & ANSI_BOLD && flags & ANSI_ITALIC) {
if (flags & ANSI_EXTRA) {
_font = &face_extra;
} else if (flags & ANSI_BOLD && flags & ANSI_ITALIC) {
_font = &face_bold_italic;
} else if (flags & ANSI_ITALIC) {
_font = &face_italic;
@ -2516,7 +2535,9 @@ term_write_char(
}
glyph_index = FT_Get_Char_Index(*_font, val);
error = FT_Load_Glyph(*_font, glyph_index, FT_LOAD_DEFAULT);
if (error) return;
if (error) {
ansi_print("Error loading glyph.\n");
};
slot = (*_font)->glyph;
if (slot->format == FT_GLYPH_FORMAT_OUTLINE) {
error = FT_Render_Glyph((*_font)->glyph, FT_RENDER_MODE_NORMAL);
@ -2549,6 +2570,7 @@ term_write_char(
}
static void cell_set(uint16_t x, uint16_t y, uint8_t c, uint8_t fg, uint8_t bg, uint8_t flags) {
if (x >= term_width || y >= term_height) return;
uint8_t * cell = (uint8_t *)((uintptr_t)term_buffer + (y * term_width + x) * 4);
cell[0] = c;
cell[1] = fg;
@ -2557,26 +2579,31 @@ static void cell_set(uint16_t x, uint16_t y, uint8_t c, uint8_t fg, uint8_t bg,
}
static uint16_t cell_ch(uint16_t x, uint16_t y) {
if (x >= term_width || y >= term_height) return 0;
uint8_t * cell = (uint8_t *)((uintptr_t)term_buffer + (y * term_width + x) * 4);
return cell[0];
}
static uint16_t cell_fg(uint16_t x, uint16_t y) {
if (x >= term_width || y >= term_height) return 0;
uint8_t * cell = (uint8_t *)((uintptr_t)term_buffer + (y * term_width + x) * 4);
return cell[1];
}
static uint16_t cell_bg(uint16_t x, uint16_t y) {
if (x >= term_width || y >= term_height) return 0;
uint8_t * cell = (uint8_t *)((uintptr_t)term_buffer + (y * term_width + x) * 4);
return cell[2];
}
static uint8_t cell_flags(uint16_t x, uint16_t y) {
if (x >= term_width || y >= term_height) return 0;
uint8_t * cell = (uint8_t *)((uintptr_t)term_buffer + (y * term_width + x) * 4);
return cell[3];
}
static void cell_redraw(uint16_t x, uint16_t y) {
if (x >= term_width || y >= term_height) return;
uint8_t * cell = (uint8_t *)((uintptr_t)term_buffer + (y * term_width + x) * 4);
if (((uint32_t *)cell)[0] == 0x00000000) {
term_write_char(' ', x * char_width, y * char_height, term_colors[DEFAULT_FG], term_colors[DEFAULT_BG], DEFAULT_FLAGS);
@ -2586,6 +2613,7 @@ static void cell_redraw(uint16_t x, uint16_t y) {
}
static void cell_redraw_inverted(uint16_t x, uint16_t y) {
if (x >= term_width || y >= term_height) return;
uint8_t * cell = (uint8_t *)((uintptr_t)term_buffer + (y * term_width + x) * 4);
if (((uint32_t *)cell)[0] == 0x00000000) {
term_write_char(' ', x * char_width, y * char_height, term_colors[DEFAULT_BG], term_colors[DEFAULT_FG], DEFAULT_FLAGS);
@ -2805,6 +2833,7 @@ int main(int argc, char ** argv) {
setLoaded(1,0);
setLoaded(2,0);
setLoaded(3,0);
setLoaded(4,0);
setLoaded(0,2);
font = loadMemFont("/usr/share/fonts/DejaVuSansMono.ttf", &s);
@ -2830,6 +2859,11 @@ int main(int argc, char ** argv) {
error = FT_Set_Pixel_Sizes(face_bold_italic, FONT_SIZE, FONT_SIZE); if (error) return 1;
setLoaded(3,1);
setLoaded(4,2);
error = FT_New_Face(library, "/usr/share/fonts/VLGothic.ttf", 0, &face_extra);
error = FT_Set_Pixel_Sizes(face_extra, FONT_SIZE, FONT_SIZE); if (error) return 1;
setLoaded(4,1);
char_height = 17;
char_width = 8;
char_offset = 13;
@ -2843,6 +2877,31 @@ int main(int argc, char ** argv) {
term_term_clear();
ansi_print("\033[H\033[2J");
#if 0
ansi_print("Hello World!\n");
/* UTF 8 testing */
char * str = "Hello World~~ * とある";
utf8_decode_init(str, strlen(str));
int c = 0;
int j = 0;
char herp[1024];
while ((c = utf8_decode_next()) != -1) {
if (c > 0x3000) {
term_write_char(c, 10 + j, 50, rgb(255,255,255), rgb(0,0,0), ANSI_EXTRA);
j += 2*char_width;
} else {
term_write_char(c, 10 + j, 50, rgb(255,255,255), rgb(0,0,0), 0);
j += char_width;
}
}
ansi_print("Done.\n");
while (1) { }
#endif
int ofd = syscall_mkpipe();
//int ifd = syscall_mkpipe();

View File

@ -5,6 +5,8 @@
*/
#include <stdio.h>
#include <syscall.h>
#include <string.h>
#include <stdlib.h>
DEFN_SYSCALL0(getuid, 23);