From 45aaa72e85412f8b62075c64e3135b495265dd8b Mon Sep 17 00:00:00 2001 From: mint <36459316+mintsuki@users.noreply.github.com> Date: Sun, 6 Sep 2020 03:35:32 +0200 Subject: [PATCH] Decompressor (#23) * Add decompressor infrastructure * Fix up everything * Add a gzip decompressor (#22) Co-authored-by: Matteo Semenzato --- .gitignore | 1 + README.md | 2 + src/Makefile | 9 +- src/bootsect/bootsect.asm | 26 +- src/bootsect/gdt.inc | 20 +- src/decompressor/Makefile | 40 +++ src/decompressor/gzip/tinf.h | 80 +++++ src/decompressor/gzip/tinfgzip.c | 120 +++++++ src/decompressor/gzip/tinflate.c | 566 +++++++++++++++++++++++++++++++ src/decompressor/lib/asm.h | 17 + src/decompressor/linker.ld | 27 ++ src/decompressor/main.c | 35 ++ src/lib/blib.c | 5 +- src/linker.ld | 3 +- 14 files changed, 935 insertions(+), 16 deletions(-) create mode 100644 src/decompressor/Makefile create mode 100644 src/decompressor/gzip/tinf.h create mode 100644 src/decompressor/gzip/tinfgzip.c create mode 100644 src/decompressor/gzip/tinflate.c create mode 100644 src/decompressor/lib/asm.h create mode 100644 src/decompressor/linker.ld create mode 100644 src/decompressor/main.c diff --git a/.gitignore b/.gitignore index 2841a406..ea292fbf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /**/*.a /**/*.bc /**/*.bin +/**/*.bin.gz /**/*.elf /**/*.img /bochsout.txt diff --git a/README.md b/README.md index 8555dbac..db2e138e 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,8 @@ limine-install test.img One can get `echfs-utils` by installing https://github.com/qword-os/echfs. +Limine uses a stripped-down version of https://github.com/jibsen/tinf + ## Discord server We have a Discord server if you need support, info, or you just want to hang out: https://discord.gg/QEeZMz4 diff --git a/src/Makefile b/src/Makefile index 5230d543..26b9883e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -21,8 +21,8 @@ INTERNAL_LDFLAGS = \ .PHONY: all clean -C_FILES := $(shell find ./ -type f -name '*.c' | sort) -ASM_FILES := $(shell find ./ -type f -name '*.asm' | grep -v bootsect | sort) +C_FILES := $(shell find ./ -type f -name '*.c' | grep -v bootsect | grep -v decompressor | sort) +ASM_FILES := $(shell find ./ -type f -name '*.asm' | grep -v bootsect | grep -v decompressor | sort) ASM_OBJ := $(ASM_FILES:.asm=.o) BC := $(C_FILES:.c=.bc) @@ -34,6 +34,8 @@ limine.bin: libgcc.a $(BC) $(ASM_OBJ) clang --target=i386-elf -O$(OPT_LEVEL) -c optimised_bundle.bc -o optimised_bundle.o ld.lld optimised_bundle.o $(ASM_OBJ) libgcc.a $(INTERNAL_LDFLAGS) -o stage2.elf llvm-objcopy -O binary stage2.elf stage2.bin + gzip -9 stage2.bin + $(MAKE) -C decompressor cd bootsect && nasm bootsect.asm -fbin -o ../limine.bin libgcc.a: @@ -47,4 +49,5 @@ libgcc.a: nasm $< -f elf32 -o $@ clean: - rm -f limine.bin $(ASM_OBJ) $(BC) + $(MAKE) -C decompressor clean + rm -f stage2.bin.gz limine.bin $(ASM_OBJ) $(BC) diff --git a/src/bootsect/bootsect.asm b/src/bootsect/bootsect.asm index 582c428f..b7404f15 100644 --- a/src/bootsect/bootsect.asm +++ b/src/bootsect/bootsect.asm @@ -15,7 +15,7 @@ start: mov fs, ax mov gs, ax mov ss, ax - mov sp, 0x7c00 + mov sp, 0xfff0 sti ; Some BIOSes don't pass the correct boot drive number, @@ -93,20 +93,24 @@ times 0x1b8-($-$$) db 0 times 510-($-$$) db 0 dw 0xaa55 -; ********************* Stage 2 ********************* +; ********************* Stage 1.5 ********************* -stage2: +stage15: + push es + push 0x6000 + pop es mov eax, dword [stage2_sector] inc eax - mov ebx, 0x8000 + xor ebx, ebx mov ecx, 62 call read_sectors + pop es jc err_reading_disk call enable_a20 jc err_enabling_a20 - lgdt [GDT] + call load_gdt cli @@ -127,7 +131,10 @@ stage2: and edx, 0xff push edx - call 0x8000 + push stage2.size + push (stage2 - 0x8000) + 0x60000 + + call 0x60000 bits 16 %include 'a20_enabler.inc' @@ -135,6 +142,11 @@ bits 16 times 1024-($-$$) db 0 -incbin '../stage2.bin' +incbin '../decompressor/decompressor.bin' + +align 16 +stage2: +incbin '../stage2.bin.gz' +.size: equ $ - stage2 times 32768-($-$$) db 0 diff --git a/src/bootsect/gdt.inc b/src/bootsect/gdt.inc index a3a0a632..9df929b1 100644 --- a/src/bootsect/gdt.inc +++ b/src/bootsect/gdt.inc @@ -1,7 +1,25 @@ +load_gdt: + pusha + push es + push ds + push 0x7000 + pop es + xor di, di + push 0 + pop ds + mov si, GDT.GDTStart + mov cx, GDT.GDTEnd - GDT.GDTStart + rep movsb + lgdt [GDT] + pop ds + pop es + popa + ret + GDT: dw .GDTEnd - .GDTStart - 1 ; GDT size -dd .GDTStart ; GDT start +dd 0x70000 ; GDT start .GDTStart: diff --git a/src/decompressor/Makefile b/src/decompressor/Makefile new file mode 100644 index 00000000..7115eb4c --- /dev/null +++ b/src/decompressor/Makefile @@ -0,0 +1,40 @@ +OPT_LEVEL = z +CFLAGS = -pipe -Wall -Wextra + +INTERNAL_CFLAGS = \ + -O$(OPT_LEVEL) \ + -std=gnu99 \ + -ffreestanding \ + -flto \ + -mno-80387 \ + -mno-mmx \ + -mno-sse \ + -mno-sse2 \ + -I. \ + -Wno-address-of-packed-member + +INTERNAL_LDFLAGS = \ + -static \ + -nostdlib \ + -Tlinker.ld \ + -no-pie + +.PHONY: all clean + +C_FILES := $(shell find ./ -type f -name '*.c' | sort) +BC := $(C_FILES:.c=.bc) + +all: decompressor.bin + +decompressor.bin: $(BC) + llvm-link $(BC) -o bundle.bc + opt --O$(OPT_LEVEL) bundle.bc -o optimised_bundle.bc + clang --target=i386-elf -O$(OPT_LEVEL) -c optimised_bundle.bc -o optimised_bundle.o + ld.lld optimised_bundle.o ../libgcc.a $(INTERNAL_LDFLAGS) -o decompressor.elf + llvm-objcopy -O binary decompressor.elf decompressor.bin + +%.bc: %.c + clang --target=i386-elf $(CFLAGS) $(INTERNAL_CFLAGS) -c $< -o $@ + +clean: + rm -f decompressor.bin $(BC) diff --git a/src/decompressor/gzip/tinf.h b/src/decompressor/gzip/tinf.h new file mode 100644 index 00000000..6e56faa3 --- /dev/null +++ b/src/decompressor/gzip/tinf.h @@ -0,0 +1,80 @@ +/* + * tinf - tiny inflate library (inflate, gzip, zlib) + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#ifndef TINF_H_INCLUDED +#define TINF_H_INCLUDED + +#define TINF_VER_MAJOR 1 /**< Major version number */ +#define TINF_VER_MINOR 2 /**< Minor version number */ +#define TINF_VER_PATCH 1 /**< Patch version number */ +#define TINF_VER_STRING "1.2.1" /**< Version number as a string */ + +/** + * Status codes returned. + * + * @see tinf_uncompress, tinf_gzip_uncompress, tinf_zlib_uncompress + */ +typedef enum { + TINF_OK = 0, /**< Success */ + TINF_DATA_ERROR = -3, /**< Input error */ + TINF_BUF_ERROR = -5 /**< Not enough room for output */ +} tinf_error_code; + +/** + * Decompress `sourceLen` bytes of deflate data from `source` to `dest`. + * + * The variable `destLen` points to must contain the size of `dest` on entry, + * and will be set to the size of the decompressed data on success. + * + * Reads at most `sourceLen` bytes from `source`. + * Writes at most `*destLen` bytes to `dest`. + * + * @param dest pointer to where to place decompressed data + * @param destLen pointer to variable containing size of `dest` + * @param source pointer to compressed data + * @param sourceLen size of compressed data + * @return `TINF_OK` on success, error code on error + */ +int tinf_uncompress(void *dest, + const void *source, unsigned int sourceLen); + +/** + * Decompress `sourceLen` bytes of gzip data from `source` to `dest`. + * + * The variable `destLen` points to must contain the size of `dest` on entry, + * and will be set to the size of the decompressed data on success. + * + * Reads at most `sourceLen` bytes from `source`. + * Writes at most `*destLen` bytes to `dest`. + * + * @param dest pointer to where to place decompressed data + * @param destLen pointer to variable containing size of `dest` + * @param source pointer to compressed data + * @param sourceLen size of compressed data + * @return `TINF_OK` on success, error code on error + */ +int tinf_gzip_uncompress(void *dest, + const void *source, unsigned int sourceLen); +#endif /* TINF_H_INCLUDED */ diff --git a/src/decompressor/gzip/tinfgzip.c b/src/decompressor/gzip/tinfgzip.c new file mode 100644 index 00000000..7935a50e --- /dev/null +++ b/src/decompressor/gzip/tinfgzip.c @@ -0,0 +1,120 @@ +/* + * tinfgzip - tiny gzip decompressor + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#include "tinf.h" + +typedef enum { + FTEXT = 1, + FHCRC = 2, + FEXTRA = 4, + FNAME = 8, + FCOMMENT = 16 +} tinf_gzip_flag; + +int tinf_gzip_uncompress(void *dest, + const void *source, unsigned int sourceLen) { + const unsigned char *src = (const unsigned char *) source; + unsigned char *dst = (unsigned char *) dest; + const unsigned char *start; + int res; + unsigned char flg; + + /* -- Check header -- */ + + /* Check room for at least 10 byte header and 8 byte trailer */ + if (sourceLen < 18) { + return TINF_DATA_ERROR; + } + + /* Check id bytes */ + if (src[0] != 0x1F || src[1] != 0x8B) { + return TINF_DATA_ERROR; + } + + /* Check method is deflate */ + if (src[2] != 8) { + return TINF_DATA_ERROR; + } + + /* Get flag byte */ + flg = src[3]; + + /* Check that reserved bits are zero */ + if (flg & 0xE0) { + return TINF_DATA_ERROR; + } + + /* -- Find start of compressed data -- */ + + /* Skip base header of 10 bytes */ + start = src + 10; + + /* Skip extra data if present */ + if (flg & FEXTRA) { + unsigned int xlen = *start; + + if (xlen > sourceLen - 12) { + return TINF_DATA_ERROR; + } + + start += xlen + 2; + } + + /* Skip file name if present */ + if (flg & FNAME) { + do { + if (((unsigned int)(start - src)) >= sourceLen) { + return TINF_DATA_ERROR; + } + } while (*start++); + } + + /* Skip file comment if present */ + if (flg & FCOMMENT) { + do { + if (((unsigned int)(start - src)) >= sourceLen) { + return TINF_DATA_ERROR; + } + } while (*start++); + } + + if (flg & FHCRC) { + start += 2; + } + + /* -- Decompress data -- */ + + if ((src + sourceLen) - start < 8) { + return TINF_DATA_ERROR; + } + + res = tinf_uncompress(dst, start, + (src + sourceLen) - start - 8); + + if (res != TINF_OK) { + return TINF_DATA_ERROR; + } + return TINF_OK; +} diff --git a/src/decompressor/gzip/tinflate.c b/src/decompressor/gzip/tinflate.c new file mode 100644 index 00000000..67993904 --- /dev/null +++ b/src/decompressor/gzip/tinflate.c @@ -0,0 +1,566 @@ +/* + * tinflate - tiny inflate + * + * Copyright (c) 2003-2019 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#include "tinf.h" + +#include + +#if defined(UINT_MAX) && (UINT_MAX) < 0xFFFFFFFFUL +# error "tinf requires unsigned int to be at least 32-bit" +#endif + +/* -- Internal data structures -- */ + +struct tinf_tree { + unsigned short counts[16]; /* Number of codes with a given length */ + unsigned short symbols[288]; /* Symbols sorted by code */ + int max_sym; +}; + +struct tinf_data { + const unsigned char *source; + const unsigned char *source_end; + unsigned int tag; + int bitcount; + int overflow; + + unsigned char *dest_start; + unsigned char *dest; + + struct tinf_tree ltree; /* Literal/length tree */ + struct tinf_tree dtree; /* Distance tree */ +}; + +/* Given an array of code lengths, build a tree */ +static int tinf_build_tree(struct tinf_tree *t, const unsigned char *lengths, + unsigned int num) { + unsigned short offs[16]; + unsigned int i, num_codes, available; + + + for (i = 0; i < 16; ++i) { + t->counts[i] = 0; + } + + t->max_sym = -1; + + /* Count number of codes for each non-zero length */ + for (i = 0; i < num; ++i) { + + if (lengths[i]) { + t->max_sym = i; + t->counts[lengths[i]]++; + } + } + + /* Compute offset table for distribution sort */ + for (available = 1, num_codes = 0, i = 0; i < 16; ++i) { + unsigned int used = t->counts[i]; + + /* Check length contains no more codes than available */ + if (used > available) { + return TINF_DATA_ERROR; + } + available = 2 * (available - used); + + offs[i] = num_codes; + num_codes += used; + } + + /* + * Check all codes were used, or for the special case of only one + * code that it has length 1 + */ + if ((num_codes > 1 && available > 0) + || (num_codes == 1 && t->counts[1] != 1)) { + return TINF_DATA_ERROR; + } + + /* Fill in symbols sorted by code */ + for (i = 0; i < num; ++i) { + if (lengths[i]) { + t->symbols[offs[lengths[i]]++] = i; + } + } + + /* + * For the special case of only one code (which will be 0) add a + * code 1 which results in a symbol that is too large + */ + if (num_codes == 1) { + t->counts[1] = 2; + t->symbols[1] = t->max_sym + 1; + } + + return TINF_OK; +} + +/* -- Decode functions -- */ + +static void tinf_refill(struct tinf_data *d, int num) { + + /* Read bytes until at least num bits available */ + while (d->bitcount < num) { + if (d->source != d->source_end) { + d->tag |= (unsigned int) *d->source++ << d->bitcount; + } + else { + d->overflow = 1; + } + d->bitcount += 8; + } + +} + +static unsigned int tinf_getbits_no_refill(struct tinf_data *d, int num) { + unsigned int bits; + + + /* Get bits from tag */ + bits = d->tag & ((1UL << num) - 1); + + /* Remove bits from tag */ + d->tag >>= num; + d->bitcount -= num; + + return bits; +} + +/* Get num bits from source stream */ +static unsigned int tinf_getbits(struct tinf_data *d, int num) { + tinf_refill(d, num); + return tinf_getbits_no_refill(d, num); +} + +/* Read a num bit value from stream and add base */ +static unsigned int tinf_getbits_base(struct tinf_data *d, int num, int base) { + return base + (num ? tinf_getbits(d, num) : 0); +} + +/* Given a data stream and a tree, decode a symbol */ +static int tinf_decode_symbol(struct tinf_data *d, const struct tinf_tree *t) { + int base = 0, offs = 0; + int len; + + /* + * Get more bits while code index is above number of codes + * + * Rather than the actual code, we are computing the position of the + * code in the sorted order of codes, which is the index of the + * corresponding symbol. + * + * Conceptually, for each code length (level in the tree), there are + * counts[len] leaves on the left and internal nodes on the right. + * The index we have decoded so far is base + offs, and if that + * falls within the leaves we are done. Otherwise we adjust the range + * of offs and add one more bit to it. + */ + for (len = 1; ; ++len) { + offs = 2 * offs + tinf_getbits(d, 1); + + if (offs < t->counts[len]) { + break; + } + + base += t->counts[len]; + offs -= t->counts[len]; + } + + + return t->symbols[base + offs]; +} + +/* Given a data stream, decode dynamic trees from it */ +static int tinf_decode_trees(struct tinf_data *d, struct tinf_tree *lt, + struct tinf_tree *dt) { + unsigned char lengths[288 + 32]; + + /* Special ordering of code length codes */ + static const unsigned char clcidx[19] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, + 11, 4, 12, 3, 13, 2, 14, 1, 15 + }; + + unsigned int hlit, hdist, hclen; + unsigned int i, num, length; + int res; + + /* Get 5 bits HLIT (257-286) */ + hlit = tinf_getbits_base(d, 5, 257); + + /* Get 5 bits HDIST (1-32) */ + hdist = tinf_getbits_base(d, 5, 1); + + /* Get 4 bits HCLEN (4-19) */ + hclen = tinf_getbits_base(d, 4, 4); + + /* + * The RFC limits the range of HLIT to 286, but lists HDIST as range + * 1-32, even though distance codes 30 and 31 have no meaning. While + * we could allow the full range of HLIT and HDIST to make it possible + * to decode the fixed trees with this function, we consider it an + * error here. + * + * See also: https://github.com/madler/zlib/issues/82 + */ + if (hlit > 286 || hdist > 30) { + return TINF_DATA_ERROR; + } + + for (i = 0; i < 19; ++i) { + lengths[i] = 0; + } + + /* Read code lengths for code length alphabet */ + for (i = 0; i < hclen; ++i) { + /* Get 3 bits code length (0-7) */ + unsigned int clen = tinf_getbits(d, 3); + + lengths[clcidx[i]] = clen; + } + + /* Build code length tree (in literal/length tree to save space) */ + res = tinf_build_tree(lt, lengths, 19); + + if (res != TINF_OK) { + return res; + } + + /* Check code length tree is not empty */ + if (lt->max_sym == -1) { + return TINF_DATA_ERROR; + } + + /* Decode code lengths for the dynamic trees */ + for (num = 0; num < hlit + hdist; ) { + int sym = tinf_decode_symbol(d, lt); + + if (sym > lt->max_sym) { + return TINF_DATA_ERROR; + } + + switch (sym) { + case 16: + /* Copy previous code length 3-6 times (read 2 bits) */ + if (num == 0) { + return TINF_DATA_ERROR; + } + sym = lengths[num - 1]; + length = tinf_getbits_base(d, 2, 3); + break; + case 17: + /* Repeat code length 0 for 3-10 times (read 3 bits) */ + sym = 0; + length = tinf_getbits_base(d, 3, 3); + break; + case 18: + /* Repeat code length 0 for 11-138 times (read 7 bits) */ + sym = 0; + length = tinf_getbits_base(d, 7, 11); + break; + default: + /* Values 0-15 represent the actual code lengths */ + length = 1; + break; + } + + if (length > hlit + hdist - num) { + return TINF_DATA_ERROR; + } + + while (length--) { + lengths[num++] = sym; + } + } + + /* Check EOB symbol is present */ + if (lengths[256] == 0) { + return TINF_DATA_ERROR; + } + + /* Build dynamic trees */ + res = tinf_build_tree(lt, lengths, hlit); + + if (res != TINF_OK) { + return res; + } + + res = tinf_build_tree(dt, lengths + hlit, hdist); + + if (res != TINF_OK) { + return res; + } + + return TINF_OK; +} + +/* -- Block inflate functions -- */ + +/* Given a stream and two trees, inflate a block of data */ +static int tinf_inflate_block_data(struct tinf_data *d, struct tinf_tree *lt, + struct tinf_tree *dt) { + /* Extra bits and base tables for length codes */ + static const unsigned char length_bits[30] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 0, 127 + }; + + static const unsigned short length_base[30] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, + 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258, 0 + }; + + /* Extra bits and base tables for distance codes */ + static const unsigned char dist_bits[30] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 + }; + + static const unsigned short dist_base[30] = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, + 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, + 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 + }; + + for (;;) { + int sym = tinf_decode_symbol(d, lt); + + /* Check for overflow in bit reader */ + if (d->overflow) { + return TINF_DATA_ERROR; + } + + if (sym < 256) { + *d->dest++ = sym; + } + else { + int length, dist, offs; + int i; + + /* Check for end of block */ + if (sym == 256) { + return TINF_OK; + } + + /* Check sym is within range and distance tree is not empty */ + if (sym > lt->max_sym || sym - 257 > 28 || dt->max_sym == -1) { + return TINF_DATA_ERROR; + } + + sym -= 257; + + /* Possibly get more bits from length code */ + length = tinf_getbits_base(d, length_bits[sym], + length_base[sym]); + + dist = tinf_decode_symbol(d, dt); + + /* Check dist is within range */ + if (dist > dt->max_sym || dist > 29) { + return TINF_DATA_ERROR; + } + + /* Possibly get more bits from distance code */ + offs = tinf_getbits_base(d, dist_bits[dist], + dist_base[dist]); + + if (offs > d->dest - d->dest_start) { + return TINF_DATA_ERROR; + } + + /* Copy match */ + for (i = 0; i < length; ++i) { + d->dest[i] = d->dest[i - offs]; + } + + d->dest += length; + } + } +} + +/* Inflate an uncompressed block of data */ +static int tinf_inflate_uncompressed_block(struct tinf_data *d) { + unsigned int length, invlength; + + if (d->source_end - d->source < 4) { + return TINF_DATA_ERROR; + } + + /* Get length */ + length = *(d->source); + + /* Get one's complement of length */ + invlength = *(d->source + 2); + + /* Check length */ + if (length != (~invlength & 0x0000FFFF)) { + return TINF_DATA_ERROR; + } + + d->source += 4; + + if ((unsigned int)((d->source_end - d->source)) < length) { + return TINF_DATA_ERROR; + } + + /* Copy block */ + while (length--) { + *d->dest++ = *d->source++; + } + + /* Make sure we start next block on a byte boundary */ + d->tag = 0; + d->bitcount = 0; + + return TINF_OK; +} + +/* Build fixed Huffman trees */ +static void tinf_build_fixed_trees(struct tinf_tree *lt, struct tinf_tree *dt) { + int i; + + /* Build fixed literal/length tree */ + for (i = 0; i < 16; ++i) { + lt->counts[i] = 0; + } + + lt->counts[7] = 24; + lt->counts[8] = 152; + lt->counts[9] = 112; + + for (i = 0; i < 24; ++i) { + lt->symbols[i] = 256 + i; + } + for (i = 0; i < 144; ++i) { + lt->symbols[24 + i] = i; + } + for (i = 0; i < 8; ++i) { + lt->symbols[24 + 144 + i] = 280 + i; + } + for (i = 0; i < 112; ++i) { + lt->symbols[24 + 144 + 8 + i] = 144 + i; + } + + lt->max_sym = 285; + + /* Build fixed distance tree */ + for (i = 0; i < 16; ++i) { + dt->counts[i] = 0; + } + + dt->counts[5] = 32; + + for (i = 0; i < 32; ++i) { + dt->symbols[i] = i; + } + + dt->max_sym = 29; +} + + +/* Inflate a block of data compressed with fixed Huffman trees */ +static int tinf_inflate_fixed_block(struct tinf_data *d) { + /* Build fixed Huffman trees */ + tinf_build_fixed_trees(&d->ltree, &d->dtree); + + /* Decode block using fixed trees */ + return tinf_inflate_block_data(d, &d->ltree, &d->dtree); +} + + +/* Inflate a block of data compressed with dynamic Huffman trees */ +static int tinf_inflate_dynamic_block(struct tinf_data *d) { + /* Decode trees from stream */ + int res = tinf_decode_trees(d, &d->ltree, &d->dtree); + + if (res != TINF_OK) { + return res; + } + + /* Decode block using decoded trees */ + return tinf_inflate_block_data(d, &d->ltree, &d->dtree); +} + +/* Inflate stream from source to dest */ +int tinf_uncompress(void *dest, + const void *source, unsigned int sourceLen) { + struct tinf_data d; + int bfinal; + + /* Initialise data */ + d.source = (const unsigned char *) source; + d.source_end = d.source + sourceLen; + d.tag = 0; + d.bitcount = 0; + d.overflow = 0; + + d.dest = (unsigned char *) dest; + d.dest_start = d.dest; + + do { + unsigned int btype; + int res; + + /* Read final block flag */ + bfinal = tinf_getbits(&d, 1); + + /* Read block type (2 bits) */ + btype = tinf_getbits(&d, 2); + + /* Decompress block */ + switch (btype) { + case 0: + /* Decompress uncompressed block */ + res = tinf_inflate_uncompressed_block(&d); + break; + case 1: + /* Decompress block with fixed Huffman trees */ + res = tinf_inflate_fixed_block(&d); + break; + case 2: + /* Decompress block with dynamic Huffman trees */ + res = tinf_inflate_dynamic_block(&d); + break; + default: + res = TINF_DATA_ERROR; + break; + } + + if (res != TINF_OK) { + return res; + } + } while (!bfinal); + + /* Check for overflow in bit reader */ + if (d.overflow) { + return TINF_DATA_ERROR; + } + + return TINF_OK; +} + diff --git a/src/decompressor/lib/asm.h b/src/decompressor/lib/asm.h new file mode 100644 index 00000000..560057c0 --- /dev/null +++ b/src/decompressor/lib/asm.h @@ -0,0 +1,17 @@ +#ifndef __LIB__ASM_H__ +#define __LIB__ASM_H__ + +#define ASM(body, ...) asm volatile (".intel_syntax noprefix\n\t" body ".att_syntax prefix" : __VA_ARGS__) +#define ASM_BASIC(body) asm (".intel_syntax noprefix\n\t" body ".att_syntax prefix") + +#define FARJMP16(seg, off) \ + ".byte 0xea\n\t" \ + ".2byte " off "\n\t" \ + ".2byte " seg "\n\t" \ + +#define FARJMP32(seg, off) \ + ".byte 0xea\n\t" \ + ".4byte " off "\n\t" \ + ".2byte " seg "\n\t" \ + +#endif diff --git a/src/decompressor/linker.ld b/src/decompressor/linker.ld new file mode 100644 index 00000000..baf1f0de --- /dev/null +++ b/src/decompressor/linker.ld @@ -0,0 +1,27 @@ +OUTPUT_FORMAT(elf32-i386) +ENTRY(main) + +SECTIONS +{ + . = 0x60000; + + .text : { + KEEP(*(.entry*)) + *(.text*) + } + + .rodata : { + *(.rodata*) + } + + .data : { + *(.data*) + } + + .bss : { + bss_begin = .; + *(COMMON) + *(.bss*) + bss_end = .; + } +} diff --git a/src/decompressor/main.c b/src/decompressor/main.c new file mode 100644 index 00000000..e6bc3841 --- /dev/null +++ b/src/decompressor/main.c @@ -0,0 +1,35 @@ +#include + +ASM_BASIC( + ".section .entry\n\t" + + "cld\n\t" + + // Zero out .bss + "xor al, al\n\t" + "mov edi, OFFSET bss_begin\n\t" + "mov ecx, OFFSET bss_end\n\t" + "sub ecx, OFFSET bss_begin\n\t" + "rep stosb\n\t" + + "mov ebx, OFFSET main\n\t" + "jmp ebx\n\t" +); + +#include +#include +#include + +__attribute__((noreturn)) +void main(uint8_t *compressed_stage2, size_t stage2_size, uint8_t boot_drive) { + // The decompressor should decompress compressed_stage2 to address 0x500. + // For now, just copy it over as it is not compressed. TODO: implement decompressor. + volatile uint8_t *dest = (volatile uint8_t *)0x500; + + tinf_gzip_uncompress(dest, compressed_stage2, stage2_size); + + __attribute__((noreturn)) + void (*stage2)(uint8_t boot_drive) = (void *)dest; + + stage2(boot_drive); +} diff --git a/src/lib/blib.c b/src/lib/blib.c index 7fab85c8..98ef721a 100644 --- a/src/lib/blib.c +++ b/src/lib/blib.c @@ -45,9 +45,8 @@ __attribute__((noreturn)) void panic(const char *fmt, ...) { } } -extern symbol bss_end; -static size_t bump_allocator_base = (size_t)bss_end; -#define BUMP_ALLOCATOR_LIMIT ((size_t)0x80000) +static size_t bump_allocator_base = 0x10000; +#define BUMP_ALLOCATOR_LIMIT ((size_t)0x70000) void brewind(size_t count) { bump_allocator_base -= count; diff --git a/src/linker.ld b/src/linker.ld index 2c8efd1b..f9f0057f 100644 --- a/src/linker.ld +++ b/src/linker.ld @@ -3,10 +3,9 @@ ENTRY(main) SECTIONS { - . = 0x8000; + . = 0x500; .text : { - bootsect_begin = .; KEEP(*(.entry*)) *(.text*) }