diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 3046a768..372ab882 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -10,7 +10,7 @@ jobs: steps: - name: Install dependencies - run: pacman --noconfirm -Syu && pacman --needed --noconfirm -S base-devel git autoconf automake nasm curl mtools llvm clang lld + run: pacman --noconfirm -Syu && pacman --needed --noconfirm -S base-devel git autoconf automake nasm curl mtools llvm clang lld aarch64-linux-gnu-gcc - name: Checkout code uses: actions/checkout@v3 @@ -21,5 +21,8 @@ jobs: - name: Build the bootloader (LLVM) run: ./bootstrap && ./configure CROSS_TOOLCHAIN=llvm --enable-werror --enable-all && make all && make maintainer-clean - - name: Build the bootloader (GCC) - run: ./bootstrap && ./configure CROSS_CC=gcc CROSS_LD=ld CROSS_OBJCOPY=objcopy CROSS_OBJDUMP=objdump CROSS_READELF=readelf --enable-werror --enable-all && make all && make maintainer-clean + - name: Build the bootloader (GCC, x86) + run: ./bootstrap && ./configure CROSS_CC=gcc CROSS_LD=ld CROSS_OBJCOPY=objcopy CROSS_OBJDUMP=objdump CROSS_READELF=readelf --enable-werror --enable-bios --enable-uefi-ia32 --enable-uefi-x86_64 && make all && make maintainer-clean + + - name: Build the bootloader (GCC, aarch64) + run: ./bootstrap && ./configure CROSS_TOOLCHAIN=aarch64-linux-gnu --enable-werror --enable-uefi-aarch64 && make all && make maintainer-clean diff --git a/GNUmakefile.in b/GNUmakefile.in index 0ea30402..2f48a62f 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -62,7 +62,7 @@ override LIMINE_VERSION := @PACKAGE_VERSION@ override STAGE1_FILES := $(shell find '$(call SHESCAPE,$(SRCDIR))/stage1' -type f -name '*.asm') .PHONY: all -all: limine-version @BUILD_UEFI_X86_64@ @BUILD_UEFI_IA32@ @BUILD_BIOS@ +all: limine-version @BUILD_UEFI_X86_64@ @BUILD_UEFI_IA32@ @BUILD_UEFI_AARCH64@ @BUILD_BIOS@ $(MAKE) '$(call SHESCAPE,$(BINDIR))/limine-cd-efi.bin' $(call MKESCAPE,$(BINDIR))/limine-hdd.h: $(call MKESCAPE,$(BINDIR))/limine-hdd.bin @@ -90,7 +90,7 @@ limine-version: $(MAKE) '$(call SHESCAPE,$(BINDIR))/limine-version' .PHONY: clean -clean: limine-bios-clean limine-uefi-ia32-clean limine-uefi-x86-64-clean +clean: limine-bios-clean limine-uefi-ia32-clean limine-uefi-x86-64-clean limine-uefi-aarch64-clean rm -rf '$(call SHESCAPE,$(BINDIR))' '$(call SHESCAPE,$(BUILDDIR))/stage1.stamp' .PHONY: install-data @@ -101,6 +101,7 @@ install-data: all $(INSTALL) -m 644 '$(call SHESCAPE,$(BINDIR))/limine-cd.bin' '$(call SHESCAPE,$(DESTDIR)$(ACDATAROOTDIR))/limine/' || true $(INSTALL) -m 644 '$(call SHESCAPE,$(BINDIR))/limine-cd-efi.bin' '$(call SHESCAPE,$(DESTDIR)$(ACDATAROOTDIR))/limine/' || true $(INSTALL) -m 644 '$(call SHESCAPE,$(BINDIR))/limine-pxe.bin' '$(call SHESCAPE,$(DESTDIR)$(ACDATAROOTDIR))/limine/' || true + $(INSTALL) -m 644 '$(call SHESCAPE,$(BINDIR))/BOOTAA64.EFI' '$(call SHESCAPE,$(DESTDIR)$(ACDATAROOTDIR))/limine/' || true $(INSTALL) -m 644 '$(call SHESCAPE,$(BINDIR))/BOOTX64.EFI' '$(call SHESCAPE,$(DESTDIR)$(ACDATAROOTDIR))/limine/' || true $(INSTALL) -m 644 '$(call SHESCAPE,$(BINDIR))/BOOTIA32.EFI' '$(call SHESCAPE,$(DESTDIR)$(ACDATAROOTDIR))/limine/' || true $(INSTALL) -d '$(call SHESCAPE,$(DESTDIR)$(ACINCLUDEDIR))' @@ -181,6 +182,15 @@ limine-uefi-ia32: $(MAKE) common-uefi-ia32 $(MAKE) '$(call SHESCAPE,$(BINDIR))/BOOTIA32.EFI' +$(call MKESCAPE,$(BINDIR))/BOOTAA64.EFI: $(call MKESCAPE,$(BUILDDIR))/common-uefi-aarch64/BOOTAA64.EFI + $(MKDIR_P) '$(call SHESCAPE,$(BINDIR))' + cp '$(call SHESCAPE,$(BUILDDIR))/common-uefi-aarch64/BOOTAA64.EFI' '$(call SHESCAPE,$(BINDIR))/' + +.PHONY: limine-uefi-aarch64 +limine-uefi-aarch64: + $(MAKE) common-uefi-aarch64 + $(MAKE) '$(call SHESCAPE,$(BINDIR))/BOOTAA64.EFI' + .PHONY: limine-bios-clean limine-bios-clean: common-bios-clean decompressor-clean @@ -190,6 +200,9 @@ limine-uefi-x86-64-clean: common-uefi-x86-64-clean .PHONY: limine-uefi-ia32-clean limine-uefi-ia32-clean: common-uefi-ia32-clean +.PHONY: limine-uefi-aarch64-clean +limine-uefi-aarch64-clean: common-uefi-aarch64-clean + .PHONY: dist dist: rm -rf '$(call SHESCAPE,$(BUILDDIR))'/"limine-$(LIMINE_VERSION)" @@ -236,6 +249,17 @@ common-uefi-x86-64: common-uefi-x86-64-clean: rm -rf '$(call SHESCAPE,$(BUILDDIR))/common-uefi-x86-64' +.PHONY: common-uefi-aarch64 +common-uefi-aarch64: + $(MAKE) -C '$(call SHESCAPE,$(SRCDIR))/common' all \ + CROSS_FILE='$(call SHESCAPE,$(BUILDDIR))/cross-files/aarch64-toolchain.mk' \ + TARGET=uefi-aarch64 \ + BUILDDIR='$(call SHESCAPE,$(BUILDDIR))/common-uefi-aarch64' + +.PHONY: common-uefi-aarch64-clean +common-uefi-aarch64-clean: + rm -rf '$(call SHESCAPE,$(BUILDDIR))/common-uefi-aarch64' + .PHONY: common-uefi-ia32 common-uefi-ia32: $(MAKE) -C '$(call SHESCAPE,$(SRCDIR))/common' all \ @@ -277,6 +301,10 @@ ovmf-x64: $(MKDIR_P) ovmf-x64 cd ovmf-x64 && curl -o OVMF-X64.zip https://efi.akeo.ie/OVMF/OVMF-X64.zip && 7z x OVMF-X64.zip +ovmf-aa64: + mkdir -p ovmf-aa64 + cd ovmf-aa64 && curl -o OVMF-AA64.zip https://efi.akeo.ie/OVMF/OVMF-AA64.zip && 7z x OVMF-AA64.zip + ovmf-ia32: $(MKDIR_P) ovmf-ia32 cd ovmf-ia32 && curl -o OVMF-IA32.zip https://efi.akeo.ie/OVMF/OVMF-IA32.zip && 7z x OVMF-IA32.zip @@ -501,6 +529,30 @@ uefi-test: rm -rf test_image loopback_dev qemu-system-x86_64 -m 512M -M q35 -L ovmf -bios ovmf-x64/OVMF.fd -net none -smp 4 -hda test.hdd -debugcon stdio +.PHONY: uefi-aa64-test +uefi-aa64-test: + $(MAKE) ovmf-aa64 + $(MAKE) test-clean + $(MAKE) test.hdd + $(MAKE) limine-uefi-aarch64 + $(MAKE) -C test + rm -rf test_image/ + mkdir test_image + sudo losetup -Pf --show test.hdd > loopback_dev + sudo partprobe `cat loopback_dev` + sudo mkfs.fat -F 32 `cat loopback_dev`p1 + sudo mount `cat loopback_dev`p1 test_image + sudo mkdir test_image/boot + sudo cp -rv $(BINDIR)/* test_image/boot/ + sudo cp -rv test/* test_image/boot/ + sudo mkdir -p test_image/EFI/BOOT + sudo cp $(BINDIR)/BOOTAA64.EFI test_image/EFI/BOOT/ + sync + sudo umount test_image/ + sudo losetup -d `cat loopback_dev` + rm -rf test_image loopback_dev + qemu-system-aarch64 -m 512M -M virt -cpu cortex-a72 -L ovmf -bios ovmf-aa64/OVMF.fd -net none -smp 4 -device ramfb -device qemu-xhci -device usb-kbd -hda test.hdd -serial stdio + .PHONY: uefi-ia32-test uefi-ia32-test: $(MAKE) ovmf-ia32 diff --git a/PROTOCOL.md b/PROTOCOL.md index 804b0cdb..c7a9a00b 100644 --- a/PROTOCOL.md +++ b/PROTOCOL.md @@ -19,7 +19,8 @@ languages. All pointers are 64-bit wide. All pointers point to the object with the higher half direct map offset already added to them, unless otherwise noted. -The calling convention matches the SysV C ABI for the specific architecture. +The calling convention matches the C ABI for the specific architecture +(SysV for x86, AAPCS for aarch64). ## Features @@ -155,6 +156,46 @@ to the stack before jumping to the kernel. All other general purpose registers are set to 0. +### aarch64 + +`PC` will be the entry point as defined as part of the executable file format, +unless the an Entry Point feature is requested (see below), in which case, +the value of `PC` is going to be taken from there. + +The contents of the `VBAR_EL1` register are undefined, and the kernel must load it's own. + +The `MAIR_EL1` register will contain at least these entries, in an unspecified order: + - Normal, Write-back RW-Allocate non-transient (`0b11111111`), + - Unspecified, correct for use with the framebuffer. + +The kernel and the lower-half identity mapping will be mapped with Normal write-back memory, +while the framebuffer is mappeed with the correct caching mode. The kernel must ensure that +MMIO it wants to access is mapped with the correct caching mode. + +All interrupts are masked (`PSTATE.{D, A, I, F}` are set to 1). + +The kernel is entered in little-endian AArch64 EL1t (EL1 with `PSTATE.SP` set to 0, `PSTATE.E` set to 0, and `PSTATE.nRW` set to 0). + +Other fields of `PSTATE` are undefined. + +At entry: the MMU (`SCTLR_EL1.M`) is enabled, the I-Cache and D-Cache (`SCTLR_EL1.{I, C}`) are enabled, +data alignment checking (`SCTLR_EL1.A`) is disabled. SP alignment checking (`SCTLR_EL1.{SA, SA0}`) is enabled. +Other fields of `SCTLR_EL1` are reset to 0 or to their reserved value. + +Higher ELs do not interfere with accesses to vector or floating point instructions or registers. + +Higher ELs do not interfere with accesses to the generic timer and counter. + +The used translation granule size is undefined. + +If booted by EFI/UEFI, boot services are exited. + +`SP` is set to point to a stack, in bootloader-reserved memory, which is +at least 64KiB (65536 bytes) in size, or the size specified in the Stack +Size Request (see below). + +All other general purpose registers (including `X29` and `X30`) are set to 0. Vector registers are in an undefined state. + ## Feature List Request IDs are composed of 4 64-bit unsigned integers, but the first 2 are @@ -463,6 +504,29 @@ No registers other than the segment registers and general purpose registers are going to be used. Especially, this means that there is no need to save and restore FPU, SSE, or AVX state when calling the terminal write function. +#### aarch64 + +Additionally, the kernel must ensure, when calling `write()`, that: + +* The currently loaded virtual address space is still the one provided at +entry by the bootloader, or a custom virtual address space is loaded which +identity maps the framebuffer memory region associated with the terminal, and +all the bootloader reclaimable memory regions, with read, write, and execute +permissions. + +* The routine is called *by its physical address* (the value of the function +pointer is already physical), which should be identity mapped. + +* Bootloader-reclaimable memory entries are left untouched until after the +kernel is done utilising bootloader-provided facilities (this terminal being +one of them). + +Notes regarding the usage of registers: + +No registers other than the general purpose registers are going to be used. +Especially, this means that there is no need to save and restore SIMD state +when calling the terminal write function. + #### Terminal characteristics The terminal should strive for Linux console compatibility. @@ -563,9 +627,12 @@ struct limine_smp_request { }; ``` -* `flags` - Bit 0: Enable X2APIC, if possible. +* `flags` - Bit 0: Enable X2APIC, if possible. (x86_64-only) + +#### x86_64: Response: + ```c struct limine_smp_response { uint64_t revision; @@ -610,6 +677,55 @@ processor. For all CPUs, this field is guaranteed to be NULL when control is fir to the bootstrap processor. * `extra_argument` - A free for use field. +#### aarch64: + +Response: + +```c +struct limine_smp_response { + uint64_t revision; + uint32_t flags; + uint64_t bsp_mpidr; + uint64_t cpu_count; + struct limine_smp_info **cpus; +}; +``` + +* `flags` - Always zero +* `bsp_mpidr` - MPIDR of the bootstrap processor (as read from `MPIDR_EL1`, with Res1 masked off). +* `cpu_count` - How many CPUs are present. It includes the bootstrap processor. +* `cpus` - Pointer to an array of `cpu_count` pointers to +`struct limine_smp_info` structures. + +Notes: The presence of this request will prompt the bootloader to bootstrap +the secondary processors. This will not be done if this request is not present. + +```c +struct limine_smp_info; + +typedef void (*limine_goto_address)(struct limine_smp_info *); + +struct limine_smp_info { + uint32_t processor_id; + uint32_t gic_iface_no; + uint64_t mpidr; + uint64_t reserved; + limine_goto_address goto_address; + uint64_t extra_argument; +}; +``` + +* `processor_id` - ACPI Processor UID as specified by the MADT +* `gic_iface_no` - GIC CPU Interface number of the processor as specified by the MADT (possibly always 0) +* `mpidr` - MPIDR of the processor as specified by the MADT or device tree +* `goto_address` - An atomic write to this field causes the parked CPU to +jump to the written address, on a 64KiB (or Stack Size Request size) stack. A pointer to the +`struct limine_smp_info` structure of the CPU is passed in `X0`. Other than +that, the CPU state will be the same as described for the bootstrap +processor. This field is unused for the structure describing the bootstrap +processor. +* `extra_argument` - A free for use field. + ### Memory Map Feature ID: @@ -936,3 +1052,32 @@ struct limine_kernel_address_response { * `physical_base` - The physical base address of the kernel. * `virtual_base` - The virtual base address of the kernel. + +### Device Tree Blob Feature + +ID: +```c +#define LIMINE_DTB_REQUEST { LIMINE_COMMON_MAGIC, 0xb40ddb48fb54bac7, 0x545081493f81ffb7 } +``` + +Request: +```c +struct limine_dtb_request { + uint64_t id[4]; + uint64_t revision; + struct limine_dtb_response *response; +}; +``` + +Response: +```c +struct limine_dtb_response { + uint64_t revision; + void *dtb_ptr; +}; +``` + +* `dtb_ptr` - Physical pointer to the device tree blob. + +Note: Information contained in the `/chosen` node may not reflect the information +given by bootloader tags, and as such the `/chosen` node properties should be ignored. diff --git a/common/GNUmakefile b/common/GNUmakefile index ececdac0..82bc7119 100644 --- a/common/GNUmakefile +++ b/common/GNUmakefile @@ -21,6 +21,8 @@ else ifeq ($(TARGET),uefi-x86-64) override OBJCOPY_ARCH := elf64-x86-64 else ifeq ($(TARGET),uefi-ia32) override OBJCOPY_ARCH := elf32-i386 +else ifeq ($(TARGET),uefi-aarch64) + override OBJCOPY_ARCH := elf64-littleaarch64 else $(error Invalid target) endif @@ -44,10 +46,6 @@ override CROSS_CFLAGS += \ -Wno-address-of-packed-member \ -Wshadow \ -Wvla \ - -mno-80387 \ - -mno-mmx \ - -mno-sse \ - -mno-sse2 \ -MMD \ -DCOM_OUTPUT=$(COM_OUTPUT) \ -DE9_OUTPUT=$(E9_OUTPUT) \ @@ -69,6 +67,14 @@ ifeq ($(TARGET),bios) -fno-pic endif +ifeq ($(TARGET),$(filter $(TARGET),bios uefi-ia32 uefi-x86_64)) + override CROSS_CFLAGS += \ + -mno-80387 \ + -mno-mmx \ + -mno-sse \ + -mno-sse2 +endif + ifeq ($(TARGET),uefi-x86-64) override CROSS_CFLAGS += \ -Dbios=0 \ @@ -98,6 +104,18 @@ ifeq ($(TARGET),uefi-ia32) -fpie endif +ifeq ($(TARGET),uefi-aarch64) + override CROSS_CFLAGS += \ + -Dbios=0 \ + -Duefi=1 \ + -mtune=generic \ + -DGNU_EFI_USE_MS_ABI \ + -I'$(call SHESCAPE,$(BUILDDIR))/limine-efi/inc' \ + -I'$(call SHESCAPE,$(BUILDDIR))/limine-efi/inc/aarch64' \ + -fpie \ + -mgeneral-regs-only +endif + override CROSS_LDFLAGS += \ -nostdlib \ -z max-page-size=0x1000 @@ -127,6 +145,15 @@ ifeq ($(TARGET),uefi-ia32) -ztext endif +ifeq ($(TARGET),uefi-aarch64) + override CROSS_LDFLAGS += \ + -maarch64elf \ + -static \ + -pie \ + --no-dynamic-linker \ + -ztext +endif + override C_FILES := $(shell find ./ -type f -name '*.c') ifeq ($(TARGET),bios) override ASMX86_FILES := $(shell find ./ -type f -name '*.asm_x86') @@ -150,6 +177,13 @@ ifeq ($(TARGET),uefi-ia32) override OBJ := $(addprefix $(call MKESCAPE,$(BUILDDIR))/, $(C_FILES:.c=.o) $(ASM32_FILES:.asm_ia32=.o) $(ASM32U_FILES:.asm_uefi_ia32=.o) $(ASMX86_FILES:.asm_x86=.o)) endif +ifeq ($(TARGET),uefi-aarch64) + override ASM64_FILES := $(shell find ./ -type f -name '*.asm_aarch64') + override ASM64U_FILES := $(shell find ./ -type f -name '*.asm_uefi_aarch64') + + override OBJ := $(addprefix $(call MKESCAPE,$(BUILDDIR))/, $(C_FILES:.c=.o) $(ASM64_FILES:.asm_aarch64=.o) $(ASM64U_FILES:.asm_uefi_aarch64=.o)) +endif + override HEADER_DEPS := $(addprefix $(call MKESCAPE,$(BUILDDIR))/, $(C_FILES:.c=.d)) .PHONY: all @@ -160,11 +194,13 @@ else ifeq ($(TARGET),uefi-x86-64) all: $(call MKESCAPE,$(BUILDDIR))/BOOTX64.EFI else ifeq ($(TARGET),uefi-ia32) all: $(call MKESCAPE,$(BUILDDIR))/BOOTIA32.EFI +else ifeq ($(TARGET), uefi-aarch64) +all: $(call MKESCAPE,$(BUILDDIR))/BOOTAA64.EFI endif $(call MKESCAPE,$(BUILDDIR))/font.o: font.bin $(MKDIR_P) "$$(dirname '$(call SHESCAPE,$@)')" - $(CROSS_OBJCOPY) -B i8086 -I binary -O $(OBJCOPY_ARCH) font.bin '$(call SHESCAPE,$@)' + $(CROSS_OBJCOPY) -I binary -O $(OBJCOPY_ARCH) font.bin '$(call SHESCAPE,$@)' $(call MKESCAPE,$(BUILDDIR))/tinf-copied: ../tinf/* rm -rf '$(call SHESCAPE,$(BUILDDIR))/tinf' @@ -290,6 +326,46 @@ $(call MKESCAPE,$(BUILDDIR))/limine.elf: $(call MKESCAPE,$(BUILDDIR))/limine-efi endif +ifeq ($(TARGET),uefi-aarch64) + +$(call MKESCAPE,$(BUILDDIR))/full.map.o: $(call MKESCAPE,$(BUILDDIR))/limine_nomap.elf + cd '$(call SHESCAPE,$(BUILDDIR))' && \ + '$(call SHESCAPE,$(SRCDIR))/gensyms.sh' '$(call SHESCAPE,$<)' full 64 '\.text' + $(CROSS_CC) $(CROSS_CFLAGS) -c '$(call SHESCAPE,$(BUILDDIR))/full.map.S' -o '$(call SHESCAPE,$@)' + rm -f '$(call SHESCAPE,$(BUILDDIR))/full.map.S' '$(call SHESCAPE,$(BUILDDIR))/full.map.d' + +$(call MKESCAPE,$(BUILDDIR))/BOOTAA64.EFI: $(call MKESCAPE,$(BUILDDIR))/limine.elf + $(CROSS_OBJCOPY) -O binary '$(call SHESCAPE,$<)' '$(call SHESCAPE,$@)' + +$(call MKESCAPE,$(BUILDDIR))/limine-efi/gnuefi/crt0-efi-aarch64.o $(call MKESCAPE,$(BUILDDIR))/limine-efi/gnuefi/reloc_aarch64.o: $(call MKESCAPE,$(BUILDDIR))/limine-efi + unset ARFLAGS; unset CPPFLAGS; \ + $(MAKE) -C '$(call SHESCAPE,$(BUILDDIR))/limine-efi/gnuefi' \ + CC="$(CROSS_CC) -mtune=generic" \ + CFLAGS='-nostdinc' \ + INCDIR='-I$(call SHESCAPE,$(SRCDIR))/../freestanding-headers' \ + ARCH=aarch64 + +$(call MKESCAPE,$(BUILDDIR))/linker_nomap.ld: linker_uefi_aarch64.ld.in + $(MKDIR_P) '$(call SHESCAPE,$(BUILDDIR))' + $(CROSS_CC) -x c -E -P -undef -DLINKER_NOMAP linker_uefi_aarch64.ld.in -o '$(call SHESCAPE,$(BUILDDIR))/linker_nomap.ld' + +$(call MKESCAPE,$(BUILDDIR))/limine_nomap.elf: $(call MKESCAPE,$(BUILDDIR))/limine-efi/gnuefi/crt0-efi-aarch64.o $(call MKESCAPE,$(BUILDDIR))/limine-efi/gnuefi/reloc_aarch64.o $(OBJ) $(call MKESCAPE,$(BUILDDIR))/tinf/tinfgzip.o $(call MKESCAPE,$(BUILDDIR))/tinf/tinflate.o $(call MKESCAPE,$(BUILDDIR))/font.o + $(MAKE) '$(call SHESCAPE,$(BUILDDIR))/linker_nomap.ld' + $(CROSS_LD) \ + -T'$(call SHESCAPE,$(BUILDDIR))/linker_nomap.ld' \ + '$(call OBJESCAPE,$^)' $(CROSS_LDFLAGS) -o '$(call SHESCAPE,$@)' + +$(call MKESCAPE,$(BUILDDIR))/linker.ld: linker_uefi_aarch64.ld.in + $(MKDIR_P) '$(call SHESCAPE,$(BUILDDIR))' + $(CROSS_CC) -x c -E -P -undef linker_uefi_aarch64.ld.in -o '$(call SHESCAPE,$(BUILDDIR))/linker.ld' + +$(call MKESCAPE,$(BUILDDIR))/limine.elf: $(call MKESCAPE,$(BUILDDIR))/limine-efi/gnuefi/crt0-efi-aarch64.o $(call MKESCAPE,$(BUILDDIR))/limine-efi/gnuefi/reloc_aarch64.o $(OBJ) $(call MKESCAPE,$(BUILDDIR))/tinf/tinfgzip.o $(call MKESCAPE,$(BUILDDIR))/tinf/tinflate.o $(call MKESCAPE,$(BUILDDIR))/font.o $(call MKESCAPE,$(BUILDDIR))/full.map.o + $(MAKE) '$(call SHESCAPE,$(BUILDDIR))/linker.ld' + $(CROSS_LD) \ + -T'$(call SHESCAPE,$(BUILDDIR))/linker.ld' \ + '$(call OBJESCAPE,$^)' $(CROSS_LDFLAGS) -o '$(call SHESCAPE,$@)' +endif + ifeq ($(TARGET),uefi-ia32) $(call MKESCAPE,$(BUILDDIR))/full.map.o: $(call MKESCAPE,$(BUILDDIR))/limine_nomap.elf @@ -339,6 +415,12 @@ $(call MKESCAPE,$(BUILDDIR))/%.o: %.c $(call MKESCAPE,$(BUILDDIR))/tinf-copied $ $(CROSS_CC) $(CROSS_CFLAGS) -c '$(call SHESCAPE,$<)' -o '$(call SHESCAPE,$@)' endif +ifeq ($(TARGET), uefi-aarch64) +$(call MKESCAPE,$(BUILDDIR))/%.o: %.c $(call MKESCAPE,$(BUILDDIR))/tinf-copied $(call MKESCAPE,$(BUILDDIR))/limine-efi + $(MKDIR_P) "$$(dirname '$(call SHESCAPE,$@)')" + $(CROSS_CC) $(CROSS_CFLAGS) -c '$(call SHESCAPE,$<)' -o '$(call SHESCAPE,$@)' +endif + ifeq ($(TARGET),uefi-ia32) $(call MKESCAPE,$(BUILDDIR))/%.o: %.c $(call MKESCAPE,$(BUILDDIR))/tinf-copied $(call MKESCAPE,$(BUILDDIR))/limine-efi $(MKDIR_P) "$$(dirname '$(call SHESCAPE,$@)')" @@ -389,6 +471,16 @@ $(call MKESCAPE,$(BUILDDIR))/%.o: %.asm_x86 nasm '$(call SHESCAPE,$<)' -F dwarf -g $(WERROR) -f elf64 -o '$(call SHESCAPE,$@)' endif +ifeq ($(TARGET), uefi-aarch64) +$(call MKESCAPE,$(BUILDDIR))/%.o: %.asm_aarch64 + $(MKDIR_P) "$$(dirname '$(call SHESCAPE,$@)')" + $(CROSS_CC) $(CROSS_CFLAGS) -x assembler-with-cpp -c '$(call SHESCAPE,$<)' -o '$(call SHESCAPE,$@)' + +$(call MKESCAPE,$(BUILDDIR))/%.o: %.asm_uefi_aarch64 + $(MKDIR_P) "$$(dirname '$(call SHESCAPE,$@)')" + $(CROSS_CC) $(CROSS_CFLAGS) -x assembler-with-cpp -c '$(call SHESCAPE,$<)' -o '$(call SHESCAPE,$@)' +endif + ifeq ($(TARGET),uefi-ia32) $(call MKESCAPE,$(BUILDDIR))/%.o: %.asm_ia32 $(MKDIR_P) "$$(dirname '$(call SHESCAPE,$@)')" diff --git a/common/efi_thunk.asm_uefi_aarch64 b/common/efi_thunk.asm_uefi_aarch64 new file mode 100644 index 00000000..5d0559bb --- /dev/null +++ b/common/efi_thunk.asm_uefi_aarch64 @@ -0,0 +1,8 @@ +.global efi_main +.extern uefi_entry + +efi_main: + mov x30, xzr + mov x29, xzr + + b uefi_entry diff --git a/common/efi_thunk.asm_uefi_ia32 b/common/efi_thunk.asm_uefi_ia32 new file mode 100644 index 00000000..4b36b934 --- /dev/null +++ b/common/efi_thunk.asm_uefi_ia32 @@ -0,0 +1,6 @@ +global efi_main +extern uefi_entry +efi_main: + xor eax, eax + mov [esp], eax + jmp uefi_entry diff --git a/common/efi_thunk.asm_uefi_x86_64 b/common/efi_thunk.asm_uefi_x86_64 new file mode 100644 index 00000000..88e7b64f --- /dev/null +++ b/common/efi_thunk.asm_uefi_x86_64 @@ -0,0 +1,6 @@ +global efi_main +extern uefi_entry +efi_main: + xor eax, eax + mov [rsp], rax + jmp uefi_entry diff --git a/common/entry.s3.c b/common/entry.s3.c index f3e7f2c3..98b18528 100644 --- a/common/entry.s3.c +++ b/common/entry.s3.c @@ -26,26 +26,6 @@ void stage3_common(void); #if uefi == 1 -__attribute__((naked)) -EFI_STATUS efi_main( - __attribute__((unused)) EFI_HANDLE ImageHandle, - __attribute__((unused)) EFI_SYSTEM_TABLE *SystemTable) { - // Invalid return address of 0 to end stacktraces here -#if defined (__x86_64__) - asm ( - "xorl %eax, %eax\n\t" - "movq %rax, (%rsp)\n\t" - "jmp uefi_entry\n\t" - ); -#elif defined (__i386__) - asm ( - "xorl %eax, %eax\n\t" - "movl %eax, (%esp)\n\t" - "jmp uefi_entry\n\t" - ); -#endif -} - noreturn void uefi_entry(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) { gST = SystemTable; gBS = SystemTable->BootServices; @@ -65,7 +45,9 @@ noreturn void uefi_entry(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) init_memmap(); +#if defined (__x86_64__) || defined (__i386__) init_gdt(); +#endif disk_create_index(); @@ -129,8 +111,10 @@ noreturn void uefi_entry(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) noreturn void stage3_common(void) { term_notready(); +#if defined (__x86_64__) || defined (__i386__) init_flush_irqs(); init_io_apics(); +#endif menu(true); } diff --git a/common/gensyms.sh b/common/gensyms.sh index 55ec0f69..e2cca0e8 100755 --- a/common/gensyms.sh +++ b/common/gensyms.sh @@ -37,11 +37,11 @@ echo ".globl $2_map" >> "$TMP4" echo "$2_map:" >> "$TMP4" if [ "$3" = "32" ]; then - paste -d'$' "$TMP2" "$TMP3" | "$SED" 's/^/.long 0x/g;s/$/"/g;s/\$/\ + paste -d'#' "$TMP2" "$TMP3" | "$SED" 's/^/.long 0x/g;s/$/"/g;s/\#/\ .asciz "/g' >> "$TMP4" echo ".long 0xffffffff" >> "$TMP4" elif [ "$3" = "64" ]; then - paste -d'$' "$TMP2" "$TMP3" | "$SED" 's/^/.quad 0x/g;s/$/"/g;s/\$/\ + paste -d'#' "$TMP2" "$TMP3" | "$SED" 's/^/.quad 0x/g;s/$/"/g;s/\#/\ .asciz "/g' >> "$TMP4" echo ".quad 0xffffffffffffffff" >> "$TMP4" fi diff --git a/common/lib/acpi.h b/common/lib/acpi.h index 58361e72..24fe0241 100644 --- a/common/lib/acpi.h +++ b/common/lib/acpi.h @@ -7,6 +7,7 @@ #define EBDA (ebda_get()) +#if bios == 1 static inline uintptr_t ebda_get(void) { uintptr_t ebda = (uintptr_t)mminw(0x40e) << 4; @@ -17,6 +18,7 @@ static inline uintptr_t ebda_get(void) { return ebda; } +#endif struct sdt { char signature[4]; diff --git a/common/lib/blib.c b/common/lib/blib.c index 245644fa..c6d7f635 100644 --- a/common/lib/blib.c +++ b/common/lib/blib.c @@ -211,7 +211,13 @@ retry: goto retry; } +#if defined(__x86_64__) || defined(__i386__) asm volatile ("cli" ::: "memory"); +#elif defined (__aarch64__) + asm volatile ("msr daifset, #15" ::: "memory"); +#else +#error Unknown architecture +#endif efi_boot_services_exited = true; diff --git a/common/lib/blib.h b/common/lib/blib.h index e915a8c6..b992d6dd 100644 --- a/common/lib/blib.h +++ b/common/lib/blib.h @@ -57,8 +57,10 @@ uint64_t strtoui(const char *s, const char **end, int base); #if defined (__i386__) void memcpy32to64(uint64_t, uint64_t, uint64_t); -#elif defined (__x86_64__) +#elif defined (__x86_64__) || defined (__aarch64__) # define memcpy32to64(X, Y, Z) memcpy((void *)(uintptr_t)(X), (void *)(uintptr_t)(Y), Z) +#else +#error Unknown architecture #endif #define DIV_ROUNDUP(a, b) (((a) + ((b) - 1)) / (b)) @@ -83,7 +85,18 @@ typedef char symbol[]; noreturn void stage3_common(void); +#if defined (__x86_64__) || defined (__i386__) noreturn void common_spinup(void *fnptr, int args, ...); +#elif defined (__aarch64__) +noreturn void enter_in_current_el(uint64_t entry, uint64_t sp, uint64_t sctlr, + uint64_t target_x0); + +noreturn void enter_in_el1(uint64_t entry, uint64_t sp, uint64_t sctlr, + uint64_t mair, uint64_t tcr, uint64_t ttbr0, + uint64_t ttbr1, uint64_t target_x0); +#else +#error Unknown architecture +#endif #define no_unwind __attribute__((section(".no_unwind"))) diff --git a/common/lib/elf.c b/common/lib/elf.c index 073591ca..5030c217 100644 --- a/common/lib/elf.c +++ b/common/lib/elf.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -20,13 +21,15 @@ #define DT_RELASZ 0x00000008 #define DT_RELAENT 0x00000009 -#define ABI_SYSV 0x00 -#define ARCH_X86_64 0x3e -#define ARCH_X86_32 0x03 -#define BITS_LE 0x01 -#define ET_DYN 0x0003 -#define SHT_RELA 0x00000004 -#define R_X86_64_RELATIVE 0x00000008 +#define ABI_SYSV 0x00 +#define ARCH_X86_64 0x3e +#define ARCH_X86_32 0x03 +#define ARCH_AARCH64 0xb7 +#define BITS_LE 0x01 +#define ET_DYN 0x0003 +#define SHT_RELA 0x00000004 +#define R_X86_64_RELATIVE 0x00000008 +#define R_AARCH64_RELATIVE 0x00000403 /* Indices into identification array */ #define EI_CLASS 4 @@ -109,6 +112,7 @@ int elf_bits(uint8_t *elf) { switch (hdr.machine) { case ARCH_X86_64: + case ARCH_AARCH64: return 64; case ARCH_X86_32: return 32; @@ -209,7 +213,14 @@ static int elf64_apply_relocations(uint8_t *elf, struct elf64_hdr *hdr, void *bu memcpy(&relocation, elf + (rela_offset + offset), sizeof(struct elf64_rela)); switch (relocation.r_info) { - case R_X86_64_RELATIVE: { +#if defined (__x86_64__) || defined (__i386__) + case R_X86_64_RELATIVE: +#elif defined (__aarch64__) + case R_AARCH64_RELATIVE: +#else +#error Unknown architecture +#endif + { // Relocation is before buffer if (relocation.r_addr < vaddr) continue; @@ -251,10 +262,19 @@ int elf64_load_section(uint8_t *elf, void *buffer, const char *name, size_t limi return 1; } +#if defined (__x86_64__) || defined (__i386__) if (hdr.machine != ARCH_X86_64) { printv("elf: Not an x86_64 ELF file.\n"); return 1; } +#elif defined (__aarch64__) + if (hdr.machine != ARCH_AARCH64) { + printv("elf: Not an aarch64 ELF file.\n"); + return 1; + } +#else +#error Unknown architecture +#endif if (hdr.shdr_size < sizeof(struct elf64_shdr)) { panic(true, "elf: shdr_size < sizeof(struct elf64_shdr)"); @@ -500,9 +520,17 @@ int elf64_load(uint8_t *elf, uint64_t *entry_point, uint64_t *top, uint64_t *_sl panic(true, "elf: Not a Little-endian ELF file.\n"); } +#if defined (__x86_64__) || defined (__i386__) if (hdr.machine != ARCH_X86_64) { panic(true, "elf: Not an x86_64 ELF file.\n"); } +#elif defined (__aarch64__) + if (hdr.machine != ARCH_AARCH64) { + panic(true, "elf: Not an aarch64 ELF file.\n"); + } +#else +#error Unknown architecture +#endif if (is_reloc) { *is_reloc = false; @@ -671,6 +699,11 @@ final: if (elf64_apply_relocations(elf, &hdr, (void *)(uintptr_t)load_addr, phdr.p_vaddr, phdr.p_memsz, slide)) { panic(true, "elf: Failed to apply relocations"); } + +#if defined (__aarch64__) + clean_inval_dcache_poc(mem_base, mem_base + mem_size); + inval_icache_pou(mem_base, mem_base + mem_size); +#endif } if (simulation) { diff --git a/common/lib/macros.aarch64_asm.h b/common/lib/macros.aarch64_asm.h new file mode 100644 index 00000000..d59ef069 --- /dev/null +++ b/common/lib/macros.aarch64_asm.h @@ -0,0 +1,52 @@ +// Branch to \el1 if in EL1, or to \el2 if in EL2 +// Uses \reg, halts if not in EL1 or EL2 +.macro PICK_EL reg, el1, el2 + mrs \reg, currentel + and \reg, \reg, #0b1100 + + cmp \reg, #0b0100 // EL1? + b.eq \el1 + cmp \reg, #0b1000 // EL2? + b.eq \el2 + + // Halt otherwise + msr daifset, #0b1111 +99: + wfi + b 99b +.endm + + +// Zero out all general purpose registers apart from X0 +.macro ZERO_REGS_EXCEPT_X0 + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + mov x16, xzr + mov x17, xzr + mov x18, xzr + mov x19, xzr + mov x20, xzr + mov x21, xzr + mov x22, xzr + mov x23, xzr + mov x24, xzr + mov x25, xzr + mov x26, xzr + mov x27, xzr + mov x28, xzr + mov x29, xzr + mov x30, xzr +.endm diff --git a/common/lib/mem.asm_aarch64 b/common/lib/mem.asm_aarch64 new file mode 100644 index 00000000..fa632984 --- /dev/null +++ b/common/lib/mem.asm_aarch64 @@ -0,0 +1,66 @@ +.section .text + +.global memcpy +memcpy: + mov x3, x0 +0: + cbz x2, 1f + ldrb w4, [x1], #1 + strb w4, [x0], #1 + sub x2, x2, #1 + b 0b +1: + mov x0, x3 + ret + +.global memset +memset: + mov x3, x0 +0: + cbz x2, 1f + strb w1, [x0], #1 + sub x2, x2, #1 + b 0b +1: + mov x0, x3 + ret + +.global memmove +memmove: + mov x3, x0 + mov x5, x2 + + cmp x0, x1 + b.gt 1f +0: + cbz x2, 2f + ldrb w4, [x1], #1 + strb w4, [x0], #1 + sub x2, x2, #1 + b 0b +1: + sub x5, x5, #1 + cbz x2, 2f + ldrb w4, [x1, x5] + strb w4, [x0, x5] + sub x2, x2, #1 + b 1b +2: + mov x0, x3 + ret + +.global memcmp +memcmp: + mov x3, xzr +0: + cbz x2, 1f + ldrb w3, [x0], #1 + ldrb w4, [x1], #1 + sub w3, w3, w4 + cbnz w3, 1f + sub x2, x2, #1 + b 0b +1: + sxtw x0, w3 + mov x0, x3 + ret diff --git a/common/lib/panic.s2.c b/common/lib/panic.s2.c index beee9c16..47b07c93 100644 --- a/common/lib/panic.s2.c +++ b/common/lib/panic.s2.c @@ -83,7 +83,13 @@ nested: #elif uefi == 1 print("System halted."); for (;;) { +#if defined (__x86_64__) || defined (__i386__) asm ("hlt"); +#elif defined (__aarch64__) + asm ("wfi"); +#else +#error Unknown architecture +#endif } #endif } diff --git a/common/lib/print.s2.c b/common/lib/print.s2.c index ec0aec61..05d540aa 100644 --- a/common/lib/print.s2.c +++ b/common/lib/print.s2.c @@ -210,9 +210,11 @@ out: #endif for (size_t i = 0; i < print_buf_i; i++) { +#if defined (__x86_64__) || defined (__i386__) if (E9_OUTPUT) { outb(0xe9, print_buf[i]); } +#endif if ((term_backend != NOT_READY && serial) || COM_OUTPUT) { switch (print_buf[i]) { case '\n': diff --git a/common/lib/rand.c b/common/lib/rand.c index 7fa37895..4149fccf 100644 --- a/common/lib/rand.c +++ b/common/lib/rand.c @@ -26,6 +26,8 @@ static void init_rand(void) { * ((uint32_t)0xce86d624) ^ ((uint32_t)0xee0da130 * (uint32_t)rdtsc()); + // TODO(qookie): aarch64 also has an optional HW random number generator +#if defined (__x86_64__) || defined(__i386__) uint32_t eax, ebx, ecx, edx; // Check for rdseed @@ -34,6 +36,7 @@ static void init_rand(void) { } else if (cpuid(0x01, 0, &eax, &ebx, &ecx, &edx) && (ecx & (1 << 30))) { seed *= (seed ^ rdrand(uint32_t)); } +#endif status = ext_mem_alloc(n * sizeof(uint32_t)); diff --git a/common/lib/spinup.asm_aarch64 b/common/lib/spinup.asm_aarch64 new file mode 100644 index 00000000..68a02686 --- /dev/null +++ b/common/lib/spinup.asm_aarch64 @@ -0,0 +1,108 @@ +#include + +// noreturn void enter_in_current_el(uint64_t entry, uint64_t sp, uint64_t sctlr, +// uint64_t target_x0) +// Configure current EL state and jump to kernel. Used for Linux hence +// no paging register configuration (which requires SCTLR.M = 0). + +.global enter_in_current_el +enter_in_current_el: + msr sp_el0, x1 + + // Sanity check that SCTLR.M = 0 + and x8, x2, #0b1 + cbnz x8, 99f +99: + wfi + b 99b + + PICK_EL x8, 0f, 1f +0: + msr sctlr_el1, x2 + dsb sy + isb + + // Enter kernel in EL1 + mov x8, #0x3c4 + msr spsr_el1, x8 + msr elr_el1, x0 + + mov x0, x3 + ZERO_REGS_EXCEPT_X0 + + eret + +1: + msr sctlr_el2, x2 + dsb sy + isb + + // Enter kernel in EL2 + mov x8, #0x3c8 + msr spsr_el2, x8 + msr elr_el2, x0 + + mov x0, x3 + ZERO_REGS_EXCEPT_X0 + + eret + +// noreturn void enter_in_el1(uint64_t entry, uint64_t sp, uint64_t sctlr, +// uint64_t mair, uint64_t tcr, uint64_t ttbr0, +// uint64_t ttbr1, uint64_t target_x0) +// Potentially drop to EL1 from EL2 (and also disable trapping to EL2), then +// configure EL1 state and jump to kernel. + +.global enter_in_el1 +enter_in_el1: + msr spsel, #0 + mov sp, x1 + + // Configure EL1 state + msr mair_el1, x3 + msr tcr_el1, x4 + msr ttbr0_el1, x5 + msr ttbr1_el1, x6 + msr sctlr_el1, x2 + dsb sy + isb + + PICK_EL x8, 0f, 1f +0: + // Enter kernel in EL1 + mov x8, #0x3c4 + msr spsr_el1, x8 + msr elr_el1, x0 + + mov x0, x7 + ZERO_REGS_EXCEPT_X0 + + eret + +1: + // Configure EL2-specific state for EL1 + + // Don't trap counters to EL2 + mrs x8, cnthctl_el2 + orr x8, x8, #3 + msr cnthctl_el2, x8 + msr cntvoff_el2, xzr + + // Enable AArch64 in EL1 + ldr x8, =0x80000002 + msr hcr_el2, x8 + + // Don't trap FP/SIMD to EL2 + mov x8, #0x33FF + msr cptr_el2, x8 + msr hstr_el2, xzr + + // Enter kernel in EL1 + mov x8, #0x3c4 + msr spsr_el2, x8 + msr elr_el2, x0 + + mov x0, x7 + ZERO_REGS_EXCEPT_X0 + + eret diff --git a/common/lib/term.c b/common/lib/term.c index b4c1bc4b..b9a62c36 100644 --- a/common/lib/term.c +++ b/common/lib/term.c @@ -243,8 +243,10 @@ void term_write(uint64_t buf, uint64_t count) { } bool native = false; -#if defined (__x86_64__) +#if defined (__x86_64__) || defined (__aarch64__) native = true; +#elif !defined (__i386__) +#error Unknown architecture #endif if (!term_runtime || native) { diff --git a/common/lib/trace.s2.c b/common/lib/trace.s2.c index dd60615a..5fdac159 100644 --- a/common/lib/trace.s2.c +++ b/common/lib/trace.s2.c @@ -52,8 +52,10 @@ void print_stacktrace(size_t *base_ptr) { "movl %%ebp, %0" #elif defined (__x86_64__) "movq %%rbp, %0" +#elif defined (__aarch64__) + "mov %0, x29" #endif - : "=g"(base_ptr) + : "=r"(base_ptr) :: "memory" ); } diff --git a/common/linker_uefi_aarch64.ld.in b/common/linker_uefi_aarch64.ld.in new file mode 100644 index 00000000..4781788e --- /dev/null +++ b/common/linker_uefi_aarch64.ld.in @@ -0,0 +1,96 @@ +OUTPUT_FORMAT(elf64-littleaarch64) +OUTPUT_ARCH(aarch64) +ENTRY(_start) + +PHDRS +{ + text PT_LOAD FLAGS((1 << 0) | (1 << 2)) ; + data PT_LOAD FLAGS((1 << 1) | (1 << 2)) ; + dynamic PT_DYNAMIC FLAGS((1 << 1) | (1 << 2)) ; +} + +SECTIONS +{ + . = 0; + __image_base = .; + __image_size = __image_end - __image_base; + + .text : { + *(.pe_header) + . = ALIGN(0x1000); + + *(.text .text.*) + . = ALIGN(0x1000); + } :text + + __text_start = __image_base + 0x1000; + __text_size = SIZEOF(.text) - 0x1000; + __text_end = __text_start + __text_size; + + .data.sbat : { + *(.data.sbat) + . = ALIGN(0x1000); + } :data + + PROVIDE(__sbat_sizev = 1); + + __sbat_start = __text_end; + __sbat_size = SIZEOF(.data.sbat); + __sbat_end = __sbat_start + __sbat_size; + + .data.reloc : { + *(.data.reloc) + . = ALIGN(0x1000); + } :data + + __reloc_start = __sbat_end; + __reloc_size = SIZEOF(.data.reloc); + __reloc_end = __reloc_start + __reloc_size; + + .data : { + *(.rodata .rodata.*) + +#ifdef LINKER_NOMAP + full_map = .; +#else + *(.full_map) +#endif + + *(.no_unwind) + + data_begin = .; + *(.data .data.*) + *(.bss .bss.*) + *(COMMON) + data_end = .; + } :data + + .rela : { + *(.rela .rela.*) + } :data + + .got : { + *(.got .got.*) + } :data + + .dynamic : { + *(.dynamic) + . = ALIGN(0x1000); + } :data :dynamic + + __data_start = __reloc_end; + __data_size = SIZEOF(.data) + SIZEOF(.rela) + SIZEOF(.got) + SIZEOF(.dynamic); + __data_end = __data_start + __data_size; + + // for GNU-EFI crt0: + _data = __data_start; + _data_size = __data_size; + _edata = __data_end; + + __image_end = __data_end; + + /DISCARD/ : { + *(.eh_frame) + *(.note .note.*) + } +} diff --git a/common/menu.c b/common/menu.c index d4c19f2c..feba27da 100644 --- a/common/menu.c +++ b/common/menu.c @@ -534,52 +534,6 @@ static size_t print_tree(const char *shift, size_t level, size_t base_index, siz return max_entries; } -#if defined (__x86_64__) -__attribute__((used)) -static uintptr_t stack_at_first_entry = 0; -#endif - -__attribute__((naked)) -noreturn void menu(__attribute__((unused)) bool timeout_enabled) { -#if defined (__i386__) - asm volatile ( - "pop %eax\n\t" - "call 1f\n\t" - "1:\n\t" - "pop %eax\n\t" - "add $(2f - 1b), %eax\n\t" - "cmpl $0, (%eax)\n\t" - "jne 1f\n\t" - "mov %esp, (%eax)\n\t" - "jmp 3f\n\t" - "1:\n\t" - "mov (%esp), %edi\n\t" - "mov (%eax), %esp\n\t" - "push %edi\n\t" - "jmp 3f\n\t" - "2:\n\t" - ".long 0\n\t" - "3:\n\t" - "push $0\n\t" - "jmp _menu" - ); -#elif defined (__x86_64__) - asm volatile ( - "xor %eax, %eax\n\t" - "cmp %rax, stack_at_first_entry(%rip)\n\t" - "jne 1f\n\t" - "mov %rsp, stack_at_first_entry(%rip)\n\t" - "jmp 2f\n\t" - "1:\n\t" - "mov stack_at_first_entry(%rip), %rsp\n\t" - "2:\n\t" - "push $0\n\t" - "push $0\n\t" - "jmp _menu" - ); -#endif -} - static struct memmap_entry *rewound_memmap = NULL; static size_t rewound_memmap_entries = 0; static uint8_t *rewound_data; @@ -594,8 +548,7 @@ extern symbol s2_data_begin; extern symbol s2_data_end; #endif -__attribute__((used)) -static noreturn void _menu(bool timeout_enabled) { +noreturn void _menu(bool timeout_enabled) { size_t data_size = (uintptr_t)data_end - (uintptr_t)data_begin; #if bios == 1 size_t s2_data_size = (uintptr_t)s2_data_end - (uintptr_t)s2_data_begin; @@ -926,11 +879,23 @@ noreturn void boot(char *config) { } else if (!strcmp(proto, "limine")) { limine_load(config, cmdline); } else if (!strcmp(proto, "linux")) { +#if defined (__x86_64__) || defined (__i386__) linux_load(config, cmdline); +#else + print("TODO: Linux is not available on aarch64.\n\n"); +#endif } else if (!strcmp(proto, "multiboot1") || !strcmp(proto, "multiboot")) { +#if defined (__x86_64__) || defined (__i386__) multiboot1_load(config, cmdline); +#else + print("Multiboot 1 is not available on aarch64.\n\n"); +#endif } else if (!strcmp(proto, "multiboot2")) { +#if defined (__x86_64__) || defined (__i386__) multiboot2_load(config, cmdline); +#else + print("Multiboot 2 is not available on aarch64.\n\n"); +#endif } else if (!strcmp(proto, "chainload_next")) { chainload_next(config); } else if (!strcmp(proto, "chainload")) { diff --git a/common/menu_thunk.asm_aarch64 b/common/menu_thunk.asm_aarch64 new file mode 100644 index 00000000..2e6ae59c --- /dev/null +++ b/common/menu_thunk.asm_aarch64 @@ -0,0 +1,24 @@ +.section .data + +stack_at_first_entry: + .quad 0 + +.section .text + +.global menu +.extern _menu + +menu: + adrp x8, stack_at_first_entry + ldr x9, [x8, :lo12:stack_at_first_entry] + cbz x9, 1f + mov sp, x9 + b 2f +1: + mov x9, sp + str x9, [x8, :lo12:stack_at_first_entry] +2: + mov x30, xzr + mov x29, xzr + + b _menu diff --git a/common/menu_thunk.asm_ia32 b/common/menu_thunk.asm_ia32 new file mode 100644 index 00000000..54e160fa --- /dev/null +++ b/common/menu_thunk.asm_ia32 @@ -0,0 +1,26 @@ +global menu +extern _menu +menu: + pop eax + call .L1 +.L1: + pop eax + add eax, .L3 - .L1 + cmp dword [eax], 0 + jne .L2 + mov [eax], esp + jmp .L4 + +.L2: + mov edi, [esp] + mov esp, [eax] + push edi + jmp .L4 + +.L3: + dq 0 + +.L4: + push 0 + jmp _menu + diff --git a/common/menu_thunk.asm_x86_64 b/common/menu_thunk.asm_x86_64 new file mode 100644 index 00000000..0e086fdc --- /dev/null +++ b/common/menu_thunk.asm_x86_64 @@ -0,0 +1,21 @@ +section .data + +stack_at_first_entry: + dq 0 + +section .text + +global menu +extern _menu +menu: + xor eax, eax + cmp [rel stack_at_first_entry], rax + jne .L1 + mov [rel stack_at_first_entry], rsp + jmp .L2 +.L1: + mov rsp, [rel stack_at_first_entry] +.L2: + push 0 + push 0 + jmp _menu diff --git a/common/mm/vmm.c b/common/mm/vmm.c index 1355abd9..a3d787ba 100644 --- a/common/mm/vmm.c +++ b/common/mm/vmm.c @@ -10,6 +10,24 @@ typedef uint64_t pt_entry_t; +static pt_entry_t *get_next_level(pagemap_t pagemap, pt_entry_t *current_level, + uint64_t virt, enum page_size desired_sz, + size_t level_idx, size_t entry); + +#if defined (__x86_64__) || defined (__i386__) + +#define PT_FLAG_VALID ((uint64_t)1 << 0) +#define PT_FLAG_WRITE ((uint64_t)1 << 1) +#define PT_FLAG_USER ((uint64_t)1 << 2) +#define PT_FLAG_LARGE ((uint64_t)1 << 7) +#define PT_FLAG_NX ((uint64_t)1 << 63) +#define PT_PADDR_MASK ((uint64_t)0x0000FFFFFFFFF000) + +#define PT_TABLE_FLAGS (PT_FLAG_VALID | PT_FLAG_WRITE | PT_FLAG_USER) +#define PT_IS_TABLE(x) (((x) & (PT_FLAG_VALID | PT_FLAG_LARGE)) == PT_FLAG_VALID) +#define PT_IS_LARGE(x) (((x) & (PT_FLAG_VALID | PT_FLAG_LARGE)) == (PT_FLAG_VALID | PT_FLAG_LARGE)) +#define PT_TO_VMM_FLAGS(x) ((x) & (PT_FLAG_WRITE | PT_FLAG_NX)) + void vmm_assert_nx(void) { uint32_t a, b, c, d; if (!cpuid(0x80000001, 0, &a, &b, &c, &d) || !(d & (1 << 20))) { @@ -17,22 +35,6 @@ void vmm_assert_nx(void) { } } -static pt_entry_t *get_next_level(pt_entry_t *current_level, size_t entry) { - pt_entry_t *ret; - - if (current_level[entry] & 0x1) { - // Present flag set - ret = (pt_entry_t *)(size_t)(current_level[entry] & ~((pt_entry_t)0xfff)); - } else { - // Allocate a table for the next level - ret = ext_mem_alloc(PT_SIZE); - // Present + writable + user (0b111) - current_level[entry] = (pt_entry_t)(size_t)ret | 0b111; - } - - return ret; -} - pagemap_t new_pagemap(int lv) { pagemap_t pagemap; pagemap.levels = lv; @@ -68,6 +70,8 @@ void map_page(pagemap_t pagemap, uint64_t virt_addr, uint64_t phys_addr, uint64_ pt_entry_t *pml5, *pml4, *pml3, *pml2, *pml1; + flags |= PT_FLAG_VALID; // Always present + // Paging levels switch (pagemap.levels) { case 5: @@ -81,15 +85,14 @@ void map_page(pagemap_t pagemap, uint64_t virt_addr, uint64_t phys_addr, uint64_ } level5: - pml4 = get_next_level(pml5, pml5_entry); + pml4 = get_next_level(pagemap, pml5, virt_addr, pg_size, 4, pml5_entry); level4: - pml3 = get_next_level(pml4, pml4_entry); + pml3 = get_next_level(pagemap, pml4, virt_addr, pg_size, 3, pml4_entry); if (pg_size == Size1GiB) { // Check if 1GiB pages are avaliable. if (is_1gib_page_supported()) { - pml3[pml3_entry] = (pt_entry_t)(phys_addr | flags | (1 << 7)); - return; + pml3[pml3_entry] = (pt_entry_t)(phys_addr | flags | PT_FLAG_LARGE); } else { // If 1GiB pages are not supported then emulate it by splitting them into // 2MiB pages. @@ -97,16 +100,206 @@ level4: map_page(pagemap, virt_addr + i, phys_addr + i, flags, Size2MiB); } } - } - pml2 = get_next_level(pml3, pml3_entry); - - if (pg_size == Size2MiB) { - pml2[pml2_entry] = (pt_entry_t)(phys_addr | flags | (1 << 7)); return; } - pml1 = get_next_level(pml2, pml2_entry); + pml2 = get_next_level(pagemap, pml3, virt_addr, pg_size, 2, pml3_entry); + + if (pg_size == Size2MiB) { + pml2[pml2_entry] = (pt_entry_t)(phys_addr | flags | PT_FLAG_LARGE); + return; + } + + pml1 = get_next_level(pagemap, pml2, virt_addr, pg_size, 1, pml2_entry); pml1[pml1_entry] = (pt_entry_t)(phys_addr | flags); } + +#elif defined (__aarch64__) + +// Here we operate under the assumption that 4K pages are supported by the CPU. +// This appears to be guaranteed by UEFI, as section 2.3.6 "AArch64 Platforms" +// states that the primary processor core configuration includes 4K translation +// granules (TCR_EL1.TG0 = 0). +// Support for 4K pages also implies 2M, 1G and 512G blocks. + +// Sanity check that 4K pages are supported. +void vmm_assert_4k_pages(void) { + uint64_t aa64mmfr0; + asm volatile ("mrs %0, id_aa64mmfr0_el1" : "=r"(aa64mmfr0)); + + if (((aa64mmfr0 >> 28) & 0b1111) == 0b1111) { + panic(false, "vmm: CPU does not support 4K pages, please make a bug report about this."); + } +} + +#define PT_FLAG_VALID ((uint64_t)1 << 0) +#define PT_FLAG_TABLE ((uint64_t)1 << 1) +#define PT_FLAG_4K_PAGE ((uint64_t)1 << 1) +#define PT_FLAG_BLOCK ((uint64_t)0 << 1) +#define PT_FLAG_USER ((uint64_t)1 << 6) +#define PT_FLAG_READONLY ((uint64_t)1 << 7) +#define PT_FLAG_INNER_SH ((uint64_t)3 << 8) +#define PT_FLAG_ACCESS ((uint64_t)1 << 10) +#define PT_FLAG_XN ((uint64_t)1 << 54) +#define PT_FLAG_WB ((uint64_t)0 << 2) +#define PT_FLAG_FB ((uint64_t)1 << 2) +#define PT_PADDR_MASK ((uint64_t)0x0000FFFFFFFFF000) + +#define PT_TABLE_FLAGS (PT_FLAG_VALID | PT_FLAG_TABLE) + +#define PT_IS_TABLE(x) (((x) & (PT_FLAG_VALID | PT_FLAG_TABLE)) == (PT_FLAG_VALID | PT_FLAG_TABLE)) +#define PT_IS_LARGE(x) (((x) & (PT_FLAG_VALID | PT_FLAG_TABLE)) == PT_FLAG_VALID) +#define PT_TO_VMM_FLAGS(x) (pt_to_vmm_flags_internal(x)) + +static uint64_t pt_to_vmm_flags_internal(pt_entry_t entry) { + uint64_t flags = 0; + + if (!(entry & PT_FLAG_READONLY)) + flags |= VMM_FLAG_WRITE; + if (entry & PT_FLAG_XN) + flags |= VMM_FLAG_NOEXEC; + if (entry & PT_FLAG_FB) + flags |= VMM_FLAG_FB; + + return flags; +} + +pagemap_t new_pagemap(int lv) { + pagemap_t pagemap; + pagemap.levels = lv; + pagemap.top_level[0] = ext_mem_alloc(PT_SIZE); + pagemap.top_level[1] = ext_mem_alloc(PT_SIZE); + return pagemap; +} + +void map_page(pagemap_t pagemap, uint64_t virt_addr, uint64_t phys_addr, uint64_t flags, enum page_size pg_size) { + // Calculate the indices in the various tables using the virtual address + size_t pml5_entry = (virt_addr & ((uint64_t)0xf << 48)) >> 48; + size_t pml4_entry = (virt_addr & ((uint64_t)0x1ff << 39)) >> 39; + size_t pml3_entry = (virt_addr & ((uint64_t)0x1ff << 30)) >> 30; + size_t pml2_entry = (virt_addr & ((uint64_t)0x1ff << 21)) >> 21; + size_t pml1_entry = (virt_addr & ((uint64_t)0x1ff << 12)) >> 12; + + pt_entry_t *pml5, *pml4, *pml3, *pml2, *pml1; + + bool is_higher_half = virt_addr & ((uint64_t)1 << 63); + + uint64_t real_flags = PT_FLAG_VALID | PT_FLAG_INNER_SH | PT_FLAG_ACCESS | PT_FLAG_WB; + if (!(flags & VMM_FLAG_WRITE)) + real_flags |= PT_FLAG_READONLY; + if (flags & VMM_FLAG_NOEXEC) + real_flags |= PT_FLAG_XN; + if (flags & VMM_FLAG_FB) + real_flags |= PT_FLAG_FB; + + // Paging levels + switch (pagemap.levels) { + case 5: + pml5 = pagemap.top_level[is_higher_half]; + goto level5; + case 4: + pml4 = pagemap.top_level[is_higher_half]; + goto level4; + default: + __builtin_unreachable(); + } + +level5: + pml4 = get_next_level(pagemap, pml5, virt_addr, pg_size, 4, pml5_entry); +level4: + pml3 = get_next_level(pagemap, pml4, virt_addr, pg_size, 3, pml4_entry); + + if (pg_size == Size1GiB) { + pml3[pml3_entry] = (pt_entry_t)(phys_addr | real_flags | PT_FLAG_BLOCK); + return; + } + + pml2 = get_next_level(pagemap, pml3, virt_addr, pg_size, 2, pml3_entry); + + if (pg_size == Size2MiB) { + pml2[pml2_entry] = (pt_entry_t)(phys_addr | real_flags | PT_FLAG_BLOCK); + return; + } + + pml1 = get_next_level(pagemap, pml2, virt_addr, pg_size, 1, pml2_entry); + + pml1[pml1_entry] = (pt_entry_t)(phys_addr | real_flags | PT_FLAG_4K_PAGE); +} + +#else +#error Unknown architecture +#endif + +static pt_entry_t *get_next_level(pagemap_t pagemap, pt_entry_t *current_level, + uint64_t virt, enum page_size desired_sz, + size_t level_idx, size_t entry) { + pt_entry_t *ret; + + if (PT_IS_TABLE(current_level[entry])) { + ret = (pt_entry_t *)(size_t)(current_level[entry] & PT_PADDR_MASK); + } else { + if (PT_IS_LARGE(current_level[entry])) { + // We are replacing an existing large page with a smaller page. + // Split the previous mapping into mappings of the newly requested size + // before performing the requested map operation. + + uint64_t old_page_size, new_page_size; + switch (level_idx) { + case 2: + old_page_size = 0x40000000; + break; + + case 1: + old_page_size = 0x200000; + break; + + default: + panic(false, "Unexpected level in get_next_level"); + } + + switch (desired_sz) { + case Size1GiB: + new_page_size = 0x40000000; + break; + + case Size2MiB: + new_page_size = 0x200000; + break; + + case Size4KiB: + new_page_size = 0x1000; + break; + + default: + panic(false, "Unexpected page size in get_next_level"); + } + + // Save all the information from the old entry at this level + uint64_t old_flags = PT_TO_VMM_FLAGS(current_level[entry]); + uint64_t old_phys = current_level[entry] & PT_PADDR_MASK; + uint64_t old_virt = virt & ~(old_page_size - 1); + + if (old_phys & (old_page_size - 1)) + panic(false, "Unexpected page table entry address in get_next_level"); + + // Allocate a table for the next level + ret = ext_mem_alloc(PT_SIZE); + current_level[entry] = (pt_entry_t)(size_t)ret | PT_TABLE_FLAGS; + + // Recreate the old mapping with smaller pages + for (uint64_t i = 0; i < old_page_size; i += new_page_size) { + map_page(pagemap, old_virt + i, old_phys + i, old_flags, desired_sz); + } + } else { + // Allocate a table for the next level + ret = ext_mem_alloc(PT_SIZE); + current_level[entry] = (pt_entry_t)(size_t)ret | PT_TABLE_FLAGS; + } + } + + return ret; +} + + diff --git a/common/mm/vmm.h b/common/mm/vmm.h index 12e70e75..3031da3b 100644 --- a/common/mm/vmm.h +++ b/common/mm/vmm.h @@ -4,9 +4,11 @@ #include #include -#define VMM_FLAG_PRESENT ((uint64_t)1 << 0) +#if defined (__x86_64__) || defined (__i386__) + #define VMM_FLAG_WRITE ((uint64_t)1 << 1) #define VMM_FLAG_NOEXEC ((uint64_t)1 << 63) +#define VMM_FLAG_FB ((uint64_t)0) typedef struct { int levels; @@ -23,4 +25,31 @@ void vmm_assert_nx(void); pagemap_t new_pagemap(int lv); void map_page(pagemap_t pagemap, uint64_t virt_addr, uint64_t phys_addr, uint64_t flags, enum page_size page_size); +#elif defined (__aarch64__) + +// We use fake flags here because these don't properly map onto the +// aarch64 flags. +#define VMM_FLAG_WRITE ((uint64_t)1 << 0) +#define VMM_FLAG_NOEXEC ((uint64_t)1 << 1) +#define VMM_FLAG_FB ((uint64_t)1 << 2) + +typedef struct { + int levels; + void *top_level[2]; +} pagemap_t; + +enum page_size { + Size4KiB, + Size2MiB, + Size1GiB +}; + +void vmm_assert_4k_pages(void); +pagemap_t new_pagemap(int lv); +void map_page(pagemap_t pagemap, uint64_t virt_addr, uint64_t phys_addr, uint64_t flags, enum page_size page_size); + +#else +#error Unknown architecture +#endif + #endif diff --git a/common/protos/limine.c b/common/protos/limine.c index 52a7c88d..32655b00 100644 --- a/common/protos/limine.c +++ b/common/protos/limine.c @@ -42,7 +42,7 @@ static pagemap_t build_pagemap(bool level5pg, struct elf_range *ranges, size_t r if (ranges_count == 0) { // Map 0 to 2GiB at 0xffffffff80000000 for (uint64_t i = 0; i < 0x80000000; i += 0x40000000) { - map_page(pagemap, 0xffffffff80000000 + i, i, 0x03, Size1GiB); + map_page(pagemap, 0xffffffff80000000 + i, i, VMM_FLAG_WRITE, Size1GiB); } } else { for (size_t i = 0; i < ranges_count; i++) { @@ -55,7 +55,7 @@ static pagemap_t build_pagemap(bool level5pg, struct elf_range *ranges, size_t r panic(false, "limine: Protected memory ranges are only supported for higher half kernels"); } - uint64_t pf = VMM_FLAG_PRESENT | + uint64_t pf = (ranges[i].permissions & ELF_PF_X ? 0 : VMM_FLAG_NOEXEC) | (ranges[i].permissions & ELF_PF_W ? VMM_FLAG_WRITE : 0); @@ -68,9 +68,9 @@ static pagemap_t build_pagemap(bool level5pg, struct elf_range *ranges, size_t r // Sub 2MiB mappings for (uint64_t i = 0; i < 0x200000; i += 0x1000) { if (i != 0) { - map_page(pagemap, i, i, 0x03, Size4KiB); + map_page(pagemap, i, i, VMM_FLAG_WRITE, Size4KiB); } - map_page(pagemap, direct_map_offset + i, i, 0x03, Size4KiB); + map_page(pagemap, direct_map_offset + i, i, VMM_FLAG_WRITE, Size4KiB); } // Map 2MiB to 4GiB at higher half base and 0 @@ -86,13 +86,13 @@ static pagemap_t build_pagemap(bool level5pg, struct elf_range *ranges, size_t r // So we map 2MiB to 1GiB with 2MiB pages and then map the rest // with 1GiB pages :^) for (uint64_t i = 0x200000; i < 0x40000000; i += 0x200000) { - map_page(pagemap, i, i, 0x03, Size2MiB); - map_page(pagemap, direct_map_offset + i, i, 0x03, Size2MiB); + map_page(pagemap, i, i, VMM_FLAG_WRITE, Size2MiB); + map_page(pagemap, direct_map_offset + i, i, VMM_FLAG_WRITE, Size2MiB); } for (uint64_t i = 0x40000000; i < 0x100000000; i += 0x40000000) { - map_page(pagemap, i, i, 0x03, Size1GiB); - map_page(pagemap, direct_map_offset + i, i, 0x03, Size1GiB); + map_page(pagemap, i, i, VMM_FLAG_WRITE, Size1GiB); + map_page(pagemap, direct_map_offset + i, i, VMM_FLAG_WRITE, Size1GiB); } size_t _memmap_entries = memmap_entries; @@ -119,15 +119,72 @@ static pagemap_t build_pagemap(bool level5pg, struct elf_range *ranges, size_t r for (uint64_t j = 0; j < aligned_length; j += 0x40000000) { uint64_t page = aligned_base + j; - map_page(pagemap, page, page, 0x03, Size1GiB); - map_page(pagemap, direct_map_offset + page, page, 0x03, Size1GiB); + map_page(pagemap, page, page, VMM_FLAG_WRITE, Size1GiB); + map_page(pagemap, direct_map_offset + page, page, VMM_FLAG_WRITE, Size1GiB); } } + // Map the framebuffer as uncacheable +#if defined (__aarch64__) + for (size_t i = 0; i < _memmap_entries; i++) { + uint64_t base = _memmap[i].base; + uint64_t length = _memmap[i].length; + uint64_t top = base + length; + + if (_memmap[i].type != MEMMAP_FRAMEBUFFER) + continue; + + uint64_t aligned_base = ALIGN_DOWN(base, 0x1000); + uint64_t aligned_top = ALIGN_UP(top, 0x1000); + uint64_t aligned_length = aligned_top - aligned_base; + + for (uint64_t j = 0; j < aligned_length; j += 0x1000) { + uint64_t page = aligned_base + j; + map_page(pagemap, page, page, VMM_FLAG_WRITE | VMM_FLAG_FB, Size4KiB); + map_page(pagemap, direct_map_offset + page, page, VMM_FLAG_WRITE | VMM_FLAG_FB, Size4KiB); + } + } +#endif + return pagemap; } +#if defined (__x86_64__) || defined (__i386__) extern symbol limine_spinup_32; +#elif defined (__aarch64__) + +#define LIMINE_SCTLR ((1 << 29) /* Res1 */ \ + | (1 << 28) /* Res1 */ \ + | (1 << 23) /* Res1 */ \ + | (1 << 22) /* Res1 */ \ + | (1 << 20) /* Res1 */ \ + | (1 << 12) /* I-Cache */ \ + | (1 << 11) /* Res1 */ \ + | (1 << 8) /* Res1 */ \ + | (1 << 7) /* Res1 */ \ + | (1 << 4) /* SP0 Alignment check */ \ + | (1 << 3) /* SP Alignment check */ \ + | (1 << 2) /* D-Cache */ \ + | (1 << 0)) /* MMU */ \ + +#define LIMINE_MAIR(fb) ( ((uint64_t)0b11111111 << 0) /* Normal WB RW-allocate non-transient */ \ + | ((uint64_t)(fb) << 8) ) /* Framebuffer type */ + +#define LIMINE_TCR(tsz, pa) ( ((uint64_t)(pa) << 32) /* Intermediate address size */ \ + | ((uint64_t)2 << 30) /* TTBR1 4K granule */ \ + | ((uint64_t)2 << 28) /* TTBR1 Inner shareable */ \ + | ((uint64_t)1 << 26) /* TTBR1 Outer WB RW-Allocate */ \ + | ((uint64_t)1 << 24) /* TTBR1 Inner WB RW-Allocate */ \ + | ((uint64_t)(tsz) << 16) /* Address bits in TTBR1 */ \ + /* TTBR0 4K granule */ \ + | ((uint64_t)2 << 12) /* TTBR0 Inner shareable */ \ + | ((uint64_t)1 << 10) /* TTBR0 Outer WB RW-Allocate */ \ + | ((uint64_t)1 << 8) /* TTBR0 Inner WB RW-Allocate */ \ + | ((uint64_t)(tsz) << 0)) /* Address bits in TTBR0 */ + +#else +#error Unknown architecture +#endif static uint64_t physical_base, virtual_base, slide, direct_map_offset; static size_t requests_count; @@ -216,7 +273,9 @@ static void term_write_shim(uint64_t context, uint64_t buf, uint64_t count) { } noreturn void limine_load(char *config, char *cmdline) { +#if defined (__x86_64__) || defined (__i386__) uint32_t eax, ebx, ecx, edx; +#endif char *kernel_path = config_get_value(config, 0, "KERNEL_PATH"); if (kernel_path == NULL) @@ -296,10 +355,12 @@ noreturn void limine_load(char *config, char *cmdline) { } } +#if defined (__x86_64__) || defined (__i386__) // Check if 64 bit CPU if (!cpuid(0x80000001, 0, &eax, &ebx, &ecx, &edx) || !(edx & (1 << 29))) { panic(true, "limine: This CPU does not support 64-bit mode."); } +#endif printv("limine: Physical base: %X\n", physical_base); printv("limine: Virtual base: %X\n", virtual_base); @@ -312,10 +373,13 @@ noreturn void limine_load(char *config, char *cmdline) { FEAT_START // Check if 5-level paging is available bool level5pg = false; + // TODO(qookie): aarch64 also has optional 5 level paging when using 4K pages +#if defined (__x86_64__) || defined (__i386__) if (cpuid(0x00000007, 0, &eax, &ebx, &ecx, &edx) && (ecx & (1 << 16))) { printv("limine: CPU has 5-level paging support\n"); level5pg = true; } +#endif struct limine_5_level_paging_request *lv5pg_request = get_request(LIMINE_5_LEVEL_PAGING_REQUEST); want_5lv = lv5pg_request != NULL && level5pg; @@ -332,6 +396,15 @@ FEAT_START } FEAT_END +#if defined (__aarch64__) + uint64_t aa64mmfr0; + asm volatile ("mrs %0, id_aa64mmfr0_el1" : "=r" (aa64mmfr0)); + + uint64_t pa = aa64mmfr0 & 0xF; + + uint64_t tsz = 64 - (want_5lv ? 57 : 48); +#endif + struct limine_file *kf = ext_mem_alloc(sizeof(struct limine_file)); *kf = get_file(kernel_file, cmdline); fclose(kernel_file); @@ -459,6 +532,38 @@ FEAT_START FEAT_END #endif + // Device tree blob feature +FEAT_START + struct limine_dtb_request *dtb_request = get_request(LIMINE_DTB_REQUEST); + if (dtb_request == NULL) { + break; // next feature + } + +#if uefi == 1 + struct limine_dtb_response *dtb_response = + ext_mem_alloc(sizeof(struct limine_dtb_response)); + + // TODO: Looking for the DTB should be moved out of here and into lib/, because: + // 1. We will need it for core bring-up for the SMP request. + // 2. We will need to patch it for the Linux boot protocol to set the initramfs + // and boot arguments. + // 3. If Limine is ported to platforms that use a DTB but do not use UEFI, it will + // need to be found in a different way. + const EFI_GUID dtb_guid = EFI_DTB_TABLE_GUID; + + // Look for the DTB in the configuration tables + for (size_t i = 0; i < gST->NumberOfTableEntries; i++) { + EFI_CONFIGURATION_TABLE *cur_table = &gST->ConfigurationTable[i]; + + if (memcmp(&cur_table->VendorGuid, &dtb_guid, sizeof(EFI_GUID)) == 0) + dtb_response->dtb_ptr = (uint64_t)(uintptr_t)cur_table->VendorTable; + } + + dtb_request->response = reported_addr(dtb_response); +#endif + +FEAT_END + // Stack size uint64_t stack_size = 65536; FEAT_START @@ -593,8 +698,10 @@ FEAT_START #if defined (__i386__) term_callback = limine_term_callback; limine_term_callback_ptr = terminal_request->callback; -#elif defined (__x86_64__) +#elif defined (__x86_64__) || defined (__aarch64__) term_callback = (void *)terminal_request->callback; +#else +#error Unknown architecture #endif } @@ -607,8 +714,10 @@ FEAT_START limine_term_write_ptr = (uintptr_t)term_write_shim; terminal_response->write = (uintptr_t)(void *)limine_term_write_entry; -#elif defined (__x86_64__) +#elif defined (__x86_64__) || defined (__aarch64__) terminal_response->write = (uintptr_t)term_write_shim; +#else +#error Unknown architecture #endif term_fb_ptr = &terminal->framebuffer; @@ -699,6 +808,7 @@ FEAT_START FEAT_END // Wrap-up stuff before memmap close +#if defined (__x86_64__) || defined (__i386__) struct gdtr *local_gdt = ext_mem_alloc(sizeof(struct gdtr)); local_gdt->limit = gdt.limit; uint64_t local_gdt_base = (uint64_t)gdt.ptr; @@ -707,6 +817,40 @@ FEAT_END #if defined (__i386__) local_gdt->ptr_hi = local_gdt_base >> 32; #endif +#endif + +#if defined (__aarch64__) + uint64_t fb_attr = 0x00; + if (fb.framebuffer_addr) { + int el = current_el(); + uint64_t res; + + if (el == 1) { + asm volatile ( + "at s1e1w, %1\n\t" + "isb\n\t" + "mrs %0, par_el1" + : "=r"(res) + : "r"(fb.framebuffer_addr) + : "memory"); + } else if (el == 2) { + asm volatile ( + "at s1e2w, %1\n\t" + "isb\n\t" + "mrs %0, par_el1" + : "=r"(res) + : "r"(fb.framebuffer_addr) + : "memory"); + } else { + panic(false, "Unexpected EL in limine_load"); + } + + if (res & 1) + panic(false, "Address translation for framebuffer failed"); + + fb_attr = res >> 56; + } +#endif void *stack = ext_mem_alloc(stack_size) + stack_size; @@ -728,12 +872,22 @@ FEAT_START struct limine_smp_info *smp_array; struct smp_information *smp_info; size_t cpu_count; +#if defined (__x86_64__) || defined (__i386__) uint32_t bsp_lapic_id; smp_info = init_smp(0, (void **)&smp_array, &cpu_count, &bsp_lapic_id, true, want_5lv, pagemap, smp_request->flags & LIMINE_SMP_X2APIC, true, direct_map_offset, true); +#elif defined (__aarch64__) + uint64_t bsp_mpidr; + + smp_info = init_smp(0, (void **)&smp_array, + &cpu_count, &bsp_mpidr, + pagemap, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa), LIMINE_SCTLR); +#else +#error Unknown architecture +#endif if (smp_info == NULL) { break; @@ -747,8 +901,14 @@ FEAT_START struct limine_smp_response *smp_response = ext_mem_alloc(sizeof(struct limine_smp_response)); +#if defined (__x86_64__) || defined (__i386__) smp_response->flags |= (smp_request->flags & LIMINE_SMP_X2APIC) && x2apic_check(); smp_response->bsp_lapic_id = bsp_lapic_id; +#elif defined (__aarch64__) + smp_response->bsp_mpidr = bsp_mpidr; +#else +#error Unknown architecture +#endif uint64_t *smp_list = ext_mem_alloc(cpu_count * sizeof(uint64_t)); for (size_t i = 0; i < cpu_count; i++) { @@ -833,6 +993,7 @@ FEAT_END term_runtime = true; +#if defined (__x86_64__) || defined (__i386__) #if bios == 1 // If we're going 64, we might as well call this BIOS interrupt // to tell the BIOS that we are entering Long Mode, since it is in @@ -857,4 +1018,13 @@ FEAT_END (uint32_t)entry_point, (uint32_t)(entry_point >> 32), (uint32_t)reported_stack, (uint32_t)(reported_stack >> 32), (uint32_t)(uintptr_t)local_gdt); +#elif defined (__aarch64__) + vmm_assert_4k_pages(); + + enter_in_el1(entry_point, (uint64_t)stack, LIMINE_SCTLR, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa), + (uint64_t)pagemap.top_level[0], + (uint64_t)pagemap.top_level[1], 0); +#else +#error Unknown architecture +#endif } diff --git a/common/protos/linux.c b/common/protos/linux.c index 1d31586f..cdd2180b 100644 --- a/common/protos/linux.c +++ b/common/protos/linux.c @@ -1,3 +1,5 @@ +#if defined (__x86_64__) || defined (__i386__) + #include #include #include @@ -629,3 +631,5 @@ set_textmode:; common_spinup(linux_spinup, 2, (uint32_t)kernel_load_addr, (uint32_t)(uintptr_t)boot_params); } + +#endif diff --git a/common/protos/multiboot1.c b/common/protos/multiboot1.c index a8b73530..1e137d42 100644 --- a/common/protos/multiboot1.c +++ b/common/protos/multiboot1.c @@ -1,3 +1,5 @@ +#if defined (__x86_64__) || defined (__i386__) + #include #include #include @@ -421,3 +423,5 @@ noreturn void multiboot1_load(char *config, char *cmdline) { (uint32_t)mb1_info_final_loc, (uint32_t)entry_point, (uint32_t)(uintptr_t)ranges, (uint32_t)ranges_count); } + +#endif diff --git a/common/protos/multiboot2.c b/common/protos/multiboot2.c index 28985fd7..4f20978d 100644 --- a/common/protos/multiboot2.c +++ b/common/protos/multiboot2.c @@ -1,3 +1,5 @@ +#if defined (__x86_64__) || defined (__i386__) + #include #include #include @@ -773,3 +775,5 @@ noreturn void multiboot2_load(char *config, char* cmdline) { (uint32_t)mb2_info_final_loc, (uint32_t)entry_point, (uint32_t)(uintptr_t)ranges, (uint32_t)ranges_count); } + +#endif diff --git a/common/sys/cpu.c b/common/sys/cpu.c index a75ea88d..7aabbbdc 100644 --- a/common/sys/cpu.c +++ b/common/sys/cpu.c @@ -20,3 +20,9 @@ extern uint64_t rdmsr(uint32_t msr); extern void wrmsr(uint32_t msr, uint64_t value); extern uint64_t rdtsc(void); extern void delay(uint64_t cycles); + +extern size_t icache_line_size(void); +extern size_t dcache_line_size(void); +extern void clean_inval_dcache_poc(uintptr_t start, uintptr_t end); +extern void inval_icache_pou(uintptr_t start, uintptr_t end); +extern int current_el(void); diff --git a/common/sys/cpu.h b/common/sys/cpu.h index d36487b1..62847b4d 100644 --- a/common/sys/cpu.h +++ b/common/sys/cpu.h @@ -2,8 +2,11 @@ #define __SYS__CPU_H__ #include +#include #include +#if defined(__x86_64__) || defined(__i386__) + inline bool cpuid(uint32_t leaf, uint32_t subleaf, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { uint32_t cpuid_max; @@ -217,4 +220,76 @@ inline void delay(uint64_t cycles) { ret; \ }) +#elif defined (__aarch64__) + +inline uint64_t rdtsc(void) { + uint64_t v; + asm volatile ("mrs %0, cntpct_el0" : "=r" (v)); + return v; +} + +#define locked_read(var) ({ \ + typeof(*var) ret = 0; \ + asm volatile ( \ + "ldar %0, %1" \ + : "=r" (ret) \ + : "m" (*(var)) \ + : "memory" \ + ); \ + ret; \ +}) + +inline size_t icache_line_size(void) { + uint64_t ctr; + asm volatile ("mrs %0, ctr_el0" : "=r"(ctr)); + + return (ctr & 0b1111) << 4; +} + +inline size_t dcache_line_size(void) { + uint64_t ctr; + asm volatile ("mrs %0, ctr_el0" : "=r"(ctr)); + + return ((ctr >> 16) & 0b1111) << 4; +} + +// Clean and invalidate D-Cache to Point of Coherency +inline void clean_inval_dcache_poc(uintptr_t start, uintptr_t end) { + size_t dsz = dcache_line_size(); + + uintptr_t addr = start & ~(dsz - 1); + while (addr < end) { + asm volatile ("dc civac, %0" :: "r"(addr) : "memory"); + addr += dsz; + } + + asm volatile ("dsb sy\n\tisb"); +} + +// Invalidate I-Cache to Point of Unification +inline void inval_icache_pou(uintptr_t start, uintptr_t end) { + size_t isz = icache_line_size(); + + uintptr_t addr = start & ~(isz - 1); + while (addr < end) { + asm volatile ("ic ivau, %0" :: "r"(addr) : "memory"); + addr += isz; + } + + asm volatile ("dsb sy\n\tisb"); +} + +inline int current_el(void) { + uint64_t v; + + asm volatile ("mrs %0, currentel" : "=r"(v)); + v = (v >> 2) & 0b11; + + return v; +} + +#else +#error Unknown architecture +#endif + #endif diff --git a/common/sys/gdt.s2.c b/common/sys/gdt.s2.c index 64d8bf3e..8f48db4d 100644 --- a/common/sys/gdt.s2.c +++ b/common/sys/gdt.s2.c @@ -1,3 +1,5 @@ +#if defined (__x86_64__) || defined (__i386__) + #include #include #include @@ -79,3 +81,5 @@ void init_gdt(void) { gdt.ptr = (uintptr_t)gdt_copy; } #endif + +#endif diff --git a/common/sys/idt.c b/common/sys/idt.c index e27d7fd7..7804dc5b 100644 --- a/common/sys/idt.c +++ b/common/sys/idt.c @@ -1,3 +1,5 @@ +#if defined (__x86_64__) || defined (__i386__) + #include #include #include @@ -67,3 +69,5 @@ void flush_irqs(void) { asm volatile ("lidt %0" :: "m"(old_idt) : "memory"); } + +#endif diff --git a/common/sys/lapic.c b/common/sys/lapic.c index d52a1634..3d63d464 100644 --- a/common/sys/lapic.c +++ b/common/sys/lapic.c @@ -1,3 +1,5 @@ +#if defined (__x86_64__) || defined (__i386__) + #include #include #include @@ -162,7 +164,7 @@ void io_apic_write(size_t io_apic, uint32_t reg, uint32_t value) { } uint32_t io_apic_gsi_count(size_t io_apic) { - return ((io_apic_read(io_apic, 1) & 0xff0000) >> 16) + 1; + return ((io_apic_read(io_apic, 1) & 0xff0000) >> 16) + 1; } void io_apic_mask_all(void) { @@ -175,3 +177,5 @@ void io_apic_mask_all(void) { } } } + +#endif diff --git a/common/sys/pic.c b/common/sys/pic.c index 8d821816..36b59750 100644 --- a/common/sys/pic.c +++ b/common/sys/pic.c @@ -1,3 +1,5 @@ +#if defined (__x86_64__) || defined (__i386__) + #include #include #include @@ -42,3 +44,5 @@ void pic_mask_all(void) { outb(0xa1, 0xff); outb(0x21, 0xff); } + +#endif diff --git a/common/sys/smp.c b/common/sys/smp.c index 46c7e4fb..6dace9b6 100644 --- a/common/sys/smp.c +++ b/common/sys/smp.c @@ -42,6 +42,28 @@ struct madt_x2apic { extern symbol smp_trampoline_start; extern size_t smp_trampoline_size; +struct madt_gicc { + struct madt_header header; + uint8_t reserved1[2]; + uint32_t iface_no; + uint32_t acpi_uid; + uint32_t flags; + uint32_t parking_ver; + uint32_t perf_gsiv; + uint64_t parking_addr; + uint64_t gicc_base_addr; + uint64_t gicv_base_addr; + uint64_t gich_base_addr; + uint32_t vgic_maint_gsiv; + uint64_t gicr_base_addr; + uint64_t mpidr; + uint8_t power_eff_class; + uint8_t reserved2; + uint16_t spe_overflow_gsiv; +} __attribute__((packed)); + +#if defined (__x86_64__) || defined (__i386__) + struct trampoline_passed_info { uint8_t smp_tpl_booted_flag; uint8_t smp_tpl_target_mode; @@ -78,7 +100,6 @@ static bool smp_start_ap(uint32_t lapic_id, struct gdtr *gdtr, | ((uint32_t)wp << 4) | (uint32_t)longmode; passed_info->smp_tpl_gdt = *gdtr; - passed_info->smp_tpl_booted_flag = 0; passed_info->smp_tpl_hhdm = hhdm; asm volatile ("" ::: "memory"); @@ -278,3 +299,271 @@ struct smp_information *init_smp(size_t header_hack_size, return ret; } + +#elif defined (__aarch64__) + +struct trampoline_passed_info { + uint64_t smp_tpl_booted_flag; + + uint64_t smp_tpl_ttbr0; + uint64_t smp_tpl_ttbr1; + + uint64_t smp_tpl_mair; + uint64_t smp_tpl_tcr; + uint64_t smp_tpl_sctlr; + + uint64_t smp_tpl_info_struct; +}; + +enum { + BOOT_WITH_SPIN_TBL, + BOOT_WITH_PSCI_SMC, + BOOT_WITH_PSCI_HVC, + BOOT_WITH_ACPI_PARK +}; + +static uint32_t psci_cpu_on = 0xC4000003; + +static bool try_start_ap(int boot_method, uint64_t method_ptr, + struct smp_information *info_struct, + uint64_t ttbr0, uint64_t ttbr1, uint64_t mair, + uint64_t tcr, uint64_t sctlr) { + // Prepare the trampoline + static void *trampoline = NULL; + if (trampoline == NULL) { + trampoline = ext_mem_alloc(0x1000); + + memcpy(trampoline, smp_trampoline_start, smp_trampoline_size); + } + + static struct trampoline_passed_info *passed_info = NULL; + if (passed_info == NULL) { + passed_info = (void *)(((uintptr_t)trampoline + 0x1000) + - sizeof(struct trampoline_passed_info)); + } + + passed_info->smp_tpl_info_struct = (uint64_t)(uintptr_t)info_struct; + passed_info->smp_tpl_booted_flag = 0; + passed_info->smp_tpl_ttbr0 = ttbr0; + passed_info->smp_tpl_ttbr1 = ttbr1; + passed_info->smp_tpl_mair = mair; + passed_info->smp_tpl_tcr = tcr; + passed_info->smp_tpl_sctlr = sctlr; + + // Cache coherency between the I-Cache and D-Cache is not guaranteed by the + // architecture and as such we must perform I-Cache invalidation. + // Additionally, the newly-booted AP may have caches disabled which implies + // it possibly does not see our cache contents either. + + clean_inval_dcache_poc((uintptr_t)trampoline, (uintptr_t)trampoline + 0x1000); + inval_icache_pou((uintptr_t)trampoline, (uintptr_t)trampoline + 0x1000); + + asm volatile ("" ::: "memory"); + + switch (boot_method) { + case BOOT_WITH_SPIN_TBL: + *(volatile uint64_t *)method_ptr = (uint64_t)(uintptr_t)trampoline; + clean_inval_dcache_poc(method_ptr, method_ptr + 8); + asm ("sev"); + break; + + case BOOT_WITH_PSCI_SMC: + case BOOT_WITH_PSCI_HVC: { + register int32_t result asm("w0"); + register uint32_t cmd asm("w0") = psci_cpu_on; + register uint64_t cpu asm("x1") = info_struct->mpidr; + register uint64_t addr asm("x2") = (uint64_t)(uintptr_t)trampoline; + register uint64_t ctx asm("x3") = 0; + + if (boot_method == BOOT_WITH_PSCI_SMC) + asm volatile ("smc #0" : "=r"(result) : "r"(cmd), "r"(cpu), "r"(addr), "r"(ctx)); + else + asm volatile ("hvc #0" : "=r"(result) : "r"(cmd), "r"(cpu), "r"(addr), "r"(ctx)); + + switch (result) { + case 0: // Success + break; + case -2: + printv("smp: PSCI says CPU_ON was given invalid arguments\n"); + return false; + case -4: + printv("smp: PSCI says AP is already on\n"); + return false; + case -5: + printv("smp: PSCI says CPU_ON is already pending for this AP\n"); + return false; + case -6: + printv("smp: PSCI reports internal failure\n"); + return false; + case -9: + printv("smp: PSCI says CPU_ON was given an invalid address\n"); + return false; + default: + printv("smp: PSCI reports an unexpected error (%d)\n", result); + return false; + } + + break; + } + + case BOOT_WITH_ACPI_PARK: + panic(false, "ACPI parking protocol is unsupported, please report this!"); + break; + + default: + panic(false, "Invalid boot method specified"); + } + + for (int i = 0; i < 1000000; i++) { + // We do not need cache invalidation here as by the time the AP gets to + // set this flag, it has enabled it's caches + + if (locked_read(&passed_info->smp_tpl_booted_flag) == 1) { + return true; + } + //delay(10000000); + } + + return false; +} + +static struct smp_information *try_acpi_smp(size_t header_hack_size, + void **header_ptr, + size_t *cpu_count, + uint64_t *_bsp_mpidr, + pagemap_t pagemap, + uint64_t mair, + uint64_t tcr, + uint64_t sctlr) { + int boot_method = BOOT_WITH_ACPI_PARK; + + // Search for FADT table + uint8_t *fadt = acpi_get_table("FACP", 0); + + if (fadt == NULL) + return NULL; + + // Read the single field from the FADT without defining a struct for the whole table + uint16_t arm_boot_args; + memcpy(&arm_boot_args, fadt + 129, 2); + + if (arm_boot_args & 1) // PSCI compliant? + boot_method = arm_boot_args & 2 ? BOOT_WITH_PSCI_HVC : BOOT_WITH_PSCI_SMC; + + // Search for MADT table + struct madt *madt = acpi_get_table("APIC", 0); + + if (madt == NULL) + return NULL; + + uint64_t bsp_mpidr; + asm volatile ("mrs %0, mpidr_el1" : "=r"(bsp_mpidr)); + + // This bit is Res1 in the system reg, but not included in the MPIDR from MADT + bsp_mpidr &= ~((uint64_t)1 << 31); + + *_bsp_mpidr = bsp_mpidr; + + printv("smp: BSP MPIDR is %X\n", bsp_mpidr); + + *cpu_count = 0; + + // Count the MAX of startable APs and allocate accordingly + size_t max_cpus = 0; + + for (uint8_t *madt_ptr = (uint8_t *)madt->madt_entries_begin; + (uintptr_t)madt_ptr < (uintptr_t)madt + madt->header.length; + madt_ptr += *(madt_ptr + 1)) { + switch (*madt_ptr) { + case 11: { + // GIC CPU Interface + struct madt_gicc *gicc = (void *)madt_ptr; + + // Check if we can actually try to start the AP + if (gicc->flags & 1) + max_cpus++; + + continue; + } + } + } + + *header_ptr = ext_mem_alloc( + header_hack_size + max_cpus * sizeof(struct smp_information)); + struct smp_information *ret = *header_ptr + header_hack_size; + *cpu_count = 0; + + // Try to start all APs + for (uint8_t *madt_ptr = (uint8_t *)madt->madt_entries_begin; + (uintptr_t)madt_ptr < (uintptr_t)madt + madt->header.length; + madt_ptr += *(madt_ptr + 1)) { + switch (*madt_ptr) { + case 11: { + // GIC CPU Interface + struct madt_gicc *gicc = (void *)madt_ptr; + + // Check if we can actually try to start the AP + if (!(gicc->flags & 1)) + continue; + + struct smp_information *info_struct = &ret[*cpu_count]; + + info_struct->acpi_processor_uid = gicc->acpi_uid; + info_struct->gic_iface_no = gicc->iface_no; + info_struct->mpidr = gicc->mpidr; + + // Do not try to restart the BSP + if (gicc->mpidr == bsp_mpidr) { + (*cpu_count)++; + continue; + } + + printv("smp: Found candidate AP for bring-up. Interface no.: %x, MPIDR: %X\n", gicc->iface_no, gicc->mpidr); + + // Try to start the AP + if (!try_start_ap(boot_method, gicc->parking_addr, info_struct, + (uint64_t)(uintptr_t)pagemap.top_level[0], + (uint64_t)(uintptr_t)pagemap.top_level[1], + mair, tcr, sctlr)) { + print("smp: FAILED to bring-up AP\n"); + continue; + } + + printv("smp: Successfully brought up AP\n"); + + (*cpu_count)++; + continue; + } + } + } + + return ret; +} + +struct smp_information *init_smp(size_t header_hack_size, + void **header_ptr, + size_t *cpu_count, + uint64_t *bsp_mpidr, + pagemap_t pagemap, + uint64_t mair, + uint64_t tcr, + uint64_t sctlr) { + struct smp_information *info = NULL; + + //if (dtb_is_present() && (info = try_dtb_smp(header_hack_size, header_ptr, cpu_count, + // _bsp_iface_no, pagemap, mair, tcr, sctlr))) + // return info; + + // No RSDP means no ACPI + if (acpi_get_rsdp() && (info = try_acpi_smp(header_hack_size, header_ptr, cpu_count, + bsp_mpidr, pagemap, mair, tcr, sctlr))) + return info; + + printv("Failed to figure out how to start APs."); + + return NULL; +} + +#else +#error Unknown architecture +#endif diff --git a/common/sys/smp.h b/common/sys/smp.h index 6ac759e0..72bbac21 100644 --- a/common/sys/smp.h +++ b/common/sys/smp.h @@ -6,6 +6,8 @@ #include #include +#if defined (__x86_64__) || defined (__i386__) + struct smp_information { uint32_t acpi_processor_uid; uint32_t lapic_id; @@ -26,4 +28,27 @@ struct smp_information *init_smp(size_t header_hack_size, uint64_t hhdm, bool wp); +#elif defined (__aarch64__) + +struct smp_information { + uint32_t acpi_processor_uid; + uint32_t gic_iface_no; + uint64_t mpidr; + uint64_t stack_addr; + uint64_t goto_address; + uint64_t extra_argument; +} __attribute__((packed)); + +struct smp_information *init_smp(size_t header_hack_size, + void **header_ptr, + size_t *cpu_count, + uint64_t *bsp_mpidr, + pagemap_t pagemap, + uint64_t mair, + uint64_t tcr, + uint64_t sctlr); +#else +#error Unknown architecture +#endif + #endif diff --git a/common/sys/smp_trampoline.asm_aarch64 b/common/sys/smp_trampoline.asm_aarch64 new file mode 100644 index 00000000..0c631ec8 --- /dev/null +++ b/common/sys/smp_trampoline.asm_aarch64 @@ -0,0 +1,99 @@ +#include + +.set tpl_booted_flag, -56 +.set tpl_ttbr0, -48 +.set tpl_ttbr1, -40 +.set tpl_mair, -32 +.set tpl_tcr, -24 +.set tpl_sctlr, -16 +.set tpl_info_struct, -8 + +.global smp_trampoline_start +smp_trampoline_start: + bl .L_entry +.L_entry: + // Mask IRQs + msr daifset, #0b1111 + + // Address to next page (since our offsets into the boot data are negative) + add x1, x30, #0xFFC + + ldr x0, [x1, tpl_info_struct] + ldr x2, [x1, tpl_sctlr] + ldr x3, [x1, tpl_mair] + ldr x4, [x1, tpl_tcr] + ldr x5, [x1, tpl_ttbr0] + ldr x6, [x1, tpl_ttbr1] + + // Configure EL1 state + msr mair_el1, x3 + msr tcr_el1, x4 + msr ttbr0_el1, x5 + msr ttbr1_el1, x6 + msr sctlr_el1, x2 + dsb sy + isb + + PICK_EL x8, 1f, 0f +0: + // Configure EL2-specific state for EL1 + + // Don't trap counters to EL2 + mrs x8, cnthctl_el2 + orr x8, x8, #3 + msr cnthctl_el2, x8 + msr cntvoff_el2, xzr + + // Enable AArch64 in EL1 + mov x8, xzr + orr x8, x8, #(1 << 31) + orr x8, x8, #(1 << 1) + msr hcr_el2, x8 + + // Don't trap FP/SIMD to EL2 + mov x8, #0x33FF + msr cptr_el2, x8 + msr hstr_el2, xzr + + // Run rest of trampoline in EL1 + mov x8, #0x3c4 + msr spsr_el2, x8 + adr x8, 1f + msr elr_el2, x8 + + eret + +1: + // Notify BSP we are alive + mov x8, #1 + add x9, x1, tpl_booted_flag + stlr x8, [x9] + + // Wait for BSP to tell us where to go + add x9, x0, #24 +2: + ldar x8, [x9] + cbnz x8, 3f + yield + b 2b + +3: + msr elr_el1, x8 + + msr spsel, #0 + ldr x8, [x0, #16] + mov sp, x8 + + // Enter kernel + mov x8, #0x3c4 + msr spsr_el1, x8 + + ZERO_REGS_EXCEPT_X0 + + eret + +smp_trampoline_end: + +.global smp_trampoline_size +smp_trampoline_size: + .quad smp_trampoline_end - smp_trampoline_start diff --git a/configure.ac b/configure.ac index 71999533..e0b4b789 100644 --- a/configure.ac +++ b/configure.ac @@ -134,6 +134,25 @@ fi AC_SUBST(BUILD_UEFI_X86_64, [$BUILD_UEFI_X86_64]) +BUILD_UEFI_AARCH64="$BUILD_ALL" + +AC_ARG_ENABLE([uefi-aarch64], + AS_HELP_STRING([--enable-uefi-aarch64], [enable building the aarch64 UEFI port]), + BUILD_UEFI_AARCH64="$enableval") + +if test "x$BUILD_UEFI_AARCH64" = "xno"; then + BUILD_UEFI_AARCH64="" +else + ( + mkdir -p "$BUILDDIR/cross-files" + cd "$BUILDDIR/cross-files" + TRIPLET=aarch64-elf-none "$SRCDIR/cross-detect/configure" || exit 1 + ) + BUILD_UEFI_AARCH64="limine-uefi-aarch64" +fi + +AC_SUBST(BUILD_UEFI_AARCH64, [$BUILD_UEFI_AARCH64]) + BUILD_CD_EFI="$BUILD_ALL" AC_ARG_ENABLE([cd-efi], diff --git a/limine.h b/limine.h index 8abb0fd3..774fb0b5 100644 --- a/limine.h +++ b/limine.h @@ -188,12 +188,14 @@ struct limine_5_level_paging_request { #define LIMINE_SMP_REQUEST { LIMINE_COMMON_MAGIC, 0x95a67b819a1b857e, 0xa0b61b723b6a73e0 } -#define LIMINE_SMP_X2APIC (1 << 0) - struct limine_smp_info; typedef void (*limine_goto_address)(struct limine_smp_info *); +#if defined (__x86_64__) || defined (__i386__) + +#define LIMINE_SMP_X2APIC (1 << 0) + struct limine_smp_info { uint32_t processor_id; uint32_t lapic_id; @@ -210,6 +212,29 @@ struct limine_smp_response { LIMINE_PTR(struct limine_smp_info **) cpus; }; +#elif defined (__aarch64__) + +struct limine_smp_info { + uint32_t processor_id; + uint32_t gic_iface_no; + uint64_t mpidr; + uint64_t reserved; + LIMINE_PTR(limine_goto_address) goto_address; + uint64_t extra_argument; +}; + +struct limine_smp_response { + uint64_t revision; + uint32_t flags; + uint64_t bsp_mpidr; + uint64_t cpu_count; + LIMINE_PTR(struct limine_smp_info **) cpus; +}; + +#else +#error Unknown architecture +#endif + struct limine_smp_request { uint64_t id[4]; uint64_t revision; @@ -373,6 +398,21 @@ struct limine_kernel_address_request { LIMINE_PTR(struct limine_kernel_address_response *) response; }; +/* Device Tree Blob */ + +#define LIMINE_DTB_REQUEST { LIMINE_COMMON_MAGIC, 0xb40ddb48fb54bac7, 0x545081493f81ffb7 } + +struct limine_dtb_response { + uint64_t revision; + LIMINE_PTR(void *) dtb_ptr; +}; + +struct limine_dtb_request { + uint64_t id[4]; + uint64_t revision; + LIMINE_PTR(struct limine_dtb_response *) response; +}; + #ifdef __cplusplus } #endif diff --git a/test/e9print.c b/test/e9print.c index bab06a5e..7e425a04 100644 --- a/test/e9print.c +++ b/test/e9print.c @@ -1,5 +1,6 @@ #include #include +#include void (*limine_print)(const char *buf, size_t size) = NULL; diff --git a/test/limine.c b/test/limine.c index c314ad47..ded2abaa 100644 --- a/test/limine.c +++ b/test/limine.c @@ -119,6 +119,14 @@ struct limine_terminal_request _terminal_request = { __attribute__((section(".limine_reqs"))) void *terminal_req = &_terminal_request; +struct limine_dtb_request _dtb_request = { + .id = LIMINE_DTB_REQUEST, + .revision = 0, .response = NULL +}; + +__attribute__((section(".limine_reqs"))) +void *dtb_req = &_dtb_request; + static char *get_memmap_type(uint64_t type) { switch (type) { case LIMINE_MEMMAP_USABLE: @@ -174,6 +182,23 @@ static void print_file(struct limine_file *file) { *(uint64_t *)file->part_uuid.d); } +uint32_t ctr = 0; + +void ap_entry(struct limine_smp_info *info) { + e9_printf("Hello from AP!"); + +#if defined (__x86_64__) + e9_printf("My LAPIC ID: %x", info->lapic_id); +#elif defined (__aarch64__) + e9_printf("My GIC CPU Interface no.: %x", info->gic_iface_no); + e9_printf("My MPIDR: %x", info->mpidr); +#endif + + __atomic_fetch_add(&ctr, 1, __ATOMIC_SEQ_CST); + + while (1); +} + #define FEAT_START do { #define FEAT_END } while (0); @@ -355,12 +380,35 @@ FEAT_START struct limine_smp_response *smp_response = _smp_request.response; e9_printf("SMP feature, revision %d", smp_response->revision); e9_printf("Flags: %x", smp_response->flags); +#if defined (__x86_64__) e9_printf("BSP LAPIC ID: %x", smp_response->bsp_lapic_id); +#elif defined (__aarch64__) + e9_printf("BSP MPIDR: %x", smp_response->bsp_mpidr); +#endif e9_printf("CPU count: %d", smp_response->cpu_count); for (size_t i = 0; i < smp_response->cpu_count; i++) { struct limine_smp_info *cpu = smp_response->cpus[i]; e9_printf("Processor ID: %x", cpu->processor_id); +#if defined (__x86_64__) e9_printf("LAPIC ID: %x", cpu->lapic_id); +#elif defined (__aarch64__) + e9_printf("GIC CPU Interface no.: %x", cpu->gic_iface_no); + e9_printf("MPIDR: %x", cpu->mpidr); +#endif + + +#if defined (__x86_64__) + if (cpu->lapic_id != smp_response->bsp_lapic_id) { +#elif defined (__aarch64__) + if (cpu->mpidr != smp_response->bsp_mpidr) { +#endif + uint32_t old_ctr = __atomic_load_n(&ctr, __ATOMIC_SEQ_CST); + + __atomic_store_n(&cpu->goto_address, ap_entry, __ATOMIC_SEQ_CST); + + while (__atomic_load_n(&ctr, __ATOMIC_SEQ_CST) == old_ctr) + ; + } } FEAT_END @@ -382,5 +430,16 @@ FEAT_START e9_printf("Write function at: %x", term_response->write); FEAT_END +FEAT_START + e9_printf(""); + if (_dtb_request.response == NULL) { + e9_printf("Device tree blob not passed"); + break; + } + struct limine_dtb_response *dtb_response = _dtb_request.response; + e9_printf("Device tree blob feature, revision %d", dtb_response->revision); + e9_printf("Device tree blob pointer: %x", dtb_response->dtb_ptr); +FEAT_END + for (;;); }