pseries: Move hash page table allocation to reset time

At the moment the size of the hash page table (HPT) is fixed based on the
maximum memory allowed to the guest.  As such, we allocate the table during
machine construction, and just clear it at reset.

However, we're planning to implement a PAPR extension allowing the hash
page table to be resized at runtime.  This will mean that on reset we want
to revert it to the default size.  It also means that when migrating, we
need to make sure the destination allocates an HPT of size matching the
host, since the guest could have changed it before the migration.

This patch replaces the spapr_alloc_htab() and spapr_reset_htab() functions
with a new spapr_reallocate_hpt() function.  This is called at reset and
inbound migration only, not during machine init any more.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
This commit is contained in:
David Gibson 2016-02-09 10:21:56 +10:00
parent 8dfe8e7f4f
commit c5f54f3e31

View File

@ -1063,80 +1063,54 @@ static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
return shift; return shift;
} }
static void spapr_alloc_htab(sPAPRMachineState *spapr) static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
Error **errp)
{ {
long shift; long rc;
int index;
/* allocate hash page table. For now we always make this 16mb, /* Clean up any HPT info from a previous boot */
* later we should probably make it scale to the size of guest g_free(spapr->htab);
* RAM */ spapr->htab = NULL;
spapr->htab_shift = 0;
close_htab_fd(spapr);
shift = kvmppc_reset_htab(spapr->htab_shift); rc = kvmppc_reset_htab(shift);
if (shift < 0) { if (rc < 0) {
/* /* kernel-side HPT needed, but couldn't allocate one */
* For HV KVM, host kernel will return -ENOMEM when requested error_setg_errno(errp, errno,
* HTAB size can't be allocated. "Failed to allocate KVM HPT of order %d (try smaller maxmem?)",
*/ shift);
error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem"); /* This is almost certainly fatal, but if the caller really
} else if (shift > 0) { * wants to carry on with shift == 0, it's welcome to try */
/* } else if (rc > 0) {
* Kernel handles htab, we don't need to allocate one /* kernel-side HPT allocated */
* if (rc != shift) {
* Older kernels can fall back to lower HTAB shift values, error_setg(errp,
* but we don't allow booting of such guests. "Requested order %d HPT, but kernel allocated order %ld (try smaller maxmem?)",
*/ shift, rc);
if (shift != spapr->htab_shift) {
error_setg(&error_abort, "Failed to allocate HTAB of requested size, try with smaller maxmem");
} }
spapr->htab_shift = shift; spapr->htab_shift = shift;
kvmppc_kern_htab = true; kvmppc_kern_htab = true;
} else { } else {
/* Allocate htab */ /* kernel-side HPT not needed, allocate in userspace instead */
spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr)); size_t size = 1ULL << shift;
int i;
/* And clear it */ spapr->htab = qemu_memalign(size, size);
memset(spapr->htab, 0, HTAB_SIZE(spapr)); if (!spapr->htab) {
error_setg_errno(errp, errno,
for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) { "Could not allocate HPT of order %d", shift);
DIRTY_HPTE(HPTE(spapr->htab, index)); return;
}
}
} }
/* memset(spapr->htab, 0, size);
* Clear HTAB entries during reset. spapr->htab_shift = shift;
* kvmppc_kern_htab = false;
* If host kernel has allocated HTAB, KVM_PPC_ALLOCATE_HTAB ioctl is
* used to clear HTAB. Otherwise QEMU-allocated HTAB is cleared manually.
*/
static void spapr_reset_htab(sPAPRMachineState *spapr)
{
long shift;
int index;
shift = kvmppc_reset_htab(spapr->htab_shift); for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
if (shift < 0) { DIRTY_HPTE(HPTE(spapr->htab, i));
error_setg(&error_abort, "Failed to reset HTAB");
} else if (shift > 0) {
if (shift != spapr->htab_shift) {
error_setg(&error_abort, "Requested HTAB allocation failed during reset");
} }
close_htab_fd(spapr);
} else {
memset(spapr->htab, 0, HTAB_SIZE(spapr));
for (index = 0; index < HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; index++) {
DIRTY_HPTE(HPTE(spapr->htab, index));
}
}
/* Update the RMA size if necessary */
if (spapr->vrma_adjust) {
spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
spapr->htab_shift);
} }
} }
@ -1159,15 +1133,24 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
static void ppc_spapr_reset(void) static void ppc_spapr_reset(void)
{ {
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); MachineState *machine = MACHINE(qdev_get_machine());
sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
PowerPCCPU *first_ppc_cpu; PowerPCCPU *first_ppc_cpu;
uint32_t rtas_limit; uint32_t rtas_limit;
/* Check for unknown sysbus devices */ /* Check for unknown sysbus devices */
foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL); foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
/* Reset the hash table & recalc the RMA */ /* Allocate and/or reset the hash page table */
spapr_reset_htab(spapr); spapr_reallocate_hpt(spapr,
spapr_hpt_shift_for_ramsize(machine->maxram_size),
&error_fatal);
/* Update the RMA size if necessary */
if (spapr->vrma_adjust) {
spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
spapr->htab_shift);
}
qemu_devices_reset(); qemu_devices_reset();
@ -1547,10 +1530,12 @@ static int htab_load(QEMUFile *f, void *opaque, int version_id)
section_hdr = qemu_get_be32(f); section_hdr = qemu_get_be32(f);
if (section_hdr) { if (section_hdr) {
/* First section, just the hash shift */ Error *local_err;
if (spapr->htab_shift != section_hdr) {
error_report("htab_shift mismatch: source %d target %d", /* First section gives the htab size */
section_hdr, spapr->htab_shift); spapr_reallocate_hpt(spapr, section_hdr, &local_err);
if (local_err) {
error_report_err(local_err);
return -EINVAL; return -EINVAL;
} }
return 0; return 0;
@ -1803,9 +1788,6 @@ static void ppc_spapr_init(MachineState *machine)
/* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD; load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
spapr->htab_shift = spapr_hpt_shift_for_ramsize(machine->maxram_size);
spapr_alloc_htab(spapr);
/* Set up Interrupt Controller before we create the VCPUs */ /* Set up Interrupt Controller before we create the VCPUs */
spapr->icp = xics_system_init(machine, spapr->icp = xics_system_init(machine,
DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),