qemu/include/hw/pci-host/spapr.h
Greg Kurz 30499fdd98 spapr: Fix buffer overflow in spapr_numa_associativity_init()
Running a guest with 128 NUMA nodes crashes QEMU:

../../util/error.c:59: error_setv: Assertion `*errp == NULL' failed.

The crash happens when setting the FWNMI migration blocker:

2861	    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) {
2862	        /* Create the error string for live migration blocker */
2863	        error_setg(&spapr->fwnmi_migration_blocker,
2864	            "A machine check is being handled during migration. The handler"
2865	            "may run and log hardware error on the destination");
2866	    }

Inspection reveals that papr->fwnmi_migration_blocker isn't NULL:

(gdb) p spapr->fwnmi_migration_blocker
$1 = (Error *) 0x8000000004000000

Since this is the only place where papr->fwnmi_migration_blocker is
set, this means someone wrote there in our back. Further analysis
points to spapr_numa_associativity_init(), especially the part
that initializes the associative arrays for NVLink GPUs:

    max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;

ie. max_nodes_with_gpus = 128 + 6, but the array isn't sized to
accommodate the 6 extra nodes:

struct SpaprMachineState {
    .
    .
    .
    uint32_t numa_assoc_array[MAX_NODES][NUMA_ASSOC_SIZE];

    Error *fwnmi_migration_blocker;
};

and the following loops happily overwrite spapr->fwnmi_migration_blocker,
and probably more:

    for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
        spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS);

        for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) {
            uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
                                 SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
            spapr->numa_assoc_array[i][j] = gpu_assoc;
        }

        spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
    }

Fix the size of the array. This requires "hw/ppc/spapr.h" to see
NVGPU_MAX_NUM. Including "hw/pci-host/spapr.h" introduces a
circular dependency that breaks the build, so this moves the
definition of NVGPU_MAX_NUM to "hw/ppc/spapr.h" instead.

Reported-by: Min Deng <mdeng@redhat.com>
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1908693
Fixes: dd7e1d7ae4 ("spapr_numa: move NVLink2 associativity handling to spapr_numa.c")
Cc: danielhb413@gmail.com
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <160829960428.734871.12634150161215429514.stgit@bahia.lan>
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-01-06 11:09:59 +11:00

214 lines
6.7 KiB
C

/*
* QEMU SPAPR PCI BUS definitions
*
* Copyright (c) 2011 Alexey Kardashevskiy <aik@au1.ibm.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef PCI_HOST_SPAPR_H
#define PCI_HOST_SPAPR_H
#include "hw/ppc/spapr.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_host.h"
#include "hw/ppc/xics.h"
#include "qom/object.h"
#define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
OBJECT_DECLARE_SIMPLE_TYPE(SpaprPhbState, SPAPR_PCI_HOST_BRIDGE)
#define SPAPR_PCI_DMA_MAX_WINDOWS 2
typedef struct SpaprPciMsi {
uint32_t first_irq;
uint32_t num;
} SpaprPciMsi;
typedef struct SpaprPciMsiMig {
uint32_t key;
SpaprPciMsi value;
} SpaprPciMsiMig;
typedef struct SpaprPciLsi {
uint32_t irq;
} SpaprPciLsi;
typedef struct SpaprPhbPciNvGpuConfig SpaprPhbPciNvGpuConfig;
struct SpaprPhbState {
PCIHostState parent_obj;
uint32_t index;
uint64_t buid;
char *dtbusname;
bool dr_enabled;
MemoryRegion memspace, iospace;
hwaddr mem_win_addr, mem_win_size, mem64_win_addr, mem64_win_size;
uint64_t mem64_win_pciaddr;
hwaddr io_win_addr, io_win_size;
MemoryRegion mem32window, mem64window, iowindow, msiwindow;
uint32_t dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS];
hwaddr dma_win_addr, dma_win_size;
AddressSpace iommu_as;
MemoryRegion iommu_root;
SpaprPciLsi lsi_table[PCI_NUM_PINS];
GHashTable *msi;
/* Temporary cache for migration purposes */
int32_t msi_devs_num;
SpaprPciMsiMig *msi_devs;
QLIST_ENTRY(SpaprPhbState) list;
bool ddw_enabled;
uint64_t page_size_mask;
uint64_t dma64_win_addr;
uint32_t numa_node;
bool pcie_ecs; /* Allow access to PCIe extended config space? */
/* Fields for migration compatibility hacks */
bool pre_2_8_migration;
uint32_t mig_liobn;
hwaddr mig_mem_win_addr, mig_mem_win_size;
hwaddr mig_io_win_addr, mig_io_win_size;
hwaddr nv2_gpa_win_addr;
hwaddr nv2_atsd_win_addr;
SpaprPhbPciNvGpuConfig *nvgpus;
bool pre_5_1_assoc;
};
#define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
#define SPAPR_PCI_MEM32_WIN_SIZE \
((1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET)
#define SPAPR_PCI_MEM64_WIN_SIZE 0x10000000000ULL /* 1 TiB */
/* All PCI outbound windows will be within this range */
#define SPAPR_PCI_BASE (1ULL << 45) /* 32 TiB */
#define SPAPR_PCI_LIMIT (1ULL << 46) /* 64 TiB */
#define SPAPR_MAX_PHBS ((SPAPR_PCI_LIMIT - SPAPR_PCI_BASE) / \
SPAPR_PCI_MEM64_WIN_SIZE - 1)
#define SPAPR_PCI_IO_WIN_SIZE 0x10000
#define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL
#define SPAPR_PCI_NV2RAM64_WIN_BASE SPAPR_PCI_LIMIT
#define SPAPR_PCI_NV2RAM64_WIN_SIZE (2 * TiB) /* For up to 6 GPUs 256GB each */
/* Max number of NVLinks per GPU in any physical box */
#define NVGPU_MAX_LINKS 3
/*
* GPU RAM starts at 64TiB so huge DMA window to cover it all ends at 128TiB
* which is enough. We do not need DMA for ATSD so we put them at 128TiB.
*/
#define SPAPR_PCI_NV2ATSD_WIN_BASE (128 * TiB)
#define SPAPR_PCI_NV2ATSD_WIN_SIZE (NVGPU_MAX_NUM * NVGPU_MAX_LINKS * \
64 * KiB)
int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
uint32_t intc_phandle, void *fdt, int *node_offset);
void spapr_pci_rtas_init(void);
SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid);
PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
uint32_t config_addr);
/* DRC callbacks */
void spapr_phb_remove_pci_device_cb(DeviceState *dev);
int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
void *fdt, int *fdt_start_offset, Error **errp);
/* VFIO EEH hooks */
#ifdef CONFIG_LINUX
bool spapr_phb_eeh_available(SpaprPhbState *sphb);
int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
unsigned int addr, int option);
int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state);
int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option);
int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb);
void spapr_phb_vfio_reset(DeviceState *qdev);
void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp);
void spapr_phb_nvgpu_free(SpaprPhbState *sphb);
void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
Error **errp);
void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt);
void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
SpaprPhbState *sphb);
#else
static inline bool spapr_phb_eeh_available(SpaprPhbState *sphb)
{
return false;
}
static inline int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
unsigned int addr, int option)
{
return RTAS_OUT_HW_ERROR;
}
static inline int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb,
int *state)
{
return RTAS_OUT_HW_ERROR;
}
static inline int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
{
return RTAS_OUT_HW_ERROR;
}
static inline int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
{
return RTAS_OUT_HW_ERROR;
}
static inline void spapr_phb_vfio_reset(DeviceState *qdev)
{
}
static inline void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
{
}
static inline void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
{
}
static inline void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt,
int bus_off, Error **errp)
{
}
static inline void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb,
void *fdt)
{
}
static inline void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt,
int offset,
SpaprPhbState *sphb)
{
}
#endif
void spapr_phb_dma_reset(SpaprPhbState *sphb);
static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb)
{
return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
}
#endif /* PCI_HOST_SPAPR_H */