Update version for 8.1.5 release

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
target/arm: Fix A64 scalar SQSHRN and SQRSHRN
2024-01-29 14:20:00 +03:00 · 2024-01-27 18:10:41 +03:00 · 2024-01-27 18:05:25 +03:00 · 2024-01-26 19:54:36 +03:00 · 2024-01-26 19:31:37 +03:00 · 2024-01-26 19:31:37 +03:00
320 changed files with 3437 additions and 1654 deletions
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@ -584,7 +584,10 @@ pages:
    - mkdir -p public
    # HTML-ised source tree
    - make gtags
-    - htags -anT --tree-view=filetree -m qemu_init
+    # We unset variables to work around a bug in some htags versions
+    # which causes it to fail when the environment is large
+    - CI_COMMIT_MESSAGE= CI_COMMIT_TAG_MESSAGE= htags
+        -anT --tree-view=filetree -m qemu_init
        -t "Welcome to the QEMU sourcecode"
    - mv HTML public/src
    # Project documentation
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -5,16 +5,21 @@
 # Required
 version: 2

+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
  configuration: docs/conf.py

+# We recommend specifying your dependencies to enable reproducible builds:
+# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+  install:
+    - requirements: docs/requirements.txt
+
 # We want all the document formats
 formats: all
-
-# For consistency, we require that QEMU's Sphinx extensions
-# run with at least the same minimum version of Python that
-# we require for other Python in our codebase (our conf.py
-# enforces this, and some code needs it.)
-python:
-  version: 3.6
--- a/2
+++ b/2
@ -1 +1 @@
-8.1.0
+8.1.5
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@ -2458,7 +2458,7 @@ static int kvm_init(MachineState *ms)
    KVMState *s;
    const KVMCapabilityInfo *missing_cap;
    int ret;
-    int type = 0;
+    int type;
    uint64_t dirty_log_manual_caps;

    qemu_mutex_init(&kml_slots_lock);
@ -2523,6 +2523,8 @@ static int kvm_init(MachineState *ms)
        type = mc->kvm_type(ms, kvm_type);
    } else if (mc->kvm_type) {
        type = mc->kvm_type(ms, NULL);
+    } else {
+        type = kvm_arch_get_default_type(ms);
    }

    do {
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@ -33,36 +33,6 @@ void cpu_loop_exit_noexc(CPUState *cpu)
    cpu_loop_exit(cpu);
 }

-#if defined(CONFIG_SOFTMMU)
-void cpu_reloading_memory_map(void)
-{
-    if (qemu_in_vcpu_thread() && current_cpu->running) {
-        /* The guest can in theory prolong the RCU critical section as long
-         * as it feels like. The major problem with this is that because it
-         * can do multiple reconfigurations of the memory map within the
-         * critical section, we could potentially accumulate an unbounded
-         * collection of memory data structures awaiting reclamation.
-         *
-         * Because the only thing we're currently protecting with RCU is the
-         * memory data structures, it's sufficient to break the critical section
-         * in this callback, which we know will get called every time the
-         * memory map is rearranged.
-         *
-         * (If we add anything else in the system that uses RCU to protect
-         * its data structures, we will need to implement some other mechanism
-         * to force TCG CPUs to exit the critical section, at which point this
-         * part of this callback might become unnecessary.)
-         *
-         * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
-         * only protects cpu->as->dispatch. Since we know our caller is about
-         * to reload it, it's safe to split the critical section.
-         */
-        rcu_read_unlock();
-        rcu_read_lock();
-    }
-}
-#endif
-
 void cpu_loop_exit(CPUState *cpu)
 {
    /* Undo the setting in cpu_tb_exec.  */
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@ -182,7 +182,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
    const TranslationBlock *tb = p;
    const struct tb_desc *desc = d;

-    if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->pc) &&
+    if (tb->pc == desc->pc &&
        tb_page_addr0(tb) == desc->page_addr0 &&
        tb->cs_base == desc->cs_base &&
        tb->flags == desc->flags &&
@ -232,7 +232,7 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
        return NULL;
    }
    desc.page_addr0 = phys_pc;
-    h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
+    h = tb_hash_func(phys_pc, pc,
                     flags, cs_base, cflags);
    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }
@ -720,7 +720,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
            && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
            /* Execute just one insn to trigger exception pending in the log */
            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
-                | CF_NOIRQ | 1;
+                | CF_LAST_IO | CF_NOIRQ | 1;
        }
 #endif
        return false;
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@ -46,7 +46,7 @@ static bool tb_cmp(const void *ap, const void *bp)
    const TranslationBlock *a = ap;
    const TranslationBlock *b = bp;

-    return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
+    return (a->pc == b->pc &&
            a->cs_base == b->cs_base &&
            a->flags == b->flags &&
            (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
@ -916,7 +916,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)

    /* remove the TB from the hash list */
    phys_pc = tb_page_addr0(tb);
-    h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
+    h = tb_hash_func(phys_pc, tb->pc,
                     tb->flags, tb->cs_base, orig_cflags);
    if (!qht_remove(&tb_ctx.htable, tb, h)) {
        return;
@ -983,7 +983,7 @@ TranslationBlock *tb_link_page(TranslationBlock *tb)
    tb_record(tb);

    /* add in the hash table */
-    h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
+    h = tb_hash_func(tb_page_addr0(tb), tb->pc,
                     tb->flags, tb->cs_base, tb->cflags);
    qht_insert(&tb_ctx.htable, tb, h, &existing_tb);

@ -1083,7 +1083,8 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
    if (current_tb_modified) {
        /* Force execution of one insn next time.  */
        CPUState *cpu = current_cpu;
-        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
+        cpu->cflags_next_tb =
+            1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu);
        return true;
    }
    return false;
@ -1153,7 +1154,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
    if (current_tb_modified) {
        page_collection_unlock(pages);
        /* Force execution of one insn next time.  */
-        current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
+        current_cpu->cflags_next_tb =
+            1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu);
        mmap_unlock();
        cpu_loop_exit_noexc(current_cpu);
    }
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@ -100,14 +100,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
                break;
            case EXCP_HALTED:
                /*
-                 * during start-up the vCPU is reset and the thread is
-                 * kicked several times. If we don't ensure we go back
-                 * to sleep in the halted state we won't cleanly
-                 * start-up when the vCPU is enabled.
-                 *
-                 * cpu->halted should ensure we sleep in wait_io_event
+                 * Usually cpu->halted is set, but may have already been
+                 * reset by another thread by the time we arrive here.
                 */
-                g_assert(cpu->halted);
                break;
            case EXCP_ATOMIC:
                qemu_mutex_unlock_iothread();
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@ -326,9 +326,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,

    gen_code_buf = tcg_ctx->code_gen_ptr;
    tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
-    if (!(cflags & CF_PCREL)) {
-        tb->pc = pc;
-    }
+    tb->pc = pc;
    tb->cs_base = cs_base;
    tb->flags = flags;
    tb->cflags = cflags;
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@ -16,26 +16,19 @@
 #include "tcg/tcg-op-common.h"
 #include "internal.h"

-static void gen_io_start(void)
+static void set_can_do_io(DisasContextBase *db, bool val)
 {
-    tcg_gen_st_i32(tcg_constant_i32(1), cpu_env,
-                   offsetof(ArchCPU, parent_obj.can_do_io) -
-                   offsetof(ArchCPU, env));
+    if (db->saved_can_do_io != val) {
+        db->saved_can_do_io = val;
+        tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
+                       offsetof(ArchCPU, parent_obj.can_do_io) -
+                       offsetof(ArchCPU, env));
+    }
 }

 bool translator_io_start(DisasContextBase *db)
 {
-    uint32_t cflags = tb_cflags(db->tb);
-
-    if (!(cflags & CF_USE_ICOUNT)) {
-        return false;
-    }
-    if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) {
-        /* Already started in translator_loop. */
-        return true;
-    }
-
-    gen_io_start();
+    set_can_do_io(db, true);

    /*
     * Ensure that this instruction will be the last in the TB.
@ -47,14 +40,17 @@ bool translator_io_start(DisasContextBase *db)
    return true;
 }

-static TCGOp *gen_tb_start(uint32_t cflags)
+static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags)
 {
-    TCGv_i32 count = tcg_temp_new_i32();
+    TCGv_i32 count = NULL;
    TCGOp *icount_start_insn = NULL;

-    tcg_gen_ld_i32(count, cpu_env,
-                   offsetof(ArchCPU, neg.icount_decr.u32) -
-                   offsetof(ArchCPU, env));
+    if ((cflags & CF_USE_ICOUNT) || !(cflags & CF_NOIRQ)) {
+        count = tcg_temp_new_i32();
+        tcg_gen_ld_i32(count, cpu_env,
+                       offsetof(ArchCPU, neg.icount_decr.u32) -
+                       offsetof(ArchCPU, env));
+    }

    if (cflags & CF_USE_ICOUNT) {
        /*
@ -84,18 +80,15 @@ static TCGOp *gen_tb_start(uint32_t cflags)
        tcg_gen_st16_i32(count, cpu_env,
                         offsetof(ArchCPU, neg.icount_decr.u16.low) -
                         offsetof(ArchCPU, env));
-        /*
-         * cpu->can_do_io is cleared automatically here at the beginning of
-         * each translation block.  The cost is minimal and only paid for
-         * -icount, plus it would be very easy to forget doing it in the
-         * translator. Doing it here means we don't need a gen_io_end() to
-         * go with gen_io_start().
-         */
-        tcg_gen_st_i32(tcg_constant_i32(0), cpu_env,
-                       offsetof(ArchCPU, parent_obj.can_do_io) -
-                       offsetof(ArchCPU, env));
    }

+    /*
+     * cpu->can_do_io is set automatically here at the beginning of
+     * each translation block.  The cost is minimal, plus it would be
+     * very easy to forget doing it in the translator.
+     */
+    set_can_do_io(db, db->max_insns == 1 && (cflags & CF_LAST_IO));
+
    return icount_start_insn;
 }

@ -144,6 +137,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
    db->num_insns = 0;
    db->max_insns = *max_insns;
    db->singlestep_enabled = cflags & CF_SINGLE_STEP;
+    db->saved_can_do_io = -1;
    db->host_addr[0] = host_pc;
    db->host_addr[1] = NULL;

@ -151,11 +145,17 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */

    /* Start translating.  */
-    icount_start_insn = gen_tb_start(cflags);
+    icount_start_insn = gen_tb_start(db, cflags);
    ops->tb_start(db, cpu);
    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */

-    plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
+    if (cflags & CF_MEMI_ONLY) {
+        /* We should only see CF_MEMI_ONLY for io_recompile. */
+        assert(cflags & CF_LAST_IO);
+        plugin_enabled = plugin_gen_tb_start(cpu, db, true);
+    } else {
+        plugin_enabled = plugin_gen_tb_start(cpu, db, false);
+    }

    while (true) {
        *max_insns = ++db->num_insns;
@ -172,13 +172,9 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
           the next instruction.  */
        if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) {
            /* Accept I/O on the last instruction.  */
-            gen_io_start();
-            ops->translate_insn(db, cpu);
-        } else {
-            /* we should only see CF_MEMI_ONLY for io_recompile */
-            tcg_debug_assert(!(cflags & CF_MEMI_ONLY));
-            ops->translate_insn(db, cpu);
+            set_can_do_io(db, true);
        }
+        ops->translate_insn(db, cpu);

        /*
         * We can't instrument after instructions that change control
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@ -398,6 +398,7 @@ static void cryptodev_backend_set_ops(Object *obj, Visitor *v,
 static void
 cryptodev_backend_complete(UserCreatable *uc, Error **errp)
 {
+    ERRP_GUARD();
    CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc);
    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc);
    uint32_t services;
@ -406,11 +407,20 @@ cryptodev_backend_complete(UserCreatable *uc, Error **errp)
    QTAILQ_INIT(&backend->opinfos);
    value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg;
    cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp);
+    if (*errp) {
+        return;
+    }
    value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg;
    cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp);
+    if (*errp) {
+        return;
+    }

    if (bc->init) {
        bc->init(backend, errp);
+        if (*errp) {
+            return;
+        }
    }

    services = backend->conf.crypto_services;
--- a/backends/tpm/tpm_util.c
+++ b/backends/tpm/tpm_util.c
@ -112,12 +112,8 @@ static int tpm_util_request(int fd,
                            void *response,
                            size_t responselen)
 {
-    fd_set readfds;
+    GPollFD fds[1] = { {.fd = fd, .events = G_IO_IN } };
    int n;
-    struct timeval tv = {
-        .tv_sec = 1,
-        .tv_usec = 0,
-    };

    n = write(fd, request, requestlen);
    if (n < 0) {
@ -127,11 +123,8 @@ static int tpm_util_request(int fd,
        return -EFAULT;
    }

-    FD_ZERO(&readfds);
-    FD_SET(fd, &readfds);
-
    /* wait for a second */
-    n = select(fd + 1, &readfds, NULL, NULL, &tv);
+    n = RETRY_ON_EINTR(g_poll(fds, 1, 1000));
    if (n != 1) {
        return -errno;
    }
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@ -324,22 +324,39 @@ static void coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
 {
    BDRVBlkLogWritesState *s = lr->bs->opaque;
-    uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
+
+    /*
+     * Determine the offsets and sizes of different parts of the entry, and
+     * update the state of the driver.
+     *
+     * This needs to be done in one go, before any actual I/O is done, as the
+     * log entry may have to be written in two parts, and the state of the
+     * driver may be modified by other driver operations while waiting for the
+     * I/O to complete.
+     */
+    const uint64_t entry_start_sector = s->cur_log_sector;
+    const uint64_t entry_offset = entry_start_sector << s->sectorbits;
+    const uint64_t qiov_aligned_size = ROUND_UP(lr->qiov->size, s->sectorsize);
+    const uint64_t entry_aligned_size = qiov_aligned_size +
+        ROUND_UP(lr->zero_size, s->sectorsize);
+    const uint64_t entry_nr_sectors = entry_aligned_size >> s->sectorbits;

    s->nr_entries++;
-    s->cur_log_sector +=
-            ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;
+    s->cur_log_sector += entry_nr_sectors;

-    lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
+    /*
+     * Write the log entry. Note that if this is a "write zeroes" operation,
+     * only the entry header is written here, with the zeroing being done
+     * separately below.
+     */
+    lr->log_ret = bdrv_co_pwritev(s->log_file, entry_offset, lr->qiov->size,
                                  lr->qiov, 0);

    /* Logging for the "write zeroes" operation */
    if (lr->log_ret == 0 && lr->zero_size) {
-        cur_log_offset = s->cur_log_sector << s->sectorbits;
-        s->cur_log_sector +=
-                ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
+        const uint64_t zeroes_offset = entry_offset + qiov_aligned_size;

-        lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
+        lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, zeroes_offset,
                                            lr->zero_size, 0);
    }

--- a/block/file-posix.c
+++ b/block/file-posix.c
@ -160,7 +160,6 @@ typedef struct BDRVRawState {
    bool has_write_zeroes:1;
    bool use_linux_aio:1;
    bool use_linux_io_uring:1;
-    int64_t *offset; /* offset of zone append operation */
    int page_cache_inconsistent; /* errno from fdatasync failure */
    bool has_fallocate;
    bool needs_alignment;
@ -1412,11 +1411,9 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
    BlockZoneModel zoned;
    int ret;

-    bs->bl.zoned = BLK_Z_NONE;
-
    ret = get_sysfs_zoned_model(st, &zoned);
    if (ret < 0 || zoned == BLK_Z_NONE) {
-        return;
+        goto no_zoned;
    }
    bs->bl.zoned = zoned;

@ -1437,10 +1434,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Unable to read chunk_sectors "
                                     "sysfs attribute");
-        return;
+        goto no_zoned;
    } else if (!ret) {
        error_setg(errp, "Read 0 from chunk_sectors sysfs attribute");
-        return;
+        goto no_zoned;
    }
    bs->bl.zone_size = ret << BDRV_SECTOR_BITS;

@ -1448,10 +1445,10 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Unable to read nr_zones "
                                     "sysfs attribute");
-        return;
+        goto no_zoned;
    } else if (!ret) {
        error_setg(errp, "Read 0 from nr_zones sysfs attribute");
-        return;
+        goto no_zoned;
    }
    bs->bl.nr_zones = ret;

@ -1472,10 +1469,15 @@ static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
    ret = get_zones_wp(bs, s->fd, 0, bs->bl.nr_zones, 0);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "report wps failed");
-        bs->wps = NULL;
-        return;
+        goto no_zoned;
    }
    qemu_co_mutex_init(&bs->wps->colock);
+    return;
+
+no_zoned:
+    bs->bl.zoned = BLK_Z_NONE;
+    g_free(bs->wps);
+    bs->wps = NULL;
 }
 #else /* !defined(CONFIG_BLKZONED) */
 static void raw_refresh_zoned_limits(BlockDriverState *bs, struct stat *st,
@ -2442,19 +2444,21 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
    return true;
 }

-static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
                                   uint64_t bytes, QEMUIOVector *qiov, int type)
 {
    BDRVRawState *s = bs->opaque;
    RawPosixAIOData acb;
    int ret;
+    uint64_t offset = *offset_ptr;

    if (fd_open(bs) < 0)
        return -EIO;
 #if defined(CONFIG_BLKZONED)
-    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && bs->wps) {
+    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
+        bs->bl.zoned != BLK_Z_NONE) {
        qemu_co_mutex_lock(&bs->wps->colock);
-        if (type & QEMU_AIO_ZONE_APPEND && bs->bl.zone_size) {
+        if (type & QEMU_AIO_ZONE_APPEND) {
            int index = offset / bs->bl.zone_size;
            offset = bs->wps->wp[index];
        }
@ -2502,16 +2506,15 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,

 out:
 #if defined(CONFIG_BLKZONED)
-{
-    BlockZoneWps *wps = bs->wps;
-    if (ret == 0) {
-        if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND))
-            && wps && bs->bl.zone_size) {
+    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) &&
+        bs->bl.zoned != BLK_Z_NONE) {
+        BlockZoneWps *wps = bs->wps;
+        if (ret == 0) {
            uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
            if (!BDRV_ZT_IS_CONV(*wp)) {
                if (type & QEMU_AIO_ZONE_APPEND) {
-                    *s->offset = *wp;
-                    trace_zbd_zone_append_complete(bs, *s->offset
+                    *offset_ptr = *wp;
+                    trace_zbd_zone_append_complete(bs, *offset_ptr
                        >> BDRV_SECTOR_BITS);
                }
                /* Advance the wp if needed */
@ -2519,17 +2522,15 @@ out:
                    *wp = offset + bytes;
                }
            }
+        } else {
+            /*
+             * write and append write are not allowed to cross zone boundaries
+             */
+            update_zones_wp(bs, s->fd, offset, 1);
        }
-    } else {
-        if (type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
-            update_zones_wp(bs, s->fd, 0, 1);
-        }
-    }

-    if ((type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) && wps) {
        qemu_co_mutex_unlock(&wps->colock);
    }
-}
 #endif
    return ret;
 }
@ -2538,14 +2539,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
                                      int64_t bytes, QEMUIOVector *qiov,
                                      BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
 }

 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
                                       int64_t bytes, QEMUIOVector *qiov,
                                       BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
 }

 static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
@ -3472,7 +3473,7 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
                        len >> BDRV_SECTOR_BITS);
    ret = raw_thread_pool_submit(handle_aiocb_zone_mgmt, &acb);
    if (ret != 0) {
-        update_zones_wp(bs, s->fd, offset, i);
+        update_zones_wp(bs, s->fd, offset, nrz);
        error_report("ioctl %s failed %d", op_name, ret);
        return ret;
    }
@ -3508,8 +3509,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
    int64_t zone_size_mask = bs->bl.zone_size - 1;
    int64_t iov_len = 0;
    int64_t len = 0;
-    BDRVRawState *s = bs->opaque;
-    s->offset = offset;

    if (*offset & zone_size_mask) {
        error_report("sector offset %" PRId64 " is not aligned to zone size "
@ -3530,7 +3529,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
    }

    trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
-    return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+    return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
 }
 #endif

--- a/block/io.c
+++ b/block/io.c
@ -2585,6 +2585,16 @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
                ret |= (ret2 & BDRV_BLOCK_ZERO);
            }
        }
+
+        /*
+         * Now that the recursive search was done, clear the flag. Otherwise,
+         * with more complicated block graphs like snapshot-access ->
+         * copy-before-write -> qcow2, where the return value will be propagated
+         * further up to a parent bdrv_co_do_block_status() call, both the
+         * BDRV_BLOCK_RECURSE and BDRV_BLOCK_ZERO flags would be set, which is
+         * not allowed.
+         */
+        ret &= ~BDRV_BLOCK_RECURSE;
    }

 out:
--- a/block/nvme.c
+++ b/block/nvme.c
@ -416,9 +416,10 @@ static bool nvme_process_completion(NVMeQueuePair *q)
            q->cq_phase = !q->cq_phase;
        }
        cid = le16_to_cpu(c->cid);
-        if (cid == 0 || cid > NVME_QUEUE_SIZE) {
-            warn_report("NVMe: Unexpected CID in completion queue: %"PRIu32", "
-                        "queue size: %u", cid, NVME_QUEUE_SIZE);
+        if (cid == 0 || cid > NVME_NUM_REQS) {
+            warn_report("NVMe: Unexpected CID in completion queue: %" PRIu32
+                        ", should be within: 1..%u inclusively", cid,
+                        NVME_NUM_REQS);
            continue;
        }
        trace_nvme_complete_command(s, q->index, cid);
--- a/block/parallels-ext.c
+++ b/block/parallels-ext.c
@ -130,7 +130,7 @@ static BdrvDirtyBitmap *parallels_load_bitmap(BlockDriverState *bs,
    g_autofree uint64_t *l1_table = NULL;
    BdrvDirtyBitmap *bitmap;
    QemuUUID uuid;
-    char uuidstr[UUID_FMT_LEN + 1];
+    char uuidstr[UUID_STR_LEN];
    int i;

    if (data_size < sizeof(bf)) {
--- a/block/qapi-sysemu.c
+++ b/block/qapi-sysemu.c
@ -232,6 +232,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
                                            BlockDriverState *bs, Error **errp)
 {
    Error *local_err = NULL;
+    AioContext *ctx;
    bool has_device;
    int ret;

@ -253,7 +254,11 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
        return;
    }

+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
    ret = blk_insert_bs(blk, bs, errp);
+    aio_context_release(ctx);
+
    if (ret < 0) {
        return;
    }
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@ -1984,7 +1984,7 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
            /* If we keep the reference, pass on the discard still */
            bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
                          s->cluster_size);
-       }
+        }
    }

    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
@ -2062,9 +2062,15 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
        QCow2ClusterType type = qcow2_get_cluster_type(bs, old_l2_entry);
        bool unmap = (type == QCOW2_CLUSTER_COMPRESSED) ||
            ((flags & BDRV_REQ_MAY_UNMAP) && qcow2_cluster_is_allocated(type));
-        uint64_t new_l2_entry = unmap ? 0 : old_l2_entry;
+        bool keep_reference =
+            (s->discard_no_unref && type != QCOW2_CLUSTER_COMPRESSED);
+        uint64_t new_l2_entry = old_l2_entry;
        uint64_t new_l2_bitmap = old_l2_bitmap;

+        if (unmap && !keep_reference) {
+            new_l2_entry = 0;
+        }
+
        if (has_subclusters(s)) {
            new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
        } else {
@ -2082,9 +2088,17 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
            set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
        }

-        /* Then decrease the refcount */
        if (unmap) {
-            qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
+            if (!keep_reference) {
+                /* Then decrease the refcount */
+                qcow2_free_any_cluster(bs, old_l2_entry, QCOW2_DISCARD_REQUEST);
+            } else if (s->discard_passthrough[QCOW2_DISCARD_REQUEST] &&
+                       (type == QCOW2_CLUSTER_NORMAL ||
+                        type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+                /* If we keep the reference, pass on the discard still */
+                bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
+                            s->cluster_size);
+            }
        }
    }

--- a/block/snapshot.c
+++ b/block/snapshot.c
@ -190,8 +190,10 @@ static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
 int bdrv_can_snapshot(BlockDriverState *bs)
 {
    BlockDriver *drv = bs->drv;
+
    GLOBAL_STATE_CODE();
-    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+
+    if (!drv || !bdrv_is_inserted(bs) || !bdrv_is_writable(bs)) {
        return 0;
    }

--- a/block/vdi.c
+++ b/block/vdi.c
@ -239,7 +239,7 @@ static void vdi_header_to_le(VdiHeader *header)

 static void vdi_header_print(VdiHeader *header)
 {
-    char uuidstr[37];
+    char uuidstr[UUID_STR_LEN];
    QemuUUID uuid;
    logout("text        %s", header->text);
    logout("signature   0x%08x\n", header->signature);
--- a/block/vmdk.c
+++ b/block/vmdk.c
@ -347,29 +347,41 @@ vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
    BDRVVmdkState *s = bs->opaque;
    int ret = 0;

-    desc = g_malloc0(DESC_SIZE);
-    tmp_desc = g_malloc0(DESC_SIZE);
-    ret = bdrv_co_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
+    size_t desc_buf_size;
+
+    if (s->desc_offset == 0) {
+        desc_buf_size = bdrv_getlength(bs->file->bs);
+        if (desc_buf_size > 16ULL << 20) {
+            error_report("VMDK description file too big");
+            return -EFBIG;
+        }
+    } else {
+        desc_buf_size = DESC_SIZE;
+    }
+
+    desc = g_malloc0(desc_buf_size);
+    tmp_desc = g_malloc0(desc_buf_size);
+    ret = bdrv_co_pread(bs->file, s->desc_offset, desc_buf_size, desc, 0);
    if (ret < 0) {
        goto out;
    }

-    desc[DESC_SIZE - 1] = '\0';
+    desc[desc_buf_size - 1] = '\0';
    tmp_str = strstr(desc, "parentCID");
    if (tmp_str == NULL) {
        ret = -EINVAL;
        goto out;
    }

-    pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
+    pstrcpy(tmp_desc, desc_buf_size, tmp_str);
    p_name = strstr(desc, "CID");
    if (p_name != NULL) {
        p_name += sizeof("CID");
-        snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
-        pstrcat(desc, DESC_SIZE, tmp_desc);
+        snprintf(p_name, desc_buf_size - (p_name - desc), "%" PRIx32 "\n", cid);
+        pstrcat(desc, desc_buf_size, tmp_desc);
    }

-    ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
+    ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, desc_buf_size, desc, 0);

 out:
    g_free(desc);
--- a/blockdev.c
+++ b/blockdev.c
@ -2361,8 +2361,9 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,

    bdrv_co_lock(bs);
    bdrv_drained_end(bs);
-    blk_co_unref(blk);
    bdrv_co_unlock(bs);
+
+    blk_co_unref(blk);
 }

 void qmp_block_stream(const char *job_id, const char *device,
--- a/chardev/char-pty.c
+++ b/chardev/char-pty.c
@ -106,11 +106,27 @@ static void pty_chr_update_read_handler(Chardev *chr)
 static int char_pty_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
    PtyChardev *s = PTY_CHARDEV(chr);
+    GPollFD pfd;
+    int rc;

-    if (!s->connected) {
-        return len;
+    if (s->connected) {
+        return io_channel_send(s->ioc, buf, len);
    }
-    return io_channel_send(s->ioc, buf, len);
+
+    /*
+     * The other side might already be re-connected, but the timer might
+     * not have fired yet. So let's check here whether we can write again:
+     */
+    pfd.fd = QIO_CHANNEL_FILE(s->ioc)->fd;
+    pfd.events = G_IO_OUT;
+    pfd.revents = 0;
+    rc = RETRY_ON_EINTR(g_poll(&pfd, 1, 0));
+    g_assert(rc >= 0);
+    if (!(pfd.revents & G_IO_HUP) && (pfd.revents & G_IO_OUT)) {
+        io_channel_send(s->ioc, buf, len);
+    }
+
+    return len;
 }

 static GSource *pty_chr_add_watch(Chardev *chr, GIOCondition cond)
--- a/chardev/char.c
+++ b/chardev/char.c
@ -518,7 +518,7 @@ static const ChardevClass *char_get_class(const char *driver, Error **errp)

    if (object_class_is_abstract(oc)) {
        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "driver",
-                   "an abstract device type");
+                   "a non-abstract device type");
        return NULL;
    }

--- a/disas/riscv.c
+++ b/disas/riscv.c
@ -2116,8 +2116,8 @@ static const char *csr_name(int csrno)
    case 0x03ba: return "pmpaddr10";
    case 0x03bb: return "pmpaddr11";
    case 0x03bc: return "pmpaddr12";
-    case 0x03bd: return "pmpaddr14";
-    case 0x03be: return "pmpaddr13";
+    case 0x03bd: return "pmpaddr13";
+    case 0x03be: return "pmpaddr14";
    case 0x03bf: return "pmpaddr15";
    case 0x0780: return "mtohost";
    case 0x0781: return "mfromhost";
--- a/docs/about/license.rst
+++ b/docs/about/license.rst
@ -8,4 +8,4 @@ QEMU is a trademark of Fabrice Bellard.
 QEMU is released under the `GNU General Public
 License <https://www.gnu.org/licenses/gpl-2.0.txt>`__, version 2. Parts
 of QEMU have specific licenses, see file
-`LICENSE <https://git.qemu.org/?p=qemu.git;a=blob_plain;f=LICENSE>`__.
+`LICENSE <https://gitlab.com/qemu-project/qemu/-/raw/master/LICENSE>`__.
--- a/docs/devel/testing.rst
+++ b/docs/devel/testing.rst
@ -1016,7 +1016,7 @@ class.  Here's a simple usage example:
          self.vm.launch()
          res = self.vm.command('human-monitor-command',
                                command_line='info version')
-          self.assertRegexpMatches(res, r'^(\d+\.\d+\.\d)')
+          self.assertRegex(res, r'^(\d+\.\d+\.\d)')

 To execute your test, run:

@ -1077,7 +1077,7 @@ and hypothetical example follows:
              'human-monitor-command',
              command_line='info version')

-          self.assertEquals(first_res, second_res, third_res)
+          self.assertEqual(first_res, second_res, third_res)

 At test "tear down", ``avocado_qemu.Test`` handles all the QEMUMachines
 shutdown.
--- a/docs/multi-thread-compression.txt
+++ b/docs/multi-thread-compression.txt
@ -117,13 +117,13 @@ to support the multiple thread compression migration:
    {qemu} migrate_set_capability compress on

 3. Set the compression thread count on source:
-    {qemu} migrate_set_parameter compress_threads 12
+    {qemu} migrate_set_parameter compress-threads 12

 4. Set the compression level on the source:
-    {qemu} migrate_set_parameter compress_level 1
+    {qemu} migrate_set_parameter compress-level 1

 5. Set the decompression thread count on destination:
-    {qemu} migrate_set_parameter decompress_threads 3
+    {qemu} migrate_set_parameter decompress-threads 3

 6. Start outgoing migration:
    {qemu} migrate -d tcp:destination.host:4444
@ -133,9 +133,9 @@ to support the multiple thread compression migration:

 The following are the default settings:
    compress: off
-    compress_threads: 8
-    decompress_threads: 2
-    compress_level: 1 (which means best speed)
+    compress-threads: 8
+    decompress-threads: 2
+    compress-level: 1 (which means best speed)

 So, only the first two steps are required to use the multiple
 thread compression in migration. You can do more if the default
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@ -89,7 +89,7 @@ RUNNING:
 First, set the migration speed to match your hardware's capabilities:

 QEMU Monitor Command:
-$ migrate_set_parameter max_bandwidth 40g # or whatever is the MAX of your RDMA device
+$ migrate_set_parameter max-bandwidth 40g # or whatever is the MAX of your RDMA device

 Next, on the destination machine, add the following to the QEMU command line:

--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -0,0 +1,2 @@
+sphinx==5.3.0
+sphinx_rtd_theme==1.1.1
--- a/docs/sphinx/hxtool.py
+++ b/docs/sphinx/hxtool.py
@ -49,7 +49,7 @@ def serror(file, lnum, errtext):

 def parse_directive(line):
    """Return first word of line, if any"""
-    return re.split('\W', line)[0]
+    return re.split(r'\W', line)[0]

 def parse_defheading(file, lnum, line):
    """Handle a DEFHEADING directive"""
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@ -14,6 +14,7 @@ the following architecture extensions:
 - FEAT_BBM at level 2 (Translation table break-before-make levels)
 - FEAT_BF16 (AArch64 BFloat16 instructions)
 - FEAT_BTI (Branch Target Identification)
+- FEAT_CRC32 (CRC32 instructions)
 - FEAT_CSV2 (Cache speculation variant 2)
 - FEAT_CSV2_1p1 (Cache speculation variant 2, version 1.1)
 - FEAT_CSV2_1p2 (Cache speculation variant 2, version 1.2)
--- a/hw/acpi/erst.c
+++ b/hw/acpi/erst.c
@ -947,6 +947,7 @@ static const VMStateDescription erst_vmstate  = {

 static void erst_realizefn(PCIDevice *pci_dev, Error **errp)
 {
+    ERRP_GUARD();
    ERSTDeviceState *s = ACPIERST(pci_dev);

    trace_acpi_erst_realizefn_in();
@ -964,9 +965,15 @@ static void erst_realizefn(PCIDevice *pci_dev, Error **errp)

    /* HostMemoryBackend size will be multiple of PAGE_SIZE */
    s->storage_size = object_property_get_int(OBJECT(s->hostmem), "size", errp);
+    if (*errp) {
+        return;
+    }

    /* Initialize backend storage and record_count */
    check_erst_backend_storage(s, errp);
+    if (*errp) {
+        return;
+    }

    /* BAR 0: Programming registers */
    memory_region_init_io(&s->iomem_mr, OBJECT(pci_dev), &erst_reg_ops, s,
@ -977,6 +984,9 @@ static void erst_realizefn(PCIDevice *pci_dev, Error **errp)
    memory_region_init_ram(&s->exchange_mr, OBJECT(pci_dev),
                            "erst.exchange",
                            le32_to_cpu(s->header->record_size), errp);
+    if (*errp) {
+        return;
+    }
    pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
                        &s->exchange_mr);

--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@ -761,6 +761,10 @@ static void do_cpu_reset(void *opaque)
                    if (cpu_isar_feature(aa64_hcx, cpu)) {
                        env->cp15.scr_el3 |= SCR_HXEN;
                    }
+                    if (cpu_isar_feature(aa64_fgt, cpu)) {
+                        env->cp15.scr_el3 |= SCR_FGTEN;
+                    }
+
                    /* AArch64 kernels never boot in secure mode */
                    assert(!info->secure_boot);
                    /* This hook is only supported for AArch32 currently:
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@ -502,7 +502,7 @@ static void es1370_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
    case ES1370_REG_DAC2_SCOUNT:
    case ES1370_REG_ADC_SCOUNT:
        d += (addr - ES1370_REG_DAC1_SCOUNT) >> 2;
-        d->scount = (val & 0xffff) | (d->scount & ~0xffff);
+        d->scount = (val & 0xffff) << 16 | (val & 0xffff);
        ldebug ("chan %td CURR_SAMP_CT %d, SAMP_CT %d\n",
                d - &s->chan[0], val >> 16, (val & 0xffff));
        break;
--- a/hw/audio/hda-codec.c
+++ b/hw/audio/hda-codec.c
@ -22,6 +22,7 @@
 #include "hw/qdev-properties.h"
 #include "intel-hda.h"
 #include "migration/vmstate.h"
+#include "qemu/host-utils.h"
 #include "qemu/module.h"
 #include "intel-hda-defs.h"
 #include "audio/audio.h"
@ -189,9 +190,9 @@ struct HDAAudioState {
    bool     use_timer;
 };

-static inline int64_t hda_bytes_per_second(HDAAudioStream *st)
+static inline uint32_t hda_bytes_per_second(HDAAudioStream *st)
 {
-    return 2LL * st->as.nchannels * st->as.freq;
+    return 2 * (uint32_t)st->as.nchannels * (uint32_t)st->as.freq;
 }

 static inline void hda_timer_sync_adjust(HDAAudioStream *st, int64_t target_pos)
@ -222,12 +223,18 @@ static void hda_audio_input_timer(void *opaque)

    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);

-    int64_t buft_start = st->buft_start;
+    int64_t uptime = now - st->buft_start;
    int64_t wpos = st->wpos;
    int64_t rpos = st->rpos;
+    int64_t wanted_rpos;

-    int64_t wanted_rpos = hda_bytes_per_second(st) * (now - buft_start)
-                          / NANOSECONDS_PER_SECOND;
+    if (uptime <= 0) {
+        /* wanted_rpos <= 0 */
+        goto out_timer;
+    }
+
+    wanted_rpos = muldiv64(uptime, hda_bytes_per_second(st),
+                           NANOSECONDS_PER_SECOND);
    wanted_rpos &= -4; /* IMPORTANT! clip to frames */

    if (wanted_rpos <= rpos) {
@ -286,12 +293,18 @@ static void hda_audio_output_timer(void *opaque)

    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);

-    int64_t buft_start = st->buft_start;
+    int64_t uptime = now - st->buft_start;
    int64_t wpos = st->wpos;
    int64_t rpos = st->rpos;
+    int64_t wanted_wpos;

-    int64_t wanted_wpos = hda_bytes_per_second(st) * (now - buft_start)
-                          / NANOSECONDS_PER_SECOND;
+    if (uptime <= 0) {
+        /* wanted_wpos <= 0 */
+        goto out_timer;
+    }
+
+    wanted_wpos = muldiv64(uptime, hda_bytes_per_second(st),
+                           NANOSECONDS_PER_SECOND);
    wanted_wpos &= -4; /* IMPORTANT! clip to frames */

    if (wanted_wpos <= wpos) {
--- a/hw/avr/atmega.c
+++ b/hw/avr/atmega.c
@ -233,6 +233,10 @@ static void atmega_realize(DeviceState *dev, Error **errp)

    /* CPU */
    object_initialize_child(OBJECT(dev), "cpu", &s->cpu, mc->cpu_type);
+
+    object_property_set_uint(OBJECT(&s->cpu), "init-sp",
+                             mc->io_size + mc->sram_size - 1, &error_abort);
+
    qdev_realize(DEVICE(&s->cpu), NULL, &error_abort);
    cpudev = DEVICE(&s->cpu);

--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@ -80,16 +80,39 @@ struct PFlashCFI01 {
    uint16_t ident3;
    uint8_t cfi_table[0x52];
    uint64_t counter;
-    unsigned int writeblock_size;
+    uint32_t writeblock_size;
    MemoryRegion mem;
    char *name;
    void *storage;
    VMChangeStateEntry *vmstate;
    bool old_multiple_chip_handling;
+
+    /* block update buffer */
+    unsigned char *blk_bytes;
+    uint32_t blk_offset;
 };

 static int pflash_post_load(void *opaque, int version_id);

+static bool pflash_blk_write_state_needed(void *opaque)
+{
+    PFlashCFI01 *pfl = opaque;
+
+    return (pfl->blk_offset != -1);
+}
+
+static const VMStateDescription vmstate_pflash_blk_write = {
+    .name = "pflash_cfi01_blk_write",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = pflash_blk_write_state_needed,
+    .fields = (const VMStateField[]) {
+        VMSTATE_VBUFFER_UINT32(blk_bytes, PFlashCFI01, 0, NULL, writeblock_size),
+        VMSTATE_UINT32(blk_offset, PFlashCFI01),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_pflash = {
    .name = "pflash_cfi01",
    .version_id = 1,
@ -101,6 +124,10 @@ static const VMStateDescription vmstate_pflash = {
        VMSTATE_UINT8(status, PFlashCFI01),
        VMSTATE_UINT64(counter, PFlashCFI01),
        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_pflash_blk_write,
+        NULL
    }
 };

@ -225,34 +252,10 @@ static uint32_t pflash_data_read(PFlashCFI01 *pfl, hwaddr offset,
    uint32_t ret;

    p = pfl->storage;
-    switch (width) {
-    case 1:
-        ret = p[offset];
-        break;
-    case 2:
-        if (be) {
-            ret = p[offset] << 8;
-            ret |= p[offset + 1];
-        } else {
-            ret = p[offset];
-            ret |= p[offset + 1] << 8;
-        }
-        break;
-    case 4:
-        if (be) {
-            ret = p[offset] << 24;
-            ret |= p[offset + 1] << 16;
-            ret |= p[offset + 2] << 8;
-            ret |= p[offset + 3];
-        } else {
-            ret = p[offset];
-            ret |= p[offset + 1] << 8;
-            ret |= p[offset + 2] << 16;
-            ret |= p[offset + 3] << 24;
-        }
-        break;
-    default:
-        abort();
+    if (be) {
+        ret = ldn_be_p(p + offset, width);
+    } else {
+        ret = ldn_le_p(p + offset, width);
    }
    trace_pflash_data_read(pfl->name, offset, width, ret);
    return ret;
@ -400,40 +403,61 @@ static void pflash_update(PFlashCFI01 *pfl, int offset,
    }
 }

+/* copy current flash content to block update buffer */
+static void pflash_blk_write_start(PFlashCFI01 *pfl, hwaddr offset)
+{
+    hwaddr mask = ~(pfl->writeblock_size - 1);
+
+    trace_pflash_write_block_start(pfl->name, pfl->counter);
+    pfl->blk_offset = offset & mask;
+    memcpy(pfl->blk_bytes, pfl->storage + pfl->blk_offset,
+           pfl->writeblock_size);
+}
+
+/* commit block update buffer changes */
+static void pflash_blk_write_flush(PFlashCFI01 *pfl)
+{
+    g_assert(pfl->blk_offset != -1);
+    trace_pflash_write_block_flush(pfl->name);
+    memcpy(pfl->storage + pfl->blk_offset, pfl->blk_bytes,
+           pfl->writeblock_size);
+    pflash_update(pfl, pfl->blk_offset, pfl->writeblock_size);
+    pfl->blk_offset = -1;
+}
+
+/* discard block update buffer changes */
+static void pflash_blk_write_abort(PFlashCFI01 *pfl)
+{
+    trace_pflash_write_block_abort(pfl->name);
+    pfl->blk_offset = -1;
+}
+
 static inline void pflash_data_write(PFlashCFI01 *pfl, hwaddr offset,
                                     uint32_t value, int width, int be)
 {
-    uint8_t *p = pfl->storage;
+    uint8_t *p;

-    trace_pflash_data_write(pfl->name, offset, width, value, pfl->counter);
-    switch (width) {
-    case 1:
-        p[offset] = value;
-        break;
-    case 2:
-        if (be) {
-            p[offset] = value >> 8;
-            p[offset + 1] = value;
-        } else {
-            p[offset] = value;
-            p[offset + 1] = value >> 8;
+    if (pfl->blk_offset != -1) {
+        /* block write: redirect writes to block update buffer */
+        if ((offset < pfl->blk_offset) ||
+            (offset + width > pfl->blk_offset + pfl->writeblock_size)) {
+            pfl->status |= 0x10; /* Programming error */
+            return;
        }
-        break;
-    case 4:
-        if (be) {
-            p[offset] = value >> 24;
-            p[offset + 1] = value >> 16;
-            p[offset + 2] = value >> 8;
-            p[offset + 3] = value;
-        } else {
-            p[offset] = value;
-            p[offset + 1] = value >> 8;
-            p[offset + 2] = value >> 16;
-            p[offset + 3] = value >> 24;
-        }
-        break;
+        trace_pflash_data_write_block(pfl->name, offset, width, value,
+                                      pfl->counter);
+        p = pfl->blk_bytes + (offset - pfl->blk_offset);
+    } else {
+        /* write directly to storage */
+        trace_pflash_data_write(pfl->name, offset, width, value);
+        p = pfl->storage + offset;
    }

+    if (be) {
+        stn_be_p(p, width, value);
+    } else {
+        stn_le_p(p, width, value);
+    }
 }

 static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
@ -548,9 +572,9 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
            } else {
                value = extract32(value, 0, pfl->bank_width * 8);
            }
-            trace_pflash_write_block(pfl->name, value);
            pfl->counter = value;
            pfl->wcycle++;
+            pflash_blk_write_start(pfl, offset);
            break;
        case 0x60:
            if (cmd == 0xd0) {
@ -581,12 +605,7 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
        switch (pfl->cmd) {
        case 0xe8: /* Block write */
            /* FIXME check @offset, @width */
-            if (!pfl->ro) {
-                /*
-                 * FIXME writing straight to memory is *wrong*.  We
-                 * should write to a buffer, and flush it to memory
-                 * only on confirm command (see below).
-                 */
+            if (!pfl->ro && (pfl->blk_offset != -1)) {
                pflash_data_write(pfl, offset, value, width, be);
            } else {
                pfl->status |= 0x10; /* Programming error */
@ -595,18 +614,8 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
            pfl->status |= 0x80;

            if (!pfl->counter) {
-                hwaddr mask = pfl->writeblock_size - 1;
-                mask = ~mask;
-
                trace_pflash_write(pfl->name, "block write finished");
                pfl->wcycle++;
-                if (!pfl->ro) {
-                    /* Flush the entire write buffer onto backing storage.  */
-                    /* FIXME premature! */
-                    pflash_update(pfl, offset & mask, pfl->writeblock_size);
-                } else {
-                    pfl->status |= 0x10; /* Programming error */
-                }
            }

            pfl->counter--;
@ -618,20 +627,17 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr offset,
    case 3: /* Confirm mode */
        switch (pfl->cmd) {
        case 0xe8: /* Block write */
-            if (cmd == 0xd0) {
-                /* FIXME this is where we should write out the buffer */
+            if ((cmd == 0xd0) && !(pfl->status & 0x10)) {
+                pflash_blk_write_flush(pfl);
                pfl->wcycle = 0;
                pfl->status |= 0x80;
            } else {
-                qemu_log_mask(LOG_UNIMP,
-                    "%s: Aborting write to buffer not implemented,"
-                    " the data is already written to storage!\n"
-                    "Flash device reset into READ mode.\n",
-                    __func__);
+                pflash_blk_write_abort(pfl);
                goto mode_read_array;
            }
            break;
        default:
+            pflash_blk_write_abort(pfl);
            goto error_flash;
        }
        break;
@ -865,6 +871,9 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
    pfl->cmd = 0x00;
    pfl->status = 0x80; /* WSM ready */
    pflash_cfi01_fill_cfi_table(pfl);
+
+    pfl->blk_bytes = g_malloc(pfl->writeblock_size);
+    pfl->blk_offset = -1;
 }

 static void pflash_cfi01_system_reset(DeviceState *dev)
@ -884,6 +893,8 @@ static void pflash_cfi01_system_reset(DeviceState *dev)
     * This model deliberately ignores this delay.
     */
    pfl->status = 0x80;
+
+    pfl->blk_offset = -1;
 }

 static Property pflash_cfi01_properties[] = {
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@ -546,7 +546,7 @@ static void pflash_write(void *opaque, hwaddr offset, uint64_t value,
                }
                goto reset_flash;
            }
-            trace_pflash_data_write(pfl->name, offset, width, value, 0);
+            trace_pflash_data_write(pfl->name, offset, width, value);
            if (!pfl->ro) {
                p = (uint8_t *)pfl->storage + offset;
                if (pfl->be) {
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@ -12,7 +12,8 @@ fdctrl_tc_pulse(int level) "TC pulse: %u"
 pflash_chip_erase_invalid(const char *name, uint64_t offset) "%s: chip erase: invalid address 0x%" PRIx64
 pflash_chip_erase_start(const char *name) "%s: start chip erase"
 pflash_data_read(const char *name, uint64_t offset, unsigned size, uint32_t value) "%s: data offset:0x%04"PRIx64" size:%u value:0x%04x"
-pflash_data_write(const char *name, uint64_t offset, unsigned size, uint32_t value, uint64_t counter) "%s: data offset:0x%04"PRIx64" size:%u value:0x%04x counter:0x%016"PRIx64
+pflash_data_write(const char *name, uint64_t offset, unsigned size, uint32_t value) "%s: data offset:0x%04"PRIx64" size:%u value:0x%04x"
+pflash_data_write_block(const char *name, uint64_t offset, unsigned size, uint32_t value, uint64_t counter) "%s: data offset:0x%04"PRIx64" size:%u value:0x%04x counter:0x%016"PRIx64
 pflash_device_id(const char *name, uint16_t id) "%s: read device ID: 0x%04x"
 pflash_device_info(const char *name, uint64_t offset) "%s: read device information offset:0x%04" PRIx64
 pflash_erase_complete(const char *name) "%s: sector erase complete"
@ -32,7 +33,9 @@ pflash_unlock0_failed(const char *name, uint64_t offset, uint8_t cmd, uint16_t a
 pflash_unlock1_failed(const char *name, uint64_t offset, uint8_t cmd) "%s: unlock0 failed 0x%" PRIx64 " 0x%02x"
 pflash_unsupported_device_configuration(const char *name, uint8_t width, uint8_t max) "%s: unsupported device configuration: device_width:%d max_device_width:%d"
 pflash_write(const char *name, const char *str) "%s: %s"
-pflash_write_block(const char *name, uint32_t value) "%s: block write: bytes:0x%x"
+pflash_write_block_start(const char *name, uint32_t value) "%s: block write start: bytes:0x%x"
+pflash_write_block_flush(const char *name) "%s: block write flush"
+pflash_write_block_abort(const char *name) "%s: block write abort"
 pflash_write_block_erase(const char *name, uint64_t offset, uint64_t len) "%s: block erase offset:0x%" PRIx64 " bytes:0x%" PRIx64
 pflash_write_failed(const char *name, uint64_t offset, uint8_t cmd) "%s: command failed 0x%" PRIx64 " 0x%02x"
 pflash_write_invalid(const char *name, uint8_t cmd) "%s: invalid write for command 0x%02x"
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@ -115,9 +115,13 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
        return;
    }

-    if (xen_device_frontend_scanf(xendev, "protocol", "%ms",
-                                  &str) != 1) {
-        protocol = BLKIF_PROTOCOL_NATIVE;
+    if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) {
+        /* x86 defaults to the 32-bit protocol even for 64-bit guests. */
+        if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) {
+            protocol = BLKIF_PROTOCOL_X86_32;
+        } else {
+            protocol = BLKIF_PROTOCOL_NATIVE;
+        }
    } else {
        if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) {
            protocol = BLKIF_PROTOCOL_X86_32;
--- a/hw/char/riscv_htif.c
+++ b/hw/char/riscv_htif.c
@ -30,6 +30,7 @@
 #include "qemu/timer.h"
 #include "qemu/error-report.h"
 #include "exec/address-spaces.h"
+#include "exec/tswap.h"
 #include "sysemu/dma.h"

 #define RISCV_DEBUG_HTIF 0
@ -209,11 +210,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
            } else {
                uint64_t syscall[8];
                cpu_physical_memory_read(payload, syscall, sizeof(syscall));
-                if (syscall[0] == PK_SYS_WRITE &&
-                    syscall[1] == HTIF_DEV_CONSOLE &&
-                    syscall[3] == HTIF_CONSOLE_CMD_PUTC) {
+                if (tswap64(syscall[0]) == PK_SYS_WRITE &&
+                    tswap64(syscall[1]) == HTIF_DEV_CONSOLE &&
+                    tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) {
                    uint8_t ch;
-                    cpu_physical_memory_read(syscall[2], &ch, 1);
+                    cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1);
                    qemu_chr_fe_write(&s->chr, &ch, 1);
                    resp = 0x100 | (uint8_t)payload;
                } else {
@ -232,7 +233,8 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written)
            s->tohost = 0; /* clear to indicate we read */
            return;
        } else if (cmd == HTIF_CONSOLE_CMD_PUTC) {
-            qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1);
+            uint8_t ch = (uint8_t)payload;
+            qemu_chr_fe_write(&s->chr, &ch, 1);
            resp = 0x100 | (uint8_t)payload;
        } else {
            qemu_log("HTIF device %d: unknown command\n", device);
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@ -1105,7 +1105,7 @@ static void get_uuid(Object *obj, Visitor *v, const char *name, void *opaque,
 {
    Property *prop = opaque;
    QemuUUID *uuid = object_field_prop_ptr(obj, prop);
-    char buffer[UUID_FMT_LEN + 1];
+    char buffer[UUID_STR_LEN];
    char *p = buffer;

    qemu_uuid_unparse(uuid, buffer);
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@ -39,12 +39,6 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state,
        return;
    }

-    fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
-    for (i = 0, target = object->targets; target; i++, target = target->next) {
-        /* This link cannot be resolved yet, so stash the name for now */
-        fw->targets[i] = g_strdup(target->value);
-    }
-
    if (object->size % (256 * MiB)) {
        error_setg(errp,
                   "Size of a CXL fixed memory window must be a multiple of 256MiB");
@ -64,6 +58,12 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state,
        fw->enc_int_gran = 0;
    }

+    fw->targets = g_malloc0_n(fw->num_targets, sizeof(*fw->targets));
+    for (i = 0, target = object->targets; target; i++, target = target->next) {
+        /* This link cannot be resolved yet, so stash the name for now */
+        fw->targets[i] = g_strdup(target->value);
+    }
+
    cxl_state->fixed_windows = g_list_append(cxl_state->fixed_windows,
                                             g_steal_pointer(&fw));

--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@ -1014,6 +1014,7 @@ static Property ati_vga_properties[] = {
    DEFINE_PROP_UINT16("x-device-id", ATIVGAState, dev_id,
                       PCI_DEVICE_ID_ATI_RAGE128_PF),
    DEFINE_PROP_BOOL("guest_hwcursor", ATIVGAState, cursor_guest_mode, false),
+    DEFINE_PROP_UINT8("x-pixman", ATIVGAState, use_pixman, 3),
    DEFINE_PROP_END_OF_LIST()
 };

@ -1035,11 +1036,18 @@ static void ati_vga_class_init(ObjectClass *klass, void *data)
    k->exit = ati_vga_exit;
 }

+static void ati_vga_init(Object *o)
+{
+    object_property_set_description(o, "x-pixman", "Use pixman for: "
+                                    "1: fill, 2: blit");
+}
+
 static const TypeInfo ati_vga_info = {
    .name = TYPE_ATI_VGA,
    .parent = TYPE_PCI_DEVICE,
    .instance_size = sizeof(ATIVGAState),
    .class_init = ati_vga_class_init,
+    .instance_init = ati_vga_init,
    .interfaces = (InterfaceInfo[]) {
          { INTERFACE_CONVENTIONAL_PCI_DEVICE },
          { },
--- a/hw/display/ati_2d.c
+++ b/hw/display/ati_2d.c
@ -92,6 +92,7 @@ void ati_2d_blt(ATIVGAState *s)
    switch (s->regs.dp_mix & GMC_ROP3_MASK) {
    case ROP3_SRCCOPY:
    {
+        bool fallback = false;
        unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ?
                       s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width);
        unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ?
@ -122,27 +123,50 @@ void ati_2d_blt(ATIVGAState *s)
                src_bits, dst_bits, src_stride, dst_stride, bpp, bpp,
                src_x, src_y, dst_x, dst_y,
                s->regs.dst_width, s->regs.dst_height);
-        if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
+        if ((s->use_pixman & BIT(1)) &&
+            s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT &&
            s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
-            pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
-                       src_stride, dst_stride, bpp, bpp,
-                       src_x, src_y, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
-        } else {
+            fallback = !pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits,
+                                   src_stride, dst_stride, bpp, bpp,
+                                   src_x, src_y, dst_x, dst_y,
+                                   s->regs.dst_width, s->regs.dst_height);
+        } else if (s->use_pixman & BIT(1)) {
            /* FIXME: We only really need a temporary if src and dst overlap */
            int llb = s->regs.dst_width * (bpp / 8);
            int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t));
            uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) *
                                     s->regs.dst_height);
-            pixman_blt((uint32_t *)src_bits, tmp,
-                       src_stride, tmp_stride, bpp, bpp,
-                       src_x, src_y, 0, 0,
-                       s->regs.dst_width, s->regs.dst_height);
-            pixman_blt(tmp, (uint32_t *)dst_bits,
-                       tmp_stride, dst_stride, bpp, bpp,
-                       0, 0, dst_x, dst_y,
-                       s->regs.dst_width, s->regs.dst_height);
+            fallback = !pixman_blt((uint32_t *)src_bits, tmp,
+                                   src_stride, tmp_stride, bpp, bpp,
+                                   src_x, src_y, 0, 0,
+                                   s->regs.dst_width, s->regs.dst_height);
+            if (!fallback) {
+                fallback = !pixman_blt(tmp, (uint32_t *)dst_bits,
+                                       tmp_stride, dst_stride, bpp, bpp,
+                                       0, 0, dst_x, dst_y,
+                                       s->regs.dst_width, s->regs.dst_height);
+            }
            g_free(tmp);
+        } else {
+            fallback = true;
+        }
+        if (fallback) {
+            unsigned int y, i, j, bypp = bpp / 8;
+            unsigned int src_pitch = src_stride * sizeof(uint32_t);
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp;
+                j = src_x * bypp;
+                if (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) {
+                    i += (dst_y + y) * dst_pitch;
+                    j += (src_y + y) * src_pitch;
+                } else {
+                    i += (dst_y + s->regs.dst_height - 1 - y) * dst_pitch;
+                    j += (src_y + s->regs.dst_height - 1 - y) * src_pitch;
+                }
+                memmove(&dst_bits[i], &src_bits[j], s->regs.dst_width * bypp);
+            }
        }
        if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
            dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
@ -180,14 +204,21 @@ void ati_2d_blt(ATIVGAState *s)

        dst_stride /= sizeof(uint32_t);
        DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n",
-                dst_bits, dst_stride, bpp,
-                dst_x, dst_y,
-                s->regs.dst_width, s->regs.dst_height,
-                filler);
-        pixman_fill((uint32_t *)dst_bits, dst_stride, bpp,
-                    dst_x, dst_y,
-                    s->regs.dst_width, s->regs.dst_height,
-                    filler);
+                dst_bits, dst_stride, bpp, dst_x, dst_y,
+                s->regs.dst_width, s->regs.dst_height, filler);
+        if (!(s->use_pixman & BIT(0)) ||
+            !pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, dst_x, dst_y,
+                    s->regs.dst_width, s->regs.dst_height, filler)) {
+            /* fallback when pixman failed or we don't want to call it */
+            unsigned int x, y, i, bypp = bpp / 8;
+            unsigned int dst_pitch = dst_stride * sizeof(uint32_t);
+            for (y = 0; y < s->regs.dst_height; y++) {
+                i = dst_x * bypp + (dst_y + y) * dst_pitch;
+                for (x = 0; x < s->regs.dst_width; x++, i += bypp) {
+                    stn_he_p(&dst_bits[i], bypp, filler);
+                }
+            }
+        }
        if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr &&
            dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr +
            s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) {
--- a/hw/display/ati_int.h
+++ b/hw/display/ati_int.h
@ -89,6 +89,7 @@ struct ATIVGAState {
    char *model;
    uint16_t dev_id;
    uint8_t mode;
+    uint8_t use_pixman;
    bool cursor_guest_mode;
    uint16_t cursor_size;
    uint32_t cursor_offset;
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@ -1591,7 +1591,10 @@ static void qxl_set_mode(PCIQXLDevice *d, unsigned int modenr, int loadvm)
    }

    d->guest_slots[0].slot = slot;
-    assert(qxl_add_memslot(d, 0, devmem, QXL_SYNC) == 0);
+    if (qxl_add_memslot(d, 0, devmem, QXL_SYNC) != 0) {
+        qxl_set_guest_bug(d, "device isn't initialized yet");
+        return;
+    }

    d->guest_primary.surface = surface;
    qxl_create_guest_primary(d, 0, QXL_SYNC);
--- a/hw/display/ramfb.c
+++ b/hw/display/ramfb.c
@ -97,6 +97,7 @@ static void ramfb_fw_cfg_write(void *dev, off_t offset, size_t len)

    s->width = width;
    s->height = height;
+    qemu_free_displaysurface(s->ds);
    s->ds = surface;
 }

--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@ -26,6 +26,7 @@
 #include "hw/virtio/virtio-gpu-pixman.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/qdev-properties.h"
+#include "migration/blocker.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
@ -44,6 +45,8 @@ static void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
                                       struct virtio_gpu_simple_resource *res);
 static void virtio_gpu_reset_bh(void *opaque);

+static Error *blob_mig_blocker;
+
 void virtio_gpu_update_cursor_data(VirtIOGPU *g,
                                   struct virtio_gpu_scanout *s,
                                   uint32_t resource_id)
@ -1283,7 +1286,9 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
            g_free(res);
            return -EINVAL;
        }
-
+#ifdef WIN32
+        pixman_image_set_destroy_function(res->image, win32_pixman_image_destroy, res->handle);
+#endif

        res->addrs = g_new(uint64_t, res->iov_cnt);
        res->iov = g_new(struct iovec, res->iov_cnt);
@ -1374,6 +1379,14 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
            error_setg(errp, "blobs and virgl are not compatible (yet)");
            return;
        }
+
+        if (!blob_mig_blocker) {
+            error_setg(&blob_mig_blocker,
+                       "virtio-gpu blob VMs are currently not migratable.");
+        }
+        if (migrate_add_blocker(blob_mig_blocker, errp)) {
+            return;
+        }
    }

    if (!virtio_gpu_base_device_realize(qdev,
@ -1400,6 +1413,9 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev)
 {
    VirtIOGPU *g = VIRTIO_GPU(qdev);

+    if (virtio_gpu_blob_enabled(g->parent_obj.conf)) {
+        migrate_del_blocker(blob_mig_blocker);
+    }
    g_clear_pointer(&g->ctrl_bh, qemu_bh_delete);
    g_clear_pointer(&g->cursor_bh, qemu_bh_delete);
    g_clear_pointer(&g->reset_bh, qemu_bh_delete);
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@ -2271,7 +2271,7 @@ static void vmbus_dev_realize(DeviceState *dev, Error **errp)
    VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
    BusChild *child;
    Error *err = NULL;
-    char idstr[UUID_FMT_LEN + 1];
+    char idstr[UUID_STR_LEN];

    assert(!qemu_uuid_is_null(&vdev->instanceid));

@ -2467,7 +2467,7 @@ static char *vmbus_get_dev_path(DeviceState *dev)
 static char *vmbus_get_fw_dev_path(DeviceState *dev)
 {
    VMBusDevice *vdev = VMBUS_DEVICE(dev);
-    char uuid[UUID_FMT_LEN + 1];
+    char uuid[UUID_STR_LEN];

    qemu_uuid_unparse(&vdev->instanceid, uuid);
    return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
--- a/hw/i2c/aspeed_i2c.c
+++ b/hw/i2c/aspeed_i2c.c
@ -226,7 +226,7 @@ static int aspeed_i2c_dma_read(AspeedI2CBus *bus, uint8_t *data)
    return 0;
 }

-static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
+static int aspeed_i2c_bus_send(AspeedI2CBus *bus)
 {
    AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller);
    int ret = -1;
@ -236,10 +236,10 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
    uint32_t reg_byte_buf = aspeed_i2c_bus_byte_buf_offset(bus);
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
    int pool_tx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl,
-                                                TX_COUNT);
+                                                TX_COUNT) + 1;

    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
-        for (i = pool_start; i < pool_tx_count; i++) {
+        for (i = 0; i < pool_tx_count; i++) {
            uint8_t *pool_base = aic->bus_pool_base(bus);

            trace_aspeed_i2c_bus_send("BUF", i + 1, pool_tx_count,
@ -273,7 +273,7 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start)
        }
        SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, TX_DMA_EN, 0);
    } else {
-        trace_aspeed_i2c_bus_send("BYTE", pool_start, 1,
+        trace_aspeed_i2c_bus_send("BYTE", 0, 1,
                                  bus->regs[reg_byte_buf]);
        ret = i2c_send(bus->bus, bus->regs[reg_byte_buf]);
    }
@ -293,7 +293,7 @@ static void aspeed_i2c_bus_recv(AspeedI2CBus *bus)
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
    uint32_t reg_dma_addr = aspeed_i2c_bus_dma_addr_offset(bus);
    int pool_rx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl,
-                                                RX_COUNT);
+                                                RX_SIZE) + 1;

    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) {
        uint8_t *pool_base = aic->bus_pool_base(bus);
@ -418,7 +418,7 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus)
    uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus);
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);
    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) {
-        count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT);
+        count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT) + 1;
    } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_DMA_EN)) {
        count = bus->regs[reg_dma_len];
    } else { /* BYTE mode */
@ -446,10 +446,8 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus)
 */
 static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)
 {
-    uint8_t pool_start = 0;
    uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus);
    uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus);
-    uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus);
    uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus);

    if (!aspeed_i2c_check_sram(bus)) {
@ -483,27 +481,11 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)

        SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_START_CMD, 0);

-        /*
-         * The START command is also a TX command, as the slave
-         * address is sent on the bus. Drop the TX flag if nothing
-         * else needs to be sent in this sequence.
-         */
-        if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
-            if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT)
-                == 1) {
-                SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
-            } else {
-                /*
-                 * Increase the start index in the TX pool buffer to
-                 * skip the address byte.
-                 */
-                pool_start++;
-            }
-        } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) {
+        if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) {
            if (bus->regs[reg_dma_len] == 0) {
                SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
            }
-        } else {
+        } else if (!SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) {
            SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0);
        }

@ -520,7 +502,7 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value)

    if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_TX_CMD)) {
        aspeed_i2c_set_state(bus, I2CD_MTXD);
-        if (aspeed_i2c_bus_send(bus, pool_start)) {
+        if (aspeed_i2c_bus_send(bus)) {
            SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, TX_NAK, 1);
            i2c_end_transfer(bus->bus);
        } else {
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@ -1246,13 +1246,8 @@ static int amdvi_int_remap_msi(AMDVIState *iommu,
        return -AMDVI_IR_ERR;
    }

-    if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
-        trace_amdvi_err("MSI address high 32 bits non-zero when "
-                        "Interrupt Remapping enabled.");
-        return -AMDVI_IR_ERR;
-    }
-
-    if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
+    if (origin->address < AMDVI_INT_ADDR_FIRST ||
+        origin->address + sizeof(origin->data) > AMDVI_INT_ADDR_LAST + 1) {
        trace_amdvi_err("MSI is not from IOAPIC.");
        return -AMDVI_IR_ERR;
    }
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@ -210,8 +210,6 @@
 #define AMDVI_INT_ADDR_FIRST    0xfee00000
 #define AMDVI_INT_ADDR_LAST     0xfeefffff
 #define AMDVI_INT_ADDR_SIZE     (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST + 1)
-#define AMDVI_MSI_ADDR_HI_MASK  (0xffffffff00000000ULL)
-#define AMDVI_MSI_ADDR_LO_MASK  (0x00000000ffffffffULL)

 /* SB IOAPIC is always on this device in AMD systems */
 #define AMDVI_IOAPIC_SB_DEVID   PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@ -490,6 +490,12 @@ int xen_evtchn_set_callback_param(uint64_t param)
        break;
    }

+    /* If the guest has set a per-vCPU callback vector, prefer that. */
+    if (gsi && kvm_xen_has_vcpu_callback_vector()) {
+        in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
+        gsi = 0;
+    }
+
    if (!ret) {
        /* If vector delivery was turned *off* then tell the kernel */
        if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
@ -1129,6 +1135,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset)
        return -ESRCH;
    }

+    QEMU_IOTHREAD_LOCK_GUARD();
    return xen_evtchn_soft_reset();
 }

--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@ -541,7 +541,5 @@ int xen_gnttab_reset(void)
    s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
    s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);

-    memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
-
    return 0;
 }
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@ -1357,10 +1357,12 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token)
    } else {
        deliver_watch(s, path, token);
        /*
-         * If the message was queued because there was already ring activity,
-         * no need to wake the guest. But if not, we need to send the evtchn.
+         * Attempt to queue the message into the actual ring, and send
+         * the event channel notification if any bytes are copied.
         */
-        xen_be_evtchn_notify(s->eh, s->be_port);
+        if (s->rsp_pending && put_rsp(s) > 0) {
+            xen_be_evtchn_notify(s->eh, s->be_port);
+        }
    }
 }

--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@ -41,9 +41,10 @@
 #include "trace.h"

 static void check_cmd(AHCIState *s, int port);
-static int handle_cmd(AHCIState *s, int port, uint8_t slot);
+static void handle_cmd(AHCIState *s, int port, uint8_t slot);
 static void ahci_reset_port(AHCIState *s, int port);
-static bool ahci_write_fis_d2h(AHCIDevice *ad);
+static bool ahci_write_fis_d2h(AHCIDevice *ad, bool d2h_fis_i);
+static void ahci_clear_cmd_issue(AHCIDevice *ad, uint8_t slot);
 static void ahci_init_d2h(AHCIDevice *ad);
 static int ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit);
 static bool ahci_map_clb_address(AHCIDevice *ad);
@ -328,6 +329,11 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
        ahci_check_irq(s);
        break;
    case AHCI_PORT_REG_CMD:
+        if ((pr->cmd & PORT_CMD_START) && !(val & PORT_CMD_START)) {
+            pr->scr_act = 0;
+            pr->cmd_issue = 0;
+        }
+
        /* Block any Read-only fields from being set;
         * including LIST_ON and FIS_ON.
         * The spec requires to set ICC bits to zero after the ICC change
@ -591,9 +597,8 @@ static void check_cmd(AHCIState *s, int port)

    if ((pr->cmd & PORT_CMD_START) && pr->cmd_issue) {
        for (slot = 0; (slot < 32) && pr->cmd_issue; slot++) {
-            if ((pr->cmd_issue & (1U << slot)) &&
-                !handle_cmd(s, port, slot)) {
-                pr->cmd_issue &= ~(1U << slot);
+            if (pr->cmd_issue & (1U << slot)) {
+                handle_cmd(s, port, slot);
            }
        }
    }
@ -618,9 +623,13 @@ static void ahci_init_d2h(AHCIDevice *ad)
        return;
    }

-    if (ahci_write_fis_d2h(ad)) {
+    /*
+     * For simplicity, do not call ahci_clear_cmd_issue() for this
+     * ahci_write_fis_d2h(). (The reset value for PxCI is 0.)
+     */
+    if (ahci_write_fis_d2h(ad, true)) {
        ad->init_d2h_sent = true;
-        /* We're emulating receiving the first Reg H2D Fis from the device;
+        /* We're emulating receiving the first Reg D2H FIS from the device;
         * Update the SIG register, but otherwise proceed as normal. */
        pr->sig = ((uint32_t)ide_state->hcyl << 24) |
            (ide_state->lcyl << 16) |
@ -658,6 +667,7 @@ static void ahci_reset_port(AHCIState *s, int port)
    pr->scr_act = 0;
    pr->tfdata = 0x7F;
    pr->sig = 0xFFFFFFFF;
+    pr->cmd_issue = 0;
    d->busy_slot = -1;
    d->init_d2h_sent = false;

@ -801,8 +811,14 @@ static void ahci_write_fis_sdb(AHCIState *s, NCQTransferState *ncq_tfs)
    pr->scr_act &= ~ad->finished;
    ad->finished = 0;

-    /* Trigger IRQ if interrupt bit is set (which currently, it always is) */
-    if (sdb_fis->flags & 0x40) {
+    /*
+     * TFES IRQ is always raised if ERR_STAT is set, regardless of I bit.
+     * If ERR_STAT is not set, trigger SDBS IRQ if interrupt bit is set
+     * (which currently, it always is).
+     */
+    if (sdb_fis->status & ERR_STAT) {
+        ahci_trigger_irq(s, ad, AHCI_PORT_IRQ_BIT_TFES);
+    } else if (sdb_fis->flags & 0x40) {
        ahci_trigger_irq(s, ad, AHCI_PORT_IRQ_BIT_SDBS);
    }
 }
@ -850,7 +866,7 @@ static void ahci_write_fis_pio(AHCIDevice *ad, uint16_t len, bool pio_fis_i)
    }
 }

-static bool ahci_write_fis_d2h(AHCIDevice *ad)
+static bool ahci_write_fis_d2h(AHCIDevice *ad, bool d2h_fis_i)
 {
    AHCIPortRegs *pr = &ad->port_regs;
    uint8_t *d2h_fis;
@ -864,7 +880,7 @@ static bool ahci_write_fis_d2h(AHCIDevice *ad)
    d2h_fis = &ad->res_fis[RES_FIS_RFIS];

    d2h_fis[0] = SATA_FIS_TYPE_REGISTER_D2H;
-    d2h_fis[1] = (1 << 6); /* interrupt bit */
+    d2h_fis[1] = d2h_fis_i ? (1 << 6) : 0; /* interrupt bit */
    d2h_fis[2] = s->status;
    d2h_fis[3] = s->error;

@ -890,7 +906,10 @@ static bool ahci_write_fis_d2h(AHCIDevice *ad)
        ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_TFES);
    }

-    ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_DHRS);
+    if (d2h_fis_i) {
+        ahci_trigger_irq(ad->hba, ad, AHCI_PORT_IRQ_BIT_DHRS);
+    }
+
    return true;
 }

@ -998,7 +1017,6 @@ static void ncq_err(NCQTransferState *ncq_tfs)

    ide_state->error = ABRT_ERR;
    ide_state->status = READY_STAT | ERR_STAT;
-    ncq_tfs->drive->port_regs.scr_err |= (1 << ncq_tfs->tag);
    qemu_sglist_destroy(&ncq_tfs->sglist);
    ncq_tfs->used = 0;
 }
@ -1008,7 +1026,7 @@ static void ncq_finish(NCQTransferState *ncq_tfs)
    /* If we didn't error out, set our finished bit. Errored commands
     * do not get a bit set for the SDB FIS ACT register, nor do they
     * clear the outstanding bit in scr_act (PxSACT). */
-    if (!(ncq_tfs->drive->port_regs.scr_err & (1 << ncq_tfs->tag))) {
+    if (ncq_tfs->used) {
        ncq_tfs->drive->finished |= (1 << ncq_tfs->tag);
    }

@ -1120,6 +1138,24 @@ static void process_ncq_command(AHCIState *s, int port, const uint8_t *cmd_fis,
        return;
    }

+    /*
+     * A NCQ command clears the bit in PxCI after the command has been QUEUED
+     * successfully (ERROR not set, BUSY and DRQ cleared).
+     *
+     * For NCQ commands, PxCI will always be cleared here.
+     *
+     * (Once the NCQ command is COMPLETED, the device will send a SDB FIS with
+     * the interrupt bit set, which will clear PxSACT and raise an interrupt.)
+     */
+    ahci_clear_cmd_issue(ad, slot);
+
+    /*
+     * In reality, for NCQ commands, PxCI is cleared after receiving a D2H FIS
+     * without the interrupt bit set, but since ahci_write_fis_d2h() can raise
+     * an IRQ on error, we need to call them in reverse order.
+     */
+    ahci_write_fis_d2h(ad, false);
+
    ncq_tfs->used = 1;
    ncq_tfs->drive = ad;
    ncq_tfs->slot = slot;
@ -1192,6 +1228,7 @@ static void handle_reg_h2d_fis(AHCIState *s, int port,
 {
    IDEState *ide_state = &s->dev[port].port.ifs[0];
    AHCICmdHdr *cmd = get_cmd_header(s, port, slot);
+    AHCIDevice *ad = &s->dev[port];
    uint16_t opts = le16_to_cpu(cmd->opts);

    if (cmd_fis[1] & 0x0F) {
@ -1211,10 +1248,30 @@ static void handle_reg_h2d_fis(AHCIState *s, int port,
        case STATE_RUN:
            if (cmd_fis[15] & ATA_SRST) {
                s->dev[port].port_state = STATE_RESET;
+                /*
+                 * When setting SRST in the first H2D FIS in the reset sequence,
+                 * the device does not send a D2H FIS. Host software thus has to
+                 * set the "Clear Busy upon R_OK" bit such that PxCI (and BUSY)
+                 * gets cleared. See AHCI 1.3.1, section 10.4.1 Software Reset.
+                 */
+                if (opts & AHCI_CMD_CLR_BUSY) {
+                    ahci_clear_cmd_issue(ad, slot);
+                }
            }
            break;
        case STATE_RESET:
            if (!(cmd_fis[15] & ATA_SRST)) {
+                /*
+                 * When clearing SRST in the second H2D FIS in the reset
+                 * sequence, the device will execute diagnostics. When this is
+                 * done, the device will send a D2H FIS with the good status.
+                 * See SATA 3.5a Gold, section 11.4 Software reset protocol.
+                 *
+                 * This D2H FIS is the first D2H FIS received from the device,
+                 * and is received regardless if the reset was performed by a
+                 * COMRESET or by setting and clearing the SRST bit. Therefore,
+                 * the logic for this is found in ahci_init_d2h() and not here.
+                 */
                ahci_reset_port(s, port);
            }
            break;
@ -1268,11 +1325,19 @@ static void handle_reg_h2d_fis(AHCIState *s, int port,
    /* Reset transferred byte counter */
    cmd->status = 0;

+    /*
+     * A non-NCQ command clears the bit in PxCI after the command has COMPLETED
+     * successfully (ERROR not set, BUSY and DRQ cleared).
+     *
+     * For non-NCQ commands, PxCI will always be cleared by ahci_cmd_done().
+     */
+    ad->busy_slot = slot;
+
    /* We're ready to process the command in FIS byte 2. */
    ide_bus_exec_cmd(&s->dev[port].port, cmd_fis[2]);
 }

-static int handle_cmd(AHCIState *s, int port, uint8_t slot)
+static void handle_cmd(AHCIState *s, int port, uint8_t slot)
 {
    IDEState *ide_state;
    uint64_t tbl_addr;
@ -1283,12 +1348,12 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
    if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
        /* Engine currently busy, try again later */
        trace_handle_cmd_busy(s, port);
-        return -1;
+        return;
    }

    if (!s->dev[port].lst) {
        trace_handle_cmd_nolist(s, port);
-        return -1;
+        return;
    }
    cmd = get_cmd_header(s, port, slot);
    /* remember current slot handle for later */
@ -1298,7 +1363,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
    ide_state = &s->dev[port].port.ifs[0];
    if (!ide_state->blk) {
        trace_handle_cmd_badport(s, port);
-        return -1;
+        return;
    }

    tbl_addr = le64_to_cpu(cmd->tbl_addr);
@ -1307,7 +1372,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
                             DMA_DIRECTION_TO_DEVICE, MEMTXATTRS_UNSPECIFIED);
    if (!cmd_fis) {
        trace_handle_cmd_badfis(s, port);
-        return -1;
+        return;
    } else if (cmd_len != 0x80) {
        ahci_trigger_irq(s, &s->dev[port], AHCI_PORT_IRQ_BIT_HBFS);
        trace_handle_cmd_badmap(s, port, cmd_len);
@ -1331,15 +1396,6 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot)
 out:
    dma_memory_unmap(s->as, cmd_fis, cmd_len, DMA_DIRECTION_TO_DEVICE,
                     cmd_len);
-
-    if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
-        /* async command, complete later */
-        s->dev[port].busy_slot = slot;
-        return -1;
-    }
-
-    /* done handling the command */
-    return 0;
 }

 /* Transfer PIO data between RAM and device */
@ -1493,22 +1549,39 @@ static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write)
    return 1;
 }

+static void ahci_clear_cmd_issue(AHCIDevice *ad, uint8_t slot)
+{
+    IDEState *ide_state = &ad->port.ifs[0];
+
+    if (!(ide_state->status & ERR_STAT) &&
+        !(ide_state->status & (BUSY_STAT | DRQ_STAT))) {
+        ad->port_regs.cmd_issue &= ~(1 << slot);
+    }
+}
+
+/* Non-NCQ command is done - This function is never called for NCQ commands. */
 static void ahci_cmd_done(const IDEDMA *dma)
 {
    AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
+    IDEState *ide_state = &ad->port.ifs[0];

    trace_ahci_cmd_done(ad->hba, ad->port_no);

    /* no longer busy */
    if (ad->busy_slot != -1) {
-        ad->port_regs.cmd_issue &= ~(1 << ad->busy_slot);
+        ahci_clear_cmd_issue(ad, ad->busy_slot);
        ad->busy_slot = -1;
    }

-    /* update d2h status */
-    ahci_write_fis_d2h(ad);
+    /*
+     * In reality, for non-NCQ commands, PxCI is cleared after receiving a D2H
+     * FIS with the interrupt bit set, but since ahci_write_fis_d2h() will raise
+     * an IRQ, we need to call them in reverse order.
+     */
+    ahci_write_fis_d2h(ad, true);

-    if (ad->port_regs.cmd_issue && !ad->check_bh) {
+    if (!(ide_state->status & ERR_STAT) &&
+        ad->port_regs.cmd_issue && !ad->check_bh) {
        ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad,
                                           &ad->mem_reentrancy_guard);
        qemu_bh_schedule(ad->check_bh);
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@ -533,9 +533,9 @@ BlockAIOCB *ide_issue_trim(

 void ide_abort_command(IDEState *s)
 {
-    ide_transfer_stop(s);
    s->status = READY_STAT | ERR_STAT;
    s->error = ABRT_ERR;
+    ide_transfer_stop(s);
 }

 static void ide_set_retry(IDEState *s)
@ -2515,19 +2515,19 @@ static void ide_dummy_transfer_stop(IDEState *s)

 void ide_bus_reset(IDEBus *bus)
 {
-    bus->unit = 0;
-    bus->cmd = 0;
-    ide_reset(&bus->ifs[0]);
-    ide_reset(&bus->ifs[1]);
-    ide_clear_hob(bus);
-
-    /* pending async DMA */
+    /* pending async DMA - needs the IDEState before it is reset */
    if (bus->dma->aiocb) {
        trace_ide_bus_reset_aio();
        blk_aio_cancel(bus->dma->aiocb);
        bus->dma->aiocb = NULL;
    }

+    bus->unit = 0;
+    bus->cmd = 0;
+    ide_reset(&bus->ifs[0]);
+    ide_reset(&bus->ifs[1]);
+    ide_clear_hob(bus);
+
    /* reset dma provider too */
    if (bus->dma->ops->reset) {
        bus->dma->ops->reset(bus->dma);
--- a/hw/input/lasips2.c
+++ b/hw/input/lasips2.c
@ -351,6 +351,11 @@ static void lasips2_port_class_init(ObjectClass *klass, void *data)
 {
    DeviceClass *dc = DEVICE_CLASS(klass);

+    /*
+     * The PS/2 mouse port is integreal part of LASI and can not be
+     * created by users without LASI.
+     */
+    dc->user_creatable = false;
    dc->realize = lasips2_port_realize;
 }

@ -397,6 +402,11 @@ static void lasips2_kbd_port_class_init(ObjectClass *klass, void *data)
    DeviceClass *dc = DEVICE_CLASS(klass);
    LASIPS2PortDeviceClass *lpdc = LASIPS2_PORT_CLASS(klass);

+    /*
+     * The PS/2 keyboard port is integreal part of LASI and can not be
+     * created by users without LASI.
+     */
+    dc->user_creatable = false;
    device_class_set_parent_realize(dc, lasips2_kbd_port_realize,
                                    &lpdc->parent_realize);
 }
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@ -1434,16 +1434,25 @@ static void icv_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri,
    idx = icv_find_active(cs, irq);

    if (idx < 0) {
-        /* No valid list register corresponding to EOI ID */
-        icv_increment_eoicount(cs);
+        /*
+         * No valid list register corresponding to EOI ID; if this is a vLPI
+         * not in the list regs then do nothing; otherwise increment EOI count
+         */
+        if (irq < GICV3_LPI_INTID_START) {
+            icv_increment_eoicount(cs);
+        }
    } else {
        uint64_t lr = cs->ich_lr_el2[idx];
        int thisgrp = (lr & ICH_LR_EL2_GROUP) ? GICV3_G1NS : GICV3_G0;
        int lr_gprio = ich_lr_prio(lr) & icv_gprio_mask(cs, grp);

        if (thisgrp == grp && lr_gprio == dropprio) {
-            if (!icv_eoi_split(env, cs)) {
-                /* Priority drop and deactivate not split: deactivate irq now */
+            if (!icv_eoi_split(env, cs) || irq >= GICV3_LPI_INTID_START) {
+                /*
+                 * Priority drop and deactivate not split: deactivate irq now.
+                 * LPIs always get their active state cleared immediately
+                 * because no separate deactivate is expected.
+                 */
                icv_deactivate_irq(cs, idx);
            }
        }
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@ -64,13 +64,13 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,
    uint64_t next;
    uint64_t diff;

-    uint64_t rtc_r = cpu_riscv_read_rtc(mtimer);
+    uint64_t rtc = cpu_riscv_read_rtc(mtimer);

    /* Compute the relative hartid w.r.t the socket */
    hartid = hartid - mtimer->hartid_base;

    mtimer->timecmp[hartid] = value;
-    if (mtimer->timecmp[hartid] <= rtc_r) {
+    if (mtimer->timecmp[hartid] <= rtc) {
        /*
         * If we're setting an MTIMECMP value in the "past",
         * immediately raise the timer interrupt
@ -81,7 +81,7 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer,

    /* otherwise, set up the future timer interrupt */
    qemu_irq_lower(mtimer->timer_irqs[hartid]);
-    diff = mtimer->timecmp[hartid] - rtc_r;
+    diff = mtimer->timecmp[hartid] - rtc;
    /* back to ns (note args switched in muldiv64) */
    uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);

@ -208,11 +208,12 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
        return;
    } else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) {
        uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq);
+        uint64_t rtc = cpu_riscv_read_rtc(mtimer);

        if (addr == mtimer->time_base) {
            if (size == 4) {
                /* time_lo for RV32/RV64 */
-                mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r;
+                mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r;
            } else {
                /* time for RV64 */
                mtimer->time_delta = value - rtc_r;
@ -220,7 +221,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
        } else {
            if (size == 4) {
                /* time_hi for RV32/RV64 */
-                mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r;
+                mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r;
            } else {
                qemu_log_mask(LOG_GUEST_ERROR,
                              "aclint-mtimer: invalid time_hi write: %08x",
--- a/hw/mips/Kconfig
+++ b/hw/mips/Kconfig
@ -45,6 +45,7 @@ config LOONGSON3V
    select PCI_EXPRESS_GENERIC_BRIDGE
    select MSI_NONBROKEN
    select FW_CFG_MIPS
+    select UNIMP

 config MIPS_CPS
    bool
--- a/hw/mips/loongson3_virt.c
+++ b/hw/mips/loongson3_virt.c
@ -29,7 +29,6 @@
 #include "qemu/datadir.h"
 #include "qapi/error.h"
 #include "elf.h"
-#include "kvm_mips.h"
 #include "hw/char/serial.h"
 #include "hw/intc/loongson_liointc.h"
 #include "hw/mips/mips.h"
@ -612,7 +611,6 @@ static void loongson3v_machine_class_init(ObjectClass *oc, void *data)
    mc->max_cpus = LOONGSON_MAX_VCPUS;
    mc->default_ram_id = "loongson3.highram";
    mc->default_ram_size = 1600 * MiB;
-    mc->kvm_type = mips_kvm_type;
    mc->minimum_page_bits = 14;
    mc->default_nic = "virtio-net-pci";
 }
--- a/hw/misc/led.c
+++ b/hw/misc/led.c
@ -63,7 +63,7 @@ static void led_set_state_gpio_handler(void *opaque, int line, int new_state)
    LEDState *s = LED(opaque);

    assert(line == 0);
-    led_set_state(s, !!new_state != s->gpio_active_high);
+    led_set_state(s, !!new_state == s->gpio_active_high);
 }

 static void led_reset(DeviceState *dev)
--- a/hw/misc/mps2-scc.c
+++ b/hw/misc/mps2-scc.c
@ -329,6 +329,13 @@ static void mps2_scc_realize(DeviceState *dev, Error **errp)
    s->oscclk = g_new0(uint32_t, s->num_oscclk);
 }

+static void mps2_scc_finalize(Object *obj)
+{
+    MPS2SCC *s = MPS2_SCC(obj);
+
+    g_free(s->oscclk_reset);
+}
+
 static const VMStateDescription mps2_scc_vmstate = {
    .name = "mps2-scc",
    .version_id = 3,
@ -385,6 +392,7 @@ static const TypeInfo mps2_scc_info = {
    .parent = TYPE_SYS_BUS_DEVICE,
    .instance_size = sizeof(MPS2SCC),
    .instance_init = mps2_scc_init,
+    .instance_finalize = mps2_scc_finalize,
    .class_init = mps2_scc_class_init,
 };

--- a/hw/net/allwinner-sun8i-emac.c
+++ b/hw/net/allwinner-sun8i-emac.c
@ -824,7 +824,8 @@ static void allwinner_sun8i_emac_realize(DeviceState *dev, Error **errp)

    qemu_macaddr_default_if_unset(&s->conf.macaddr);
    s->nic = qemu_new_nic(&net_allwinner_sun8i_emac_info, &s->conf,
-                           object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }

--- a/hw/net/allwinner_emac.c
+++ b/hw/net/allwinner_emac.c
@ -453,7 +453,8 @@ static void aw_emac_realize(DeviceState *dev, Error **errp)

    qemu_macaddr_default_if_unset(&s->conf.macaddr);
    s->nic = qemu_new_nic(&net_aw_emac_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);

    fifo8_create(&s->rx_fifo, RX_FIFO_SIZE);
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@ -1633,7 +1633,8 @@ static void gem_realize(DeviceState *dev, Error **errp)
    qemu_macaddr_default_if_unset(&s->conf.macaddr);

    s->nic = qemu_new_nic(&net_gem_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);

    if (s->jumbo_max_len > MAX_FRAME_SIZE) {
        error_setg(errp, "jumbo-max-len is greater than %d",
--- a/hw/net/can/can_sja1000.c
+++ b/hw/net/can/can_sja1000.c
@ -108,7 +108,7 @@ void can_sja_single_filter(struct qemu_can_filter *filter,
        }

        filter->can_mask = (uint32_t)amr[0] << 3;
-        filter->can_mask |= (uint32_t)amr[1] << 5;
+        filter->can_mask |= (uint32_t)amr[1] >> 5;
        filter->can_mask = ~filter->can_mask & QEMU_CAN_SFF_MASK;
        if (!(amr[1] & 0x10)) {
            filter->can_mask |= QEMU_CAN_RTR_FLAG;
--- a/hw/net/dp8393x.c
+++ b/hw/net/dp8393x.c
@ -913,7 +913,8 @@ static void dp8393x_realize(DeviceState *dev, Error **errp)
                          "dp8393x-regs", SONIC_REG_COUNT << s->it_shift);

    s->nic = qemu_new_nic(&net_dp83932_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);

    s->watchdog = timer_new_ns(QEMU_CLOCK_VIRTUAL, dp8393x_watchdog, s);
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@ -1687,7 +1687,8 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
                               macaddr);

    d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
-                          object_get_typename(OBJECT(d)), dev->id, d);
+                          object_get_typename(OBJECT(d)), dev->id,
+                          &dev->mem_reentrancy_guard, d);

    qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);

--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@ -320,7 +320,7 @@ e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr)
    int i;

    s->nic = qemu_new_nic(&net_e1000e_info, &s->conf,
-        object_get_typename(OBJECT(s)), dev->id, s);
+        object_get_typename(OBJECT(s)), dev->id, &dev->mem_reentrancy_guard, s);

    s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0;

--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@ -1874,7 +1874,9 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp)
    nic_reset(s);

    s->nic = qemu_new_nic(&net_eepro100_info, &s->conf,
-                          object_get_typename(OBJECT(pci_dev)), pci_dev->qdev.id, s);
+                          object_get_typename(OBJECT(pci_dev)),
+                          pci_dev->qdev.id,
+                          &pci_dev->qdev.mem_reentrancy_guard, s);

    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
    TRACE(OTHER, logout("%s\n", qemu_get_queue(s->nic)->info_str));
--- a/hw/net/etraxfs_eth.c
+++ b/hw/net/etraxfs_eth.c
@ -618,7 +618,8 @@ static void etraxfs_eth_realize(DeviceState *dev, Error **errp)

    qemu_macaddr_default_if_unset(&s->conf.macaddr);
    s->nic = qemu_new_nic(&net_etraxfs_info, &s->conf,
-                          object_get_typename(OBJECT(s)), dev->id, s);
+                          object_get_typename(OBJECT(s)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);

    s->phy.read = tdk_read;
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c
@ -391,7 +391,8 @@ static void etsec_realize(DeviceState *dev, Error **errp)
    eTSEC        *etsec = ETSEC_COMMON(dev);

    etsec->nic = qemu_new_nic(&net_etsec_info, &etsec->conf,
-                              object_get_typename(OBJECT(dev)), dev->id, etsec);
+                              object_get_typename(OBJECT(dev)), dev->id,
+                              &dev->mem_reentrancy_guard, etsec);
    qemu_format_nic_info_str(qemu_get_queue(etsec->nic), etsec->conf.macaddr.a);

    etsec->ptimer = ptimer_init(etsec_timer_hit, etsec, PTIMER_POLICY_LEGACY);
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@ -1110,7 +1110,8 @@ static void ftgmac100_realize(DeviceState *dev, Error **errp)
    qemu_macaddr_default_if_unset(&s->conf.macaddr);

    s->nic = qemu_new_nic(&net_ftgmac100_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }

--- a/hw/net/i82596.c
+++ b/hw/net/i82596.c
@ -725,7 +725,7 @@ void i82596_common_init(DeviceState *dev, I82596State *s, NetClientInfo *info)
        qemu_macaddr_default_if_unset(&s->conf.macaddr);
    }
    s->nic = qemu_new_nic(info, &s->conf, object_get_typename(OBJECT(dev)),
-                dev->id, s);
+                dev->id, &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);

    if (USE_TIMER) {
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@ -315,7 +315,7 @@ igb_init_net_peer(IGBState *s, PCIDevice *pci_dev, uint8_t *macaddr)
    int i;

    s->nic = qemu_new_nic(&net_igb_info, &s->conf,
-        object_get_typename(OBJECT(s)), dev->id, s);
+        object_get_typename(OBJECT(s)), dev->id, &dev->mem_reentrancy_guard, s);

    s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0;

--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@ -1334,7 +1334,7 @@ static void imx_eth_realize(DeviceState *dev, Error **errp)

    s->nic = qemu_new_nic(&imx_eth_net_info, &s->conf,
                          object_get_typename(OBJECT(dev)),
-                          dev->id, s);
+                          dev->id, &dev->mem_reentrancy_guard, s);

    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c
@ -1361,7 +1361,8 @@ static void lan9118_realize(DeviceState *dev, Error **errp)
    qemu_macaddr_default_if_unset(&s->conf.macaddr);

    s->nic = qemu_new_nic(&net_lan9118_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
    s->eeprom[0] = 0xa5;
    for (i = 0; i < 6; i++) {
--- a/hw/net/mcf_fec.c
+++ b/hw/net/mcf_fec.c
@ -643,7 +643,8 @@ static void mcf_fec_realize(DeviceState *dev, Error **errp)
    mcf_fec_state *s = MCF_FEC_NET(dev);

    s->nic = qemu_new_nic(&net_mcf_fec_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }

--- a/hw/net/mipsnet.c
+++ b/hw/net/mipsnet.c
@ -255,7 +255,8 @@ static void mipsnet_realize(DeviceState *dev, Error **errp)
    sysbus_init_irq(sbd, &s->irq);

    s->nic = qemu_new_nic(&net_mipsnet_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }

--- a/hw/net/msf2-emac.c
+++ b/hw/net/msf2-emac.c
@ -530,7 +530,8 @@ static void msf2_emac_realize(DeviceState *dev, Error **errp)

    qemu_macaddr_default_if_unset(&s->conf.macaddr);
    s->nic = qemu_new_nic(&net_msf2_emac_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }

--- a/hw/net/mv88w8618_eth.c
+++ b/hw/net/mv88w8618_eth.c
@ -350,7 +350,8 @@ static void mv88w8618_eth_realize(DeviceState *dev, Error **errp)

    address_space_init(&s->dma_as, s->dma_mr, "emac-dma");
    s->nic = qemu_new_nic(&net_mv88w8618_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
 }

 static const VMStateDescription mv88w8618_eth_vmsd = {
--- a/hw/net/ne2000-isa.c
+++ b/hw/net/ne2000-isa.c
@ -74,7 +74,8 @@ static void isa_ne2000_realizefn(DeviceState *dev, Error **errp)
    ne2000_reset(s);

    s->nic = qemu_new_nic(&net_ne2000_isa_info, &s->c,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a);
 }

--- a/hw/net/ne2000-pci.c
+++ b/hw/net/ne2000-pci.c
@ -71,7 +71,8 @@ static void pci_ne2000_realize(PCIDevice *pci_dev, Error **errp)

    s->nic = qemu_new_nic(&net_ne2000_info, &s->c,
                          object_get_typename(OBJECT(pci_dev)),
-                          pci_dev->qdev.id, s);
+                          pci_dev->qdev.id,
+                          &pci_dev->qdev.mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a);
 }

--- a/hw/net/npcm7xx_emc.c
+++ b/hw/net/npcm7xx_emc.c
@ -821,7 +821,8 @@ static void npcm7xx_emc_realize(DeviceState *dev, Error **errp)

    qemu_macaddr_default_if_unset(&emc->conf.macaddr);
    emc->nic = qemu_new_nic(&net_npcm7xx_emc_info, &emc->conf,
-                            object_get_typename(OBJECT(dev)), dev->id, emc);
+                            object_get_typename(OBJECT(dev)), dev->id,
+                            &dev->mem_reentrancy_guard, emc);
    qemu_format_nic_info_str(qemu_get_queue(emc->nic), emc->conf.macaddr.a);
 }

--- a/hw/net/opencores_eth.c
+++ b/hw/net/opencores_eth.c
@ -732,7 +732,8 @@ static void sysbus_open_eth_realize(DeviceState *dev, Error **errp)
    sysbus_init_irq(sbd, &s->irq);

    s->nic = qemu_new_nic(&net_open_eth_info, &s->conf,
-                          object_get_typename(OBJECT(s)), dev->id, s);
+                          object_get_typename(OBJECT(s)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
 }

 static void qdev_open_eth_reset(DeviceState *dev)
--- a/hw/net/pcnet.c
+++ b/hw/net/pcnet.c
@ -1709,7 +1709,8 @@ void pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info)
    s->poll_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pcnet_poll_timer, s);

    qemu_macaddr_default_if_unset(&s->conf.macaddr);
-    s->nic = qemu_new_nic(info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s);
+    s->nic = qemu_new_nic(info, &s->conf, object_get_typename(OBJECT(dev)),
+                          dev->id, &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);

    /* Initialize the PROM */
--- a/hw/net/rocker/rocker_fp.c
+++ b/hw/net/rocker/rocker_fp.c
@ -241,8 +241,8 @@ FpPort *fp_port_alloc(Rocker *r, char *sw_name,
    port->conf.bootindex = -1;
    port->conf.peers = *peers;

-    port->nic = qemu_new_nic(&fp_port_info, &port->conf,
-                             sw_name, NULL, port);
+    port->nic = qemu_new_nic(&fp_port_info, &port->conf, sw_name, NULL,
+                             &DEVICE(r)->mem_reentrancy_guard, port);
    qemu_format_nic_info_str(qemu_get_queue(port->nic),
                             port->conf.macaddr.a);

--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@ -3388,7 +3388,8 @@ static void pci_rtl8139_realize(PCIDevice *dev, Error **errp)
    s->eeprom.contents[9] = s->conf.macaddr.a[4] | s->conf.macaddr.a[5] << 8;

    s->nic = qemu_new_nic(&net_rtl8139_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), d->id, s);
+                          object_get_typename(OBJECT(dev)), d->id,
+                          &d->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);

    s->cplus_txbuffer = NULL;
--- a/hw/net/smc91c111.c
+++ b/hw/net/smc91c111.c
@ -783,7 +783,8 @@ static void smc91c111_realize(DeviceState *dev, Error **errp)
    sysbus_init_irq(sbd, &s->irq);
    qemu_macaddr_default_if_unset(&s->conf.macaddr);
    s->nic = qemu_new_nic(&net_smc91c111_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
    /* ??? Save/restore.  */
 }
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@ -325,7 +325,8 @@ static void spapr_vlan_realize(SpaprVioDevice *sdev, Error **errp)
    memcpy(&dev->perm_mac.a, &dev->nicconf.macaddr.a, sizeof(dev->perm_mac.a));

    dev->nic = qemu_new_nic(&net_spapr_vlan_info, &dev->nicconf,
-                            object_get_typename(OBJECT(sdev)), sdev->qdev.id, dev);
+                            object_get_typename(OBJECT(sdev)), sdev->qdev.id,
+                            &sdev->qdev.mem_reentrancy_guard, dev);
    qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a);

    dev->rxp_timer = timer_new_us(QEMU_CLOCK_VIRTUAL, spapr_vlan_flush_rx_queue,
--- a/hw/net/stellaris_enet.c
+++ b/hw/net/stellaris_enet.c
@ -492,7 +492,8 @@ static void stellaris_enet_realize(DeviceState *dev, Error **errp)
    qemu_macaddr_default_if_unset(&s->conf.macaddr);

    s->nic = qemu_new_nic(&net_stellaris_enet_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
+                          object_get_typename(OBJECT(dev)), dev->id,
+                          &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }

--- a/hw/net/sungem.c
+++ b/hw/net/sungem.c
@ -1399,7 +1399,7 @@ static void sungem_realize(PCIDevice *pci_dev, Error **errp)
    qemu_macaddr_default_if_unset(&s->conf.macaddr);
    s->nic = qemu_new_nic(&net_sungem_info, &s->conf,
                          object_get_typename(OBJECT(dev)),
-                          dev->id, s);
+                          dev->id, &dev->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic),
                             s->conf.macaddr.a);
 }
--- a/hw/net/sunhme.c
+++ b/hw/net/sunhme.c
@ -881,7 +881,8 @@ static void sunhme_realize(PCIDevice *pci_dev, Error **errp)

    qemu_macaddr_default_if_unset(&s->conf.macaddr);
    s->nic = qemu_new_nic(&net_sunhme_info, &s->conf,
-                          object_get_typename(OBJECT(d)), d->id, s);
+                          object_get_typename(OBJECT(d)), d->id,
+                          &d->mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
 }

--- a/hw/net/tulip.c
+++ b/hw/net/tulip.c
@ -983,7 +983,8 @@ static void pci_tulip_realize(PCIDevice *pci_dev, Error **errp)

    s->nic = qemu_new_nic(&net_tulip_info, &s->c,
                          object_get_typename(OBJECT(pci_dev)),
-                          pci_dev->qdev.id, s);
+                          pci_dev->qdev.id,
+                          &pci_dev->qdev.mem_reentrancy_guard, s);
    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->c.macaddr.a);
 }

--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@ -667,6 +667,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,

    n->mergeable_rx_bufs = mergeable_rx_bufs;

+    /*
+     * Note: when extending the vnet header, please make sure to
+     * change the vnet header copying logic in virtio_net_flush_tx()
+     * as well.
+     */
    if (version_1) {
        n->guest_hdr_len = hash_report ?
            sizeof(struct virtio_net_hdr_v1_hash) :
@ -2674,7 +2679,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
        ssize_t ret;
        unsigned int out_num;
        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
-        struct virtio_net_hdr_mrg_rxbuf mhdr;
+        struct virtio_net_hdr_v1_hash vhdr;

        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
        if (!elem) {
@ -2691,7 +2696,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
        }

        if (n->has_vnet_hdr) {
-            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
+            if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) <
                n->guest_hdr_len) {
                virtio_error(vdev, "virtio-net header incorrect");
                virtqueue_detach_element(q->tx_vq, elem, 0);
@ -2699,8 +2704,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
                return -EINVAL;
            }
            if (n->needs_vnet_hdr_swap) {
-                virtio_net_hdr_swap(vdev, (void *) &mhdr);
-                sg2[0].iov_base = &mhdr;
+                virtio_net_hdr_swap(vdev, (void *) &vhdr);
+                sg2[0].iov_base = &vhdr;
                sg2[0].iov_len = n->guest_hdr_len;
                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
                                   out_sg, out_num,
@ -3695,10 +3700,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
         * Happen when virtio_net_set_netclient_name has been called.
         */
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
-                              n->netclient_type, n->netclient_name, n);
+                              n->netclient_type, n->netclient_name,
+                              &dev->mem_reentrancy_guard, n);
    } else {
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
-                              object_get_typename(OBJECT(dev)), dev->id, n);
+                              object_get_typename(OBJECT(dev)), dev->id,
+                              &dev->mem_reentrancy_guard, n);
    }

    for (i = 0; i < n->max_queue_pairs; i++) {
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .1.0
 .1.5