From 319de6fe6ea4b68288267bacc4574a605341f8fe Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 29 Mar 2016 17:27:10 +1100 Subject: [PATCH 01/13] target-ppc: Correct KVM synchronization for ppc_hash64_set_external_hpt() ppc_hash64_set_external_hpt() was added in e5c0d3c "target-ppc: Add helpers for updating a CPU's SDR1 and external HPT". This helper contains a cpu_synchronize_state() since it may need to push state back to KVM afterwards. This turns out to break things when it is used in the reset path, which is the only current user. It appears that kvm_vcpu_dirty is not being set early in the reset path, so the cpu_synchronize_state() is clobbering state set up by the early part of the cpu reset path with stale state from KVM. This may require some changes to the generic cpu reset path to fix properly, but as a short term fix we can just remove the cpu_synchronize_state() from ppc_hash64_set_external_hpt(), and require any non-reset path callers to do that manually. Reported-by: Laurent Vivier Signed-off-by: David Gibson --- target-ppc/mmu-hash64.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index 04e6932fa0..17e24800cb 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -284,8 +284,6 @@ void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, CPUPPCState *env = &cpu->env; Error *local_err = NULL; - cpu_synchronize_state(CPU(cpu)); - if (hpt) { env->external_htab = hpt; } else { From 24f9cd951d1a8eda1ce1e2cf97a26ca169040331 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 23 Feb 2016 17:18:34 -0800 Subject: [PATCH 02/13] target-ppc: Use movcond in isel Signed-off-by: Richard Henderson Signed-off-by: David Gibson --- target-ppc/translate.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 745f4de98f..3ea6625d26 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -756,27 +756,20 @@ static void gen_cmpli(DisasContext *ctx) /* isel (PowerPC 2.03 specification) */ static void gen_isel(DisasContext *ctx) { - TCGLabel *l1, *l2; uint32_t bi = rC(ctx->opcode); - uint32_t mask; - TCGv_i32 t0; + uint32_t mask = 0x08 >> (bi & 0x03); + TCGv t0 = tcg_temp_new(); + TCGv zr; - l1 = gen_new_label(); - l2 = gen_new_label(); + tcg_gen_extu_i32_tl(t0, cpu_crf[bi >> 2]); + tcg_gen_andi_tl(t0, t0, mask); - mask = 0x08 >> (bi & 0x03); - t0 = tcg_temp_new_i32(); - tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask); - tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1); - if (rA(ctx->opcode) == 0) - tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], 0); - else - tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - tcg_gen_br(l2); - gen_set_label(l1); - tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); - gen_set_label(l2); - tcg_temp_free_i32(t0); + zr = tcg_const_tl(0); + tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t0, zr, + rA(ctx->opcode) ? cpu_gpr[rA(ctx->opcode)] : zr, + cpu_gpr[rB(ctx->opcode)]); + tcg_temp_free(zr); + tcg_temp_free(t0); } /* cmpb: PowerPC 2.05 specification */ From 63ae0915f8ec9746d17a25c27f64cf65550627ab Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 23 Feb 2016 17:18:35 -0800 Subject: [PATCH 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate A 32-bit rotate insn is more common on hosts than a deposit insn, and if the host has neither the result is truely horrific. At the same time, tidy up the temporaries within these functions, drop the over-use of "likely", drop some checks for identity that will also be checked by tcg-op.c functions, and special case mask without rotate within rlwinm. Signed-off-by: Richard Henderson Signed-off-by: David Gibson --- target-ppc/translate.c | 170 +++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 101 deletions(-) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 3ea6625d26..b392ecc944 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -1610,141 +1610,109 @@ static void gen_cntlzd(DisasContext *ctx) /* rlwimi & rlwimi. */ static void gen_rlwimi(DisasContext *ctx) { - uint32_t mb, me, sh; + TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; + TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; + uint32_t sh = SH(ctx->opcode); + uint32_t mb = MB(ctx->opcode); + uint32_t me = ME(ctx->opcode); - mb = MB(ctx->opcode); - me = ME(ctx->opcode); - sh = SH(ctx->opcode); - if (likely(sh == (31-me) && mb <= me)) { - tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rS(ctx->opcode)], sh, me - mb + 1); + if (sh == (31-me) && mb <= me) { + tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1); } else { target_ulong mask; + TCGv_i32 t0; TCGv t1; - TCGv t0 = tcg_temp_new(); -#if defined(TARGET_PPC64) - tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)], - cpu_gpr[rS(ctx->opcode)], 32, 32); - tcg_gen_rotli_i64(t0, t0, sh); -#else - tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh); -#endif + #if defined(TARGET_PPC64) mb += 32; me += 32; #endif mask = MASK(mb, me); + + t0 = tcg_temp_new_i32(); t1 = tcg_temp_new(); - tcg_gen_andi_tl(t0, t0, mask); - tcg_gen_andi_tl(t1, cpu_gpr[rA(ctx->opcode)], ~mask); - tcg_gen_or_tl(cpu_gpr[rA(ctx->opcode)], t0, t1); - tcg_temp_free(t0); + tcg_gen_trunc_tl_i32(t0, t_rs); + tcg_gen_rotli_i32(t0, t0, sh); + tcg_gen_extu_i32_tl(t1, t0); + tcg_temp_free_i32(t0); + + tcg_gen_andi_tl(t1, t1, mask); + tcg_gen_andi_tl(t_ra, t_ra, ~mask); + tcg_gen_or_tl(t_ra, t_ra, t1); tcg_temp_free(t1); } - if (unlikely(Rc(ctx->opcode) != 0)) - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_Rc0(ctx, t_ra); + } } /* rlwinm & rlwinm. */ static void gen_rlwinm(DisasContext *ctx) { - uint32_t mb, me, sh; + TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; + TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; + uint32_t sh = SH(ctx->opcode); + uint32_t mb = MB(ctx->opcode); + uint32_t me = ME(ctx->opcode); - sh = SH(ctx->opcode); - mb = MB(ctx->opcode); - me = ME(ctx->opcode); - - if (likely(mb == 0 && me == (31 - sh))) { - if (likely(sh == 0)) { - tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); - } else { - TCGv t0 = tcg_temp_new(); - tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]); - tcg_gen_shli_tl(t0, t0, sh); - tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0); - tcg_temp_free(t0); - } - } else if (likely(sh != 0 && me == 31 && sh == (32 - mb))) { - TCGv t0 = tcg_temp_new(); - tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]); - tcg_gen_shri_tl(t0, t0, mb); - tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0); - tcg_temp_free(t0); - } else if (likely(mb == 0 && me == 31)) { - TCGv_i32 t0 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]); - tcg_gen_rotli_i32(t0, t0, sh); - tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0); - tcg_temp_free_i32(t0); + if (mb == 0 && me == (31 - sh)) { + tcg_gen_shli_tl(t_ra, t_rs, sh); + tcg_gen_ext32u_tl(t_ra, t_ra); + } else if (sh != 0 && me == 31 && sh == (32 - mb)) { + tcg_gen_ext32u_tl(t_ra, t_rs); + tcg_gen_shri_tl(t_ra, t_ra, mb); } else { - TCGv t0 = tcg_temp_new(); -#if defined(TARGET_PPC64) - tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)], - cpu_gpr[rS(ctx->opcode)], 32, 32); - tcg_gen_rotli_i64(t0, t0, sh); -#else - tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh); -#endif #if defined(TARGET_PPC64) mb += 32; me += 32; #endif - tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me)); - tcg_temp_free(t0); + if (sh == 0) { + tcg_gen_andi_tl(t_ra, t_rs, MASK(mb, me)); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t0, t_rs); + tcg_gen_rotli_i32(t0, t0, sh); + tcg_gen_andi_i32(t0, t0, MASK(mb, me)); + tcg_gen_extu_i32_tl(t_ra, t0); + tcg_temp_free_i32(t0); + } + } + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_Rc0(ctx, t_ra); } - if (unlikely(Rc(ctx->opcode) != 0)) - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); } /* rlwnm & rlwnm. */ static void gen_rlwnm(DisasContext *ctx) { - uint32_t mb, me; - mb = MB(ctx->opcode); - me = ME(ctx->opcode); + TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; + TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; + TCGv t_rb = cpu_gpr[rB(ctx->opcode)]; + uint32_t mb = MB(ctx->opcode); + uint32_t me = ME(ctx->opcode); + TCGv_i32 t0, t1; - if (likely(mb == 0 && me == 31)) { - TCGv_i32 t0, t1; - t0 = tcg_temp_new_i32(); - t1 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]); - tcg_gen_trunc_tl_i32(t1, cpu_gpr[rS(ctx->opcode)]); - tcg_gen_andi_i32(t0, t0, 0x1f); - tcg_gen_rotl_i32(t1, t1, t0); - tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t1); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); - } else { - TCGv t0; #if defined(TARGET_PPC64) - TCGv t1; + mb += 32; + me += 32; #endif - t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f); -#if defined(TARGET_PPC64) - t1 = tcg_temp_new_i64(); - tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)], - cpu_gpr[rS(ctx->opcode)], 32, 32); - tcg_gen_rotl_i64(t0, t1, t0); - tcg_temp_free_i64(t1); -#else - tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0); -#endif - if (unlikely(mb != 0 || me != 31)) { -#if defined(TARGET_PPC64) - mb += 32; - me += 32; -#endif - tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me)); - } else { - tcg_gen_andi_tl(t0, t0, MASK(32, 63)); - tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0); - } - tcg_temp_free(t0); + t0 = tcg_temp_new_i32(); + t1 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t0, t_rb); + tcg_gen_trunc_tl_i32(t1, t_rs); + tcg_gen_andi_i32(t0, t0, 0x1f); + tcg_gen_rotl_i32(t1, t1, t0); + tcg_temp_free_i32(t0); + + tcg_gen_andi_i32(t1, t1, MASK(mb, me)); + tcg_gen_extu_i32_tl(t_ra, t1); + tcg_temp_free_i32(t1); + + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_Rc0(ctx, t_ra); } - if (unlikely(Rc(ctx->opcode) != 0)) - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); } #if defined(TARGET_PPC64) From a7b2c8b90ad4b26b0152ff254f3f091ca198a972 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 23 Feb 2016 17:18:36 -0800 Subject: [PATCH 04/13] target-ppc: Cleanups to rldinm, rldnm, rldimi Mirror the cleanups just done to rlwinm, rlwnm and rlwimi. This adds use of deposit to rldimi. Signed-off-by: Richard Henderson Signed-off-by: David Gibson --- target-ppc/translate.c | 91 +++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index b392ecc944..f5ceae5900 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -1747,26 +1747,24 @@ static void glue(gen_, name##3)(DisasContext *ctx) \ gen_##name(ctx, 1, 1); \ } -static inline void gen_rldinm(DisasContext *ctx, uint32_t mb, uint32_t me, - uint32_t sh) +static void gen_rldinm(DisasContext *ctx, int mb, int me, int sh) { - if (likely(sh != 0 && mb == 0 && me == (63 - sh))) { - tcg_gen_shli_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], sh); - } else if (likely(sh != 0 && me == 63 && sh == (64 - mb))) { - tcg_gen_shri_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], mb); + TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; + TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; + + if (sh != 0 && mb == 0 && me == (63 - sh)) { + tcg_gen_shli_tl(t_ra, t_rs, sh); + } else if (sh != 0 && me == 63 && sh == (64 - mb)) { + tcg_gen_shri_tl(t_ra, t_rs, mb); } else { - TCGv t0 = tcg_temp_new(); - tcg_gen_rotli_tl(t0, cpu_gpr[rS(ctx->opcode)], sh); - if (likely(mb == 0 && me == 63)) { - tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0); - } else { - tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me)); - } - tcg_temp_free(t0); + tcg_gen_rotli_tl(t_ra, t_rs, sh); + tcg_gen_andi_tl(t_ra, t_ra, MASK(mb, me)); + } + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_Rc0(ctx, t_ra); } - if (unlikely(Rc(ctx->opcode) != 0)) - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); } + /* rldicl - rldicl. */ static inline void gen_rldicl(DisasContext *ctx, int mbn, int shn) { @@ -1777,6 +1775,7 @@ static inline void gen_rldicl(DisasContext *ctx, int mbn, int shn) gen_rldinm(ctx, mb, 63, sh); } GEN_PPC64_R4(rldicl, 0x1E, 0x00); + /* rldicr - rldicr. */ static inline void gen_rldicr(DisasContext *ctx, int men, int shn) { @@ -1787,6 +1786,7 @@ static inline void gen_rldicr(DisasContext *ctx, int men, int shn) gen_rldinm(ctx, 0, me, sh); } GEN_PPC64_R4(rldicr, 0x1E, 0x02); + /* rldic - rldic. */ static inline void gen_rldic(DisasContext *ctx, int mbn, int shn) { @@ -1798,21 +1798,22 @@ static inline void gen_rldic(DisasContext *ctx, int mbn, int shn) } GEN_PPC64_R4(rldic, 0x1E, 0x04); -static inline void gen_rldnm(DisasContext *ctx, uint32_t mb, uint32_t me) +static void gen_rldnm(DisasContext *ctx, int mb, int me) { + TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; + TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; + TCGv t_rb = cpu_gpr[rB(ctx->opcode)]; TCGv t0; t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3f); - tcg_gen_rotl_tl(t0, cpu_gpr[rS(ctx->opcode)], t0); - if (unlikely(mb != 0 || me != 63)) { - tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me)); - } else { - tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0); - } + tcg_gen_andi_tl(t0, t_rb, 0x3f); + tcg_gen_rotl_tl(t_ra, t_rs, t0); tcg_temp_free(t0); - if (unlikely(Rc(ctx->opcode) != 0)) - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); + + tcg_gen_andi_tl(t_ra, t_ra, MASK(mb, me)); + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_Rc0(ctx, t_ra); + } } /* rldcl - rldcl. */ @@ -1824,6 +1825,7 @@ static inline void gen_rldcl(DisasContext *ctx, int mbn) gen_rldnm(ctx, mb, 63); } GEN_PPC64_R2(rldcl, 0x1E, 0x08); + /* rldcr - rldcr. */ static inline void gen_rldcr(DisasContext *ctx, int men) { @@ -1833,32 +1835,31 @@ static inline void gen_rldcr(DisasContext *ctx, int men) gen_rldnm(ctx, 0, me); } GEN_PPC64_R2(rldcr, 0x1E, 0x09); + /* rldimi - rldimi. */ -static inline void gen_rldimi(DisasContext *ctx, int mbn, int shn) +static void gen_rldimi(DisasContext *ctx, int mbn, int shn) { - uint32_t sh, mb, me; + TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; + TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; + uint32_t sh = SH(ctx->opcode) | (shn << 5); + uint32_t mb = MB(ctx->opcode) | (mbn << 5); + uint32_t me = 63 - sh; - sh = SH(ctx->opcode) | (shn << 5); - mb = MB(ctx->opcode) | (mbn << 5); - me = 63 - sh; - if (unlikely(sh == 0 && mb == 0)) { - tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); + if (mb <= me) { + tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1); } else { - TCGv t0, t1; - target_ulong mask; + target_ulong mask = MASK(mb, me); + TCGv t1 = tcg_temp_new(); - t0 = tcg_temp_new(); - tcg_gen_rotli_tl(t0, cpu_gpr[rS(ctx->opcode)], sh); - t1 = tcg_temp_new(); - mask = MASK(mb, me); - tcg_gen_andi_tl(t0, t0, mask); - tcg_gen_andi_tl(t1, cpu_gpr[rA(ctx->opcode)], ~mask); - tcg_gen_or_tl(cpu_gpr[rA(ctx->opcode)], t0, t1); - tcg_temp_free(t0); + tcg_gen_rotli_tl(t1, t_rs, sh); + tcg_gen_andi_tl(t1, t1, mask); + tcg_gen_andi_tl(t_ra, t_ra, ~mask); + tcg_gen_or_tl(t_ra, t_ra, t1); tcg_temp_free(t1); } - if (unlikely(Rc(ctx->opcode) != 0)) - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); + if (unlikely(Rc(ctx->opcode) != 0)) { + gen_set_Rc0(ctx, t_ra); + } } GEN_PPC64_R4(rldimi, 0x1E, 0x06); #endif From 8836630f5d2df198dca2da9a1f1fb2d2a0c310a0 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 31 Mar 2016 13:47:05 +0200 Subject: [PATCH 05/13] hw/net/spapr_llan: Delay flushing of the RX queue while adding new RX buffers Currently, the spapr-vlan device is trying to flush the RX queue after each RX buffer that has been added by the guest via the H_ADD_LOGICAL_LAN_BUFFER hypercall. In case the receive buffer pool was empty before, we only pass single packets to the guest this way. This can cause very bad performance if a sender is trying to stream fragmented UDP packets to the guest. For example when using the UDP_STREAM test from netperf with UDP packets that are much bigger than the MTU size, almost all UDP packets are dropped in the guest since the chances are quite high that at least one of the fragments got lost on the way. When flushing the receive queue, it's much better if we'd have a bunch of receive buffers available already, so that fragmented packets can be passed to the guest in one go. To do this, the spapr_vlan_receive() function should return 0 instead of -1 if there are no more receive buffers available, so that receive_disabled = 1 gets temporarily set for the receive queue, and we have to delay the queue flushing at the end of h_add_logical_lan_buffer() a little bit by using a timer, so that the guest gets a chance to add multiple RX buffers before we flush the queue again. This improves the UDP_STREAM test with the spapr-vlan device a lot: Running netserver -p 44444 -L -f -D -4 in the guest, and netperf -p 44444 -L -H -t UDP_STREAM -l 60 -- -m 16384 in the host, I get the following values _without_ this patch: Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 229376 16384 60.00 1738970 0 3798.83 229376 60.00 23 0.05 That "0.05" means that almost all UDP packets got lost/discarded at the receiving side. With this patch applied, the value look much better: Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 229376 16384 60.00 1789104 0 3908.35 229376 60.00 22818 49.85 Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- hw/net/spapr_llan.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index a8266f8ec7..36a9214128 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -110,6 +110,7 @@ typedef struct VIOsPAPRVLANDevice { hwaddr buf_list; uint32_t add_buf_ptr, use_buf_ptr, rx_bufs; hwaddr rxq_ptr; + QEMUTimer *rxp_timer; uint32_t compat_flags; /* Compatability flags for migration */ RxBufPool *rx_pool[RX_MAX_POOLS]; /* Receive buffer descriptor pools */ } VIOsPAPRVLANDevice; @@ -206,7 +207,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, } if (!dev->rx_bufs) { - return -1; + return 0; } if (dev->compat_flags & SPAPRVLAN_FLAG_RX_BUF_POOLS) { @@ -215,7 +216,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, bd = spapr_vlan_get_rx_bd_from_page(dev, size); } if (!bd) { - return -1; + return 0; } dev->rx_bufs--; @@ -266,6 +267,13 @@ static NetClientInfo net_spapr_vlan_info = { .receive = spapr_vlan_receive, }; +static void spapr_vlan_flush_rx_queue(void *opaque) +{ + VIOsPAPRVLANDevice *dev = opaque; + + qemu_flush_queued_packets(qemu_get_queue(dev->nic)); +} + static void spapr_vlan_reset_rx_pool(RxBufPool *rxp) { /* @@ -302,6 +310,9 @@ static void spapr_vlan_realize(VIOsPAPRDevice *sdev, Error **errp) dev->nic = qemu_new_nic(&net_spapr_vlan_info, &dev->nicconf, object_get_typename(OBJECT(sdev)), sdev->qdev.id, dev); qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a); + + dev->rxp_timer = timer_new_us(QEMU_CLOCK_VIRTUAL, spapr_vlan_flush_rx_queue, + dev); } static void spapr_vlan_instance_init(Object *obj) @@ -332,6 +343,11 @@ static void spapr_vlan_instance_finalize(Object *obj) dev->rx_pool[i] = NULL; } } + + if (dev->rxp_timer) { + timer_del(dev->rxp_timer); + timer_free(dev->rxp_timer); + } } void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd) @@ -629,7 +645,13 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, dev->rx_bufs++; - qemu_flush_queued_packets(qemu_get_queue(dev->nic)); + /* + * Give guest some more time to add additional RX buffers before we + * flush the receive queue, so that e.g. fragmented IP packets can + * be passed to the guest in one go later (instead of passing single + * fragments if there is only one receive buffer available). + */ + timer_mod(dev->rxp_timer, qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + 500); return H_SUCCESS; } From 5c29dd8c28c3c7718cc37b38c5171b3caf629928 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 4 Apr 2016 12:13:10 +0200 Subject: [PATCH 06/13] hw/net/spapr_llan: Provide counter with dropped rx frames to the guest The last 8 bytes of the receive buffer list page (that has been supplied by the guest with the H_REGISTER_LOGICAL_LAN call) contain a counter for frames that have been dropped because there was no suitable receive buffer available. This patch introduces code to use this field to provide the information about dropped rx packets to the guest. There it can be queried with "ethtool -S eth0 | grep rx_no_buffer". Signed-off-by: Thomas Huth Signed-off-by: David Gibson --- hw/net/spapr_llan.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 36a9214128..8b2eebd4e3 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -122,6 +122,21 @@ static int spapr_vlan_can_receive(NetClientState *nc) return (dev->isopen && dev->rx_bufs > 0); } +/** + * The last 8 bytes of the receive buffer list page (that has been + * supplied by the guest with the H_REGISTER_LOGICAL_LAN call) contain + * a counter for frames that have been dropped because there was no + * suitable receive buffer available. This function is used to increase + * this counter by one. + */ +static void spapr_vlan_record_dropped_rx_frame(VIOsPAPRVLANDevice *dev) +{ + uint64_t cnt; + + cnt = vio_ldq(&dev->sdev, dev->buf_list + 4096 - 8); + vio_stq(&dev->sdev, dev->buf_list + 4096 - 8, cnt + 1); +} + /** * Get buffer descriptor from one of our receive buffer pools */ @@ -207,6 +222,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, } if (!dev->rx_bufs) { + spapr_vlan_record_dropped_rx_frame(dev); return 0; } @@ -216,6 +232,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, bd = spapr_vlan_get_rx_bd_from_page(dev, size); } if (!bd) { + spapr_vlan_record_dropped_rx_frame(dev); return 0; } From 8afc22a20fba6aa078e4e7a625d16d78da768323 Mon Sep 17 00:00:00 2001 From: Zhou Jie Date: Mon, 25 Apr 2016 11:36:06 -0400 Subject: [PATCH 07/13] Added negative check for get_image_size() This patch adds check for negative return value from get_image_size(), where it is missing. It avoids unnecessary two function calls. Signed-off-by: Zhou Jie Signed-off-by: David Gibson --- hw/ppc/spapr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index add68acfef..3b0845f158 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1842,6 +1842,10 @@ static void ppc_spapr_init(MachineState *machine) exit(1); } spapr->rtas_size = get_image_size(filename); + if (spapr->rtas_size < 0) { + error_report("Could not get size of LPAR rtas '%s'", filename); + exit(1); + } spapr->rtas_blob = g_malloc(spapr->rtas_size); if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) { error_report("Could not load LPAR rtas '%s'", filename); From 41264b385c2b324fea026204e5de9bef980733b1 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 26 Apr 2016 15:41:04 +0200 Subject: [PATCH 08/13] PPC/KVM: early validation of vcpu id The KVM API restricts vcpu ids to be < KVM_CAP_MAX_VCPUS. On PowerPC targets, depending on the number of threads per core in the host and in the guest, some topologies do generate higher vcpu ids actually. When this happens, QEMU bails out with the following error: kvm_init_vcpu failed: Invalid argument The KVM_CREATE_VCPU ioctl has several EINVAL return paths, so it is not possible to fully disambiguate. This patch adds a check in the code that computes vcpu ids, so that we can detect the error earlier, and print a friendlier message instead of calling KVM_CREATE_VCPU with an obviously bogus vcpu id. Signed-off-by: Greg Kurz Signed-off-by: David Gibson --- include/sysemu/kvm.h | 2 ++ kvm-all.c | 6 ++++++ target-ppc/translate_init.c | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index f9f00e2e56..f357ccde91 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -345,6 +345,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s); int kvm_arch_init_vcpu(CPUState *cpu); +bool kvm_vcpu_id_is_valid(int vcpu_id); + /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ unsigned long kvm_arch_vcpu_id(CPUState *cpu); diff --git a/kvm-all.c b/kvm-all.c index f9ae8f9bf8..e56f385278 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1459,6 +1459,12 @@ static int kvm_max_vcpus(KVMState *s) return (ret) ? ret : kvm_recommended_vcpus(s); } +bool kvm_vcpu_id_is_valid(int vcpu_id) +{ + KVMState *s = KVM_STATE(current_machine->accelerator); + return vcpu_id >= 0 && vcpu_id < kvm_max_vcpus(s); +} + static int kvm_init(MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 954195f5e4..a003c1029d 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -9231,6 +9231,14 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) #if !defined(CONFIG_USER_ONLY) cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt + (cs->cpu_index % smp_threads); + + if (kvm_enabled() && !kvm_vcpu_id_is_valid(cpu->cpu_dt_id)) { + error_setg(errp, "Can't create CPU with id %d in KVM", cpu->cpu_dt_id); + error_append_hint(errp, "Adjust the number of cpus to %d " + "or try to raise the number of threads per core\n", + cpu->cpu_dt_id * smp_threads / max_smt); + return; + } #endif if (tcg_enabled()) { From 5dd5238c0b5cc42d347128227f124fe31b05832d Mon Sep 17 00:00:00 2001 From: Jianjun Duan Date: Tue, 24 May 2016 10:55:04 -0700 Subject: [PATCH 09/13] spapr: ensure device trees are always associated with DRC There are possible racing situations involving hotplug events and guest migration. For cases where a hotplug event is migrated, or the guest is in the process of fetching device tree at the time of migration, we need to ensure the device tree is created and associated with the corresponding DRC for devices that were hotplugged on the source, but 'coldplugged' on the target. Signed-off-by: Jianjun Duan Signed-off-by: David Gibson --- hw/ppc/spapr.c | 16 ++++++---------- hw/ppc/spapr_pci.c | 12 +++++------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 3b0845f158..44e401ae99 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2136,15 +2136,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, int i, fdt_offset, fdt_size; void *fdt; - /* - * Check for DRC connectors and send hotplug notification to the - * guest only in case of hotplugged memory. This allows cold plugged - * memory to be specified at boot time. - */ - if (!dev->hotplugged) { - return; - } - for (i = 0; i < nr_lmbs; i++) { drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, addr/SPAPR_MEMORY_BLOCK_SIZE); @@ -2158,7 +2149,12 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size, drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp); addr += SPAPR_MEMORY_BLOCK_SIZE; } - spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); + /* send hotplug notification to the + * guest only in case of hotplugged memory + */ + if (dev->hotplugged) { + spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs); + } } static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index e55b505c96..91a356f851 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1093,13 +1093,11 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc, spapr_tce_set_need_vfio(tcet, true); } - if (dev->hotplugged) { - fdt = create_device_tree(&fdt_size); - fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); - if (!fdt_start_offset) { - error_setg(errp, "Failed to create pci child device tree node"); - goto out; - } + fdt = create_device_tree(&fdt_size); + fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); + if (!fdt_start_offset) { + error_setg(errp, "Failed to create pci child device tree node"); + goto out; } drck->attach(drc, DEVICE(pdev), From eded5bac3b2fa51f6165030b0e8ed718247af188 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 May 2016 16:52:18 +1000 Subject: [PATCH 10/13] spapr_pci: Use correct DMA LIOBN when composing the device tree The user could have picked LIOBN via the CLI but the device tree rendering code would still use the value derived from the PHB index (which is the default fallback if LIOBN is not set in the CLI). This replaces SPAPR_PCI_LIOBN() with the actual DMA LIOBN value. Signed-off-by: Alexey Kardashevskiy Signed-off-by: David Gibson --- hw/ppc/spapr_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 91a356f851..856aec7f51 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1814,7 +1814,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map, sizeof(interrupt_map))); - tcet = spapr_tce_find_by_liobn(SPAPR_PCI_LIOBN(phb->index, 0)); + tcet = spapr_tce_find_by_liobn(phb->dma_liobn); if (!tcet) { return -1; } From f94819d6012fbc2a6022eeacd65608e58dacbada Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 May 2016 16:52:21 +1000 Subject: [PATCH 11/13] spapr_iommu: Finish renaming vfio_accel to need_vfio 6a81dd17 "spapr_iommu: Rename vfio_accel parameter" renamed vfio_accel flag everywhere but one spot was missed. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson --- target-ppc/kvm_ppc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index fc7931227d..3b2090e42e 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -163,7 +163,7 @@ static inline bool kvmppc_spapr_use_multitce(void) static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *fd, - bool vfio_accel) + bool need_vfio) { return NULL; } From fec5d3a1cd8fea7d70778f8b408589d75c88c3e7 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 May 2016 16:52:19 +1000 Subject: [PATCH 12/13] spapr_iommu: Move table allocation to helpers At the moment presence of vfio-pci devices on a bus affect the way the guest view table is allocated. If there is no vfio-pci on a PHB and the host kernel supports KVM acceleration of H_PUT_TCE, a table is allocated in KVM. However, if there is vfio-pci and we do yet not KVM acceleration for these, the table has to be allocated by the userspace. At the moment the table is allocated once at boot time but next patches will reallocate it. This moves kvmppc_create_spapr_tce/g_malloc0 and their counterparts to helpers. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: David Gibson --- hw/ppc/spapr_iommu.c | 58 +++++++++++++++++++++++++++++--------------- trace-events | 2 +- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index b6fe1dd986..96bb0181a7 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -76,6 +76,37 @@ static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce) } } +static uint64_t *spapr_tce_alloc_table(uint32_t liobn, + uint32_t page_shift, + uint32_t nb_table, + int *fd, + bool need_vfio) +{ + uint64_t *table = NULL; + uint64_t window_size = (uint64_t)nb_table << page_shift; + + if (kvm_enabled() && !(window_size >> 32)) { + table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio); + } + + if (!table) { + *fd = -1; + table = g_malloc0(nb_table * sizeof(uint64_t)); + } + + trace_spapr_iommu_new_table(liobn, table, *fd); + + return table; +} + +static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table) +{ + if (!kvm_enabled() || + (kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) { + g_free(table); + } +} + /* Called from RCU critical section */ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, bool is_write) @@ -142,21 +173,13 @@ static MemoryRegionIOMMUOps spapr_iommu_ops = { static int spapr_tce_table_realize(DeviceState *dev) { sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev); - uint64_t window_size = (uint64_t)tcet->nb_table << tcet->page_shift; - if (kvm_enabled() && !(window_size >> 32)) { - tcet->table = kvmppc_create_spapr_tce(tcet->liobn, - window_size, - &tcet->fd, - tcet->need_vfio); - } - - if (!tcet->table) { - size_t table_size = tcet->nb_table * sizeof(uint64_t); - tcet->table = g_malloc0(table_size); - } - - trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd); + tcet->fd = -1; + tcet->table = spapr_tce_alloc_table(tcet->liobn, + tcet->page_shift, + tcet->nb_table, + &tcet->fd, + tcet->need_vfio); memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops, "iommu-spapr", @@ -242,11 +265,8 @@ static void spapr_tce_table_unrealize(DeviceState *dev, Error **errp) QLIST_REMOVE(tcet, list); - if (!kvm_enabled() || - (kvmppc_remove_spapr_tce(tcet->table, tcet->fd, - tcet->nb_table) != 0)) { - g_free(tcet->table); - } + spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table); + tcet->fd = -1; } MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet) diff --git a/trace-events b/trace-events index 1211c20ed1..b27d1dab65 100644 --- a/trace-events +++ b/trace-events @@ -1430,7 +1430,7 @@ spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) " spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64 spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64 spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x" -spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd) "liobn=%"PRIx64" tcet=%p table=%p fd=%d" +spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=%"PRIx64" table=%p fd=%d" # hw/ppc/ppc.c ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)" From b4daafbd13826dfa9d2596fb0f31f1453611189f Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 26 May 2016 16:14:57 +1000 Subject: [PATCH 13/13] MAINTAINERS: Add David Gibson as ppc maintainer I've been de facto co-maintainer of all ppc target related code for some time. Alex Graf isworking on other things and doesn't have a whole lot of time for qemu ppc maintainership. So, update the MAINTAINERS file to reflect this. Signed-off-by: David Gibson Reviewed-by: Alexander Graf Acked-by: Mark Cave-Ayland --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3700d51c0b..3c949d5a97 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -165,6 +165,7 @@ F: hw/openrisc/ F: tests/tcg/openrisc/ PowerPC +M: David Gibson M: Alexander Graf L: qemu-ppc@nongnu.org S: Maintained