migration/xbzrle: Shuffle function order
Place the CONFIG_AVX512BW_OPT block at the top, which will aid function selection in the next patch. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
51f4d916b5
commit
1b48d0abdf
@ -15,6 +15,128 @@
|
|||||||
#include "qemu/host-utils.h"
|
#include "qemu/host-utils.h"
|
||||||
#include "xbzrle.h"
|
#include "xbzrle.h"
|
||||||
|
|
||||||
|
#if defined(CONFIG_AVX512BW_OPT)
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
int __attribute__((target("avx512bw")))
|
||||||
|
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
||||||
|
uint8_t *dst, int dlen)
|
||||||
|
{
|
||||||
|
uint32_t zrun_len = 0, nzrun_len = 0;
|
||||||
|
int d = 0, i = 0, num = 0;
|
||||||
|
uint8_t *nzrun_start = NULL;
|
||||||
|
/* add 1 to include residual part in main loop */
|
||||||
|
uint32_t count512s = (slen >> 6) + 1;
|
||||||
|
/* countResidual is tail of data, i.e., countResidual = slen % 64 */
|
||||||
|
uint32_t count_residual = slen & 0b111111;
|
||||||
|
bool never_same = true;
|
||||||
|
uint64_t mask_residual = 1;
|
||||||
|
mask_residual <<= count_residual;
|
||||||
|
mask_residual -= 1;
|
||||||
|
__m512i r = _mm512_set1_epi32(0);
|
||||||
|
|
||||||
|
while (count512s) {
|
||||||
|
int bytes_to_check = 64;
|
||||||
|
uint64_t mask = 0xffffffffffffffff;
|
||||||
|
if (count512s == 1) {
|
||||||
|
bytes_to_check = count_residual;
|
||||||
|
mask = mask_residual;
|
||||||
|
}
|
||||||
|
__m512i old_data = _mm512_mask_loadu_epi8(r,
|
||||||
|
mask, old_buf + i);
|
||||||
|
__m512i new_data = _mm512_mask_loadu_epi8(r,
|
||||||
|
mask, new_buf + i);
|
||||||
|
uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
|
||||||
|
count512s--;
|
||||||
|
|
||||||
|
bool is_same = (comp & 0x1);
|
||||||
|
while (bytes_to_check) {
|
||||||
|
if (d + 2 > dlen) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (is_same) {
|
||||||
|
if (nzrun_len) {
|
||||||
|
d += uleb128_encode_small(dst + d, nzrun_len);
|
||||||
|
if (d + nzrun_len > dlen) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
nzrun_start = new_buf + i - nzrun_len;
|
||||||
|
memcpy(dst + d, nzrun_start, nzrun_len);
|
||||||
|
d += nzrun_len;
|
||||||
|
nzrun_len = 0;
|
||||||
|
}
|
||||||
|
/* 64 data at a time for speed */
|
||||||
|
if (count512s && (comp == 0xffffffffffffffff)) {
|
||||||
|
i += 64;
|
||||||
|
zrun_len += 64;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
never_same = false;
|
||||||
|
num = ctz64(~comp);
|
||||||
|
num = (num < bytes_to_check) ? num : bytes_to_check;
|
||||||
|
zrun_len += num;
|
||||||
|
bytes_to_check -= num;
|
||||||
|
comp >>= num;
|
||||||
|
i += num;
|
||||||
|
if (bytes_to_check) {
|
||||||
|
/* still has different data after same data */
|
||||||
|
d += uleb128_encode_small(dst + d, zrun_len);
|
||||||
|
zrun_len = 0;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (never_same || zrun_len) {
|
||||||
|
/*
|
||||||
|
* never_same only acts if
|
||||||
|
* data begins with diff in first count512s
|
||||||
|
*/
|
||||||
|
d += uleb128_encode_small(dst + d, zrun_len);
|
||||||
|
zrun_len = 0;
|
||||||
|
never_same = false;
|
||||||
|
}
|
||||||
|
/* has diff, 64 data at a time for speed */
|
||||||
|
if ((bytes_to_check == 64) && (comp == 0x0)) {
|
||||||
|
i += 64;
|
||||||
|
nzrun_len += 64;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
num = ctz64(comp);
|
||||||
|
num = (num < bytes_to_check) ? num : bytes_to_check;
|
||||||
|
nzrun_len += num;
|
||||||
|
bytes_to_check -= num;
|
||||||
|
comp >>= num;
|
||||||
|
i += num;
|
||||||
|
if (bytes_to_check) {
|
||||||
|
/* mask like 111000 */
|
||||||
|
d += uleb128_encode_small(dst + d, nzrun_len);
|
||||||
|
/* overflow */
|
||||||
|
if (d + nzrun_len > dlen) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
nzrun_start = new_buf + i - nzrun_len;
|
||||||
|
memcpy(dst + d, nzrun_start, nzrun_len);
|
||||||
|
d += nzrun_len;
|
||||||
|
nzrun_len = 0;
|
||||||
|
is_same = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nzrun_len != 0) {
|
||||||
|
d += uleb128_encode_small(dst + d, nzrun_len);
|
||||||
|
/* overflow */
|
||||||
|
if (d + nzrun_len > dlen) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
nzrun_start = new_buf + i - nzrun_len;
|
||||||
|
memcpy(dst + d, nzrun_start, nzrun_len);
|
||||||
|
d += nzrun_len;
|
||||||
|
}
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
page = zrun nzrun
|
page = zrun nzrun
|
||||||
| zrun nzrun page
|
| zrun nzrun page
|
||||||
@ -175,125 +297,3 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
|
|||||||
|
|
||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_AVX512BW_OPT)
|
|
||||||
#include <immintrin.h>
|
|
||||||
|
|
||||||
int __attribute__((target("avx512bw")))
|
|
||||||
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
|
|
||||||
uint8_t *dst, int dlen)
|
|
||||||
{
|
|
||||||
uint32_t zrun_len = 0, nzrun_len = 0;
|
|
||||||
int d = 0, i = 0, num = 0;
|
|
||||||
uint8_t *nzrun_start = NULL;
|
|
||||||
/* add 1 to include residual part in main loop */
|
|
||||||
uint32_t count512s = (slen >> 6) + 1;
|
|
||||||
/* countResidual is tail of data, i.e., countResidual = slen % 64 */
|
|
||||||
uint32_t count_residual = slen & 0b111111;
|
|
||||||
bool never_same = true;
|
|
||||||
uint64_t mask_residual = 1;
|
|
||||||
mask_residual <<= count_residual;
|
|
||||||
mask_residual -= 1;
|
|
||||||
__m512i r = _mm512_set1_epi32(0);
|
|
||||||
|
|
||||||
while (count512s) {
|
|
||||||
int bytes_to_check = 64;
|
|
||||||
uint64_t mask = 0xffffffffffffffff;
|
|
||||||
if (count512s == 1) {
|
|
||||||
bytes_to_check = count_residual;
|
|
||||||
mask = mask_residual;
|
|
||||||
}
|
|
||||||
__m512i old_data = _mm512_mask_loadu_epi8(r,
|
|
||||||
mask, old_buf + i);
|
|
||||||
__m512i new_data = _mm512_mask_loadu_epi8(r,
|
|
||||||
mask, new_buf + i);
|
|
||||||
uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
|
|
||||||
count512s--;
|
|
||||||
|
|
||||||
bool is_same = (comp & 0x1);
|
|
||||||
while (bytes_to_check) {
|
|
||||||
if (d + 2 > dlen) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (is_same) {
|
|
||||||
if (nzrun_len) {
|
|
||||||
d += uleb128_encode_small(dst + d, nzrun_len);
|
|
||||||
if (d + nzrun_len > dlen) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
nzrun_start = new_buf + i - nzrun_len;
|
|
||||||
memcpy(dst + d, nzrun_start, nzrun_len);
|
|
||||||
d += nzrun_len;
|
|
||||||
nzrun_len = 0;
|
|
||||||
}
|
|
||||||
/* 64 data at a time for speed */
|
|
||||||
if (count512s && (comp == 0xffffffffffffffff)) {
|
|
||||||
i += 64;
|
|
||||||
zrun_len += 64;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
never_same = false;
|
|
||||||
num = ctz64(~comp);
|
|
||||||
num = (num < bytes_to_check) ? num : bytes_to_check;
|
|
||||||
zrun_len += num;
|
|
||||||
bytes_to_check -= num;
|
|
||||||
comp >>= num;
|
|
||||||
i += num;
|
|
||||||
if (bytes_to_check) {
|
|
||||||
/* still has different data after same data */
|
|
||||||
d += uleb128_encode_small(dst + d, zrun_len);
|
|
||||||
zrun_len = 0;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (never_same || zrun_len) {
|
|
||||||
/*
|
|
||||||
* never_same only acts if
|
|
||||||
* data begins with diff in first count512s
|
|
||||||
*/
|
|
||||||
d += uleb128_encode_small(dst + d, zrun_len);
|
|
||||||
zrun_len = 0;
|
|
||||||
never_same = false;
|
|
||||||
}
|
|
||||||
/* has diff, 64 data at a time for speed */
|
|
||||||
if ((bytes_to_check == 64) && (comp == 0x0)) {
|
|
||||||
i += 64;
|
|
||||||
nzrun_len += 64;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
num = ctz64(comp);
|
|
||||||
num = (num < bytes_to_check) ? num : bytes_to_check;
|
|
||||||
nzrun_len += num;
|
|
||||||
bytes_to_check -= num;
|
|
||||||
comp >>= num;
|
|
||||||
i += num;
|
|
||||||
if (bytes_to_check) {
|
|
||||||
/* mask like 111000 */
|
|
||||||
d += uleb128_encode_small(dst + d, nzrun_len);
|
|
||||||
/* overflow */
|
|
||||||
if (d + nzrun_len > dlen) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
nzrun_start = new_buf + i - nzrun_len;
|
|
||||||
memcpy(dst + d, nzrun_start, nzrun_len);
|
|
||||||
d += nzrun_len;
|
|
||||||
nzrun_len = 0;
|
|
||||||
is_same = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nzrun_len != 0) {
|
|
||||||
d += uleb128_encode_small(dst + d, nzrun_len);
|
|
||||||
/* overflow */
|
|
||||||
if (d + nzrun_len > dlen) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
nzrun_start = new_buf + i - nzrun_len;
|
|
||||||
memcpy(dst + d, nzrun_start, nzrun_len);
|
|
||||||
d += nzrun_len;
|
|
||||||
}
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user