diff --git a/configure b/configure index 3c7470096f..eb49bb6680 100755 --- a/configure +++ b/configure @@ -2475,7 +2475,8 @@ fi # zstd check if test "$zstd" != "no" ; then - if $pkg_config --exist libzstd ; then + libzstd_minver="1.4.0" + if $pkg_config --atleast-version=$libzstd_minver libzstd ; then zstd_cflags="$($pkg_config --cflags libzstd)" zstd_libs="$($pkg_config --libs libzstd)" LIBS="$zstd_libs $LIBS" diff --git a/migration/colo.c b/migration/colo.c index 93c5a452fb..44942c4e23 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -26,6 +26,7 @@ #include "qemu/main-loop.h" #include "qemu/rcu.h" #include "migration/failover.h" +#include "migration/ram.h" #ifdef CONFIG_REPLICATION #include "replication.h" #endif @@ -845,6 +846,8 @@ void *colo_process_incoming_thread(void *opaque) */ qemu_file_set_blocking(mis->from_src_file, true); + colo_incoming_start_dirty_log(); + bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); diff --git a/migration/migration.c b/migration/migration.c index 0b2045ccbd..c1d88ace7f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -78,6 +78,7 @@ /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 /* Define default autoconverge cpu throttle migration parameters */ +#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 @@ -778,6 +779,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->compress_wait_thread = s->parameters.compress_wait_thread; params->has_decompress_threads = true; params->decompress_threads = s->parameters.decompress_threads; + params->has_throttle_trigger_threshold = true; + params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; params->has_cpu_throttle_initial = true; params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; params->has_cpu_throttle_increment = true; @@ -851,6 +854,7 @@ bool migration_is_setup_or_active(int state) case MIGRATION_STATUS_PRE_SWITCHOVER: case MIGRATION_STATUS_DEVICE: case MIGRATION_STATUS_WAIT_UNPLUG: + case MIGRATION_STATUS_COLO: return true; default: @@ -1169,6 +1173,15 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) return false; } + if (params->has_throttle_trigger_threshold && + (params->throttle_trigger_threshold < 1 || + params->throttle_trigger_threshold > 100)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "throttle_trigger_threshold", + "an integer in the range of 1 to 100"); + return false; + } + if (params->has_cpu_throttle_initial && (params->cpu_throttle_initial < 1 || params->cpu_throttle_initial > 99)) { @@ -1298,6 +1311,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, dest->decompress_threads = params->decompress_threads; } + if (params->has_throttle_trigger_threshold) { + dest->throttle_trigger_threshold = params->throttle_trigger_threshold; + } + if (params->has_cpu_throttle_initial) { dest->cpu_throttle_initial = params->cpu_throttle_initial; } @@ -1382,6 +1399,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) s->parameters.decompress_threads = params->decompress_threads; } + if (params->has_throttle_trigger_threshold) { + s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; + } + if (params->has_cpu_throttle_initial) { s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; } @@ -3558,6 +3579,9 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, parameters.decompress_threads, DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), + DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, + parameters.throttle_trigger_threshold, + DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, parameters.cpu_throttle_initial, DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), @@ -3667,6 +3691,7 @@ static void migration_instance_init(Object *obj) params->has_compress_level = true; params->has_compress_threads = true; params->has_decompress_threads = true; + params->has_throttle_trigger_threshold = true; params->has_cpu_throttle_initial = true; params->has_cpu_throttle_increment = true; params->has_max_bandwidth = true; diff --git a/migration/ram.c b/migration/ram.c index 0ef68798d2..c12cfdbe26 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -896,11 +896,38 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) } } +static void migration_trigger_throttle(RAMState *rs) +{ + MigrationState *s = migrate_get_current(); + uint64_t threshold = s->parameters.throttle_trigger_threshold; + + uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev; + uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; + uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; + + /* During block migration the auto-converge logic incorrectly detects + * that ram migration makes no progress. Avoid this by disabling the + * throttling logic during the bulk phase of block migration. */ + if (migrate_auto_converge() && !blk_mig_bulk_active()) { + /* The following detection logic can be refined later. For now: + Check to see if the ratio between dirtied bytes and the approx. + amount of bytes that just got transferred since the last time + we were in this routine reaches the threshold. If that happens + twice, start or increase throttling. */ + + if ((bytes_dirty_period > bytes_dirty_threshold) && + (++rs->dirty_rate_high_cnt >= 2)) { + trace_migration_throttle(); + rs->dirty_rate_high_cnt = 0; + mig_throttle_guest_down(); + } + } +} + static void migration_bitmap_sync(RAMState *rs) { RAMBlock *block; int64_t end_time; - uint64_t bytes_xfer_now; ram_counters.dirty_sync_count++; @@ -927,26 +954,7 @@ static void migration_bitmap_sync(RAMState *rs) /* more than 1 second = 1000 millisecons */ if (end_time > rs->time_last_bitmap_sync + 1000) { - bytes_xfer_now = ram_counters.transferred; - - /* During block migration the auto-converge logic incorrectly detects - * that ram migration makes no progress. Avoid this by disabling the - * throttling logic during the bulk phase of block migration. */ - if (migrate_auto_converge() && !blk_mig_bulk_active()) { - /* The following detection logic can be refined later. For now: - Check to see if the dirtied bytes is 50% more than the approx. - amount of bytes that just got transferred since the last time we - were in this routine. If that happens twice, start or increase - throttling */ - - if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE > - (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && - (++rs->dirty_rate_high_cnt >= 2)) { - trace_migration_throttle(); - rs->dirty_rate_high_cnt = 0; - mig_throttle_guest_down(); - } - } + migration_trigger_throttle(rs); migration_update_rates(rs, end_time); @@ -955,7 +963,7 @@ static void migration_bitmap_sync(RAMState *rs) /* reset period counters */ rs->time_last_bitmap_sync = end_time; rs->num_dirty_pages_period = 0; - rs->bytes_xfer_prev = bytes_xfer_now; + rs->bytes_xfer_prev = ram_counters.transferred; } if (migrate_use_events()) { qapi_event_send_migration_pass(ram_counters.dirty_sync_count); @@ -2734,7 +2742,7 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, } static inline void *colo_cache_from_block_offset(RAMBlock *block, - ram_addr_t offset) + ram_addr_t offset, bool record_bitmap) { if (!offset_in_ramblock(block, offset)) { return NULL; @@ -2750,7 +2758,8 @@ static inline void *colo_cache_from_block_offset(RAMBlock *block, * It help us to decide which pages in ram cache should be flushed * into VM's RAM later. */ - if (!test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) { + if (record_bitmap && + !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) { ram_state->migration_dirty_pages++; } return block->colo_cache + offset; @@ -2986,7 +2995,6 @@ int colo_init_ram_cache(void) } return -errno; } - memcpy(block->colo_cache, block->host, block->used_length); } } @@ -3000,19 +3008,36 @@ int colo_init_ram_cache(void) RAMBLOCK_FOREACH_NOT_IGNORED(block) { unsigned long pages = block->max_length >> TARGET_PAGE_BITS; - block->bmap = bitmap_new(pages); - bitmap_set(block->bmap, 0, pages); } } - ram_state = g_new0(RAMState, 1); - ram_state->migration_dirty_pages = 0; - qemu_mutex_init(&ram_state->bitmap_mutex); - memory_global_dirty_log_start(); + ram_state_init(&ram_state); return 0; } +/* TODO: duplicated with ram_init_bitmaps */ +void colo_incoming_start_dirty_log(void) +{ + RAMBlock *block = NULL; + /* For memory_global_dirty_log_start below. */ + qemu_mutex_lock_iothread(); + qemu_mutex_lock_ramlist(); + + memory_global_dirty_log_sync(); + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + ramblock_sync_dirty_bitmap(ram_state, block); + /* Discard this dirty bitmap record */ + bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS); + } + memory_global_dirty_log_start(); + } + ram_state->migration_dirty_pages = 0; + qemu_mutex_unlock_ramlist(); + qemu_mutex_unlock_iothread(); +} + /* It is need to hold the global lock to call this helper */ void colo_release_ram_cache(void) { @@ -3032,9 +3057,7 @@ void colo_release_ram_cache(void) } } } - qemu_mutex_destroy(&ram_state->bitmap_mutex); - g_free(ram_state); - ram_state = NULL; + ram_state_cleanup(&ram_state); } /** @@ -3348,7 +3371,7 @@ static int ram_load_precopy(QEMUFile *f) while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr, total_ram_bytes; - void *host = NULL; + void *host = NULL, *host_bak = NULL; uint8_t ch; /* @@ -3379,20 +3402,35 @@ static int ram_load_precopy(QEMUFile *f) RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { RAMBlock *block = ram_block_from_stream(f, flags); + host = host_from_ram_block_offset(block, addr); /* - * After going into COLO, we should load the Page into colo_cache. + * After going into COLO stage, we should not load the page + * into SVM's memory directly, we put them into colo_cache firstly. + * NOTE: We need to keep a copy of SVM's ram in colo_cache. + * Previously, we copied all these memory in preparing stage of COLO + * while we need to stop VM, which is a time-consuming process. + * Here we optimize it by a trick, back-up every page while in + * migration process while COLO is enabled, though it affects the + * speed of the migration, but it obviously reduce the downtime of + * back-up all SVM'S memory in COLO preparing stage. */ - if (migration_incoming_in_colo_state()) { - host = colo_cache_from_block_offset(block, addr); - } else { - host = host_from_ram_block_offset(block, addr); + if (migration_incoming_colo_enabled()) { + if (migration_incoming_in_colo_state()) { + /* In COLO stage, put all pages into cache temporarily */ + host = colo_cache_from_block_offset(block, addr, true); + } else { + /* + * In migration stage but before COLO stage, + * Put all pages into both cache and SVM's memory. + */ + host_bak = colo_cache_from_block_offset(block, addr, false); + } } if (!host) { error_report("Illegal RAM offset " RAM_ADDR_FMT, addr); ret = -EINVAL; break; } - if (!migration_incoming_in_colo_state()) { ramblock_recv_bitmap_set(block, host); } @@ -3506,6 +3544,9 @@ static int ram_load_precopy(QEMUFile *f) if (!ret) { ret = qemu_file_get_error(f); } + if (!ret && host_bak) { + memcpy(host_bak, host, TARGET_PAGE_SIZE); + } } ret |= wait_for_decompress_done(); diff --git a/migration/ram.h b/migration/ram.h index a553d40751..5ceaff7cb4 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -66,5 +66,6 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); /* ram cache */ int colo_init_ram_cache(void); void colo_release_ram_cache(void); +void colo_incoming_start_dirty_log(void); #endif diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 1c69d51b97..58724031ea 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -407,6 +407,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %u\n", MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS), params->decompress_threads); + assert(params->has_throttle_trigger_threshold); + monitor_printf(mon, "%s: %u\n", + MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD), + params->throttle_trigger_threshold); assert(params->has_cpu_throttle_initial); monitor_printf(mon, "%s: %u\n", MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL), @@ -1254,6 +1258,9 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_decompress_threads = true; visit_type_int(v, param, &p->decompress_threads, &err); break; + case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD: + p->has_throttle_trigger_threshold = true; + visit_type_int(v, param, &p->throttle_trigger_threshold, &err); case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL: p->has_cpu_throttle_initial = true; visit_type_int(v, param, &p->cpu_throttle_initial, &err); diff --git a/qapi/migration.json b/qapi/migration.json index d44d99cd78..0d1c0712ca 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -540,6 +540,10 @@ # compression, so set the decompress-threads to the number about 1/4 # of compress-threads is adequate. # +# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period +# to trigger throttling. It is expressed as percentage. +# The default value is 50. (Since 5.0) +# # @cpu-throttle-initial: Initial percentage of time guest cpus are throttled # when migration auto-converge is activated. The # default value is 20. (Since 2.7) @@ -625,7 +629,7 @@ 'data': ['announce-initial', 'announce-max', 'announce-rounds', 'announce-step', 'compress-level', 'compress-threads', 'decompress-threads', - 'compress-wait-thread', + 'compress-wait-thread', 'throttle-trigger-threshold', 'cpu-throttle-initial', 'cpu-throttle-increment', 'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth', 'downtime-limit', 'x-checkpoint-delay', 'block-incremental', @@ -660,6 +664,10 @@ # # @decompress-threads: decompression thread count # +# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period +# to trigger throttling. It is expressed as percentage. +# The default value is 50. (Since 5.0) +# # @cpu-throttle-initial: Initial percentage of time guest cpus are # throttled when migration auto-converge is activated. # The default value is 20. (Since 2.7) @@ -752,6 +760,7 @@ '*compress-threads': 'int', '*compress-wait-thread': 'bool', '*decompress-threads': 'int', + '*throttle-trigger-threshold': 'int', '*cpu-throttle-initial': 'int', '*cpu-throttle-increment': 'int', '*tls-creds': 'StrOrNull', @@ -813,6 +822,10 @@ # # @decompress-threads: decompression thread count # +# @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period +# to trigger throttling. It is expressed as percentage. +# The default value is 50. (Since 5.0) +# # @cpu-throttle-initial: Initial percentage of time guest cpus are # throttled when migration auto-converge is activated. # (Since 2.7) @@ -905,6 +918,7 @@ '*compress-threads': 'uint8', '*compress-wait-thread': 'bool', '*decompress-threads': 'uint8', + '*throttle-trigger-threshold': 'uint8', '*cpu-throttle-initial': 'uint8', '*cpu-throttle-increment': 'uint8', '*tls-creds': 'str',