+ trace_migrate_handle_rp_req_pages(rbname, start, len);
+
+ /*
+ * Since we currently insist on matching page sizes, just sanity check
+ * we're being asked for whole host pages.
+ */
+ if (start & (our_host_ps-1) ||
+ (len & (our_host_ps-1))) {
+ error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
+ " len: %zd", __func__, start, len);
+ mark_source_rp_bad(ms);
+ return;
+ }
+
+ if (ram_save_queue_pages(ms, rbname, start, len)) {
+ mark_source_rp_bad(ms);
+ }
+}
+
+/*
+ * Handles messages sent on the return path towards the source VM
+ *
+ */
+static void *source_return_path_thread(void *opaque)
+{
+ MigrationState *ms = opaque;
+ QEMUFile *rp = ms->rp_state.from_dst_file;
+ uint16_t header_len, header_type;
+ uint8_t buf[512];
+ uint32_t tmp32, sibling_error;
+ ram_addr_t start = 0; /* =0 to silence warning */
+ size_t len = 0, expected_len;
+ int res;
+
+ trace_source_return_path_thread_entry();
+ while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
+ migration_is_setup_or_active(ms->state)) {
+ trace_source_return_path_thread_loop_top();
+ header_type = qemu_get_be16(rp);
+ header_len = qemu_get_be16(rp);
+
+ if (header_type >= MIG_RP_MSG_MAX ||
+ header_type == MIG_RP_MSG_INVALID) {
+ error_report("RP: Received invalid message 0x%04x length 0x%04x",
+ header_type, header_len);
+ mark_source_rp_bad(ms);
+ goto out;
+ }
+
+ if ((rp_cmd_args[header_type].len != -1 &&
+ header_len != rp_cmd_args[header_type].len) ||
+ header_len > sizeof(buf)) {
+ error_report("RP: Received '%s' message (0x%04x) with"
+ "incorrect length %d expecting %zu",
+ rp_cmd_args[header_type].name, header_type, header_len,
+ (size_t)rp_cmd_args[header_type].len);
+ mark_source_rp_bad(ms);
+ goto out;
+ }
+
+ /* We know we've got a valid header by this point */
+ res = qemu_get_buffer(rp, buf, header_len);
+ if (res != header_len) {
+ error_report("RP: Failed reading data for message 0x%04x"
+ " read %d expected %d",
+ header_type, res, header_len);
+ mark_source_rp_bad(ms);
+ goto out;
+ }
+
+ /* OK, we have the message and the data */
+ switch (header_type) {
+ case MIG_RP_MSG_SHUT:
+ sibling_error = be32_to_cpup((uint32_t *)buf);
+ trace_source_return_path_thread_shut(sibling_error);
+ if (sibling_error) {
+ error_report("RP: Sibling indicated error %d", sibling_error);
+ mark_source_rp_bad(ms);
+ }
+ /*
+ * We'll let the main thread deal with closing the RP
+ * we could do a shutdown(2) on it, but we're the only user
+ * anyway, so there's nothing gained.
+ */
+ goto out;
+
+ case MIG_RP_MSG_PONG:
+ tmp32 = be32_to_cpup((uint32_t *)buf);
+ trace_source_return_path_thread_pong(tmp32);
+ break;
+
+ case MIG_RP_MSG_REQ_PAGES:
+ start = be64_to_cpup((uint64_t *)buf);
+ len = be32_to_cpup((uint32_t *)(buf + 8));
+ migrate_handle_rp_req_pages(ms, NULL, start, len);
+ break;
+
+ case MIG_RP_MSG_REQ_PAGES_ID:
+ expected_len = 12 + 1; /* header + termination */
+
+ if (header_len >= expected_len) {
+ start = be64_to_cpup((uint64_t *)buf);
+ len = be32_to_cpup((uint32_t *)(buf + 8));
+ /* Now we expect an idstr */
+ tmp32 = buf[12]; /* Length of the following idstr */
+ buf[13 + tmp32] = '\0';
+ expected_len += tmp32;
+ }
+ if (header_len != expected_len) {
+ error_report("RP: Req_Page_id with length %d expecting %zd",
+ header_len, expected_len);
+ mark_source_rp_bad(ms);
+ goto out;
+ }
+ migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
+ break;
+
+ default:
+ break;
+ }
+ }
+ if (qemu_file_get_error(rp)) {
+ trace_source_return_path_thread_bad_end();
+ mark_source_rp_bad(ms);
+ }
+
+ trace_source_return_path_thread_end();
+out:
+ ms->rp_state.from_dst_file = NULL;
+ qemu_fclose(rp);
+ return NULL;
+}
+
+static int open_return_path_on_source(MigrationState *ms)
+{
+
+ ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
+ if (!ms->rp_state.from_dst_file) {
+ return -1;
+ }
+
+ trace_open_return_path_on_source();
+ qemu_thread_create(&ms->rp_state.rp_thread, "return path",
+ source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
+
+ trace_open_return_path_on_source_continue();
+
+ return 0;
+}
+
+/* Returns 0 if the RP was ok, otherwise there was an error on the RP */
+static int await_return_path_close_on_source(MigrationState *ms)
+{
+ /*
+ * If this is a normal exit then the destination will send a SHUT and the
+ * rp_thread will exit, however if there's an error we need to cause
+ * it to exit.
+ */
+ if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
+ /*
+ * shutdown(2), if we have it, will cause it to unblock if it's stuck
+ * waiting for the destination.
+ */
+ qemu_file_shutdown(ms->rp_state.from_dst_file);
+ mark_source_rp_bad(ms);
+ }
+ trace_await_return_path_close_on_source_joining();
+ qemu_thread_join(&ms->rp_state.rp_thread);
+ trace_await_return_path_close_on_source_close();
+ return ms->rp_state.error;
+}
+
+/*
+ * Switch from normal iteration to postcopy
+ * Returns non-0 on error
+ */
+static int postcopy_start(MigrationState *ms, bool *old_vm_running)
+{
+ int ret;
+ const QEMUSizedBuffer *qsb;
+ int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_POSTCOPY_ACTIVE);
+
+ trace_postcopy_start();
+ qemu_mutex_lock_iothread();
+ trace_postcopy_start_set_run();
+
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
+ *old_vm_running = runstate_is_running();
+ global_state_store();
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_inactivate_all();
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /*
+ * Cause any non-postcopiable, but iterative devices to
+ * send out their final data.
+ */
+ qemu_savevm_state_complete_precopy(ms->to_dst_file, true);
+
+ /*
+ * in Finish migrate and with the io-lock held everything should
+ * be quiet, but we've potentially still got dirty pages and we
+ * need to tell the destination to throw any pages it's already received
+ * that are dirty
+ */
+ if (ram_postcopy_send_discard_bitmap(ms)) {
+ error_report("postcopy send discard bitmap failed");
+ goto fail;
+ }
+
+ /*
+ * send rest of state - note things that are doing postcopy
+ * will notice we're in POSTCOPY_ACTIVE and not actually
+ * wrap their state up here
+ */
+ qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
+ /* Ping just for debugging, helps line traces up */
+ qemu_savevm_send_ping(ms->to_dst_file, 2);
+
+ /*
+ * While loading the device state we may trigger page transfer
+ * requests and the fd must be free to process those, and thus
+ * the destination must read the whole device state off the fd before
+ * it starts processing it. Unfortunately the ad-hoc migration format
+ * doesn't allow the destination to know the size to read without fully
+ * parsing it through each devices load-state code (especially the open
+ * coded devices that use get/put).
+ * So we wrap the device state up in a package with a length at the start;
+ * to do this we use a qemu_buf to hold the whole of the device state.
+ */
+ QEMUFile *fb = qemu_bufopen("w", NULL);
+ if (!fb) {
+ error_report("Failed to create buffered file");
+ goto fail;
+ }
+
+ /*
+ * Make sure the receiver can get incoming pages before we send the rest
+ * of the state
+ */
+ qemu_savevm_send_postcopy_listen(fb);
+
+ qemu_savevm_state_complete_precopy(fb, false);
+ qemu_savevm_send_ping(fb, 3);
+
+ qemu_savevm_send_postcopy_run(fb);
+
+ /* <><> end of stuff going into the package */
+ qsb = qemu_buf_get(fb);
+
+ /* Now send that blob */
+ if (qemu_savevm_send_packaged(ms->to_dst_file, qsb)) {
+ goto fail_closefb;
+ }
+ qemu_fclose(fb);
+
+ /* Send a notify to give a chance for anything that needs to happen
+ * at the transition to postcopy and after the device state; in particular
+ * spice needs to trigger a transition now
+ */
+ ms->postcopy_after_devices = true;
+ notifier_list_notify(&migration_state_notifiers, ms);
+
+ ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
+
+ qemu_mutex_unlock_iothread();
+
+ /*
+ * Although this ping is just for debug, it could potentially be
+ * used for getting a better measurement of downtime at the source.
+ */
+ qemu_savevm_send_ping(ms->to_dst_file, 4);
+
+ ret = qemu_file_get_error(ms->to_dst_file);
+ if (ret) {
+ error_report("postcopy_start: Migration stream errored");
+ migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ }
+
+ return ret;
+
+fail_closefb:
+ qemu_fclose(fb);
+fail:
+ migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ qemu_mutex_unlock_iothread();
+ return -1;
+}
+
+/**
+ * migration_completion: Used by migration_thread when there's not much left.
+ * The caller 'breaks' the loop when this returns.
+ *
+ * @s: Current migration state
+ * @current_active_state: The migration state we expect to be in
+ * @*old_vm_running: Pointer to old_vm_running flag
+ * @*start_time: Pointer to time to update
+ */
+static void migration_completion(MigrationState *s, int current_active_state,
+ bool *old_vm_running,
+ int64_t *start_time)
+{
+ int ret;
+
+ if (s->state == MIGRATION_STATUS_ACTIVE) {
+ qemu_mutex_lock_iothread();
+ *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
+ *old_vm_running = runstate_is_running();
+ ret = global_state_store();
+
+ if (!ret) {
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret >= 0) {
+ ret = bdrv_inactivate_all();
+ }
+ if (ret >= 0) {
+ qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
+ qemu_savevm_state_complete_precopy(s->to_dst_file, false);
+ }
+ }
+ qemu_mutex_unlock_iothread();
+
+ if (ret < 0) {
+ goto fail;
+ }
+ } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+ trace_migration_completion_postcopy_end();
+
+ qemu_savevm_state_complete_postcopy(s->to_dst_file);
+ trace_migration_completion_postcopy_end_after_complete();
+ }
+
+ /*
+ * If rp was opened we must clean up the thread before
+ * cleaning everything else up (since if there are no failures
+ * it will wait for the destination to send it's status in
+ * a SHUT command).
+ * Postcopy opens rp if enabled (even if it's not avtivated)
+ */
+ if (migrate_postcopy_ram()) {
+ int rp_error;
+ trace_migration_completion_postcopy_end_before_rp();
+ rp_error = await_return_path_close_on_source(s);
+ trace_migration_completion_postcopy_end_after_rp(rp_error);
+ if (rp_error) {
+ goto fail;
+ }
+ }
+
+ if (qemu_file_get_error(s->to_dst_file)) {
+ trace_migration_completion_file_err();
+ goto fail;
+ }
+
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_COMPLETED);
+ return;
+
+fail:
+ migrate_set_state(&s->state, current_active_state,
+ MIGRATION_STATUS_FAILED);
+}
+
+/*
+ * Master migration thread on the source VM.
+ * It drives the migration and pumps the data down the outgoing channel.
+ */