diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py new file mode 100755 index 0000000000..b8b9968e00 --- /dev/null +++ b/scripts/analyze-migration.py @@ -0,0 +1,592 @@ +#!/usr/bin/env python +# +# Migration Stream Analyzer +# +# Copyright (c) 2015 Alexander Graf +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, see . + +import numpy as np +import json +import os +import argparse +import collections +import pprint + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError: + pass + +class MigrationFile(object): + def __init__(self, filename): + self.filename = filename + self.file = open(self.filename, "rb") + + def read64(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i8')[0]) + + def read32(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i4')[0]) + + def read16(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i2')[0]) + + def read8(self): + return np.asscalar(np.fromfile(self.file, count=1, dtype='>i1')[0]) + + def readstr(self, len = None): + if len is None: + len = self.read8() + if len == 0: + return "" + return np.fromfile(self.file, count=1, dtype=('S%d' % len))[0] + + def readvar(self, size = None): + if size is None: + size = self.read8() + if size == 0: + return "" + value = self.file.read(size) + if len(value) != size: + raise Exception("Unexpected end of %s at 0x%x" % (self.filename, self.file.tell())) + return value + + def tell(self): + return self.file.tell() + + # The VMSD description is at the end of the file, after EOF. Look for + # the last NULL byte, then for the beginning brace of JSON. + def read_migration_debug_json(self): + QEMU_VM_VMDESCRIPTION = 0x06 + + # Remember the offset in the file when we started + entrypos = self.file.tell() + + # Read the last 10MB + self.file.seek(0, os.SEEK_END) + endpos = self.file.tell() + self.file.seek(max(-endpos, -10 * 1024 * 1024), os.SEEK_END) + datapos = self.file.tell() + data = self.file.read() + # The full file read closed the file as well, reopen it + self.file = open(self.filename, "rb") + + # Find the last NULL byte, then the first brace after that. This should + # be the beginning of our JSON data. + nulpos = data.rfind("\0") + jsonpos = data.find("{", nulpos) + + # Check backwards from there and see whether we guessed right + self.file.seek(datapos + jsonpos - 5, 0) + if self.read8() != QEMU_VM_VMDESCRIPTION: + raise Exception("No Debug Migration device found") + + jsonlen = self.read32() + + # Seek back to where we were at the beginning + self.file.seek(entrypos, 0) + + return data[jsonpos:jsonpos + jsonlen] + + def close(self): + self.file.close() + +class RamSection(object): + RAM_SAVE_FLAG_COMPRESS = 0x02 + RAM_SAVE_FLAG_MEM_SIZE = 0x04 + RAM_SAVE_FLAG_PAGE = 0x08 + RAM_SAVE_FLAG_EOS = 0x10 + RAM_SAVE_FLAG_CONTINUE = 0x20 + RAM_SAVE_FLAG_XBZRLE = 0x40 + RAM_SAVE_FLAG_HOOK = 0x80 + + def __init__(self, file, version_id, ramargs, section_key): + if version_id != 4: + raise Exception("Unknown RAM version %d" % version_id) + + self.file = file + self.section_key = section_key + self.TARGET_PAGE_SIZE = ramargs['page_size'] + self.dump_memory = ramargs['dump_memory'] + self.write_memory = ramargs['write_memory'] + self.sizeinfo = collections.OrderedDict() + self.data = collections.OrderedDict() + self.data['section sizes'] = self.sizeinfo + self.name = '' + if self.write_memory: + self.files = { } + if self.dump_memory: + self.memory = collections.OrderedDict() + self.data['memory'] = self.memory + + def __repr__(self): + return self.data.__repr__() + + def __str__(self): + return self.data.__str__() + + def getDict(self): + return self.data + + def read(self): + # Read all RAM sections + while True: + addr = self.file.read64() + flags = addr & (self.TARGET_PAGE_SIZE - 1) + addr &= ~(self.TARGET_PAGE_SIZE - 1) + + if flags & self.RAM_SAVE_FLAG_MEM_SIZE: + while True: + namelen = self.file.read8() + # We assume that no RAM chunk is big enough to ever + # hit the first byte of the address, so when we see + # a zero here we know it has to be an address, not the + # length of the next block. + if namelen == 0: + self.file.file.seek(-1, 1) + break + self.name = self.file.readstr(len = namelen) + len = self.file.read64() + self.sizeinfo[self.name] = '0x%016x' % len + if self.write_memory: + print self.name + mkdir_p('./' + os.path.dirname(self.name)) + f = open('./' + self.name, "wb") + f.truncate(0) + f.truncate(len) + self.files[self.name] = f + flags &= ~self.RAM_SAVE_FLAG_MEM_SIZE + + if flags & self.RAM_SAVE_FLAG_COMPRESS: + if flags & self.RAM_SAVE_FLAG_CONTINUE: + flags &= ~self.RAM_SAVE_FLAG_CONTINUE + else: + self.name = self.file.readstr() + fill_char = self.file.read8() + # The page in question is filled with fill_char now + if self.write_memory and fill_char != 0: + self.files[self.name].seek(addr, os.SEEK_SET) + self.files[self.name].write(chr(fill_char) * self.TARGET_PAGE_SIZE) + if self.dump_memory: + self.memory['%s (0x%016x)' % (self.name, addr)] = 'Filled with 0x%02x' % fill_char + flags &= ~self.RAM_SAVE_FLAG_COMPRESS + elif flags & self.RAM_SAVE_FLAG_PAGE: + if flags & self.RAM_SAVE_FLAG_CONTINUE: + flags &= ~self.RAM_SAVE_FLAG_CONTINUE + else: + self.name = self.file.readstr() + + if self.write_memory or self.dump_memory: + data = self.file.readvar(size = self.TARGET_PAGE_SIZE) + else: # Just skip RAM data + self.file.file.seek(self.TARGET_PAGE_SIZE, 1) + + if self.write_memory: + self.files[self.name].seek(addr, os.SEEK_SET) + self.files[self.name].write(data) + if self.dump_memory: + hexdata = " ".join("{0:02x}".format(ord(c)) for c in data) + self.memory['%s (0x%016x)' % (self.name, addr)] = hexdata + + flags &= ~self.RAM_SAVE_FLAG_PAGE + elif flags & self.RAM_SAVE_FLAG_XBZRLE: + raise Exception("XBZRLE RAM compression is not supported yet") + elif flags & self.RAM_SAVE_FLAG_HOOK: + raise Exception("RAM hooks don't make sense with files") + + # End of RAM section + if flags & self.RAM_SAVE_FLAG_EOS: + break + + if flags != 0: + raise Exception("Unknown RAM flags: %x" % flags) + + def __del__(self): + if self.write_memory: + for key in self.files: + self.files[key].close() + + +class HTABSection(object): + HASH_PTE_SIZE_64 = 16 + + def __init__(self, file, version_id, device, section_key): + if version_id != 1: + raise Exception("Unknown HTAB version %d" % version_id) + + self.file = file + self.section_key = section_key + + def read(self): + + header = self.file.read32() + + if (header > 0): + # First section, just the hash shift + return + + # Read until end marker + while True: + index = self.file.read32() + n_valid = self.file.read16() + n_invalid = self.file.read16() + + if index == 0 and n_valid == 0 and n_invalid == 0: + break + + self.file.readvar(n_valid * HASH_PTE_SIZE_64) + + def getDict(self): + return "" + +class VMSDFieldGeneric(object): + def __init__(self, desc, file): + self.file = file + self.desc = desc + self.data = "" + + def __repr__(self): + return str(self.__str__()) + + def __str__(self): + return " ".join("{0:02x}".format(ord(c)) for c in self.data) + + def getDict(self): + return self.__str__() + + def read(self): + size = int(self.desc['size']) + self.data = self.file.readvar(size) + return self.data + +class VMSDFieldInt(VMSDFieldGeneric): + def __init__(self, desc, file): + super(VMSDFieldInt, self).__init__(desc, file) + self.size = int(desc['size']) + self.format = '0x%%0%dx' % (self.size * 2) + self.sdtype = '>i%d' % self.size + self.udtype = '>u%d' % self.size + + def __repr__(self): + if self.data < 0: + return ('%s (%d)' % ((self.format % self.udata), self.data)) + else: + return self.format % self.data + + def __str__(self): + return self.__repr__() + + def getDict(self): + return self.__str__() + + def read(self): + super(VMSDFieldInt, self).read() + self.sdata = np.fromstring(self.data, count=1, dtype=(self.sdtype))[0] + self.udata = np.fromstring(self.data, count=1, dtype=(self.udtype))[0] + self.data = self.sdata + return self.data + +class VMSDFieldUInt(VMSDFieldInt): + def __init__(self, desc, file): + super(VMSDFieldUInt, self).__init__(desc, file) + + def read(self): + super(VMSDFieldUInt, self).read() + self.data = self.udata + return self.data + +class VMSDFieldIntLE(VMSDFieldInt): + def __init__(self, desc, file): + super(VMSDFieldIntLE, self).__init__(desc, file) + self.dtype = '