* docs/atomics fixes and atomic_rcu_* optimization (Emilio)

* NBD bugfix (Eric)
 * Memory fixes and cleanups (Paolo, Paul)
 * scsi-block support for SCSI status, including persistent
   reservations (Paolo)
 * kvm_stat moves to the Linux repository
 * SCSI bug fixes (Peter, Prasad)
 * Killing qemu_char_get_next_serial, non-ARM parts (Xiaoqiang)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQEcBAABCAAGBQJXSpYYAAoJEL/70l94x66DZ5UIAJiFDjWdeFw5eS+MgjflcjN9
 zibrg+z2QFqFP7Z3FlQR5EjiML0g1S1eceUPBCyZazx6bQS3zvHkbhIWVx2RtHeI
 Rtt5SYRF+R1cvSEJrQGPr3ysCLAThBFuQRayJRQHi/emiUNzfTSW1z0YGaaErimP
 o4vF6Sjt1RQcnBkOPw6Qcoe4gSIbh9W4vQFbaZdB2KoWM9fOYBrjFgEFm1lo01vS
 Da77vlr1dcYCjohNgo9BOZIOllTQK7WG1XWRE6HPbqa8w6oePXikxJ1lc4PQxXyT
 clZy5uGJicp7QmEHf+yG8Evtm2mZG2iOEF4NbbFIm/GghOdFzwtHavxU/ahpAig=
 =DaAU
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* docs/atomics fixes and atomic_rcu_* optimization (Emilio)
* NBD bugfix (Eric)
* Memory fixes and cleanups (Paolo, Paul)
* scsi-block support for SCSI status, including persistent
  reservations (Paolo)
* kvm_stat moves to the Linux repository
* SCSI bug fixes (Peter, Prasad)
* Killing qemu_char_get_next_serial, non-ARM parts (Xiaoqiang)

# gpg: Signature made Sun 29 May 2016 08:11:20 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"

* remotes/bonzini/tags/for-upstream: (30 commits)
  exec: hide mr->ram_addr from qemu_get_ram_ptr users
  memory: split memory_region_from_host from qemu_ram_addr_from_host
  exec: remove ram_addr argument from qemu_ram_block_from_host
  memory: remove qemu_get_ram_fd, qemu_set_ram_fd, qemu_ram_block_host_ptr
  scsi-generic: Merge block max xfer len in INQUIRY response
  scsi-block: always use SG_IO
  scsi-disk: introduce scsi_disk_req_check_error
  scsi-disk: add need_fua_emulation to SCSIDiskClass
  scsi-disk: introduce dma_readv and dma_writev
  scsi-disk: introduce a common base class
  xen-hvm: ignore background I/O sections
  docs/atomics: update comparison with Linux
  atomics: do not emit consume barrier for atomic_rcu_read
  atomics: emit an smp_read_barrier_depends() barrier only for Alpha and Thread Sanitizer
  docs/atomics: update atomic_read/set comparison with Linux
  bt: rewrite csrhci_write to avoid out-of-bounds writes
  block/iscsi: avoid potential overflow of acb->task->cdb
  scsi: megasas: check 'read_queue_head' index value
  scsi: megasas: initialise local configuration data buffer
  scsi: megasas: use appropriate property buffer size
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2016-05-31 09:29:23 +01:00
commit 07e070aac4
36 changed files with 715 additions and 1248 deletions

View File

@ -92,9 +92,6 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
ifdef BUILD_DOCS
DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
DOCS+=qmp-commands.txt
ifdef CONFIG_LINUX
DOCS+=kvm_stat.1
endif
ifdef CONFIG_VIRTFS
DOCS+=fsdev/virtfs-proxy-helper.1
endif
@ -571,12 +568,6 @@ qemu-ga.8: qemu-ga.texi
$(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
" GEN $@")
kvm_stat.1: scripts/kvm/kvm_stat.texi
$(call quiet-command, \
perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< kvm_stat.pod && \
$(POD2MAN) --section=1 --center=" " --release=" " kvm_stat.pod > $@, \
" GEN $@")
dvi: qemu-doc.dvi qemu-tech.dvi
html: qemu-doc.html qemu-tech.html
info: qemu-doc.info qemu-tech.info

View File

@ -833,6 +833,13 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return &acb->common;
}
if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
qemu_aio_unref(acb);
return NULL;
}
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",

View File

@ -246,7 +246,8 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
{
ram_addr_t ram_addr;
if (qemu_ram_addr_from_host(ptr, &ram_addr) == NULL) {
ram_addr = qemu_ram_addr_from_host(ptr);
if (ram_addr == RAM_ADDR_INVALID) {
fprintf(stderr, "Bad ram pointer %p\n", ptr);
abort();
}

View File

@ -326,21 +326,41 @@ and memory barriers, and the equivalents in QEMU:
use a boxed atomic_t type; atomic operations in QEMU are polymorphic
and use normal C types.
- atomic_read and atomic_set in Linux give no guarantee at all;
atomic_read and atomic_set in QEMU include a compiler barrier
(similar to the READ_ONCE/WRITE_ONCE macros in Linux).
- Originally, atomic_read and atomic_set in Linux gave no guarantee
at all. Linux 4.1 updated them to implement volatile
semantics via ACCESS_ONCE (or the more recent READ/WRITE_ONCE).
- most atomic read-modify-write operations in Linux return void;
in QEMU, all of them return the old value of the variable.
QEMU's atomic_read/set implement, if the compiler supports it, C11
atomic relaxed semantics, and volatile semantics otherwise.
Both semantics prevent the compiler from doing certain transformations;
the difference is that atomic accesses are guaranteed to be atomic,
while volatile accesses aren't. Thus, in the volatile case we just cross
our fingers hoping that the compiler will generate atomic accesses,
since we assume the variables passed are machine-word sized and
properly aligned.
No barriers are implied by atomic_read/set in either Linux or QEMU.
- atomic read-modify-write operations in Linux are of three kinds:
atomic_OP returns void
atomic_OP_return returns new value of the variable
atomic_fetch_OP returns the old value of the variable
atomic_cmpxchg returns the old value of the variable
In QEMU, the second kind does not exist. Currently Linux has
atomic_fetch_or only. QEMU provides and, or, inc, dec, add, sub.
- different atomic read-modify-write operations in Linux imply
a different set of memory barriers; in QEMU, all of them enforce
sequential consistency, which means they imply full memory barriers
before and after the operation.
- Linux does not have an equivalent of atomic_mb_read() and
atomic_mb_set(). In particular, note that set_mb() is a little
weaker than atomic_mb_set().
- Linux does not have an equivalent of atomic_mb_set(). In particular,
note that smp_store_mb() is a little weaker than atomic_mb_set().
atomic_mb_read() compiles to the same instructions as Linux's
smp_load_acquire(), but this should be treated as an implementation
detail. If required, QEMU might later add atomic_load_acquire() and
atomic_store_release() macros.
SOURCES

110
exec.c
View File

@ -1815,40 +1815,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
}
#endif /* !_WIN32 */
int qemu_get_ram_fd(ram_addr_t addr)
{
RAMBlock *block;
int fd;
rcu_read_lock();
block = qemu_get_ram_block(addr);
fd = block->fd;
rcu_read_unlock();
return fd;
}
void qemu_set_ram_fd(ram_addr_t addr, int fd)
{
RAMBlock *block;
rcu_read_lock();
block = qemu_get_ram_block(addr);
block->fd = fd;
rcu_read_unlock();
}
void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
{
RAMBlock *block;
void *ptr;
rcu_read_lock();
block = qemu_get_ram_block(addr);
ptr = ramblock_ptr(block, 0);
rcu_read_unlock();
return ptr;
}
/* Return a host pointer to ram allocated with qemu_ram_alloc.
* This should not be used for general purpose DMA. Use address_space_map
* or address_space_rw instead. For local memory (e.g. video ram) that the
@ -1856,12 +1822,13 @@ void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
*
* Called within RCU critical section.
*/
void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
{
RAMBlock *block = ram_block;
if (block == NULL) {
block = qemu_get_ram_block(addr);
addr -= block->offset;
}
if (xen_enabled() && block->host == NULL) {
@ -1875,10 +1842,10 @@ void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
block->host = xen_map_cache(block->offset, block->max_length, 1);
}
return ramblock_ptr(block, addr - block->offset);
return ramblock_ptr(block, addr);
}
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
/* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
* but takes a size argument.
*
* Called within RCU critical section.
@ -1887,16 +1854,15 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
hwaddr *size)
{
RAMBlock *block = ram_block;
ram_addr_t offset_inside_block;
if (*size == 0) {
return NULL;
}
if (block == NULL) {
block = qemu_get_ram_block(addr);
addr -= block->offset;
}
offset_inside_block = addr - block->offset;
*size = MIN(*size, block->max_length - offset_inside_block);
*size = MIN(*size, block->max_length - addr);
if (xen_enabled() && block->host == NULL) {
/* We need to check if the requested address is in the RAM
@ -1910,7 +1876,7 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
block->host = xen_map_cache(block->offset, block->max_length, 1);
}
return ramblock_ptr(block, offset_inside_block);
return ramblock_ptr(block, addr);
}
/*
@ -1931,16 +1897,16 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
* ram_addr_t.
*/
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *ram_addr,
ram_addr_t *offset)
{
RAMBlock *block;
uint8_t *host = ptr;
if (xen_enabled()) {
ram_addr_t ram_addr;
rcu_read_lock();
*ram_addr = xen_ram_addr_from_mapcache(ptr);
block = qemu_get_ram_block(*ram_addr);
ram_addr = xen_ram_addr_from_mapcache(ptr);
block = qemu_get_ram_block(ram_addr);
if (block) {
*offset = (host - block->host);
}
@ -1972,7 +1938,6 @@ found:
if (round_offset) {
*offset &= TARGET_PAGE_MASK;
}
*ram_addr = block->offset + *offset;
rcu_read_unlock();
return block;
}
@ -1999,18 +1964,17 @@ RAMBlock *qemu_ram_block_by_name(const char *name)
/* Some of the softmmu routines need to translate from a host pointer
(typically a TLB entry) back to a ram offset. */
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
ram_addr_t qemu_ram_addr_from_host(void *ptr)
{
RAMBlock *block;
ram_addr_t offset; /* Not used */
block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
ram_addr_t offset;
block = qemu_ram_block_from_host(ptr, false, &offset);
if (!block) {
return NULL;
return RAM_ADDR_INVALID;
}
return block->mr;
return block->offset + offset;
}
/* Called within RCU critical section. */
@ -2022,13 +1986,13 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
}
switch (size) {
case 1:
stb_p(qemu_get_ram_ptr(NULL, ram_addr), val);
stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
case 2:
stw_p(qemu_get_ram_ptr(NULL, ram_addr), val);
stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
case 4:
stl_p(qemu_get_ram_ptr(NULL, ram_addr), val);
stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
break;
default:
abort();
@ -2490,6 +2454,8 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
hwaddr length)
{
uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
addr += memory_region_get_ram_addr(mr);
/* No early return if dirty_log_mask is or becomes 0, because
* cpu_physical_memory_set_dirty_range will still call
* xen_modified_memory.
@ -2602,9 +2568,8 @@ static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
abort();
}
} else {
addr1 += memory_region_get_ram_addr(mr);
/* RAM case */
ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(ptr, buf, l);
invalidate_and_set_dirty(mr, addr1, l);
}
@ -2695,8 +2660,7 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
}
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(mr->ram_block,
memory_region_get_ram_addr(mr) + addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(buf, ptr, l);
}
@ -2779,9 +2743,8 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
memory_region_is_romd(mr))) {
l = memory_access_size(mr, l, addr1);
} else {
addr1 += memory_region_get_ram_addr(mr);
/* ROM/RAM case */
ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (type) {
case WRITE_DATA:
memcpy(ptr, buf, l);
@ -2939,7 +2902,6 @@ void *address_space_map(AddressSpace *as,
hwaddr done = 0;
hwaddr l, xlat, base;
MemoryRegion *mr, *this_mr;
ram_addr_t raddr;
void *ptr;
if (len == 0) {
@ -2974,7 +2936,6 @@ void *address_space_map(AddressSpace *as,
}
base = xlat;
raddr = memory_region_get_ram_addr(mr);
for (;;) {
len -= l;
@ -2993,7 +2954,7 @@ void *address_space_map(AddressSpace *as,
memory_region_ref(mr);
*plen = done;
ptr = qemu_ram_ptr_length(mr->ram_block, raddr + base, plen);
ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
rcu_read_unlock();
return ptr;
@ -3010,7 +2971,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
MemoryRegion *mr;
ram_addr_t addr1;
mr = qemu_ram_addr_from_host(buffer, &addr1);
mr = memory_region_from_host(buffer, &addr1);
assert(mr != NULL);
if (is_write) {
invalidate_and_set_dirty(mr, addr1, access_len);
@ -3077,8 +3038,7 @@ static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
#endif
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(mr->ram_block,
memory_region_get_ram_addr(mr) + addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = ldl_le_p(ptr);
@ -3171,8 +3131,7 @@ static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
#endif
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(mr->ram_block,
memory_region_get_ram_addr(mr) + addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = ldq_le_p(ptr);
@ -3285,8 +3244,7 @@ static inline uint32_t address_space_lduw_internal(AddressSpace *as,
#endif
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(mr->ram_block,
memory_region_get_ram_addr(mr) + addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
val = lduw_le_p(ptr);
@ -3368,13 +3326,13 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
} else {
addr1 += memory_region_get_ram_addr(mr);
ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
stl_p(ptr, val);
dirty_log_mask = memory_region_get_dirty_log_mask(mr);
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
4, dirty_log_mask);
r = MEMTX_OK;
}
if (result) {
@ -3423,8 +3381,7 @@ static inline void address_space_stl_internal(AddressSpace *as,
r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
} else {
/* RAM case */
addr1 += memory_region_get_ram_addr(mr);
ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
stl_le_p(ptr, val);
@ -3533,8 +3490,7 @@ static inline void address_space_stw_internal(AddressSpace *as,
r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
} else {
/* RAM case */
addr1 += memory_region_get_ram_addr(mr);
ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
switch (endian) {
case DEVICE_LITTLE_ENDIAN:
stw_le_p(ptr, val);

View File

@ -39,9 +39,14 @@ struct csrhci_s {
int out_size;
uint8_t outfifo[FIFO_LEN * 2];
uint8_t inpkt[FIFO_LEN];
enum {
CSR_HDR_LEN,
CSR_DATA_LEN,
CSR_DATA
} in_state;
int in_len;
int in_hdr;
int in_data;
int in_needed;
QEMUTimer *out_tm;
int64_t baud_delay;
@ -296,38 +301,60 @@ static int csrhci_data_len(const uint8_t *pkt)
exit(-1);
}
static void csrhci_ready_for_next_inpkt(struct csrhci_s *s)
{
s->in_state = CSR_HDR_LEN;
s->in_len = 0;
s->in_needed = 2;
s->in_hdr = INT_MAX;
}
static int csrhci_write(struct CharDriverState *chr,
const uint8_t *buf, int len)
{
struct csrhci_s *s = (struct csrhci_s *) chr->opaque;
int plen = s->in_len;
int total = 0;
if (!s->enable)
return 0;
s->in_len += len;
memcpy(s->inpkt + plen, buf, len);
for (;;) {
int cnt = MIN(len, s->in_needed - s->in_len);
if (cnt) {
memcpy(s->inpkt + s->in_len, buf, cnt);
s->in_len += cnt;
buf += cnt;
len -= cnt;
total += cnt;
}
while (1) {
if (s->in_len >= 2 && plen < 2)
s->in_hdr = csrhci_header_len(s->inpkt) + 1;
if (s->in_len >= s->in_hdr && plen < s->in_hdr)
s->in_data = csrhci_data_len(s->inpkt) + s->in_hdr;
if (s->in_len >= s->in_data) {
csrhci_in_packet(s, s->inpkt);
memmove(s->inpkt, s->inpkt + s->in_len, s->in_len - s->in_data);
s->in_len -= s->in_data;
s->in_hdr = INT_MAX;
s->in_data = INT_MAX;
plen = 0;
} else
if (s->in_len < s->in_needed) {
break;
}
if (s->in_state == CSR_HDR_LEN) {
s->in_hdr = csrhci_header_len(s->inpkt) + 1;
assert(s->in_hdr >= s->in_needed);
s->in_needed = s->in_hdr;
s->in_state = CSR_DATA_LEN;
continue;
}
if (s->in_state == CSR_DATA_LEN) {
s->in_needed += csrhci_data_len(s->inpkt);
/* hci_acl_hdr could specify more than 4096 bytes, so assert. */
assert(s->in_needed <= sizeof(s->inpkt));
s->in_state = CSR_DATA;
continue;
}
if (s->in_state == CSR_DATA) {
csrhci_in_packet(s, s->inpkt);
csrhci_ready_for_next_inpkt(s);
}
}
return len;
return total;
}
static void csrhci_out_hci_packet_event(void *opaque,
@ -389,11 +416,9 @@ static void csrhci_reset(struct csrhci_s *s)
{
s->out_len = 0;
s->out_size = FIFO_LEN;
s->in_len = 0;
csrhci_ready_for_next_inpkt(s);
s->baud_delay = NANOSECONDS_PER_SECOND;
s->enable = 0;
s->in_hdr = INT_MAX;
s->in_data = INT_MAX;
s->modem_state = 0;
/* After a while... (but sooner than 10ms) */

View File

@ -983,9 +983,10 @@ void slavio_serial_ms_kbd_init(hwaddr base, qemu_irq irq,
sysbus_mmio_map(s, 0, base);
}
static int escc_init1(SysBusDevice *dev)
static void escc_init1(Object *obj)
{
ESCCState *s = ESCC(dev);
ESCCState *s = ESCC(obj);
SysBusDevice *dev = SYS_BUS_DEVICE(obj);
unsigned int i;
s->chn[0].disabled = s->disabled;
@ -994,17 +995,26 @@ static int escc_init1(SysBusDevice *dev)
sysbus_init_irq(dev, &s->chn[i].irq);
s->chn[i].chn = 1 - i;
s->chn[i].clock = s->frequency / 2;
}
s->chn[0].otherchn = &s->chn[1];
s->chn[1].otherchn = &s->chn[0];
memory_region_init_io(&s->mmio, obj, &escc_mem_ops, s, "escc",
ESCC_SIZE << s->it_shift);
sysbus_init_mmio(dev, &s->mmio);
}
static void escc_realize(DeviceState *dev, Error **errp)
{
ESCCState *s = ESCC(dev);
unsigned int i;
for (i = 0; i < 2; i++) {
if (s->chn[i].chr) {
qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
serial_receive1, serial_event, &s->chn[i]);
}
}
s->chn[0].otherchn = &s->chn[1];
s->chn[1].otherchn = &s->chn[0];
memory_region_init_io(&s->mmio, OBJECT(s), &escc_mem_ops, s, "escc",
ESCC_SIZE << s->it_shift);
sysbus_init_mmio(dev, &s->mmio);
if (s->chn[0].type == mouse) {
qemu_add_mouse_event_handler(sunmouse_event, &s->chn[0], 0,
@ -1014,8 +1024,6 @@ static int escc_init1(SysBusDevice *dev)
s->chn[1].hs = qemu_input_handler_register((DeviceState *)(&s->chn[1]),
&sunkbd_handler);
}
return 0;
}
static Property escc_properties[] = {
@ -1032,10 +1040,9 @@ static Property escc_properties[] = {
static void escc_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
k->init = escc_init1;
dc->reset = escc_reset;
dc->realize = escc_realize;
dc->vmsd = &vmstate_escc;
dc->props = escc_properties;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
@ -1045,6 +1052,7 @@ static const TypeInfo escc_info = {
.name = TYPE_ESCC,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(ESCCState),
.instance_init = escc_init1,
.class_init = escc_class_init,
};

View File

@ -159,6 +159,11 @@ static const MemoryRegionOps ser_ops = {
}
};
static Property etraxfs_ser_properties[] = {
DEFINE_PROP_CHR("chardev", ETRAXSerial, chr),
DEFINE_PROP_END_OF_LIST(),
};
static void serial_receive(void *opaque, const uint8_t *buf, int size)
{
ETRAXSerial *s = opaque;
@ -209,40 +214,42 @@ static void etraxfs_ser_reset(DeviceState *d)
}
static int etraxfs_ser_init(SysBusDevice *dev)
static void etraxfs_ser_init(Object *obj)
{
ETRAXSerial *s = ETRAX_SERIAL(obj);
SysBusDevice *dev = SYS_BUS_DEVICE(obj);
sysbus_init_irq(dev, &s->irq);
memory_region_init_io(&s->mmio, obj, &ser_ops, s,
"etraxfs-serial", R_MAX * 4);
sysbus_init_mmio(dev, &s->mmio);
}
static void etraxfs_ser_realize(DeviceState *dev, Error **errp)
{
ETRAXSerial *s = ETRAX_SERIAL(dev);
sysbus_init_irq(dev, &s->irq);
memory_region_init_io(&s->mmio, OBJECT(s), &ser_ops, s,
"etraxfs-serial", R_MAX * 4);
sysbus_init_mmio(dev, &s->mmio);
/* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr,
serial_can_receive, serial_receive,
serial_event, s);
}
return 0;
}
static void etraxfs_ser_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
k->init = etraxfs_ser_init;
dc->reset = etraxfs_ser_reset;
/* Reason: init() method uses qemu_char_get_next_serial() */
dc->cannot_instantiate_with_device_add_yet = true;
dc->props = etraxfs_ser_properties;
dc->realize = etraxfs_ser_realize;
}
static const TypeInfo etraxfs_ser_info = {
.name = TYPE_ETRAX_FS_SERIAL,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(ETRAXSerial),
.instance_init = etraxfs_ser_init,
.class_init = etraxfs_ser_class_init,
};

View File

@ -114,17 +114,13 @@ static void juart_reset(DeviceState *d)
s->jrx = 0;
}
static int lm32_juart_init(SysBusDevice *dev)
static void lm32_juart_realize(DeviceState *dev, Error **errp)
{
LM32JuartState *s = LM32_JUART(dev);
/* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr, juart_can_rx, juart_rx, juart_event, s);
}
return 0;
}
static const VMStateDescription vmstate_lm32_juart = {
@ -138,16 +134,19 @@ static const VMStateDescription vmstate_lm32_juart = {
}
};
static Property lm32_juart_properties[] = {
DEFINE_PROP_CHR("chardev", LM32JuartState, chr),
DEFINE_PROP_END_OF_LIST(),
};
static void lm32_juart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
k->init = lm32_juart_init;
dc->reset = juart_reset;
dc->vmsd = &vmstate_lm32_juart;
/* Reason: init() method uses qemu_char_get_next_serial() */
dc->cannot_instantiate_with_device_add_yet = true;
dc->props = lm32_juart_properties;
dc->realize = lm32_juart_realize;
}
static const TypeInfo lm32_juart_info = {

View File

@ -249,23 +249,25 @@ static void uart_reset(DeviceState *d)
s->regs[R_LSR] = LSR_THRE | LSR_TEMT;
}
static int lm32_uart_init(SysBusDevice *dev)
static void lm32_uart_init(Object *obj)
{
LM32UartState *s = LM32_UART(dev);
LM32UartState *s = LM32_UART(obj);
SysBusDevice *dev = SYS_BUS_DEVICE(obj);
sysbus_init_irq(dev, &s->irq);
memory_region_init_io(&s->iomem, OBJECT(s), &uart_ops, s,
memory_region_init_io(&s->iomem, obj, &uart_ops, s,
"uart", R_MAX * 4);
sysbus_init_mmio(dev, &s->iomem);
}
static void lm32_uart_realize(DeviceState *dev, Error **errp)
{
LM32UartState *s = LM32_UART(dev);
/* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
}
return 0;
}
static const VMStateDescription vmstate_lm32_uart = {
@ -278,22 +280,26 @@ static const VMStateDescription vmstate_lm32_uart = {
}
};
static Property lm32_uart_properties[] = {
DEFINE_PROP_CHR("chardev", LM32UartState, chr),
DEFINE_PROP_END_OF_LIST(),
};
static void lm32_uart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
k->init = lm32_uart_init;
dc->reset = uart_reset;
dc->vmsd = &vmstate_lm32_uart;
/* Reason: init() method uses qemu_char_get_next_serial() */
dc->cannot_instantiate_with_device_add_yet = true;
dc->props = lm32_uart_properties;
dc->realize = lm32_uart_realize;
}
static const TypeInfo lm32_uart_info = {
.name = TYPE_LM32_UART,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(LM32UartState),
.instance_init = lm32_uart_init,
.class_init = lm32_uart_class_init,
};

View File

@ -200,8 +200,6 @@ static void milkymist_uart_realize(DeviceState *dev, Error **errp)
{
MilkymistUartState *s = MILKYMIST_UART(dev);
/* FIXME use a qdev chardev prop instead of qemu_char_get_next_serial() */
s->chr = qemu_char_get_next_serial();
if (s->chr) {
qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
}
@ -229,6 +227,11 @@ static const VMStateDescription vmstate_milkymist_uart = {
}
};
static Property milkymist_uart_properties[] = {
DEFINE_PROP_CHR("chardev", MilkymistUartState, chr),
DEFINE_PROP_END_OF_LIST(),
};
static void milkymist_uart_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@ -236,8 +239,7 @@ static void milkymist_uart_class_init(ObjectClass *klass, void *data)
dc->realize = milkymist_uart_realize;
dc->reset = milkymist_uart_reset;
dc->vmsd = &vmstate_milkymist_uart;
/* Reason: realize() method uses qemu_char_get_next_serial() */
dc->cannot_instantiate_with_device_add_yet = true;
dc->props = milkymist_uart_properties;
}
static const TypeInfo milkymist_uart_info = {

View File

@ -37,6 +37,7 @@
#include "sysemu/block-backend.h"
#include "exec/address-spaces.h"
#include "sysemu/qtest.h"
#include "sysemu/sysemu.h"
#define D(x)
#define DNAND(x)
@ -341,8 +342,7 @@ void axisdev88_init(MachineState *machine)
sysbus_create_varargs("etraxfs,timer", 0x3005e000, irq[0x1b], nmi[1], NULL);
for (i = 0; i < 4; i++) {
sysbus_create_simple("etraxfs,serial", 0x30026000 + i * 0x2000,
irq[0x14 + i]);
etraxfs_ser_create(0x30026000 + i * 0x2000, irq[0x14 + i], serial_hds[i]);
}
if (kernel_filename) {

View File

@ -16,14 +16,31 @@ static inline DeviceState *lm32_pic_init(qemu_irq cpu_irq)
return dev;
}
static inline DeviceState *lm32_juart_init(void)
static inline DeviceState *lm32_juart_init(CharDriverState *chr)
{
DeviceState *dev;
dev = qdev_create(NULL, TYPE_LM32_JUART);
qdev_prop_set_chr(dev, "chardev", chr);
qdev_init_nofail(dev);
return dev;
}
static inline DeviceState *lm32_uart_create(hwaddr addr,
qemu_irq irq,
CharDriverState *chr)
{
DeviceState *dev;
SysBusDevice *s;
dev = qdev_create(NULL, "lm32-uart");
s = SYS_BUS_DEVICE(dev);
qdev_prop_set_chr(dev, "chardev", chr);
qdev_init_nofail(dev);
sysbus_mmio_map(s, 0, addr);
sysbus_connect_irq(s, 0, irq);
return dev;
}
#endif

View File

@ -31,6 +31,7 @@
#include "lm32_hwsetup.h"
#include "lm32.h"
#include "exec/address-spaces.h"
#include "sysemu/sysemu.h"
typedef struct {
LM32CPU *cpu;
@ -131,12 +132,12 @@ static void lm32_evr_init(MachineState *machine)
irq[i] = qdev_get_gpio_in(env->pic_state, i);
}
sysbus_create_simple("lm32-uart", uart0_base, irq[uart0_irq]);
lm32_uart_create(uart0_base, irq[uart0_irq], serial_hds[0]);
sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]);
sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]);
/* make sure juart isn't the first chardev */
env->juart_state = lm32_juart_init();
env->juart_state = lm32_juart_init(serial_hds[1]);
reset_info->bootstrap_pc = flash_base;
@ -232,13 +233,13 @@ static void lm32_uclinux_init(MachineState *machine)
irq[i] = qdev_get_gpio_in(env->pic_state, i);
}
sysbus_create_simple("lm32-uart", uart0_base, irq[uart0_irq]);
lm32_uart_create(uart0_base, irq[uart0_irq], serial_hds[0]);
sysbus_create_simple("lm32-timer", timer0_base, irq[timer0_irq]);
sysbus_create_simple("lm32-timer", timer1_base, irq[timer1_irq]);
sysbus_create_simple("lm32-timer", timer2_base, irq[timer2_irq]);
/* make sure juart isn't the first chardev */
env->juart_state = lm32_juart_init();
env->juart_state = lm32_juart_init(serial_hds[1]);
reset_info->bootstrap_pc = flash_base;

View File

@ -5,11 +5,13 @@
#include "net/net.h"
static inline DeviceState *milkymist_uart_create(hwaddr base,
qemu_irq irq)
qemu_irq irq,
CharDriverState *chr)
{
DeviceState *dev;
dev = qdev_create(NULL, "milkymist-uart");
qdev_prop_set_chr(dev, "chardev", chr);
qdev_init_nofail(dev);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq);

View File

@ -159,7 +159,7 @@ milkymist_init(MachineState *machine)
}
g_free(bios_filename);
milkymist_uart_create(0x60000000, irq[0]);
milkymist_uart_create(0x60000000, irq[0], serial_hds[0]);
milkymist_sysctl_create(0x60001000, irq[1], irq[2], irq[3],
80000000, 0x10014d31, 0x0000041f, 0x00000001);
milkymist_hpdmc_create(0x60002000);
@ -175,7 +175,7 @@ milkymist_init(MachineState *machine)
0x20000000, 0x1000, 0x20020000, 0x2000);
/* make sure juart isn't the first chardev */
env->juart_state = lm32_juart_init();
env->juart_state = lm32_juart_init(serial_hds[1]);
if (kernel_filename) {
uint64_t entry;

View File

@ -33,7 +33,6 @@
#include "sysemu/hostmem.h"
#include "sysemu/qtest.h"
#include "qapi/visitor.h"
#include "exec/ram_addr.h"
#include "hw/misc/ivshmem.h"
@ -533,7 +532,7 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
}
memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s),
"ivshmem.bar2", size, ptr);
qemu_set_ram_fd(memory_region_get_ram_addr(&s->server_bar2), fd);
memory_region_set_fd(&s->server_bar2, fd);
s->ivshmem_bar2 = &s->server_bar2;
}
@ -940,7 +939,7 @@ static void ivshmem_exit(PCIDevice *dev)
strerror(errno));
}
fd = qemu_get_ram_fd(memory_region_get_ram_addr(s->ivshmem_bar2));
fd = memory_region_get_fd(s->ivshmem_bar2);
close(fd);
}

View File

@ -650,7 +650,9 @@ static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd)
pa_hi = le32_to_cpu(initq->pi_addr_hi);
s->producer_pa = ((uint64_t) pa_hi << 32) | pa_lo;
s->reply_queue_head = ldl_le_pci_dma(pcid, s->producer_pa);
s->reply_queue_head %= MEGASAS_MAX_FRAMES;
s->reply_queue_tail = ldl_le_pci_dma(pcid, s->consumer_pa);
s->reply_queue_tail %= MEGASAS_MAX_FRAMES;
flags = le32_to_cpu(initq->flags);
if (flags & MFI_QUEUE_FLAG_CONTEXT64) {
s->flags |= MEGASAS_MASK_USE_QUEUE64;
@ -1293,7 +1295,7 @@ static int megasas_dcmd_ld_get_info(MegasasState *s, MegasasCmd *cmd)
static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd)
{
uint8_t data[4096];
uint8_t data[4096] = { 0 };
struct mfi_config_data *info;
int num_pd_disks = 0, array_offset, ld_offset;
BusChild *kid;
@ -1446,7 +1448,7 @@ static int megasas_dcmd_set_properties(MegasasState *s, MegasasCmd *cmd)
dcmd_size);
return MFI_STAT_INVALID_PARAMETER;
}
dma_buf_write((uint8_t *)&info, cmd->iov_size, &cmd->qsg);
dma_buf_write((uint8_t *)&info, dcmd_size, &cmd->qsg);
trace_megasas_dcmd_unsupported(cmd->index, cmd->iov_size);
return MFI_STAT_OK;
}

View File

@ -754,11 +754,6 @@ static void mptsas_fetch_request(MPTSASState *s)
hwaddr addr;
int size;
if (s->state != MPI_IOC_STATE_OPERATIONAL) {
mptsas_set_fault(s, MPI_IOCSTATUS_INVALID_STATE);
return;
}
/* Read the message header from the guest first. */
addr = s->host_mfa_high_addr | MPTSAS_FIFO_GET(s, request_post);
pci_dma_read(pci, addr, req, sizeof(hdr));
@ -789,6 +784,10 @@ static void mptsas_fetch_requests(void *opaque)
{
MPTSASState *s = opaque;
if (s->state != MPI_IOC_STATE_OPERATIONAL) {
mptsas_set_fault(s, MPI_IOCSTATUS_INVALID_STATE);
return;
}
while (!MPTSAS_FIFO_EMPTY(s, request_post)) {
mptsas_fetch_request(s);
}

View File

@ -53,7 +53,21 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
#define DEFAULT_MAX_UNMAP_SIZE (1 << 30) /* 1 GB */
#define DEFAULT_MAX_IO_SIZE INT_MAX /* 2 GB - 1 block */
typedef struct SCSIDiskState SCSIDiskState;
#define TYPE_SCSI_DISK_BASE "scsi-disk-base"
#define SCSI_DISK_BASE(obj) \
OBJECT_CHECK(SCSIDiskState, (obj), TYPE_SCSI_DISK_BASE)
#define SCSI_DISK_BASE_CLASS(klass) \
OBJECT_CLASS_CHECK(SCSIDiskClass, (klass), TYPE_SCSI_DISK_BASE)
#define SCSI_DISK_BASE_GET_CLASS(obj) \
OBJECT_GET_CLASS(SCSIDiskClass, (obj), TYPE_SCSI_DISK_BASE)
typedef struct SCSIDiskClass {
SCSIDeviceClass parent_class;
DMAIOFunc *dma_readv;
DMAIOFunc *dma_writev;
bool (*need_fua_emulation)(SCSICommand *cmd);
} SCSIDiskClass;
typedef struct SCSIDiskReq {
SCSIRequest req;
@ -62,16 +76,18 @@ typedef struct SCSIDiskReq {
uint32_t sector_count;
uint32_t buflen;
bool started;
bool need_fua_emulation;
struct iovec iov;
QEMUIOVector qiov;
BlockAcctCookie acct;
unsigned char *status;
} SCSIDiskReq;
#define SCSI_DISK_F_REMOVABLE 0
#define SCSI_DISK_F_DPOFUA 1
#define SCSI_DISK_F_NO_REMOVABLE_DEVOPS 2
struct SCSIDiskState
typedef struct SCSIDiskState
{
SCSIDevice qdev;
uint32_t features;
@ -88,7 +104,7 @@ struct SCSIDiskState
char *product;
bool tray_open;
bool tray_locked;
};
} SCSIDiskState;
static int scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed);
@ -161,6 +177,29 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
qemu_iovec_init_external(&r->qiov, &r->iov, 1);
}
static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
{
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
return true;
}
if (ret < 0) {
return scsi_handle_rw_error(r, -ret, acct_failed);
}
if (r->status && *r->status) {
if (acct_failed) {
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_req_complete(&r->req, *r->status);
return true;
}
return false;
}
static void scsi_aio_complete(void *opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@ -168,17 +207,10 @@ static void scsi_aio_complete(void *opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, true)) {
goto done;
}
}
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
scsi_req_complete(&r->req, GOOD);
@ -217,13 +249,9 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
assert(r->req.aiocb == NULL);
assert(!r->req.io_canceled);
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
goto done;
}
if (scsi_is_cmd_fua(&r->req.cmd)) {
if (r->need_fua_emulation) {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
BLOCK_ACCT_FLUSH);
r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
@ -231,26 +259,16 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
}
scsi_req_complete(&r->req, GOOD);
done:
scsi_req_unref(&r->req);
}
static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
{
assert(r->req.aiocb == NULL);
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, false)) {
goto done;
}
}
r->sector += r->sector_count;
r->sector_count = 0;
if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
@ -288,17 +306,10 @@ static void scsi_read_complete(void * opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, true)) {
goto done;
}
}
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
DPRINTF("Data ready tag=0x%x len=%zd\n", r->req.tag, r->qiov.size);
@ -315,36 +326,29 @@ done:
static void scsi_do_read(SCSIDiskReq *r, int ret)
{
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
assert (r->req.aiocb == NULL);
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, false)) {
goto done;
}
}
/* The request is used as the AIO opaque value, so add a ref. */
scsi_req_ref(&r->req);
if (r->req.sg) {
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
r->req.resid -= r->req.sg->size;
r->req.aiocb = dma_blk_read(s->qdev.conf.blk, r->req.sg,
r->sector << BDRV_SECTOR_BITS,
scsi_dma_complete, r);
r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
r->req.sg, r->sector << BDRV_SECTOR_BITS,
sdc->dma_readv, r, scsi_dma_complete, r,
DMA_DIRECTION_FROM_DEVICE);
} else {
scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
r->qiov.size, BLOCK_ACCT_READ);
r->req.aiocb = blk_aio_preadv(s->qdev.conf.blk,
r->sector << BDRV_SECTOR_BITS, &r->qiov,
0, scsi_read_complete, r);
r->req.aiocb = sdc->dma_readv(r->sector, &r->qiov,
scsi_read_complete, r, r);
}
done:
@ -399,7 +403,7 @@ static void scsi_read_data(SCSIRequest *req)
first = !r->started;
r->started = true;
if (first && scsi_is_cmd_fua(&r->req.cmd)) {
if (first && r->need_fua_emulation) {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
BLOCK_ACCT_FLUSH);
r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
@ -456,18 +460,10 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
uint32_t n;
assert (r->req.aiocb == NULL);
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, false)) {
goto done;
}
}
n = r->qiov.size / 512;
r->sector += n;
r->sector_count -= n;
@ -504,6 +500,7 @@ static void scsi_write_data(SCSIRequest *req)
{
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
/* No data transfer may already be in progress */
assert(r->req.aiocb == NULL);
@ -540,15 +537,15 @@ static void scsi_write_data(SCSIRequest *req)
if (r->req.sg) {
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
r->req.resid -= r->req.sg->size;
r->req.aiocb = dma_blk_write(s->qdev.conf.blk, r->req.sg,
r->sector << BDRV_SECTOR_BITS,
scsi_dma_complete, r);
r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
r->req.sg, r->sector << BDRV_SECTOR_BITS,
sdc->dma_writev, r, scsi_dma_complete, r,
DMA_DIRECTION_TO_DEVICE);
} else {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct,
r->qiov.size, BLOCK_ACCT_WRITE);
r->req.aiocb = blk_aio_pwritev(s->qdev.conf.blk,
r->sector << BDRV_SECTOR_BITS, &r->qiov,
0, scsi_write_complete, r);
r->req.aiocb = sdc->dma_writev(r->sector << BDRV_SECTOR_BITS, &r->qiov,
scsi_write_complete, r, r);
}
}
@ -1600,18 +1597,10 @@ static void scsi_unmap_complete_noio(UnmapCBData *data, int ret)
uint32_t nb_sectors;
assert(r->req.aiocb == NULL);
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
if (scsi_disk_req_check_error(r, ret, false)) {
goto done;
}
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, false)) {
goto done;
}
}
if (data->count > 0) {
sector_num = ldq_be_p(&data->inbuf[0]);
nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
@ -1711,17 +1700,10 @@ static void scsi_write_same_complete(void *opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
if (r->req.io_canceled) {
scsi_req_cancel_complete(&r->req);
if (scsi_disk_req_check_error(r, ret, true)) {
goto done;
}
if (ret < 0) {
if (scsi_handle_rw_error(r, -ret, true)) {
goto done;
}
}
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
data->nb_sectors -= data->iov.iov_len / 512;
@ -2138,6 +2120,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
{
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
uint32_t len;
uint8_t command;
@ -2196,6 +2179,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
return 0;
}
r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
if (r->sector_count == 0) {
scsi_req_complete(&r->req, GOOD);
}
@ -2578,16 +2562,145 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp)
scsi_generic_read_device_identification(&s->qdev);
}
typedef struct SCSIBlockReq {
SCSIDiskReq req;
sg_io_hdr_t io_header;
/* Selected bytes of the original CDB, copied into our own CDB. */
uint8_t cmd, cdb1, group_number;
/* CDB passed to SG_IO. */
uint8_t cdb[16];
} SCSIBlockReq;
static BlockAIOCB *scsi_block_do_sgio(SCSIBlockReq *req,
int64_t offset, QEMUIOVector *iov,
int direction,
BlockCompletionFunc *cb, void *opaque)
{
sg_io_hdr_t *io_header = &req->io_header;
SCSIDiskReq *r = &req->req;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
int nb_logical_blocks;
uint64_t lba;
BlockAIOCB *aiocb;
/* This is not supported yet. It can only happen if the guest does
* reads and writes that are not aligned to one logical sectors
* _and_ cover multiple MemoryRegions.
*/
assert(offset % s->qdev.blocksize == 0);
assert(iov->size % s->qdev.blocksize == 0);
io_header->interface_id = 'S';
/* The data transfer comes from the QEMUIOVector. */
io_header->dxfer_direction = direction;
io_header->dxfer_len = iov->size;
io_header->dxferp = (void *)iov->iov;
io_header->iovec_count = iov->niov;
assert(io_header->iovec_count == iov->niov); /* no overflow! */
/* Build a new CDB with the LBA and length patched in, in case
* DMA helpers split the transfer in multiple segments. Do not
* build a CDB smaller than what the guest wanted, and only build
* a larger one if strictly necessary.
*/
io_header->cmdp = req->cdb;
lba = offset / s->qdev.blocksize;
nb_logical_blocks = io_header->dxfer_len / s->qdev.blocksize;
if ((req->cmd >> 5) == 0 && lba <= 0x1ffff) {
/* 6-byte CDB */
stl_be_p(&req->cdb[0], lba | (req->cmd << 24));
req->cdb[4] = nb_logical_blocks;
req->cdb[5] = 0;
io_header->cmd_len = 6;
} else if ((req->cmd >> 5) <= 1 && lba <= 0xffffffffULL) {
/* 10-byte CDB */
req->cdb[0] = (req->cmd & 0x1f) | 0x20;
req->cdb[1] = req->cdb1;
stl_be_p(&req->cdb[2], lba);
req->cdb[6] = req->group_number;
stw_be_p(&req->cdb[7], nb_logical_blocks);
req->cdb[9] = 0;
io_header->cmd_len = 10;
} else if ((req->cmd >> 5) != 4 && lba <= 0xffffffffULL) {
/* 12-byte CDB */
req->cdb[0] = (req->cmd & 0x1f) | 0xA0;
req->cdb[1] = req->cdb1;
stl_be_p(&req->cdb[2], lba);
stl_be_p(&req->cdb[6], nb_logical_blocks);
req->cdb[10] = req->group_number;
req->cdb[11] = 0;
io_header->cmd_len = 12;
} else {
/* 16-byte CDB */
req->cdb[0] = (req->cmd & 0x1f) | 0x80;
req->cdb[1] = req->cdb1;
stq_be_p(&req->cdb[2], lba);
stl_be_p(&req->cdb[10], nb_logical_blocks);
req->cdb[14] = req->group_number;
req->cdb[15] = 0;
io_header->cmd_len = 16;
}
/* The rest is as in scsi-generic.c. */
io_header->mx_sb_len = sizeof(r->req.sense);
io_header->sbp = r->req.sense;
io_header->timeout = UINT_MAX;
io_header->usr_ptr = r;
io_header->flags |= SG_FLAG_DIRECT_IO;
aiocb = blk_aio_ioctl(s->qdev.conf.blk, SG_IO, io_header, cb, opaque);
assert(aiocb != NULL);
return aiocb;
}
static bool scsi_block_no_fua(SCSICommand *cmd)
{
return false;
}
static BlockAIOCB *scsi_block_dma_readv(int64_t offset,
QEMUIOVector *iov,
BlockCompletionFunc *cb, void *cb_opaque,
void *opaque)
{
SCSIBlockReq *r = opaque;
return scsi_block_do_sgio(r, offset, iov,
SG_DXFER_FROM_DEV, cb, cb_opaque);
}
static BlockAIOCB *scsi_block_dma_writev(int64_t offset,
QEMUIOVector *iov,
BlockCompletionFunc *cb, void *cb_opaque,
void *opaque)
{
SCSIBlockReq *r = opaque;
return scsi_block_do_sgio(r, offset, iov,
SG_DXFER_TO_DEV, cb, cb_opaque);
}
static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
{
switch (buf[0]) {
case VERIFY_10:
case VERIFY_12:
case VERIFY_16:
/* Check if BYTCHK == 0x01 (data-out buffer contains data
* for the number of logical blocks specified in the length
* field). For other modes, do not use scatter/gather operation.
*/
if ((buf[1] & 6) != 2) {
return false;
}
break;
case READ_6:
case READ_10:
case READ_12:
case READ_16:
case VERIFY_10:
case VERIFY_12:
case VERIFY_16:
case WRITE_6:
case WRITE_10:
case WRITE_12:
@ -2595,21 +2708,8 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
case WRITE_VERIFY_10:
case WRITE_VERIFY_12:
case WRITE_VERIFY_16:
/* If we are not using O_DIRECT, we might read stale data from the
* host cache if writes were made using other commands than these
* ones (such as WRITE SAME or EXTENDED COPY, etc.). So, without
* O_DIRECT everything must go through SG_IO.
*/
if (!(blk_get_flags(s->qdev.conf.blk) & BDRV_O_NOCACHE)) {
break;
}
/* MMC writing cannot be done via pread/pwrite, because it sometimes
/* MMC writing cannot be done via DMA helpers, because it sometimes
* involves writing beyond the maximum LBA or to negative LBA (lead-in).
* And once you do these writes, reading from the block device is
* unreliable, too. It is even possible that reads deliver random data
* from the host page cache (this is probably a Linux bug).
*
* We might use scsi_disk_dma_reqops as long as no writing commands are
* seen, but performance usually isn't paramount on optical media. So,
* just make scsi-block operate the same as scsi-generic for them.
@ -2627,6 +2727,54 @@ static bool scsi_block_is_passthrough(SCSIDiskState *s, uint8_t *buf)
}
static int32_t scsi_block_dma_command(SCSIRequest *req, uint8_t *buf)
{
SCSIBlockReq *r = (SCSIBlockReq *)req;
r->cmd = req->cmd.buf[0];
switch (r->cmd >> 5) {
case 0:
/* 6-byte CDB. */
r->cdb1 = r->group_number = 0;
break;
case 1:
/* 10-byte CDB. */
r->cdb1 = req->cmd.buf[1];
r->group_number = req->cmd.buf[6];
case 4:
/* 12-byte CDB. */
r->cdb1 = req->cmd.buf[1];
r->group_number = req->cmd.buf[10];
break;
case 5:
/* 16-byte CDB. */
r->cdb1 = req->cmd.buf[1];
r->group_number = req->cmd.buf[14];
break;
default:
abort();
}
if (r->cdb1 & 0xe0) {
/* Protection information is not supported. */
scsi_check_condition(&r->req, SENSE_CODE(INVALID_FIELD));
return 0;
}
r->req.status = &r->io_header.status;
return scsi_disk_dma_command(req, buf);
}
static const SCSIReqOps scsi_block_dma_reqops = {
.size = sizeof(SCSIBlockReq),
.free_req = scsi_free_request,
.send_command = scsi_block_dma_command,
.read_data = scsi_read_data,
.write_data = scsi_write_data,
.get_buf = scsi_get_buf,
.load_request = scsi_disk_load_request,
.save_request = scsi_disk_save_request,
};
static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
uint32_t lun, uint8_t *buf,
void *hba_private)
@ -2637,7 +2785,7 @@ static SCSIRequest *scsi_block_new_request(SCSIDevice *d, uint32_t tag,
return scsi_req_alloc(&scsi_generic_req_ops, &s->qdev, tag, lun,
hba_private);
} else {
return scsi_req_alloc(&scsi_disk_dma_reqops, &s->qdev, tag, lun,
return scsi_req_alloc(&scsi_block_dma_reqops, &s->qdev, tag, lun,
hba_private);
}
}
@ -2656,6 +2804,46 @@ static int scsi_block_parse_cdb(SCSIDevice *d, SCSICommand *cmd,
#endif
static
BlockAIOCB *scsi_dma_readv(int64_t offset, QEMUIOVector *iov,
BlockCompletionFunc *cb, void *cb_opaque,
void *opaque)
{
SCSIDiskReq *r = opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
return blk_aio_preadv(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
}
static
BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
BlockCompletionFunc *cb, void *cb_opaque,
void *opaque)
{
SCSIDiskReq *r = opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
}
static void scsi_disk_base_class_initfn(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
dc->fw_name = "disk";
dc->reset = scsi_disk_reset;
sdc->dma_readv = scsi_dma_readv;
sdc->dma_writev = scsi_dma_writev;
sdc->need_fua_emulation = scsi_is_cmd_fua;
}
static const TypeInfo scsi_disk_base_info = {
.name = TYPE_SCSI_DISK_BASE,
.parent = TYPE_SCSI_DEVICE,
.class_init = scsi_disk_base_class_initfn,
.instance_size = sizeof(SCSIDiskState),
.class_size = sizeof(SCSIDiskClass),
};
#define DEFINE_SCSI_DISK_PROPERTIES() \
DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf), \
DEFINE_PROP_STRING("ver", SCSIDiskState, version), \
@ -2703,17 +2891,14 @@ static void scsi_hd_class_initfn(ObjectClass *klass, void *data)
sc->realize = scsi_hd_realize;
sc->alloc_req = scsi_new_request;
sc->unit_attention_reported = scsi_disk_unit_attention_reported;
dc->fw_name = "disk";
dc->desc = "virtual SCSI disk";
dc->reset = scsi_disk_reset;
dc->props = scsi_hd_properties;
dc->vmsd = &vmstate_scsi_disk_state;
}
static const TypeInfo scsi_hd_info = {
.name = "scsi-hd",
.parent = TYPE_SCSI_DEVICE,
.instance_size = sizeof(SCSIDiskState),
.parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_hd_class_initfn,
};
@ -2735,17 +2920,14 @@ static void scsi_cd_class_initfn(ObjectClass *klass, void *data)
sc->realize = scsi_cd_realize;
sc->alloc_req = scsi_new_request;
sc->unit_attention_reported = scsi_disk_unit_attention_reported;
dc->fw_name = "disk";
dc->desc = "virtual SCSI CD-ROM";
dc->reset = scsi_disk_reset;
dc->props = scsi_cd_properties;
dc->vmsd = &vmstate_scsi_disk_state;
}
static const TypeInfo scsi_cd_info = {
.name = "scsi-cd",
.parent = TYPE_SCSI_DEVICE,
.instance_size = sizeof(SCSIDiskState),
.parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_cd_class_initfn,
};
@ -2759,21 +2941,22 @@ static void scsi_block_class_initfn(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
SCSIDiskClass *sdc = SCSI_DISK_BASE_CLASS(klass);
sc->realize = scsi_block_realize;
sc->alloc_req = scsi_block_new_request;
sc->parse_cdb = scsi_block_parse_cdb;
dc->fw_name = "disk";
sdc->dma_readv = scsi_block_dma_readv;
sdc->dma_writev = scsi_block_dma_writev;
sdc->need_fua_emulation = scsi_block_no_fua;
dc->desc = "SCSI block device passthrough";
dc->reset = scsi_disk_reset;
dc->props = scsi_block_properties;
dc->vmsd = &vmstate_scsi_disk_state;
}
static const TypeInfo scsi_block_info = {
.name = "scsi-block",
.parent = TYPE_SCSI_DEVICE,
.instance_size = sizeof(SCSIDiskState),
.parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_block_class_initfn,
};
#endif
@ -2811,13 +2994,13 @@ static void scsi_disk_class_initfn(ObjectClass *klass, void *data)
static const TypeInfo scsi_disk_info = {
.name = "scsi-disk",
.parent = TYPE_SCSI_DEVICE,
.instance_size = sizeof(SCSIDiskState),
.parent = TYPE_SCSI_DISK_BASE,
.class_init = scsi_disk_class_initfn,
};
static void scsi_disk_register_types(void)
{
type_register_static(&scsi_disk_base_info);
type_register_static(&scsi_hd_info);
type_register_static(&scsi_cd_info);
#ifdef __linux__

View File

@ -222,6 +222,18 @@ static void scsi_read_complete(void * opaque, int ret)
r->buf[3] |= 0x80;
}
}
if (s->type == TYPE_DISK &&
r->req.cmd.buf[0] == INQUIRY &&
r->req.cmd.buf[2] == 0xb0) {
uint32_t max_xfer_len = blk_get_max_transfer_length(s->conf.blk);
if (max_xfer_len) {
stl_be_p(&r->buf[8], max_xfer_len);
/* Also take care of the opt xfer len. */
if (ldl_be_p(&r->buf[12]) > max_xfer_len) {
stl_be_p(&r->buf[12], max_xfer_len);
}
}
}
scsi_req_data(&r->req, len);
scsi_req_unref(&r->req);
}

View File

@ -153,7 +153,7 @@ pvscsi_log2(uint32_t input)
return log;
}
static void
static int
pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
{
int i;
@ -161,6 +161,10 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
uint32_t req_ring_size, cmp_ring_size;
m->rs_pa = ri->ringsStatePPN << VMW_PAGE_SHIFT;
if ((ri->reqRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
|| (ri->cmpRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)) {
return -1;
}
req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
txr_len_log2 = pvscsi_log2(req_ring_size - 1);
@ -192,15 +196,20 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
/* Flush ring state page changes */
smp_wmb();
return 0;
}
static void
static int
pvscsi_ring_init_msg(PVSCSIRingInfo *m, PVSCSICmdDescSetupMsgRing *ri)
{
int i;
uint32_t len_log2;
uint32_t ring_size;
if (ri->numPages > PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES) {
return -1;
}
ring_size = ri->numPages * PVSCSI_MAX_NUM_MSG_ENTRIES_PER_PAGE;
len_log2 = pvscsi_log2(ring_size - 1);
@ -220,6 +229,8 @@ pvscsi_ring_init_msg(PVSCSIRingInfo *m, PVSCSICmdDescSetupMsgRing *ri)
/* Flush ring state page changes */
smp_wmb();
return 0;
}
static void
@ -770,7 +781,10 @@ pvscsi_on_cmd_setup_rings(PVSCSIState *s)
trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_RINGS");
pvscsi_dbg_dump_tx_rings_config(rc);
pvscsi_ring_init_data(&s->rings, rc);
if (pvscsi_ring_init_data(&s->rings, rc) < 0) {
return PVSCSI_COMMAND_PROCESSING_FAILED;
}
s->rings_info_valid = TRUE;
return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
}
@ -850,7 +864,9 @@ pvscsi_on_cmd_setup_msg_ring(PVSCSIState *s)
}
if (s->rings_info_valid) {
pvscsi_ring_init_msg(&s->rings, rc);
if (pvscsi_ring_init_msg(&s->rings, rc) < 0) {
return PVSCSI_COMMAND_PROCESSING_FAILED;
}
s->msg_ring_info_valid = TRUE;
}
return sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(uint32_t);

View File

@ -17,7 +17,6 @@
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "exec/ram_addr.h"
#include "migration/migration.h"
#include <sys/ioctl.h>
@ -247,18 +246,18 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
ram_addr_t ram_addr;
ram_addr_t offset;
MemoryRegion *mr;
assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
&ram_addr);
fd = qemu_get_ram_fd(ram_addr);
mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
&offset);
fd = memory_region_get_fd(mr);
if (fd > 0) {
msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
(uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
msg.payload.memory.regions[fd_num].mmap_offset = offset;
assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
fds[fd_num++] = fd;
}
@ -616,17 +615,15 @@ static bool vhost_user_can_merge(struct vhost_dev *dev,
uint64_t start1, uint64_t size1,
uint64_t start2, uint64_t size2)
{
ram_addr_t ram_addr;
ram_addr_t offset;
int mfd, rfd;
MemoryRegion *mr;
mr = qemu_ram_addr_from_host((void *)(uintptr_t)start1, &ram_addr);
assert(mr);
mfd = qemu_get_ram_fd(ram_addr);
mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
mfd = memory_region_get_fd(mr);
mr = qemu_ram_addr_from_host((void *)(uintptr_t)start2, &ram_addr);
assert(mr);
rfd = qemu_get_ram_fd(ram_addr);
mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
rfd = memory_region_get_fd(mr);
return mfd == rfd;
}

View File

@ -57,10 +57,10 @@ typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
/* This should not be used by devices. */
MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
ram_addr_t qemu_ram_addr_from_host(void *ptr);
RAMBlock *qemu_ram_block_by_name(const char *name);
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
ram_addr_t *ram_addr, ram_addr_t *offset);
ram_addr_t *offset);
void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
void qemu_ram_unset_idstr(RAMBlock *block);
const char *qemu_ram_get_idstr(RAMBlock *rb);

View File

@ -32,6 +32,8 @@
#include "qom/object.h"
#include "qemu/rcu.h"
#define RAM_ADDR_INVALID (~(ram_addr_t)0)
#define MAX_PHYS_ADDR_SPACE_BITS 62
#define MAX_PHYS_ADDR (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
@ -666,6 +668,35 @@ static inline bool memory_region_is_rom(MemoryRegion *mr)
*/
int memory_region_get_fd(MemoryRegion *mr);
/**
* memory_region_set_fd: Mark a RAM memory region as backed by a
* file descriptor.
*
* This function is typically used after memory_region_init_ram_ptr().
*
* @mr: the memory region being queried.
* @fd: the file descriptor that backs @mr.
*/
void memory_region_set_fd(MemoryRegion *mr, int fd);
/**
* memory_region_from_host: Convert a pointer into a RAM memory region
* and an offset within it.
*
* Given a host pointer inside a RAM memory region (created with
* memory_region_init_ram() or memory_region_init_ram_ptr()), return
* the MemoryRegion and the offset within it.
*
* Use with care; by the time this function returns, the returned pointer is
* not protected by RCU anymore. If the caller is not within an RCU critical
* section and does not hold the iothread lock, it must have other means of
* protecting the pointer, such as a reference to the region that includes
* the incoming ram_addr_t.
*
* @mr: the memory region being queried.
*/
MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset);
/**
* memory_region_get_ram_ptr: Get a pointer into a RAM memory region.
*
@ -1362,7 +1393,7 @@ MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
MemoryRegion *mr);
MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
MemTxAttrs attrs, uint8_t *buf, int len);
void *qemu_get_ram_ptr(RAMBlock *ram_block, ram_addr_t addr);
void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr);
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
@ -1400,8 +1431,7 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
l = len;
mr = address_space_translate(as, addr, &addr1, &l, false);
if (len == l && memory_access_is_direct(mr, false)) {
addr1 += memory_region_get_ram_addr(mr);
ptr = qemu_get_ram_ptr(mr->ram_block, addr1);
ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
memcpy(buf, ptr, len);
} else {
result = address_space_read_continue(as, addr, attrs, buf, len,

View File

@ -105,9 +105,6 @@ RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
uint64_t length,
void *host),
MemoryRegion *mr, Error **errp);
int qemu_get_ram_fd(ram_addr_t addr);
void qemu_set_ram_fd(ram_addr_t addr, int fd);
void *qemu_get_ram_block_host_ptr(ram_addr_t addr);
void qemu_ram_free(RAMBlock *block);
int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);

View File

@ -46,4 +46,20 @@ etraxfs_eth_init(NICInfo *nd, hwaddr base, int phyaddr,
return dev;
}
static inline DeviceState *etraxfs_ser_create(hwaddr addr,
qemu_irq irq,
CharDriverState *chr)
{
DeviceState *dev;
SysBusDevice *s;
dev = qdev_create(NULL, "etraxfs,serial");
s = SYS_BUS_DEVICE(dev);
qdev_prop_set_chr(dev, "chardev", chr);
qdev_init_nofail(dev);
sysbus_mmio_map(s, 0, addr);
sysbus_connect_irq(s, 0, irq);
return dev;
}
#endif

View File

@ -36,7 +36,18 @@
#define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); })
#define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); })
/* Most compilers currently treat consume and acquire the same, but really
* no processors except Alpha need a barrier here. Leave it in if
* using Thread Sanitizer to avoid warnings, otherwise optimize it away.
*/
#if defined(__SANITIZE_THREAD__)
#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); })
#elsif defined(__alpha__)
#define smp_read_barrier_depends() asm volatile("mb":::"memory")
#else
#define smp_read_barrier_depends() barrier()
#endif
/* Weak atomic operations prevent the compiler moving other
* loads/stores past the atomic operation load/store. However there is
@ -56,13 +67,23 @@
__atomic_store(ptr, &_val, __ATOMIC_RELAXED); \
} while(0)
/* Atomic RCU operations imply weak memory barriers */
/* See above: most compilers currently treat consume and acquire the
* same, but this slows down atomic_rcu_read unnecessarily.
*/
#ifdef __SANITIZE_THREAD__
#define atomic_rcu_read__nocheck(ptr, valptr) \
__atomic_load(ptr, valptr, __ATOMIC_CONSUME);
#else
#define atomic_rcu_read__nocheck(ptr, valptr) \
__atomic_load(ptr, valptr, __ATOMIC_RELAXED); \
smp_read_barrier_depends();
#endif
#define atomic_rcu_read(ptr) \
({ \
QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
typeof(*ptr) _val; \
__atomic_load(ptr, &_val, __ATOMIC_CONSUME); \
atomic_rcu_read__nocheck(ptr, &_val); \
_val; \
})

View File

@ -33,8 +33,6 @@
//#define DEBUG_UNASSIGNED
#define RAM_ADDR_INVALID (~(ram_addr_t)0)
static unsigned memory_region_transaction_depth;
static bool memory_region_update_pending;
static bool ioeventfd_update_pending;
@ -227,6 +225,7 @@ struct FlatRange {
hwaddr offset_in_region;
AddrRange addr;
uint8_t dirty_log_mask;
bool romd_mode;
bool readonly;
};
@ -251,6 +250,7 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
return a->mr == b->mr
&& addrrange_equal(a->addr, b->addr)
&& a->offset_in_region == b->offset_in_region
&& a->romd_mode == b->romd_mode
&& a->readonly == b->readonly;
}
@ -310,6 +310,7 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
r1->addr.size),
int128_make64(r2->offset_in_region))
&& r1->dirty_log_mask == r2->dirty_log_mask
&& r1->romd_mode == r2->romd_mode
&& r1->readonly == r2->readonly;
}
@ -663,6 +664,7 @@ static void render_memory_region(FlatView *view,
fr.mr = mr;
fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
fr.romd_mode = mr->romd_mode;
fr.readonly = readonly;
/* Render the region itself into any gaps left by the current view. */
@ -1622,13 +1624,26 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
int memory_region_get_fd(MemoryRegion *mr)
{
if (mr->alias) {
return memory_region_get_fd(mr->alias);
int fd;
rcu_read_lock();
while (mr->alias) {
mr = mr->alias;
}
fd = mr->ram_block->fd;
rcu_read_unlock();
assert(mr->ram_block);
return fd;
}
return qemu_get_ram_fd(memory_region_get_ram_addr(mr));
void memory_region_set_fd(MemoryRegion *mr, int fd)
{
rcu_read_lock();
while (mr->alias) {
mr = mr->alias;
}
mr->ram_block->fd = fd;
rcu_read_unlock();
}
void *memory_region_get_ram_ptr(MemoryRegion *mr)
@ -1642,10 +1657,22 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr)
mr = mr->alias;
}
assert(mr->ram_block);
ptr = qemu_get_ram_ptr(mr->ram_block, memory_region_get_ram_addr(mr));
ptr = qemu_map_ram_ptr(mr->ram_block, offset);
rcu_read_unlock();
return ptr + offset;
return ptr;
}
MemoryRegion *memory_region_from_host(void *ptr, ram_addr_t *offset)
{
RAMBlock *block;
block = qemu_ram_block_from_host(ptr, false, offset);
if (!block) {
return NULL;
}
return block->mr;
}
ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)

View File

@ -407,7 +407,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
while (true) {
ram_addr_t rb_offset;
ram_addr_t in_raspace;
struct pollfd pfd[2];
/*
@ -459,7 +458,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
true, &in_raspace, &rb_offset);
true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);

View File

@ -1153,12 +1153,20 @@ static void nbd_trip(void *opaque)
break;
case NBD_CMD_TRIM:
TRACE("Request type is TRIM");
ret = blk_co_discard(exp->blk, (request.from + exp->dev_offset)
/ BDRV_SECTOR_SIZE,
request.len / BDRV_SECTOR_SIZE);
if (ret < 0) {
LOG("discard failed");
reply.error = -ret;
/* Ignore unaligned head or tail, until block layer adds byte
* interface */
if (request.len >= BDRV_SECTOR_SIZE) {
request.len -= (request.from + request.len) % BDRV_SECTOR_SIZE;
ret = blk_co_discard(exp->blk,
DIV_ROUND_UP(request.from + exp->dev_offset,
BDRV_SECTOR_SIZE),
request.len / BDRV_SECTOR_SIZE);
if (ret < 0) {
LOG("discard failed");
reply.error = -ret;
}
} else {
TRACE("trim request too small, ignoring");
}
if (nbd_co_send_reply(req, &reply, 0) < 0) {
goto out;

View File

@ -328,23 +328,10 @@ def qlist_foreach(head, field_str):
yield var
def qemu_get_ram_block(ram_addr):
"""Returns the RAMBlock struct to which the given address belongs."""
ram_blocks = gdb.parse_and_eval("ram_list.blocks")
for block in qlist_foreach(ram_blocks, "next"):
if (ram_addr - block["offset"]) < block["used_length"]:
return block
raise gdb.GdbError("Bad ram offset %x" % ram_addr)
def qemu_get_ram_ptr(ram_addr):
def qemu_map_ram_ptr(block, offset):
"""Returns qemu vaddr for given guest physical address."""
block = qemu_get_ram_block(ram_addr)
return block["host"] + (ram_addr - block["offset"])
return block["host"] + offset
def memory_region_get_ram_ptr(memory_region):
@ -352,7 +339,7 @@ def memory_region_get_ram_ptr(memory_region):
return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
+ memory_region["alias_offset"])
return qemu_get_ram_ptr(memory_region["ram_block"]["offset"])
return qemu_map_ram_ptr(memory_region["ram_block"], 0)
def get_guest_phys_blocks():

View File

@ -1,825 +0,0 @@
#!/usr/bin/python
#
# top-like utility for displaying kvm statistics
#
# Copyright 2006-2008 Qumranet Technologies
# Copyright 2008-2011 Red Hat, Inc.
#
# Authors:
# Avi Kivity <avi@redhat.com>
#
# This work is licensed under the terms of the GNU GPL, version 2. See
# the COPYING file in the top-level directory.
import curses
import sys
import os
import time
import optparse
import ctypes
import fcntl
import resource
import struct
import re
from collections import defaultdict
from time import sleep
VMX_EXIT_REASONS = {
'EXCEPTION_NMI': 0,
'EXTERNAL_INTERRUPT': 1,
'TRIPLE_FAULT': 2,
'PENDING_INTERRUPT': 7,
'NMI_WINDOW': 8,
'TASK_SWITCH': 9,
'CPUID': 10,
'HLT': 12,
'INVLPG': 14,
'RDPMC': 15,
'RDTSC': 16,
'VMCALL': 18,
'VMCLEAR': 19,
'VMLAUNCH': 20,
'VMPTRLD': 21,
'VMPTRST': 22,
'VMREAD': 23,
'VMRESUME': 24,
'VMWRITE': 25,
'VMOFF': 26,
'VMON': 27,
'CR_ACCESS': 28,
'DR_ACCESS': 29,
'IO_INSTRUCTION': 30,
'MSR_READ': 31,
'MSR_WRITE': 32,
'INVALID_STATE': 33,
'MWAIT_INSTRUCTION': 36,
'MONITOR_INSTRUCTION': 39,
'PAUSE_INSTRUCTION': 40,
'MCE_DURING_VMENTRY': 41,
'TPR_BELOW_THRESHOLD': 43,
'APIC_ACCESS': 44,
'EPT_VIOLATION': 48,
'EPT_MISCONFIG': 49,
'WBINVD': 54,
'XSETBV': 55,
'APIC_WRITE': 56,
'INVPCID': 58,
}
SVM_EXIT_REASONS = {
'READ_CR0': 0x000,
'READ_CR3': 0x003,
'READ_CR4': 0x004,
'READ_CR8': 0x008,
'WRITE_CR0': 0x010,
'WRITE_CR3': 0x013,
'WRITE_CR4': 0x014,
'WRITE_CR8': 0x018,
'READ_DR0': 0x020,
'READ_DR1': 0x021,
'READ_DR2': 0x022,
'READ_DR3': 0x023,
'READ_DR4': 0x024,
'READ_DR5': 0x025,
'READ_DR6': 0x026,
'READ_DR7': 0x027,
'WRITE_DR0': 0x030,
'WRITE_DR1': 0x031,
'WRITE_DR2': 0x032,
'WRITE_DR3': 0x033,
'WRITE_DR4': 0x034,
'WRITE_DR5': 0x035,
'WRITE_DR6': 0x036,
'WRITE_DR7': 0x037,
'EXCP_BASE': 0x040,
'INTR': 0x060,
'NMI': 0x061,
'SMI': 0x062,
'INIT': 0x063,
'VINTR': 0x064,
'CR0_SEL_WRITE': 0x065,
'IDTR_READ': 0x066,
'GDTR_READ': 0x067,
'LDTR_READ': 0x068,
'TR_READ': 0x069,
'IDTR_WRITE': 0x06a,
'GDTR_WRITE': 0x06b,
'LDTR_WRITE': 0x06c,
'TR_WRITE': 0x06d,
'RDTSC': 0x06e,
'RDPMC': 0x06f,
'PUSHF': 0x070,
'POPF': 0x071,
'CPUID': 0x072,
'RSM': 0x073,
'IRET': 0x074,
'SWINT': 0x075,
'INVD': 0x076,
'PAUSE': 0x077,
'HLT': 0x078,
'INVLPG': 0x079,
'INVLPGA': 0x07a,
'IOIO': 0x07b,
'MSR': 0x07c,
'TASK_SWITCH': 0x07d,
'FERR_FREEZE': 0x07e,
'SHUTDOWN': 0x07f,
'VMRUN': 0x080,
'VMMCALL': 0x081,
'VMLOAD': 0x082,
'VMSAVE': 0x083,
'STGI': 0x084,
'CLGI': 0x085,
'SKINIT': 0x086,
'RDTSCP': 0x087,
'ICEBP': 0x088,
'WBINVD': 0x089,
'MONITOR': 0x08a,
'MWAIT': 0x08b,
'MWAIT_COND': 0x08c,
'XSETBV': 0x08d,
'NPF': 0x400,
}
# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
AARCH64_EXIT_REASONS = {
'UNKNOWN': 0x00,
'WFI': 0x01,
'CP15_32': 0x03,
'CP15_64': 0x04,
'CP14_MR': 0x05,
'CP14_LS': 0x06,
'FP_ASIMD': 0x07,
'CP10_ID': 0x08,
'CP14_64': 0x0C,
'ILL_ISS': 0x0E,
'SVC32': 0x11,
'HVC32': 0x12,
'SMC32': 0x13,
'SVC64': 0x15,
'HVC64': 0x16,
'SMC64': 0x17,
'SYS64': 0x18,
'IABT': 0x20,
'IABT_HYP': 0x21,
'PC_ALIGN': 0x22,
'DABT': 0x24,
'DABT_HYP': 0x25,
'SP_ALIGN': 0x26,
'FP_EXC32': 0x28,
'FP_EXC64': 0x2C,
'SERROR': 0x2F,
'BREAKPT': 0x30,
'BREAKPT_HYP': 0x31,
'SOFTSTP': 0x32,
'SOFTSTP_HYP': 0x33,
'WATCHPT': 0x34,
'WATCHPT_HYP': 0x35,
'BKPT32': 0x38,
'VECTOR32': 0x3A,
'BRK64': 0x3C,
}
# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
USERSPACE_EXIT_REASONS = {
'UNKNOWN': 0,
'EXCEPTION': 1,
'IO': 2,
'HYPERCALL': 3,
'DEBUG': 4,
'HLT': 5,
'MMIO': 6,
'IRQ_WINDOW_OPEN': 7,
'SHUTDOWN': 8,
'FAIL_ENTRY': 9,
'INTR': 10,
'SET_TPR': 11,
'TPR_ACCESS': 12,
'S390_SIEIC': 13,
'S390_RESET': 14,
'DCR': 15,
'NMI': 16,
'INTERNAL_ERROR': 17,
'OSI': 18,
'PAPR_HCALL': 19,
'S390_UCONTROL': 20,
'WATCHDOG': 21,
'S390_TSCH': 22,
'EPR': 23,
'SYSTEM_EVENT': 24,
}
IOCTL_NUMBERS = {
'SET_FILTER': 0x40082406,
'ENABLE': 0x00002400,
'DISABLE': 0x00002401,
'RESET': 0x00002403,
}
class Arch(object):
"""Class that encapsulates global architecture specific data like
syscall and ioctl numbers.
"""
@staticmethod
def get_arch():
machine = os.uname()[4]
if machine.startswith('ppc'):
return ArchPPC()
elif machine.startswith('aarch64'):
return ArchA64()
elif machine.startswith('s390'):
return ArchS390()
else:
# X86_64
for line in open('/proc/cpuinfo'):
if not line.startswith('flags'):
continue
flags = line.split()
if 'vmx' in flags:
return ArchX86(VMX_EXIT_REASONS)
if 'svm' in flags:
return ArchX86(SVM_EXIT_REASONS)
return
class ArchX86(Arch):
def __init__(self, exit_reasons):
self.sc_perf_evt_open = 298
self.ioctl_numbers = IOCTL_NUMBERS
self.exit_reasons = exit_reasons
class ArchPPC(Arch):
def __init__(self):
self.sc_perf_evt_open = 319
self.ioctl_numbers = IOCTL_NUMBERS
self.ioctl_numbers['ENABLE'] = 0x20002400
self.ioctl_numbers['DISABLE'] = 0x20002401
# PPC comes in 32 and 64 bit and some generated ioctl
# numbers depend on the wordsize.
char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
class ArchA64(Arch):
def __init__(self):
self.sc_perf_evt_open = 241
self.ioctl_numbers = IOCTL_NUMBERS
self.exit_reasons = AARCH64_EXIT_REASONS
class ArchS390(Arch):
def __init__(self):
self.sc_perf_evt_open = 331
self.ioctl_numbers = IOCTL_NUMBERS
self.exit_reasons = None
ARCH = Arch.get_arch()
def walkdir(path):
"""Returns os.walk() data for specified directory.
As it is only a wrapper it returns the same 3-tuple of (dirpath,
dirnames, filenames).
"""
return next(os.walk(path))
def parse_int_list(list_string):
"""Returns an int list from a string of comma separated integers and
integer ranges."""
integers = []
members = list_string.split(',')
for member in members:
if '-' not in member:
integers.append(int(member))
else:
int_range = member.split('-')
integers.extend(range(int(int_range[0]),
int(int_range[1]) + 1))
return integers
def get_online_cpus():
with open('/sys/devices/system/cpu/online') as cpu_list:
cpu_string = cpu_list.readline()
return parse_int_list(cpu_string)
def get_filters():
filters = {}
filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
if ARCH.exit_reasons:
filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
return filters
libc = ctypes.CDLL('libc.so.6', use_errno=True)
syscall = libc.syscall
class perf_event_attr(ctypes.Structure):
_fields_ = [('type', ctypes.c_uint32),
('size', ctypes.c_uint32),
('config', ctypes.c_uint64),
('sample_freq', ctypes.c_uint64),
('sample_type', ctypes.c_uint64),
('read_format', ctypes.c_uint64),
('flags', ctypes.c_uint64),
('wakeup_events', ctypes.c_uint32),
('bp_type', ctypes.c_uint32),
('bp_addr', ctypes.c_uint64),
('bp_len', ctypes.c_uint64),
]
def __init__(self):
super(self.__class__, self).__init__()
self.type = PERF_TYPE_TRACEPOINT
self.size = ctypes.sizeof(self)
self.read_format = PERF_FORMAT_GROUP
def perf_event_open(attr, pid, cpu, group_fd, flags):
return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
ctypes.c_int(pid), ctypes.c_int(cpu),
ctypes.c_int(group_fd), ctypes.c_long(flags))
PERF_TYPE_TRACEPOINT = 2
PERF_FORMAT_GROUP = 1 << 3
PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
class Group(object):
def __init__(self):
self.events = []
def add_event(self, event):
self.events.append(event)
def read(self):
length = 8 * (1 + len(self.events))
read_format = 'xxxxxxxx' + 'Q' * len(self.events)
return dict(zip([event.name for event in self.events],
struct.unpack(read_format,
os.read(self.events[0].fd, length))))
class Event(object):
def __init__(self, name, group, trace_cpu, trace_point, trace_filter,
trace_set='kvm'):
self.name = name
self.fd = None
self.setup_event(group, trace_cpu, trace_point, trace_filter,
trace_set)
def setup_event_attribute(self, trace_set, trace_point):
id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
trace_point, 'id')
event_attr = perf_event_attr()
event_attr.config = int(open(id_path).read())
return event_attr
def setup_event(self, group, trace_cpu, trace_point, trace_filter,
trace_set):
event_attr = self.setup_event_attribute(trace_set, trace_point)
group_leader = -1
if group.events:
group_leader = group.events[0].fd
fd = perf_event_open(event_attr, -1, trace_cpu,
group_leader, 0)
if fd == -1:
err = ctypes.get_errno()
raise OSError(err, os.strerror(err),
'while calling sys_perf_event_open().')
if trace_filter:
fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
trace_filter)
self.fd = fd
def enable(self):
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
def disable(self):
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
def reset(self):
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
class TracepointProvider(object):
def __init__(self):
self.group_leaders = []
self.filters = get_filters()
self._fields = self.get_available_fields()
self.setup_traces()
self.fields = self._fields
def get_available_fields(self):
path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
fields = walkdir(path)[1]
extra = []
for field in fields:
if field in self.filters:
filter_name_, filter_dicts = self.filters[field]
for name in filter_dicts:
extra.append(field + '(' + name + ')')
fields += extra
return fields
def setup_traces(self):
cpus = get_online_cpus()
# The constant is needed as a buffer for python libs, std
# streams and other files that the script opens.
newlim = len(cpus) * len(self._fields) + 50
try:
softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
if hardlim < newlim:
# Now we need CAP_SYS_RESOURCE, to increase the hard limit.
resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
else:
# Raising the soft limit is sufficient.
resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
except ValueError:
sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
for cpu in cpus:
group = Group()
for name in self._fields:
tracepoint = name
tracefilter = None
match = re.match(r'(.*)\((.*)\)', name)
if match:
tracepoint, sub = match.groups()
tracefilter = ('%s==%d\0' %
(self.filters[tracepoint][0],
self.filters[tracepoint][1][sub]))
group.add_event(Event(name=name,
group=group,
trace_cpu=cpu,
trace_point=tracepoint,
trace_filter=tracefilter))
self.group_leaders.append(group)
def available_fields(self):
return self.get_available_fields()
@property
def fields(self):
return self._fields
@fields.setter
def fields(self, fields):
self._fields = fields
for group in self.group_leaders:
for index, event in enumerate(group.events):
if event.name in fields:
event.reset()
event.enable()
else:
# Do not disable the group leader.
# It would disable all of its events.
if index != 0:
event.disable()
def read(self):
ret = defaultdict(int)
for group in self.group_leaders:
for name, val in group.read().iteritems():
if name in self._fields:
ret[name] += val
return ret
class DebugfsProvider(object):
def __init__(self):
self._fields = self.get_available_fields()
def get_available_fields(self):
return walkdir(PATH_DEBUGFS_KVM)[2]
@property
def fields(self):
return self._fields
@fields.setter
def fields(self, fields):
self._fields = fields
def read(self):
def val(key):
return int(file(PATH_DEBUGFS_KVM + '/' + key).read())
return dict([(key, val(key)) for key in self._fields])
class Stats(object):
def __init__(self, providers, fields=None):
self.providers = providers
self._fields_filter = fields
self.values = {}
self.update_provider_filters()
def update_provider_filters(self):
def wanted(key):
if not self._fields_filter:
return True
return re.match(self._fields_filter, key) is not None
# As we reset the counters when updating the fields we can
# also clear the cache of old values.
self.values = {}
for provider in self.providers:
provider_fields = [key for key in provider.get_available_fields()
if wanted(key)]
provider.fields = provider_fields
@property
def fields_filter(self):
return self._fields_filter
@fields_filter.setter
def fields_filter(self, fields_filter):
self._fields_filter = fields_filter
self.update_provider_filters()
def get(self):
for provider in self.providers:
new = provider.read()
for key in provider.fields:
oldval = self.values.get(key, (0, 0))
newval = new.get(key, 0)
newdelta = None
if oldval is not None:
newdelta = newval - oldval[0]
self.values[key] = (newval, newdelta)
return self.values
LABEL_WIDTH = 40
NUMBER_WIDTH = 10
class Tui(object):
def __init__(self, stats):
self.stats = stats
self.screen = None
self.drilldown = False
self.update_drilldown()
def __enter__(self):
"""Initialises curses for later use. Based on curses.wrapper
implementation from the Python standard library."""
self.screen = curses.initscr()
curses.noecho()
curses.cbreak()
# The try/catch works around a minor bit of
# over-conscientiousness in the curses module, the error
# return from C start_color() is ignorable.
try:
curses.start_color()
except:
pass
curses.use_default_colors()
return self
def __exit__(self, *exception):
"""Resets the terminal to its normal state. Based on curses.wrappre
implementation from the Python standard library."""
if self.screen:
self.screen.keypad(0)
curses.echo()
curses.nocbreak()
curses.endwin()
def update_drilldown(self):
if not self.stats.fields_filter:
self.stats.fields_filter = r'^[^\(]*$'
elif self.stats.fields_filter == r'^[^\(]*$':
self.stats.fields_filter = None
def refresh(self, sleeptime):
self.screen.erase()
self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
self.screen.addstr(2, 1, 'Event')
self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
len('Total'), 'Total')
self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
len('Current'), 'Current')
row = 3
stats = self.stats.get()
def sortkey(x):
if stats[x][1]:
return (-stats[x][1], -stats[x][0])
else:
return (0, -stats[x][0])
for key in sorted(stats.keys(), key=sortkey):
if row >= self.screen.getmaxyx()[0]:
break
values = stats[key]
if not values[0] and not values[1]:
break
col = 1
self.screen.addstr(row, col, key)
col += LABEL_WIDTH
self.screen.addstr(row, col, '%10d' % (values[0],))
col += NUMBER_WIDTH
if values[1] is not None:
self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
row += 1
self.screen.refresh()
def show_filter_selection(self):
while True:
self.screen.erase()
self.screen.addstr(0, 0,
"Show statistics for events matching a regex.",
curses.A_BOLD)
self.screen.addstr(2, 0,
"Current regex: {0}"
.format(self.stats.fields_filter))
self.screen.addstr(3, 0, "New regex: ")
curses.echo()
regex = self.screen.getstr()
curses.noecho()
if len(regex) == 0:
return
try:
re.compile(regex)
self.stats.fields_filter = regex
return
except re.error:
continue
def show_stats(self):
sleeptime = 0.25
while True:
self.refresh(sleeptime)
curses.halfdelay(int(sleeptime * 10))
sleeptime = 3
try:
char = self.screen.getkey()
if char == 'x':
self.drilldown = not self.drilldown
self.update_drilldown()
if char == 'q':
break
if char == 'f':
self.show_filter_selection()
except KeyboardInterrupt:
break
except curses.error:
continue
def batch(stats):
s = stats.get()
time.sleep(1)
s = stats.get()
for key in sorted(s.keys()):
values = s[key]
print '%-42s%10d%10d' % (key, values[0], values[1])
def log(stats):
keys = sorted(stats.get().iterkeys())
def banner():
for k in keys:
print '%s' % k,
print
def statline():
s = stats.get()
for k in keys:
print ' %9d' % s[k][1],
print
line = 0
banner_repeat = 20
while True:
time.sleep(1)
if line % banner_repeat == 0:
banner()
statline()
line += 1
def get_options():
description_text = """
This script displays various statistics about VMs running under KVM.
The statistics are gathered from the KVM debugfs entries and / or the
currently available perf traces.
The monitoring takes additional cpu cycles and might affect the VM's
performance.
Requirements:
- Access to:
/sys/kernel/debug/kvm
/sys/kernel/debug/trace/events/*
/proc/pid/task
- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
CAP_SYS_ADMIN and perf events are used.
- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
the large number of files that are possibly opened.
"""
class PlainHelpFormatter(optparse.IndentedHelpFormatter):
def format_description(self, description):
if description:
return description + "\n"
else:
return ""
optparser = optparse.OptionParser(description=description_text,
formatter=PlainHelpFormatter())
optparser.add_option('-1', '--once', '--batch',
action='store_true',
default=False,
dest='once',
help='run in batch mode for one second',
)
optparser.add_option('-l', '--log',
action='store_true',
default=False,
dest='log',
help='run in logging mode (like vmstat)',
)
optparser.add_option('-t', '--tracepoints',
action='store_true',
default=False,
dest='tracepoints',
help='retrieve statistics from tracepoints',
)
optparser.add_option('-d', '--debugfs',
action='store_true',
default=False,
dest='debugfs',
help='retrieve statistics from debugfs',
)
optparser.add_option('-f', '--fields',
action='store',
default=None,
dest='fields',
help='fields to display (regex)',
)
(options, _) = optparser.parse_args(sys.argv)
return options
def get_providers(options):
providers = []
if options.tracepoints:
providers.append(TracepointProvider())
if options.debugfs:
providers.append(DebugfsProvider())
if len(providers) == 0:
providers.append(TracepointProvider())
return providers
def check_access(options):
if not os.path.exists('/sys/kernel/debug'):
sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
sys.exit(1)
if not os.path.exists(PATH_DEBUGFS_KVM):
sys.stderr.write("Please make sure, that debugfs is mounted and "
"readable by the current user:\n"
"('mount -t debugfs debugfs /sys/kernel/debug')\n"
"Also ensure, that the kvm modules are loaded.\n")
sys.exit(1)
if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
or not options.debugfs):
sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
"when using the option -t (default).\n"
"If it is enabled, make {0} readable by the "
"current user.\n"
.format(PATH_DEBUGFS_TRACING))
if options.tracepoints:
sys.exit(1)
sys.stderr.write("Falling back to debugfs statistics!\n")
options.debugfs = True
sleep(5)
return options
def main():
options = get_options()
options = check_access(options)
providers = get_providers(options)
stats = Stats(providers, fields=options.fields)
if options.log:
log(stats)
elif not options.once:
with Tui(stats) as tui:
tui.show_stats()
else:
batch(stats)
if __name__ == "__main__":
main()

View File

@ -1,55 +0,0 @@
@example
@c man begin SYNOPSIS
usage: kvm_stat [OPTION]...
@c man end
@end example
@c man begin DESCRIPTION
kvm_stat prints counts of KVM kernel module trace events. These events signify
state transitions such as guest mode entry and exit.
This tool is useful for observing guest behavior from the host perspective.
Often conclusions about performance or buggy behavior can be drawn from the
output.
The set of KVM kernel module trace events may be specific to the kernel version
or architecture. It is best to check the KVM kernel module source code for the
meaning of events.
Note that trace events are counted globally across all running guests.
@c man end
@c man begin OPTIONS
@table @option
@item -1, --once, --batch
run in batch mode for one second
@item -l, --log
run in logging mode (like vmstat)
@item -t, --tracepoints
retrieve statistics from tracepoints
@item -d, --debugfs
retrieve statistics from debugfs
@item -f, --fields=@var{fields}
fields to display (regex)
@item -h, --help
show help message
@end table
@c man end
@ignore
@setfilename kvm_stat
@settitle Report KVM kernel module event counters.
@c man begin AUTHOR
Stefan Hajnoczi <stefanha@redhat.com>
@c man end
@c man begin SEEALSO
perf(1), trace-cmd(1)
@c man end
@end ignore

View File

@ -411,7 +411,8 @@ int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
if ((env->mcg_cap & MCG_SER_P) && addr
&& (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr == RAM_ADDR_INVALID ||
!kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
fprintf(stderr, "Hardware memory error for memory used by "
"QEMU itself instead of guest system!\n");
@ -445,7 +446,8 @@ int kvm_arch_on_sigbus(int code, void *addr)
hwaddr paddr;
/* Hope we are lucky for AO MCE */
if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr == RAM_ADDR_INVALID ||
!kvm_physical_memory_addr_from_host(first_cpu->kvm_state,
addr, &paddr)) {
fprintf(stderr, "Hardware memory error for memory used by "

View File

@ -511,8 +511,13 @@ static void xen_io_add(MemoryListener *listener,
MemoryRegionSection *section)
{
XenIOState *state = container_of(listener, XenIOState, io_listener);
MemoryRegion *mr = section->mr;
memory_region_ref(section->mr);
if (mr->ops == &unassigned_io_ops) {
return;
}
memory_region_ref(mr);
xen_map_io_section(xen_xc, xen_domid, state->ioservid, section);
}
@ -521,10 +526,15 @@ static void xen_io_del(MemoryListener *listener,
MemoryRegionSection *section)
{
XenIOState *state = container_of(listener, XenIOState, io_listener);
MemoryRegion *mr = section->mr;
if (mr->ops == &unassigned_io_ops) {
return;
}
xen_unmap_io_section(xen_xc, xen_domid, state->ioservid, section);
memory_region_unref(section->mr);
memory_region_unref(mr);
}
static void xen_device_realize(DeviceListener *listener,