qemu/util/qdist.c

/*
 * qdist.c - QEMU helpers for handling frequency distributions of data.
 *
 * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
 *
 * License: GNU GPL, version 2 or later.
 *   See the COPYING file in the top-level directory.
 */
#include "qemu/osdep.h"
#include "qemu/qdist.h"

#include <math.h>
#ifndef NAN
#define NAN (0.0 / 0.0)
#endif

#define QDIST_EMPTY_STR "(empty)"

void qdist_init(struct qdist *dist)
{
    dist->entries = g_new(struct qdist_entry, 1);
    dist->size = 1;
    dist->n = 0;
}

void qdist_destroy(struct qdist *dist)
{
    g_free(dist->entries);
}

static inline int qdist_cmp_double(double a, double b)
{
    if (a > b) {
        return 1;
    } else if (a < b) {
        return -1;
    }
    return 0;
}

static int qdist_cmp(const void *ap, const void *bp)
{
    const struct qdist_entry *a = ap;
    const struct qdist_entry *b = bp;

    return qdist_cmp_double(a->x, b->x);
}

void qdist_add(struct qdist *dist, double x, long count)
{
    struct qdist_entry *entry = NULL;

    if (dist->n) {
        struct qdist_entry e;

        e.x = x;
        entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
    }

    if (entry) {
        entry->count += count;
        return;
    }

    if (unlikely(dist->n == dist->size)) {
        dist->size *= 2;
        dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
    }
    dist->n++;
    entry = &dist->entries[dist->n - 1];
    entry->x = x;
    entry->count = count;
    qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
}

void qdist_inc(struct qdist *dist, double x)
{
    qdist_add(dist, x, 1);
}

/*
 * Unicode for block elements. See:
 *   https://en.wikipedia.org/wiki/Block_Elements
 */
static const gunichar qdist_blocks[] = {
    0x2581,
    0x2582,
    0x2583,
    0x2584,
    0x2585,
    0x2586,
    0x2587,
    0x2588
};

#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)

/*
 * Print a distribution into a string.
 *
 * This function assumes that appropriate binning has been done on the input;
 * see qdist_bin__internal() and qdist_pr_plain().
 *
 * Callers must free the returned string with g_free().
 */
static char *qdist_pr_internal(const struct qdist *dist)
{
    double min, max;
    GString *s = g_string_new("");
    size_t i;

    /* if only one entry, its printout will be either full or empty */
    if (dist->n == 1) {
        if (dist->entries[0].count) {
            g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
        } else {
            g_string_append_c(s, ' ');
        }
        goto out;
    }

    /* get min and max counts */
    min = dist->entries[0].count;
    max = min;
    for (i = 0; i < dist->n; i++) {
        struct qdist_entry *e = &dist->entries[i];

        if (e->count < min) {
            min = e->count;
        }
        if (e->count > max) {
            max = e->count;
        }
    }

    for (i = 0; i < dist->n; i++) {
        struct qdist_entry *e = &dist->entries[i];
        int index;

        /* make an exception with 0; instead of using block[0], print a space */
        if (e->count) {
            /* divide first to avoid loss of precision when e->count == max */
            index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
            g_string_append_unichar(s, qdist_blocks[index]);
        } else {
            g_string_append_c(s, ' ');
        }
    }
 out:
    return g_string_free(s, FALSE);
}

/*
 * Bin the distribution in @from into @n bins of consecutive, non-overlapping
 * intervals, copying the result to @to.
 *
 * This function is internal to qdist: only this file and test code should
 * ever call it.
 *
 * Note: calling this function on an already-binned qdist is a bug.
 *
 * If @n == 0 or @from->n == 1, use @from->n.
 */
void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
{
    double xmin, xmax;
    double step;
    size_t i, j;

    qdist_init(to);

    if (from->n == 0) {
        return;
    }
    if (n == 0 || from->n == 1) {
        n = from->n;
    }

    /* set equally-sized bins between @from's left and right */
    xmin = qdist_xmin(from);
    xmax = qdist_xmax(from);
    step = (xmax - xmin) / n;

    if (n == from->n) {
        /* if @from's entries are equally spaced, no need to re-bin */
        for (i = 0; i < from->n; i++) {
            if (from->entries[i].x != xmin + i * step) {
                goto rebin;
            }
        }
        /* they're equally spaced, so copy the dist and bail out */
        to->entries = g_renew(struct qdist_entry, to->entries, n);
        to->n = from->n;
        memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
        return;
    }

 rebin:
    j = 0;
    for (i = 0; i < n; i++) {
        double x;
        double left, right;

        left = xmin + i * step;
        right = xmin + (i + 1) * step;

        /* Add x, even if it might not get any counts later */
        x = left;
        qdist_add(to, x, 0);

        /*
         * To avoid double-counting we capture [left, right) ranges, except for
         * the righmost bin, which captures a [left, right] range.
         */
        while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
            struct qdist_entry *o = &from->entries[j];

            qdist_add(to, x, o->count);
            j++;
        }
    }
}

/*
 * Print @dist into a string, after re-binning it into @n bins of consecutive,
 * non-overlapping intervals.
 *
 * If @n == 0, use @orig->n.
 *
 * Callers must free the returned string with g_free().
 */
char *qdist_pr_plain(const struct qdist *dist, size_t n)
{
    struct qdist binned;
    char *ret;

    if (dist->n == 0) {
        return g_strdup(QDIST_EMPTY_STR);
    }
    qdist_bin__internal(&binned, dist, n);
    ret = qdist_pr_internal(&binned);
    qdist_destroy(&binned);
    return ret;
}

static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
                            uint32_t opt, bool is_left)
{
    const char *percent;
    const char *lparen;
    const char *rparen;
    GString *s;
    double x1, x2, step;
    double x;
    double n;
    int dec;

    s = g_string_new("");
    if (!(opt & QDIST_PR_LABELS)) {
        goto out;
    }

    dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
    percent = opt & QDIST_PR_PERCENT ? "%" : "";

    n = n_bins ? n_bins : dist->n;
    x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
    step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;

    if (opt & QDIST_PR_100X) {
        x *= 100.0;
        step *= 100.0;
    }
    if (opt & QDIST_PR_NOBINRANGE) {
        lparen = rparen = "";
        x1 = x;
        x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
    } else {
        lparen = "[";
        rparen = is_left ? ")" : "]";
        if (is_left) {
            x1 = x;
            x2 = x + step;
        } else {
            x1 = x - step;
            x2 = x;
        }
    }
    g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
    if (!(opt & QDIST_PR_NOBINRANGE)) {
        g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
    }
    g_string_append(s, percent);
 out:
    return g_string_free(s, FALSE);
}

/*
 * Print the distribution's histogram into a string.
 *
 * See also: qdist_pr_plain().
 *
 * Callers must free the returned string with g_free().
 */
char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
{
    const char *border = opt & QDIST_PR_BORDER ? "|" : "";
    char *llabel, *rlabel;
    char *hgram;
    GString *s;

    if (dist->n == 0) {
        return g_strdup(QDIST_EMPTY_STR);
    }

    s = g_string_new("");

    llabel = qdist_pr_label(dist, n_bins, opt, true);
    rlabel = qdist_pr_label(dist, n_bins, opt, false);
    hgram = qdist_pr_plain(dist, n_bins);
    g_string_append_printf(s, "%s%s%s%s%s",
                           llabel, border, hgram, border, rlabel);
    g_free(llabel);
    g_free(rlabel);
    g_free(hgram);

    return g_string_free(s, FALSE);
}

static inline double qdist_x(const struct qdist *dist, int index)
{
    if (dist->n == 0) {
        return NAN;
    }
    return dist->entries[index].x;
}

double qdist_xmin(const struct qdist *dist)
{
    return qdist_x(dist, 0);
}

double qdist_xmax(const struct qdist *dist)
{
    return qdist_x(dist, dist->n - 1);
}

size_t qdist_unique_entries(const struct qdist *dist)
{
    return dist->n;
}

unsigned long qdist_sample_count(const struct qdist *dist)
{
    unsigned long count = 0;
    size_t i;

    for (i = 0; i < dist->n; i++) {
        struct qdist_entry *e = &dist->entries[i];

        count += e->count;
    }
    return count;
}

static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
                                 size_t n, unsigned long count)
{
    /* amortize the recursion by using a base case > 2 */
    if (n <= 8) {
        size_t i;
        double ret = 0;

        for (i = 0; i < n; i++) {
            struct qdist_entry *e = &dist->entries[index + i];

            ret += e->x * e->count / count;
        }
        return ret;
    } else {
        size_t n2 = n / 2;

        return qdist_pairwise_avg(dist, index, n2, count) +
               qdist_pairwise_avg(dist, index + n2, n - n2, count);
    }
}

double qdist_avg(const struct qdist *dist)
{
    unsigned long count;

    count = qdist_sample_count(dist);
    if (!count) {
        return NAN;
    }
    return qdist_pairwise_avg(dist, 0, dist->n, count);
}
qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`/*`
			`* qdist.c - QEMU helpers for handling frequency distributions of data.`
			`*`
			`* Copyright (C) 2016, Emilio G. Cota <cota@braap.org>`
			`*`
			`* License: GNU GPL, version 2 or later.`
			`* See the COPYING file in the top-level directory.`
			`*/`
clean-includes: run it once more Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2016-06-06 19:56:37 +03:00			`#include "qemu/osdep.h"`
qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`#include "qemu/qdist.h"`

			`#include <math.h>`
			`#ifndef NAN`
			`#define NAN (0.0 / 0.0)`
			`#endif`

qdist: return "(empty)" instead of NULL when printing an empty dist Printf'ing a NULL string is undefined behaviour. Avoid it. Reported-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1469459025-23606-4-git-send-email-cota@braap.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2016-07-25 18:03:45 +03:00			`#define QDIST_EMPTY_STR "(empty)"`

qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`void qdist_init(struct qdist *dist)`
			`{`
qdist: use g_renew and g_new instead of g_realloc and g_malloc. This is safer against overflow. g_renew is available in all version of glib, while g_realloc_n is only available in 2.24. Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1469459025-23606-3-git-send-email-cota@braap.org> [Rewritten to use g_new/g_renew. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2016-07-25 18:03:44 +03:00			`dist->entries = g_new(struct qdist_entry, 1);`
qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`dist->size = 1;`
			`dist->n = 0;`
			`}`

			`void qdist_destroy(struct qdist *dist)`
			`{`
			`g_free(dist->entries);`
			`}`

			`static inline int qdist_cmp_double(double a, double b)`
			`{`
			`if (a > b) {`
			`return 1;`
			`} else if (a < b) {`
			`return -1;`
			`}`
			`return 0;`
			`}`

			`static int qdist_cmp(const void ap, const void bp)`
			`{`
			`const struct qdist_entry *a = ap;`
			`const struct qdist_entry *b = bp;`

			`return qdist_cmp_double(a->x, b->x);`
			`}`

			`void qdist_add(struct qdist *dist, double x, long count)`
			`{`
			`struct qdist_entry *entry = NULL;`

			`if (dist->n) {`
			`struct qdist_entry e;`

			`e.x = x;`
			`entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);`
			`}`

			`if (entry) {`
			`entry->count += count;`
			`return;`
			`}`

			`if (unlikely(dist->n == dist->size)) {`
			`dist->size *= 2;`
qdist: use g_renew and g_new instead of g_realloc and g_malloc. This is safer against overflow. g_renew is available in all version of glib, while g_realloc_n is only available in 2.24. Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1469459025-23606-3-git-send-email-cota@braap.org> [Rewritten to use g_new/g_renew. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2016-07-25 18:03:44 +03:00			`dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);`
qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`}`
			`dist->n++;`
			`entry = &dist->entries[dist->n - 1];`
			`entry->x = x;`
			`entry->count = count;`
			`qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);`
			`}`

			`void qdist_inc(struct qdist *dist, double x)`
			`{`
			`qdist_add(dist, x, 1);`
			`}`

			`/*`
			`* Unicode for block elements. See:`
			`* https://en.wikipedia.org/wiki/Block_Elements`
			`*/`
			`static const gunichar qdist_blocks[] = {`
			`0x2581,`
			`0x2582,`
			`0x2583,`
			`0x2584,`
			`0x2585,`
			`0x2586,`
			`0x2587,`
			`0x2588`
			`};`

			`#define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)`

			`/*`
			`* Print a distribution into a string.`
			`*`
			`* This function assumes that appropriate binning has been done on the input;`
			`* see qdist_bin__internal() and qdist_pr_plain().`
			`*`
			`* Callers must free the returned string with g_free().`
			`*/`
			`static char qdist_pr_internal(const struct qdist dist)`
			`{`
			`double min, max;`
			`GString *s = g_string_new("");`
			`size_t i;`

			`/* if only one entry, its printout will be either full or empty */`
			`if (dist->n == 1) {`
			`if (dist->entries[0].count) {`
			`g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);`
			`} else {`
			`g_string_append_c(s, ' ');`
			`}`
			`goto out;`
			`}`

			`/* get min and max counts */`
			`min = dist->entries[0].count;`
			`max = min;`
			`for (i = 0; i < dist->n; i++) {`
			`struct qdist_entry *e = &dist->entries[i];`

			`if (e->count < min) {`
			`min = e->count;`
			`}`
			`if (e->count > max) {`
			`max = e->count;`
			`}`
			`}`

			`for (i = 0; i < dist->n; i++) {`
			`struct qdist_entry *e = &dist->entries[i];`
			`int index;`

			`/* make an exception with 0; instead of using block[0], print a space */`
			`if (e->count) {`
			`/* divide first to avoid loss of precision when e->count == max */`
			`index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);`
			`g_string_append_unichar(s, qdist_blocks[index]);`
			`} else {`
			`g_string_append_c(s, ' ');`
			`}`
			`}`
			`out:`
			`return g_string_free(s, FALSE);`
			`}`

			`/*`
			`* Bin the distribution in @from into @n bins of consecutive, non-overlapping`
			`* intervals, copying the result to @to.`
			`*`
			`* This function is internal to qdist: only this file and test code should`
			`* ever call it.`
			`*`
			`* Note: calling this function on an already-binned qdist is a bug.`
			`*`
			`* If @n == 0 or @from->n == 1, use @from->n.`
			`*/`
			`void qdist_bin__internal(struct qdist to, const struct qdist from, size_t n)`
			`{`
			`double xmin, xmax;`
			`double step;`
			`size_t i, j;`

			`qdist_init(to);`

			`if (from->n == 0) {`
			`return;`
			`}`
			`if (n == 0 \|\| from->n == 1) {`
			`n = from->n;`
			`}`

			`/* set equally-sized bins between @from's left and right */`
			`xmin = qdist_xmin(from);`
			`xmax = qdist_xmax(from);`
			`step = (xmax - xmin) / n;`

			`if (n == from->n) {`
			`/* if @from's entries are equally spaced, no need to re-bin */`
			`for (i = 0; i < from->n; i++) {`
			`if (from->entries[i].x != xmin + i * step) {`
			`goto rebin;`
			`}`
			`}`
			`/* they're equally spaced, so copy the dist and bail out */`
qdist: use g_renew and g_new instead of g_realloc and g_malloc. This is safer against overflow. g_renew is available in all version of glib, while g_realloc_n is only available in 2.24. Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1469459025-23606-3-git-send-email-cota@braap.org> [Rewritten to use g_new/g_renew. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2016-07-25 18:03:44 +03:00			`to->entries = g_renew(struct qdist_entry, to->entries, n);`
qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`to->n = from->n;`
			`memcpy(to->entries, from->entries, sizeof(to->entries) to->n);`
			`return;`
			`}`

			`rebin:`
			`j = 0;`
			`for (i = 0; i < n; i++) {`
			`double x;`
			`double left, right;`

			`left = xmin + i * step;`
			`right = xmin + (i + 1) * step;`

			`/* Add x, even if it might not get any counts later */`
			`x = left;`
			`qdist_add(to, x, 0);`

			`/*`
			`* To avoid double-counting we capture [left, right) ranges, except for`
			`* the righmost bin, which captures a [left, right] range.`
			`*/`
			`while (j < from->n && (from->entries[j].x < right \|\| i == n - 1)) {`
			`struct qdist_entry *o = &from->entries[j];`

			`qdist_add(to, x, o->count);`
			`j++;`
			`}`
			`}`
			`}`

			`/*`
			`* Print @dist into a string, after re-binning it into @n bins of consecutive,`
			`* non-overlapping intervals.`
			`*`
			`* If @n == 0, use @orig->n.`
			`*`
			`* Callers must free the returned string with g_free().`
			`*/`
			`char qdist_pr_plain(const struct qdist dist, size_t n)`
			`{`
			`struct qdist binned;`
			`char *ret;`

			`if (dist->n == 0) {`
qdist: return "(empty)" instead of NULL when printing an empty dist Printf'ing a NULL string is undefined behaviour. Avoid it. Reported-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1469459025-23606-4-git-send-email-cota@braap.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2016-07-25 18:03:45 +03:00			`return g_strdup(QDIST_EMPTY_STR);`
qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`}`
			`qdist_bin__internal(&binned, dist, n);`
			`ret = qdist_pr_internal(&binned);`
			`qdist_destroy(&binned);`
			`return ret;`
			`}`

			`static char qdist_pr_label(const struct qdist dist, size_t n_bins,`
			`uint32_t opt, bool is_left)`
			`{`
			`const char *percent;`
			`const char *lparen;`
			`const char *rparen;`
			`GString *s;`
			`double x1, x2, step;`
			`double x;`
			`double n;`
			`int dec;`

			`s = g_string_new("");`
			`if (!(opt & QDIST_PR_LABELS)) {`
			`goto out;`
			`}`

			`dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;`
			`percent = opt & QDIST_PR_PERCENT ? "%" : "";`

			`n = n_bins ? n_bins : dist->n;`
			`x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);`
			`step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;`

			`if (opt & QDIST_PR_100X) {`
			`x *= 100.0;`
			`step *= 100.0;`
			`}`
			`if (opt & QDIST_PR_NOBINRANGE) {`
			`lparen = rparen = "";`
			`x1 = x;`
			`x2 = x; /* unnecessary, but a dumb compiler might not figure it out */`
			`} else {`
			`lparen = "[";`
			`rparen = is_left ? ")" : "]";`
			`if (is_left) {`
			`x1 = x;`
			`x2 = x + step;`
			`} else {`
			`x1 = x - step;`
			`x2 = x;`
			`}`
			`}`
			`g_string_append_printf(s, "%s%.*f", lparen, dec, x1);`
			`if (!(opt & QDIST_PR_NOBINRANGE)) {`
			`g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);`
			`}`
			`g_string_append(s, percent);`
			`out:`
			`return g_string_free(s, FALSE);`
			`}`

			`/*`
			`* Print the distribution's histogram into a string.`
			`*`
			`* See also: qdist_pr_plain().`
			`*`
			`* Callers must free the returned string with g_free().`
			`*/`
			`char qdist_pr(const struct qdist dist, size_t n_bins, uint32_t opt)`
			`{`
			`const char *border = opt & QDIST_PR_BORDER ? "\|" : "";`
			`char llabel, rlabel;`
			`char *hgram;`
			`GString *s;`

			`if (dist->n == 0) {`
qdist: return "(empty)" instead of NULL when printing an empty dist Printf'ing a NULL string is undefined behaviour. Avoid it. Reported-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1469459025-23606-4-git-send-email-cota@braap.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> 2016-07-25 18:03:45 +03:00			`return g_strdup(QDIST_EMPTY_STR);`
qdist: add module to represent frequency distributions of data Sometimes it is useful to have a quick histogram to represent a certain distribution -- for example, when investigating a performance regression in a hash table due to inadequate hashing. The appended allows us to easily represent a distribution using Unicode characters. Further, the data structure keeping track of the distribution is so simple that obtaining its values for off-line processing is trivial. Example, taking the last 10 commits to QEMU: Characters in commit title Count ----------------------------------- 39 1 48 1 53 1 54 2 57 1 61 1 67 1 78 1 80 1 qdist_init(&dist); qdist_inc(&dist, 39); [...] qdist_inc(&dist, 80); char str = qdist_pr(&dist, 9, QDIST_PR_LABELS); // -> [39.0,43.6)▂▂ █▂ ▂ ▄[75.4,80.0] g_free(str); char str = qdist_pr(&dist, 4, QDIST_PR_LABELS); // -> [39.0,49.2)▁█▁▁[69.8,80.0] g_free(str); Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Message-Id: <1465412133-3029-9-git-send-email-cota@braap.org> Signed-off-by: Richard Henderson <rth@twiddle.net> 2016-06-08 21:55:26 +03:00			`}`

			`s = g_string_new("");`

			`llabel = qdist_pr_label(dist, n_bins, opt, true);`
			`rlabel = qdist_pr_label(dist, n_bins, opt, false);`
			`hgram = qdist_pr_plain(dist, n_bins);`
			`g_string_append_printf(s, "%s%s%s%s%s",`
			`llabel, border, hgram, border, rlabel);`
			`g_free(llabel);`
			`g_free(rlabel);`
			`g_free(hgram);`

			`return g_string_free(s, FALSE);`
			`}`

			`static inline double qdist_x(const struct qdist *dist, int index)`
			`{`
			`if (dist->n == 0) {`
			`return NAN;`
			`}`
			`return dist->entries[index].x;`
			`}`

			`double qdist_xmin(const struct qdist *dist)`
			`{`
			`return qdist_x(dist, 0);`
			`}`

			`double qdist_xmax(const struct qdist *dist)`
			`{`
			`return qdist_x(dist, dist->n - 1);`
			`}`

			`size_t qdist_unique_entries(const struct qdist *dist)`
			`{`
			`return dist->n;`
			`}`

			`unsigned long qdist_sample_count(const struct qdist *dist)`
			`{`
			`unsigned long count = 0;`
			`size_t i;`

			`for (i = 0; i < dist->n; i++) {`
			`struct qdist_entry *e = &dist->entries[i];`

			`count += e->count;`
			`}`
			`return count;`
			`}`

			`static double qdist_pairwise_avg(const struct qdist *dist, size_t index,`
			`size_t n, unsigned long count)`
			`{`
			`/* amortize the recursion by using a base case > 2 */`
			`if (n <= 8) {`
			`size_t i;`
			`double ret = 0;`

			`for (i = 0; i < n; i++) {`
			`struct qdist_entry *e = &dist->entries[index + i];`

			`ret += e->x * e->count / count;`
			`}`
			`return ret;`
			`} else {`
			`size_t n2 = n / 2;`

			`return qdist_pairwise_avg(dist, index, n2, count) +`
			`qdist_pairwise_avg(dist, index + n2, n - n2, count);`
			`}`
			`}`

			`double qdist_avg(const struct qdist *dist)`
			`{`
			`unsigned long count;`

			`count = qdist_sample_count(dist);`
			`if (!count) {`
			`return NAN;`
			`}`
			`return qdist_pairwise_avg(dist, 0, dist->n, count);`
			`}`