439 lines
10 KiB
C
439 lines
10 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* uuid.c
|
|
* Functions for the built-in type "uuid".
|
|
*
|
|
* Copyright (c) 2007-2022, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/uuid.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "common/hashfn.h"
|
|
#include "lib/hyperloglog.h"
|
|
#include "libpq/pqformat.h"
|
|
#include "port/pg_bswap.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/guc.h"
|
|
#include "utils/sortsupport.h"
|
|
#include "utils/uuid.h"
|
|
|
|
/* sortsupport for uuid */
|
|
typedef struct
|
|
{
|
|
int64 input_count; /* number of non-null values seen */
|
|
bool estimating; /* true if estimating cardinality */
|
|
|
|
hyperLogLogState abbr_card; /* cardinality estimator */
|
|
} uuid_sortsupport_state;
|
|
|
|
static void string_to_uuid(const char *source, pg_uuid_t *uuid);
|
|
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
|
|
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
|
|
static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
|
|
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
|
|
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
|
|
|
|
Datum
|
|
uuid_in(PG_FUNCTION_ARGS)
|
|
{
|
|
char *uuid_str = PG_GETARG_CSTRING(0);
|
|
pg_uuid_t *uuid;
|
|
|
|
uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
|
|
string_to_uuid(uuid_str, uuid);
|
|
PG_RETURN_UUID_P(uuid);
|
|
}
|
|
|
|
Datum
|
|
uuid_out(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
|
|
static const char hex_chars[] = "0123456789abcdef";
|
|
StringInfoData buf;
|
|
int i;
|
|
|
|
initStringInfo(&buf);
|
|
for (i = 0; i < UUID_LEN; i++)
|
|
{
|
|
int hi;
|
|
int lo;
|
|
|
|
/*
|
|
* We print uuid values as a string of 8, 4, 4, 4, and then 12
|
|
* hexadecimal characters, with each group is separated by a hyphen
|
|
* ("-"). Therefore, add the hyphens at the appropriate places here.
|
|
*/
|
|
if (i == 4 || i == 6 || i == 8 || i == 10)
|
|
appendStringInfoChar(&buf, '-');
|
|
|
|
hi = uuid->data[i] >> 4;
|
|
lo = uuid->data[i] & 0x0F;
|
|
|
|
appendStringInfoChar(&buf, hex_chars[hi]);
|
|
appendStringInfoChar(&buf, hex_chars[lo]);
|
|
}
|
|
|
|
PG_RETURN_CSTRING(buf.data);
|
|
}
|
|
|
|
/*
|
|
* We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
|
|
* after each group of 4 hexadecimal digits, and optionally surrounded by {}.
|
|
* (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
|
|
* digits, is the only one used for output.)
|
|
*/
|
|
static void
|
|
string_to_uuid(const char *source, pg_uuid_t *uuid)
|
|
{
|
|
const char *src = source;
|
|
bool braces = false;
|
|
int i;
|
|
|
|
if (src[0] == '{')
|
|
{
|
|
src++;
|
|
braces = true;
|
|
}
|
|
|
|
for (i = 0; i < UUID_LEN; i++)
|
|
{
|
|
char str_buf[3];
|
|
|
|
if (src[0] == '\0' || src[1] == '\0')
|
|
goto syntax_error;
|
|
memcpy(str_buf, src, 2);
|
|
if (!isxdigit((unsigned char) str_buf[0]) ||
|
|
!isxdigit((unsigned char) str_buf[1]))
|
|
goto syntax_error;
|
|
|
|
str_buf[2] = '\0';
|
|
uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
|
|
src += 2;
|
|
if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
|
|
src++;
|
|
}
|
|
|
|
if (braces)
|
|
{
|
|
if (*src != '}')
|
|
goto syntax_error;
|
|
src++;
|
|
}
|
|
|
|
if (*src != '\0')
|
|
goto syntax_error;
|
|
|
|
return;
|
|
|
|
syntax_error:
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
|
errmsg("invalid input syntax for type %s: \"%s\"",
|
|
"uuid", source)));
|
|
}
|
|
|
|
Datum
|
|
uuid_recv(PG_FUNCTION_ARGS)
|
|
{
|
|
StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
|
|
pg_uuid_t *uuid;
|
|
|
|
uuid = (pg_uuid_t *) palloc(UUID_LEN);
|
|
memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
|
|
PG_RETURN_POINTER(uuid);
|
|
}
|
|
|
|
Datum
|
|
uuid_send(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
|
|
StringInfoData buffer;
|
|
|
|
pq_begintypsend(&buffer);
|
|
pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
|
|
PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
|
|
}
|
|
|
|
/* internal uuid compare function */
|
|
static int
|
|
uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
|
|
{
|
|
return memcmp(arg1->data, arg2->data, UUID_LEN);
|
|
}
|
|
|
|
Datum
|
|
uuid_lt(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_le(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_eq(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_ge(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_gt(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
|
|
}
|
|
|
|
Datum
|
|
uuid_ne(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
|
|
}
|
|
|
|
/* handler for btree index operator */
|
|
Datum
|
|
uuid_cmp(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
|
|
pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
|
|
|
|
PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
|
|
}
|
|
|
|
/*
|
|
* Sort support strategy routine
|
|
*/
|
|
Datum
|
|
uuid_sortsupport(PG_FUNCTION_ARGS)
|
|
{
|
|
SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
|
|
|
|
ssup->comparator = uuid_fast_cmp;
|
|
ssup->ssup_extra = NULL;
|
|
|
|
if (ssup->abbreviate)
|
|
{
|
|
uuid_sortsupport_state *uss;
|
|
MemoryContext oldcontext;
|
|
|
|
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
|
|
|
|
uss = palloc(sizeof(uuid_sortsupport_state));
|
|
uss->input_count = 0;
|
|
uss->estimating = true;
|
|
initHyperLogLog(&uss->abbr_card, 10);
|
|
|
|
ssup->ssup_extra = uss;
|
|
|
|
ssup->comparator = uuid_cmp_abbrev;
|
|
ssup->abbrev_converter = uuid_abbrev_convert;
|
|
ssup->abbrev_abort = uuid_abbrev_abort;
|
|
ssup->abbrev_full_comparator = uuid_fast_cmp;
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
|
}
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
/*
|
|
* SortSupport comparison func
|
|
*/
|
|
static int
|
|
uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
|
|
{
|
|
pg_uuid_t *arg1 = DatumGetUUIDP(x);
|
|
pg_uuid_t *arg2 = DatumGetUUIDP(y);
|
|
|
|
return uuid_internal_cmp(arg1, arg2);
|
|
}
|
|
|
|
/*
|
|
* Abbreviated key comparison func
|
|
*/
|
|
static int
|
|
uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
|
|
{
|
|
if (x > y)
|
|
return 1;
|
|
else if (x == y)
|
|
return 0;
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Callback for estimating effectiveness of abbreviated key optimization.
|
|
*
|
|
* We pay no attention to the cardinality of the non-abbreviated data, because
|
|
* there is no equality fast-path within authoritative uuid comparator.
|
|
*/
|
|
static bool
|
|
uuid_abbrev_abort(int memtupcount, SortSupport ssup)
|
|
{
|
|
uuid_sortsupport_state *uss = ssup->ssup_extra;
|
|
double abbr_card;
|
|
|
|
if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
|
|
return false;
|
|
|
|
abbr_card = estimateHyperLogLog(&uss->abbr_card);
|
|
|
|
/*
|
|
* If we have >100k distinct values, then even if we were sorting many
|
|
* billion rows we'd likely still break even, and the penalty of undoing
|
|
* that many rows of abbrevs would probably not be worth it. Stop even
|
|
* counting at that point.
|
|
*/
|
|
if (abbr_card > 100000.0)
|
|
{
|
|
#ifdef TRACE_SORT
|
|
if (trace_sort)
|
|
elog(LOG,
|
|
"uuid_abbrev: estimation ends at cardinality %f"
|
|
" after " INT64_FORMAT " values (%d rows)",
|
|
abbr_card, uss->input_count, memtupcount);
|
|
#endif
|
|
uss->estimating = false;
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
|
|
* fudge factor allows us to abort earlier on genuinely pathological data
|
|
* where we've had exactly one abbreviated value in the first 2k
|
|
* (non-null) rows.
|
|
*/
|
|
if (abbr_card < uss->input_count / 2000.0 + 0.5)
|
|
{
|
|
#ifdef TRACE_SORT
|
|
if (trace_sort)
|
|
elog(LOG,
|
|
"uuid_abbrev: aborting abbreviation at cardinality %f"
|
|
" below threshold %f after " INT64_FORMAT " values (%d rows)",
|
|
abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
|
|
memtupcount);
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
#ifdef TRACE_SORT
|
|
if (trace_sort)
|
|
elog(LOG,
|
|
"uuid_abbrev: cardinality %f after " INT64_FORMAT
|
|
" values (%d rows)", abbr_card, uss->input_count, memtupcount);
|
|
#endif
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Conversion routine for sortsupport. Converts original uuid representation
|
|
* to abbreviated key representation. Our encoding strategy is simple -- pack
|
|
* the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
|
|
* machines, the bytes are stored in reverse order), and treat it as an
|
|
* unsigned integer.
|
|
*/
|
|
static Datum
|
|
uuid_abbrev_convert(Datum original, SortSupport ssup)
|
|
{
|
|
uuid_sortsupport_state *uss = ssup->ssup_extra;
|
|
pg_uuid_t *authoritative = DatumGetUUIDP(original);
|
|
Datum res;
|
|
|
|
memcpy(&res, authoritative->data, sizeof(Datum));
|
|
uss->input_count += 1;
|
|
|
|
if (uss->estimating)
|
|
{
|
|
uint32 tmp;
|
|
|
|
#if SIZEOF_DATUM == 8
|
|
tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
|
|
#else /* SIZEOF_DATUM != 8 */
|
|
tmp = (uint32) res;
|
|
#endif
|
|
|
|
addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
|
|
}
|
|
|
|
/*
|
|
* Byteswap on little-endian machines.
|
|
*
|
|
* This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
|
|
* comparator) works correctly on all platforms. If we didn't do this,
|
|
* the comparator would have to call memcmp() with a pair of pointers to
|
|
* the first byte of each abbreviated key, which is slower.
|
|
*/
|
|
res = DatumBigEndianToNative(res);
|
|
|
|
return res;
|
|
}
|
|
|
|
/* hash index support */
|
|
Datum
|
|
uuid_hash(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *key = PG_GETARG_UUID_P(0);
|
|
|
|
return hash_any(key->data, UUID_LEN);
|
|
}
|
|
|
|
Datum
|
|
uuid_hash_extended(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *key = PG_GETARG_UUID_P(0);
|
|
|
|
return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
|
|
}
|
|
|
|
Datum
|
|
gen_random_uuid(PG_FUNCTION_ARGS)
|
|
{
|
|
pg_uuid_t *uuid = palloc(UUID_LEN);
|
|
|
|
if (!pg_strong_random(uuid, UUID_LEN))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INTERNAL_ERROR),
|
|
errmsg("could not generate random values")));
|
|
|
|
/*
|
|
* Set magic numbers for a "version 4" (pseudorandom) UUID, see
|
|
* http://tools.ietf.org/html/rfc4122#section-4.4
|
|
*/
|
|
uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
|
|
uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
|
|
|
|
PG_RETURN_UUID_P(uuid);
|
|
}
|