384 lines
14 KiB
C
384 lines
14 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* jsonb.h
|
|
* Declarations for jsonb data type support.
|
|
*
|
|
* Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
|
*
|
|
* src/include/utils/jsonb.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#ifndef __JSONB_H__
|
|
#define __JSONB_H__
|
|
|
|
#include "lib/stringinfo.h"
|
|
#include "utils/array.h"
|
|
#include "utils/numeric.h"
|
|
|
|
/* Tokens used when sequentially processing a jsonb value */
|
|
typedef enum
|
|
{
|
|
WJB_DONE,
|
|
WJB_KEY,
|
|
WJB_VALUE,
|
|
WJB_ELEM,
|
|
WJB_BEGIN_ARRAY,
|
|
WJB_END_ARRAY,
|
|
WJB_BEGIN_OBJECT,
|
|
WJB_END_OBJECT
|
|
} JsonbIteratorToken;
|
|
|
|
/* Strategy numbers for GIN index opclasses */
|
|
#define JsonbContainsStrategyNumber 7
|
|
#define JsonbExistsStrategyNumber 9
|
|
#define JsonbExistsAnyStrategyNumber 10
|
|
#define JsonbExistsAllStrategyNumber 11
|
|
|
|
/*
|
|
* In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
|
|
* keys and values. The storage format is text. The first byte of the text
|
|
* string distinguishes whether this is a key (always a string), null value,
|
|
* boolean value, numeric value, or string value. However, array elements
|
|
* that are strings are marked as though they were keys; this imprecision
|
|
* supports the definition of the "exists" operator, which treats array
|
|
* elements like keys. The remainder of the text string is empty for a null
|
|
* value, "t" or "f" for a boolean value, a normalized print representation of
|
|
* a numeric value, or the text of a string value. However, if the length of
|
|
* this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
|
|
* the text representation and store an 8-hex-digit representation of the
|
|
* uint32 hash value, marking the prefix byte with an additional bit to
|
|
* distinguish that this has happened. Hashing long strings saves space and
|
|
* ensures that we won't overrun the maximum entry length for a GIN index.
|
|
* (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
|
|
* to ensure that the on-disk text datum will have a short varlena header.)
|
|
* Note that when any hashed item appears in a query, we must recheck index
|
|
* matches against the heap tuple; currently, this costs nothing because we
|
|
* must always recheck for other reasons.
|
|
*/
|
|
#define JGINFLAG_KEY 0x01 /* key (or string array element) */
|
|
#define JGINFLAG_NULL 0x02 /* null value */
|
|
#define JGINFLAG_BOOL 0x03 /* boolean value */
|
|
#define JGINFLAG_NUM 0x04 /* numeric value */
|
|
#define JGINFLAG_STR 0x05 /* string value (if not an array element) */
|
|
#define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
|
|
#define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
|
|
|
|
/* Convenience macros */
|
|
#define DatumGetJsonbP(d) ((Jsonb *) PG_DETOAST_DATUM(d))
|
|
#define JsonbPGetDatum(p) PointerGetDatum(p)
|
|
#define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
|
|
#define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
|
|
|
|
typedef struct JsonbPair JsonbPair;
|
|
typedef struct JsonbValue JsonbValue;
|
|
|
|
/*
|
|
* Jsonbs are varlena objects, so must meet the varlena convention that the
|
|
* first int32 of the object contains the total object size in bytes. Be sure
|
|
* to use VARSIZE() and SET_VARSIZE() to access it, though!
|
|
*
|
|
* Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
|
|
* representation. Often, JsonbValues are just shims through which a Jsonb
|
|
* buffer is accessed, but they can also be deep copied and passed around.
|
|
*
|
|
* Jsonb is a tree structure. Each node in the tree consists of a JEntry
|
|
* header and a variable-length content (possibly of zero size). The JEntry
|
|
* header indicates what kind of a node it is, e.g. a string or an array,
|
|
* and provides the length of its variable-length portion.
|
|
*
|
|
* The JEntry and the content of a node are not stored physically together.
|
|
* Instead, the container array or object has an array that holds the JEntrys
|
|
* of all the child nodes, followed by their variable-length portions.
|
|
*
|
|
* The root node is an exception; it has no parent array or object that could
|
|
* hold its JEntry. Hence, no JEntry header is stored for the root node. It
|
|
* is implicitly known that the root node must be an array or an object,
|
|
* so we can get away without the type indicator as long as we can distinguish
|
|
* the two. For that purpose, both an array and an object begin with a uint32
|
|
* header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
|
|
* scalar value needs to be stored as a Jsonb value, what we actually store is
|
|
* an array with one element, with the flags in the array's header field set
|
|
* to JB_FSCALAR | JB_FARRAY.
|
|
*
|
|
* Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
|
|
* the variable-length portion of some node types is aligned to a 4-byte
|
|
* boundary, while others are not. When alignment is needed, the padding is
|
|
* in the beginning of the node that requires it. For example, if a numeric
|
|
* node is stored after a string node, so that the numeric node begins at
|
|
* offset 3, the variable-length portion of the numeric node will begin with
|
|
* one padding byte so that the actual numeric data is 4-byte aligned.
|
|
*/
|
|
|
|
/*
|
|
* JEntry format.
|
|
*
|
|
* The least significant 28 bits store either the data length of the entry,
|
|
* or its end+1 offset from the start of the variable-length portion of the
|
|
* containing object. The next three bits store the type of the entry, and
|
|
* the high-order bit tells whether the least significant bits store a length
|
|
* or an offset.
|
|
*
|
|
* The reason for the offset-or-length complication is to compromise between
|
|
* access speed and data compressibility. In the initial design each JEntry
|
|
* always stored an offset, but this resulted in JEntry arrays with horrible
|
|
* compressibility properties, so that TOAST compression of a JSONB did not
|
|
* work well. Storing only lengths would greatly improve compressibility,
|
|
* but it makes random access into large arrays expensive (O(N) not O(1)).
|
|
* So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
|
|
* a length in the rest. This results in reasonably compressible data (as
|
|
* long as the stride isn't too small). We may have to examine as many as
|
|
* JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
|
|
* given item, but that's still O(1) no matter how large the container is.
|
|
*
|
|
* We could avoid eating a flag bit for this purpose if we were to store
|
|
* the stride in the container header, or if we were willing to treat the
|
|
* stride as an unchangeable constant. Neither of those options is very
|
|
* attractive though.
|
|
*/
|
|
typedef uint32 JEntry;
|
|
|
|
#define JENTRY_OFFLENMASK 0x0FFFFFFF
|
|
#define JENTRY_TYPEMASK 0x70000000
|
|
#define JENTRY_HAS_OFF 0x80000000
|
|
|
|
/* values stored in the type bits */
|
|
#define JENTRY_ISSTRING 0x00000000
|
|
#define JENTRY_ISNUMERIC 0x10000000
|
|
#define JENTRY_ISBOOL_FALSE 0x20000000
|
|
#define JENTRY_ISBOOL_TRUE 0x30000000
|
|
#define JENTRY_ISNULL 0x40000000
|
|
#define JENTRY_ISCONTAINER 0x50000000 /* array or object */
|
|
|
|
/* Access macros. Note possible multiple evaluations */
|
|
#define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
|
|
#define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
|
|
#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
|
|
#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
|
|
#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
|
|
#define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
|
|
#define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
|
|
#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
|
|
#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
|
|
|
|
/* Macro for advancing an offset variable to the next JEntry */
|
|
#define JBE_ADVANCE_OFFSET(offset, je) \
|
|
do { \
|
|
JEntry je_ = (je); \
|
|
if (JBE_HAS_OFF(je_)) \
|
|
(offset) = JBE_OFFLENFLD(je_); \
|
|
else \
|
|
(offset) += JBE_OFFLENFLD(je_); \
|
|
} while(0)
|
|
|
|
/*
|
|
* We store an offset, not a length, every JB_OFFSET_STRIDE children.
|
|
* Caution: this macro should only be referenced when creating a JSONB
|
|
* value. When examining an existing value, pay attention to the HAS_OFF
|
|
* bits instead. This allows changes in the offset-placement heuristic
|
|
* without breaking on-disk compatibility.
|
|
*/
|
|
#define JB_OFFSET_STRIDE 32
|
|
|
|
/*
|
|
* A jsonb array or object node, within a Jsonb Datum.
|
|
*
|
|
* An array has one child for each element, stored in array order.
|
|
*
|
|
* An object has two children for each key/value pair. The keys all appear
|
|
* first, in key sort order; then the values appear, in an order matching the
|
|
* key order. This arrangement keeps the keys compact in memory, making a
|
|
* search for a particular key more cache-friendly.
|
|
*/
|
|
typedef struct JsonbContainer
|
|
{
|
|
uint32 header; /* number of elements or key/value pairs, and
|
|
* flags */
|
|
JEntry children[FLEXIBLE_ARRAY_MEMBER];
|
|
|
|
/* the data for each child node follows. */
|
|
} JsonbContainer;
|
|
|
|
/* flags for the header-field in JsonbContainer */
|
|
#define JB_CMASK 0x0FFFFFFF /* mask for count field */
|
|
#define JB_FSCALAR 0x10000000 /* flag bits */
|
|
#define JB_FOBJECT 0x20000000
|
|
#define JB_FARRAY 0x40000000
|
|
|
|
/* convenience macros for accessing a JsonbContainer struct */
|
|
#define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
|
|
#define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
|
|
#define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
|
|
#define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
|
|
|
|
/* The top-level on-disk format for a jsonb datum. */
|
|
typedef struct
|
|
{
|
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
|
JsonbContainer root;
|
|
} Jsonb;
|
|
|
|
/* convenience macros for accessing the root container in a Jsonb datum */
|
|
#define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
|
|
#define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
|
|
#define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
|
|
#define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
|
|
|
|
|
|
enum jbvType
|
|
{
|
|
/* Scalar types */
|
|
jbvNull = 0x0,
|
|
jbvString,
|
|
jbvNumeric,
|
|
jbvBool,
|
|
/* Composite types */
|
|
jbvArray = 0x10,
|
|
jbvObject,
|
|
/* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
|
|
jbvBinary
|
|
};
|
|
|
|
/*
|
|
* JsonbValue: In-memory representation of Jsonb. This is a convenient
|
|
* deserialized representation, that can easily support using the "val"
|
|
* union across underlying types during manipulation. The Jsonb on-disk
|
|
* representation has various alignment considerations.
|
|
*/
|
|
struct JsonbValue
|
|
{
|
|
enum jbvType type; /* Influences sort order */
|
|
|
|
union
|
|
{
|
|
Numeric numeric;
|
|
bool boolean;
|
|
struct
|
|
{
|
|
int len;
|
|
char *val; /* Not necessarily null-terminated */
|
|
} string; /* String primitive type */
|
|
|
|
struct
|
|
{
|
|
int nElems;
|
|
JsonbValue *elems;
|
|
bool rawScalar; /* Top-level "raw scalar" array? */
|
|
} array; /* Array container type */
|
|
|
|
struct
|
|
{
|
|
int nPairs; /* 1 pair, 2 elements */
|
|
JsonbPair *pairs;
|
|
} object; /* Associative container type */
|
|
|
|
struct
|
|
{
|
|
int len;
|
|
JsonbContainer *data;
|
|
} binary; /* Array or object, in on-disk format */
|
|
} val;
|
|
};
|
|
|
|
#define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \
|
|
(jsonbval)->type <= jbvBool)
|
|
|
|
/*
|
|
* Key/value pair within an Object.
|
|
*
|
|
* This struct type is only used briefly while constructing a Jsonb; it is
|
|
* *not* the on-disk representation.
|
|
*
|
|
* Pairs with duplicate keys are de-duplicated. We store the originally
|
|
* observed pair ordering for the purpose of removing duplicates in a
|
|
* well-defined way (which is "last observed wins").
|
|
*/
|
|
struct JsonbPair
|
|
{
|
|
JsonbValue key; /* Must be a jbvString */
|
|
JsonbValue value; /* May be of any type */
|
|
uint32 order; /* Pair's index in original sequence */
|
|
};
|
|
|
|
/* Conversion state used when parsing Jsonb from text, or for type coercion */
|
|
typedef struct JsonbParseState
|
|
{
|
|
JsonbValue contVal;
|
|
Size size;
|
|
struct JsonbParseState *next;
|
|
} JsonbParseState;
|
|
|
|
/*
|
|
* JsonbIterator holds details of the type for each iteration. It also stores a
|
|
* Jsonb varlena buffer, which can be directly accessed in some contexts.
|
|
*/
|
|
typedef enum
|
|
{
|
|
JBI_ARRAY_START,
|
|
JBI_ARRAY_ELEM,
|
|
JBI_OBJECT_START,
|
|
JBI_OBJECT_KEY,
|
|
JBI_OBJECT_VALUE
|
|
} JsonbIterState;
|
|
|
|
typedef struct JsonbIterator
|
|
{
|
|
/* Container being iterated */
|
|
JsonbContainer *container;
|
|
uint32 nElems; /* Number of elements in children array (will
|
|
* be nPairs for objects) */
|
|
bool isScalar; /* Pseudo-array scalar value? */
|
|
JEntry *children; /* JEntrys for child nodes */
|
|
/* Data proper. This points to the beginning of the variable-length data */
|
|
char *dataProper;
|
|
|
|
/* Current item in buffer (up to nElems) */
|
|
int curIndex;
|
|
|
|
/* Data offset corresponding to current item */
|
|
uint32 curDataOffset;
|
|
|
|
/*
|
|
* If the container is an object, we want to return keys and values
|
|
* alternately; so curDataOffset points to the current key, and
|
|
* curValueOffset points to the current value.
|
|
*/
|
|
uint32 curValueOffset;
|
|
|
|
/* Private state */
|
|
JsonbIterState state;
|
|
|
|
struct JsonbIterator *parent;
|
|
} JsonbIterator;
|
|
|
|
|
|
/* Support functions */
|
|
extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
|
|
extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
|
|
extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
|
|
extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
|
|
uint32 flags,
|
|
JsonbValue *key);
|
|
extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
|
|
uint32 i);
|
|
extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
|
|
JsonbIteratorToken seq, JsonbValue *jbVal);
|
|
extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
|
|
extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
|
|
bool skipNested);
|
|
extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
|
|
extern bool JsonbDeepContains(JsonbIterator **val,
|
|
JsonbIterator **mContained);
|
|
extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
|
|
extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
|
|
uint64 *hash, uint64 seed);
|
|
|
|
/* jsonb.c support functions */
|
|
extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
|
|
int estimated_len);
|
|
extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
|
|
int estimated_len);
|
|
|
|
|
|
#endif /* __JSONB_H__ */
|