
Since 3db72eb, the query ID of utilities is generated using the Query structure, making the use of the query string in JumbleQuery() unnecessary. This commit removes the argument "querytext" from JumbleQuery(). Reported-by: Joe Conway Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/ZJlQAWE4COFqHuAV@paquier.xyz
398 lines
10 KiB
C
398 lines
10 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* queryjumblefuncs.c
|
|
* Query normalization and fingerprinting.
|
|
*
|
|
* Normalization is a process whereby similar queries, typically differing only
|
|
* in their constants (though the exact rules are somewhat more subtle than
|
|
* that) are recognized as equivalent, and are tracked as a single entry. This
|
|
* is particularly useful for non-prepared queries.
|
|
*
|
|
* Normalization is implemented by fingerprinting queries, selectively
|
|
* serializing those fields of each query tree's nodes that are judged to be
|
|
* essential to the query. This is referred to as a query jumble. This is
|
|
* distinct from a regular serialization in that various extraneous
|
|
* information is ignored as irrelevant or not essential to the query, such
|
|
* as the collations of Vars and, most notably, the values of constants.
|
|
*
|
|
* This jumble is acquired at the end of parse analysis of each query, and
|
|
* a 64-bit hash of it is stored into the query's Query.queryId field.
|
|
* The server then copies this value around, making it available in plan
|
|
* tree(s) generated from the query. The executor can then use this value
|
|
* to blame query costs on the proper queryId.
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/nodes/queryjumblefuncs.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "common/hashfn.h"
|
|
#include "miscadmin.h"
|
|
#include "nodes/queryjumble.h"
|
|
#include "parser/scansup.h"
|
|
|
|
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
|
|
|
|
/* GUC parameters */
|
|
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
|
|
|
|
/* True when compute_query_id is ON, or AUTO and a module requests them */
|
|
bool query_id_enabled = false;
|
|
|
|
static void AppendJumble(JumbleState *jstate,
|
|
const unsigned char *item, Size size);
|
|
static void RecordConstLocation(JumbleState *jstate, int location);
|
|
static void _jumbleNode(JumbleState *jstate, Node *node);
|
|
static void _jumbleA_Const(JumbleState *jstate, Node *node);
|
|
static void _jumbleList(JumbleState *jstate, Node *node);
|
|
static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
|
|
|
|
/*
|
|
* Given a possibly multi-statement source string, confine our attention to the
|
|
* relevant part of the string.
|
|
*/
|
|
const char *
|
|
CleanQuerytext(const char *query, int *location, int *len)
|
|
{
|
|
int query_location = *location;
|
|
int query_len = *len;
|
|
|
|
/* First apply starting offset, unless it's -1 (unknown). */
|
|
if (query_location >= 0)
|
|
{
|
|
Assert(query_location <= strlen(query));
|
|
query += query_location;
|
|
/* Length of 0 (or -1) means "rest of string" */
|
|
if (query_len <= 0)
|
|
query_len = strlen(query);
|
|
else
|
|
Assert(query_len <= strlen(query));
|
|
}
|
|
else
|
|
{
|
|
/* If query location is unknown, distrust query_len as well */
|
|
query_location = 0;
|
|
query_len = strlen(query);
|
|
}
|
|
|
|
/*
|
|
* Discard leading and trailing whitespace, too. Use scanner_isspace()
|
|
* not libc's isspace(), because we want to match the lexer's behavior.
|
|
*/
|
|
while (query_len > 0 && scanner_isspace(query[0]))
|
|
query++, query_location++, query_len--;
|
|
while (query_len > 0 && scanner_isspace(query[query_len - 1]))
|
|
query_len--;
|
|
|
|
*location = query_location;
|
|
*len = query_len;
|
|
|
|
return query;
|
|
}
|
|
|
|
JumbleState *
|
|
JumbleQuery(Query *query)
|
|
{
|
|
JumbleState *jstate = NULL;
|
|
|
|
Assert(IsQueryIdEnabled());
|
|
|
|
jstate = (JumbleState *) palloc(sizeof(JumbleState));
|
|
|
|
/* Set up workspace for query jumbling */
|
|
jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
|
|
jstate->jumble_len = 0;
|
|
jstate->clocations_buf_size = 32;
|
|
jstate->clocations = (LocationLen *)
|
|
palloc(jstate->clocations_buf_size * sizeof(LocationLen));
|
|
jstate->clocations_count = 0;
|
|
jstate->highest_extern_param_id = 0;
|
|
|
|
/* Compute query ID and mark the Query node with it */
|
|
_jumbleNode(jstate, (Node *) query);
|
|
query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
|
|
jstate->jumble_len,
|
|
0));
|
|
|
|
/*
|
|
* If we are unlucky enough to get a hash of zero, use 1 instead for
|
|
* normal statements and 2 for utility queries.
|
|
*/
|
|
if (query->queryId == UINT64CONST(0))
|
|
{
|
|
if (query->utilityStmt)
|
|
query->queryId = UINT64CONST(2);
|
|
else
|
|
query->queryId = UINT64CONST(1);
|
|
}
|
|
|
|
return jstate;
|
|
}
|
|
|
|
/*
|
|
* Enables query identifier computation.
|
|
*
|
|
* Third-party plugins can use this function to inform core that they require
|
|
* a query identifier to be computed.
|
|
*/
|
|
void
|
|
EnableQueryId(void)
|
|
{
|
|
if (compute_query_id != COMPUTE_QUERY_ID_OFF)
|
|
query_id_enabled = true;
|
|
}
|
|
|
|
/*
|
|
* AppendJumble: Append a value that is substantive in a given query to
|
|
* the current jumble.
|
|
*/
|
|
static void
|
|
AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
|
|
{
|
|
unsigned char *jumble = jstate->jumble;
|
|
Size jumble_len = jstate->jumble_len;
|
|
|
|
/*
|
|
* Whenever the jumble buffer is full, we hash the current contents and
|
|
* reset the buffer to contain just that hash value, thus relying on the
|
|
* hash to summarize everything so far.
|
|
*/
|
|
while (size > 0)
|
|
{
|
|
Size part_size;
|
|
|
|
if (jumble_len >= JUMBLE_SIZE)
|
|
{
|
|
uint64 start_hash;
|
|
|
|
start_hash = DatumGetUInt64(hash_any_extended(jumble,
|
|
JUMBLE_SIZE, 0));
|
|
memcpy(jumble, &start_hash, sizeof(start_hash));
|
|
jumble_len = sizeof(start_hash);
|
|
}
|
|
part_size = Min(size, JUMBLE_SIZE - jumble_len);
|
|
memcpy(jumble + jumble_len, item, part_size);
|
|
jumble_len += part_size;
|
|
item += part_size;
|
|
size -= part_size;
|
|
}
|
|
jstate->jumble_len = jumble_len;
|
|
}
|
|
|
|
/*
|
|
* Record location of constant within query string of query tree
|
|
* that is currently being walked.
|
|
*/
|
|
static void
|
|
RecordConstLocation(JumbleState *jstate, int location)
|
|
{
|
|
/* -1 indicates unknown or undefined location */
|
|
if (location >= 0)
|
|
{
|
|
/* enlarge array if needed */
|
|
if (jstate->clocations_count >= jstate->clocations_buf_size)
|
|
{
|
|
jstate->clocations_buf_size *= 2;
|
|
jstate->clocations = (LocationLen *)
|
|
repalloc(jstate->clocations,
|
|
jstate->clocations_buf_size *
|
|
sizeof(LocationLen));
|
|
}
|
|
jstate->clocations[jstate->clocations_count].location = location;
|
|
/* initialize lengths to -1 to simplify third-party module usage */
|
|
jstate->clocations[jstate->clocations_count].length = -1;
|
|
jstate->clocations_count++;
|
|
}
|
|
}
|
|
|
|
#define JUMBLE_NODE(item) \
|
|
_jumbleNode(jstate, (Node *) expr->item)
|
|
#define JUMBLE_LOCATION(location) \
|
|
RecordConstLocation(jstate, expr->location)
|
|
#define JUMBLE_FIELD(item) \
|
|
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
|
|
#define JUMBLE_FIELD_SINGLE(item) \
|
|
AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
|
|
#define JUMBLE_STRING(str) \
|
|
do { \
|
|
if (expr->str) \
|
|
AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
|
|
} while(0)
|
|
|
|
#include "queryjumblefuncs.funcs.c"
|
|
|
|
static void
|
|
_jumbleNode(JumbleState *jstate, Node *node)
|
|
{
|
|
Node *expr = node;
|
|
|
|
if (expr == NULL)
|
|
return;
|
|
|
|
/* Guard against stack overflow due to overly complex expressions */
|
|
check_stack_depth();
|
|
|
|
/*
|
|
* We always emit the node's NodeTag, then any additional fields that are
|
|
* considered significant, and then we recurse to any child nodes.
|
|
*/
|
|
JUMBLE_FIELD(type);
|
|
|
|
switch (nodeTag(expr))
|
|
{
|
|
#include "queryjumblefuncs.switch.c"
|
|
|
|
case T_List:
|
|
case T_IntList:
|
|
case T_OidList:
|
|
case T_XidList:
|
|
_jumbleList(jstate, expr);
|
|
break;
|
|
|
|
default:
|
|
/* Only a warning, since we can stumble along anyway */
|
|
elog(WARNING, "unrecognized node type: %d",
|
|
(int) nodeTag(expr));
|
|
break;
|
|
}
|
|
|
|
/* Special cases to handle outside the automated code */
|
|
switch (nodeTag(expr))
|
|
{
|
|
case T_Param:
|
|
{
|
|
Param *p = (Param *) node;
|
|
|
|
/*
|
|
* Update the highest Param id seen, in order to start
|
|
* normalization correctly.
|
|
*/
|
|
if (p->paramkind == PARAM_EXTERN &&
|
|
p->paramid > jstate->highest_extern_param_id)
|
|
jstate->highest_extern_param_id = p->paramid;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
_jumbleList(JumbleState *jstate, Node *node)
|
|
{
|
|
List *expr = (List *) node;
|
|
ListCell *l;
|
|
|
|
switch (expr->type)
|
|
{
|
|
case T_List:
|
|
foreach(l, expr)
|
|
_jumbleNode(jstate, lfirst(l));
|
|
break;
|
|
case T_IntList:
|
|
foreach(l, expr)
|
|
JUMBLE_FIELD_SINGLE(lfirst_int(l));
|
|
break;
|
|
case T_OidList:
|
|
foreach(l, expr)
|
|
JUMBLE_FIELD_SINGLE(lfirst_oid(l));
|
|
break;
|
|
case T_XidList:
|
|
foreach(l, expr)
|
|
JUMBLE_FIELD_SINGLE(lfirst_xid(l));
|
|
break;
|
|
default:
|
|
elog(ERROR, "unrecognized list node type: %d",
|
|
(int) expr->type);
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void
|
|
_jumbleA_Const(JumbleState *jstate, Node *node)
|
|
{
|
|
A_Const *expr = (A_Const *) node;
|
|
|
|
JUMBLE_FIELD(isnull);
|
|
if (!expr->isnull)
|
|
{
|
|
JUMBLE_FIELD(val.node.type);
|
|
switch (nodeTag(&expr->val))
|
|
{
|
|
case T_Integer:
|
|
JUMBLE_FIELD(val.ival.ival);
|
|
break;
|
|
case T_Float:
|
|
JUMBLE_STRING(val.fval.fval);
|
|
break;
|
|
case T_Boolean:
|
|
JUMBLE_FIELD(val.boolval.boolval);
|
|
break;
|
|
case T_String:
|
|
JUMBLE_STRING(val.sval.sval);
|
|
break;
|
|
case T_BitString:
|
|
JUMBLE_STRING(val.bsval.bsval);
|
|
break;
|
|
default:
|
|
elog(ERROR, "unrecognized node type: %d",
|
|
(int) nodeTag(&expr->val));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
_jumbleRangeTblEntry(JumbleState *jstate, Node *node)
|
|
{
|
|
RangeTblEntry *expr = (RangeTblEntry *) node;
|
|
|
|
JUMBLE_FIELD(rtekind);
|
|
switch (expr->rtekind)
|
|
{
|
|
case RTE_RELATION:
|
|
JUMBLE_FIELD(relid);
|
|
JUMBLE_NODE(tablesample);
|
|
JUMBLE_FIELD(inh);
|
|
break;
|
|
case RTE_SUBQUERY:
|
|
JUMBLE_NODE(subquery);
|
|
break;
|
|
case RTE_JOIN:
|
|
JUMBLE_FIELD(jointype);
|
|
break;
|
|
case RTE_FUNCTION:
|
|
JUMBLE_NODE(functions);
|
|
break;
|
|
case RTE_TABLEFUNC:
|
|
JUMBLE_NODE(tablefunc);
|
|
break;
|
|
case RTE_VALUES:
|
|
JUMBLE_NODE(values_lists);
|
|
break;
|
|
case RTE_CTE:
|
|
|
|
/*
|
|
* Depending on the CTE name here isn't ideal, but it's the only
|
|
* info we have to identify the referenced WITH item.
|
|
*/
|
|
JUMBLE_STRING(ctename);
|
|
JUMBLE_FIELD(ctelevelsup);
|
|
break;
|
|
case RTE_NAMEDTUPLESTORE:
|
|
JUMBLE_STRING(enrname);
|
|
break;
|
|
case RTE_RESULT:
|
|
break;
|
|
default:
|
|
elog(ERROR, "unrecognized RTE kind: %d", (int) expr->rtekind);
|
|
break;
|
|
}
|
|
}
|