diff --git a/doc/src/sgml/keywords.sgml b/doc/src/sgml/keywords.sgml index 767ed811a9..0e7b322851 100644 --- a/doc/src/sgml/keywords.sgml +++ b/doc/src/sgml/keywords.sgml @@ -2444,7 +2444,7 @@ LATERAL - + reserved reserved reserved diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index a3dadbef89..2d9531f08d 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -590,7 +590,7 @@ SELECT a.* FROM (my_table AS a JOIN your_table AS b ON ...) AS c Subqueries specifying a derived table must be enclosed in parentheses and must be assigned a table - alias name. (See .) For + alias name (as in ). For example: FROM (SELECT * FROM table1) AS alias_name @@ -697,6 +697,87 @@ SELECT * expand to. + + + <literal>LATERAL</> Subqueries + + + LATERAL + in the FROM clause + + + + Subqueries and table functions appearing in FROM can be + preceded by the key word LATERAL. This allows them to + reference columns provided by preceding FROM items. + (Without LATERAL, each FROM item is + evaluated independently and so cannot cross-reference any other + FROM item.) + A LATERAL item can appear at top level in the + FROM list, or within a JOIN tree; in the latter + case it can also refer to any items that are on the left-hand side of a + JOIN that it is on the right-hand side of. + + + + When a FROM item contains LATERAL + cross-references, evaluation proceeds as follows: for each row of the + FROM item providing the cross-referenced column(s), or + set of rows of multiple FROM items providing the + columns, the LATERAL item is evaluated using that + row or row set's values of the columns. The resulting row(s) are + joined as usual with the rows they were computed from. This is + repeated for each row or set of rows from the column source table(s). + + + + A trivial example of LATERAL is + +SELECT * FROM foo, LATERAL (SELECT * FROM bar WHERE bar.id = foo.bar_id) ss; + + This is not especially useful since it has exactly the same result as + the more conventional + +SELECT * FROM foo, bar WHERE bar.id = foo.bar_id; + + LATERAL is primarily useful when the cross-referenced + column is necessary for computing the row(s) to be joined. A common + application is providing an argument value for a set-returning function. + For example, supposing that vertices(polygon) returns the + set of vertices of a polygon, we could identify close-together vertices + of polygons stored in a table with: + +SELECT p1.id, p2.id, v1, v2 +FROM polygons p1, polygons p2, + LATERAL vertices(p1.poly) v1, + LATERAL vertices(p2.poly) v2 +WHERE (v1 <-> v2) < 10 AND p1.id != p2.id; + + This query could also be written + +SELECT p1.id, p2.id, v1, v2 +FROM polygons p1 CROSS JOIN LATERAL vertices(p1.poly) v1, + polygons p2 CROSS JOIN LATERAL vertices(p2.poly) v2 +WHERE (v1 <-> v2) < 10 AND p1.id != p2.id; + + or in several other equivalent formulations. + + + + It is often particularly handy to LEFT JOIN to a + LATERAL subquery, so that source rows will appear in + the result even if the LATERAL subquery produces no + rows for them. For example, if get_product_names() returns + the names of products made by a manufacturer, but some manufacturers in + our table currently produce no products, we could find out which ones + those are like this: + +SELECT m.name +FROM manufacturers m LEFT JOIN LATERAL get_product_names(m.id) pname ON true +WHERE pname IS NULL; + + + diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 7e98924258..0ac37a394f 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -50,10 +50,10 @@ SELECT [ ALL | DISTINCT [ ON ( expressionwhere from_item can be one of: [ ONLY ] table_name [ * ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] - ( select ) [ AS ] alias [ ( column_alias [, ...] ) ] + [ LATERAL ] ( select ) [ AS ] alias [ ( column_alias [, ...] ) ] with_query_name [ [ AS ] alias [ ( column_alias [, ...] ) ] ] - function_name ( [ argument [, ...] ] ) [ AS ] alias [ ( column_alias [, ...] | column_definition [, ...] ) ] - function_name ( [ argument [, ...] ] ) AS ( column_definition [, ...] ) + [ LATERAL ] function_name ( [ argument [, ...] ] ) [ AS ] alias [ ( column_alias [, ...] | column_definition [, ...] ) ] + [ LATERAL ] function_name ( [ argument [, ...] ] ) AS ( column_definition [, ...] ) from_item [ NATURAL ] join_type from_item [ ON join_condition | USING ( join_column [, ...] ) ] and with_query is: @@ -284,8 +284,8 @@ TABLE [ ONLY ] table_name [ * ] The FROM clause specifies one or more source tables for the SELECT. If multiple sources are specified, the result is the Cartesian product (cross join) of all - the sources. But usually qualification conditions - are added to restrict the returned rows to a small subset of the + the sources. But usually qualification conditions are added (via + WHERE) to restrict the returned rows to a small subset of the Cartesian product. @@ -414,17 +414,18 @@ TABLE [ ONLY ] table_name [ * ] - A JOIN clause combines two - FROM items. Use parentheses if necessary to - determine the order of nesting. In the absence of parentheses, - JOINs nest left-to-right. In any case - JOIN binds more tightly than the commas - separating FROM items. + A JOIN clause combines two FROM + items, which for convenience we will refer to as tables, + though in reality they can be any type of FROM item. + Use parentheses if necessary to determine the order of nesting. + In the absence of parentheses, JOINs nest + left-to-right. In any case JOIN binds more + tightly than the commas separating FROM-list items. CROSS JOIN and INNER JOIN produce a simple Cartesian product, the same result as you get from - listing the two items at the top level of FROM, + listing the two tables at the top level of FROM, but restricted by the join condition (if any). CROSS JOIN is equivalent to INNER JOIN ON (TRUE), that is, no rows are removed by qualification. @@ -449,7 +450,7 @@ TABLE [ ONLY ] table_name [ * ] joined rows, plus one row for each unmatched right-hand row (extended with nulls on the left). This is just a notational convenience, since you could convert it to a LEFT - OUTER JOIN by switching the left and right inputs. + OUTER JOIN by switching the left and right tables. FULL OUTER JOIN returns all the joined rows, plus @@ -495,6 +496,47 @@ TABLE [ ONLY ] table_name [ * ] + + + LATERAL + + The LATERAL key word can precede a + sub-SELECT or function-call FROM + item. This allows the sub-SELECT or function + expression to refer to columns of FROM items that appear + before it in the FROM list. (Without + LATERAL, each FROM item is evaluated + independently and so cannot cross-reference any other + FROM item.) A LATERAL item can + appear at top level in the FROM list, or within a + JOIN tree; in the latter case it can also refer to any + items that are on the left-hand side of a JOIN that it is + on the right-hand side of. + + + + When a FROM item contains LATERAL + cross-references, evaluation proceeds as follows: for each row of the + FROM item providing the cross-referenced column(s), or + set of rows of multiple FROM items providing the + columns, the LATERAL item is evaluated using that + row or row set's values of the columns. The resulting row(s) are + joined as usual with the rows they were computed from. This is + repeated for each row or set of rows from the column source table(s). + + + + The column source table(s) must be INNER or + LEFT joined to the LATERAL item, else + there would not be a well-defined set of rows from which to compute + each set of rows for the LATERAL item. Thus, + although a construct such as X RIGHT JOIN + LATERAL Y is syntactically valid, it is + not actually allowed for Y to reference + X. + + + @@ -1532,6 +1574,26 @@ SELECT distance, employee_name FROM employee_recursive; else the query will loop indefinitely. (See for more examples.) + + + This example uses LATERAL to apply a set-returning function + get_product_names() for each row of the + manufacturers table: + + +SELECT m.name AS mname, pname +FROM manufacturers m, LATERAL get_product_names(m.id) pname; + + + Manufacturers not currently having any products would not appear in the + result, since it is an inner join. If we wished to include the names of + such manufacturers in the result, we could do: + + +SELECT m.name AS mname, pname +FROM manufacturers m LEFT JOIN LATERAL get_product_names(m.id) pname ON true; + + @@ -1611,6 +1673,20 @@ SELECT distributors.* WHERE distributors.name = 'Westward'; + + Function Calls in <literal>FROM</literal> + + + PostgreSQL allows a function call to be + written directly as a member of the FROM list. In the SQL + standard it would be necessary to wrap such a function call in a + sub-SELECT; that is, the syntax + FROM func(...) alias + is approximately equivalent to + FROM (SELECT func(...)) alias. + + + Namespace Available to <literal>GROUP BY</literal> and <literal>ORDER BY</literal> diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 799930ad61..71d53234bc 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1973,6 +1973,7 @@ _copyRangeTblEntry(const RangeTblEntry *from) COPY_NODE_FIELD(ctecolcollations); COPY_NODE_FIELD(alias); COPY_NODE_FIELD(eref); + COPY_SCALAR_FIELD(lateral); COPY_SCALAR_FIELD(inh); COPY_SCALAR_FIELD(inFromCl); COPY_SCALAR_FIELD(requiredPerms); @@ -2250,6 +2251,7 @@ _copyRangeSubselect(const RangeSubselect *from) { RangeSubselect *newnode = makeNode(RangeSubselect); + COPY_SCALAR_FIELD(lateral); COPY_NODE_FIELD(subquery); COPY_NODE_FIELD(alias); @@ -2261,6 +2263,7 @@ _copyRangeFunction(const RangeFunction *from) { RangeFunction *newnode = makeNode(RangeFunction); + COPY_SCALAR_FIELD(lateral); COPY_NODE_FIELD(funccallnode); COPY_NODE_FIELD(alias); COPY_NODE_FIELD(coldeflist); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 802b063671..d690ca77a5 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2161,6 +2161,7 @@ _equalWindowDef(const WindowDef *a, const WindowDef *b) static bool _equalRangeSubselect(const RangeSubselect *a, const RangeSubselect *b) { + COMPARE_SCALAR_FIELD(lateral); COMPARE_NODE_FIELD(subquery); COMPARE_NODE_FIELD(alias); @@ -2170,6 +2171,7 @@ _equalRangeSubselect(const RangeSubselect *a, const RangeSubselect *b) static bool _equalRangeFunction(const RangeFunction *a, const RangeFunction *b) { + COMPARE_SCALAR_FIELD(lateral); COMPARE_NODE_FIELD(funccallnode); COMPARE_NODE_FIELD(alias); COMPARE_NODE_FIELD(coldeflist); @@ -2287,6 +2289,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b) COMPARE_NODE_FIELD(ctecolcollations); COMPARE_NODE_FIELD(alias); COMPARE_NODE_FIELD(eref); + COMPARE_SCALAR_FIELD(lateral); COMPARE_SCALAR_FIELD(inh); COMPARE_SCALAR_FIELD(inFromCl); COMPARE_SCALAR_FIELD(requiredPerms); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index b83bd1c9fd..9dee0414f3 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2362,6 +2362,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) break; } + WRITE_BOOL_FIELD(lateral); WRITE_BOOL_FIELD(inh); WRITE_BOOL_FIELD(inFromCl); WRITE_UINT_FIELD(requiredPerms); @@ -2565,6 +2566,7 @@ _outRangeSubselect(StringInfo str, const RangeSubselect *node) { WRITE_NODE_TYPE("RANGESUBSELECT"); + WRITE_BOOL_FIELD(lateral); WRITE_NODE_FIELD(subquery); WRITE_NODE_FIELD(alias); } @@ -2574,6 +2576,7 @@ _outRangeFunction(StringInfo str, const RangeFunction *node) { WRITE_NODE_TYPE("RANGEFUNCTION"); + WRITE_BOOL_FIELD(lateral); WRITE_NODE_FIELD(funccallnode); WRITE_NODE_FIELD(alias); WRITE_NODE_FIELD(coldeflist); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ff77cefd07..1eb7582914 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1222,6 +1222,7 @@ _readRangeTblEntry(void) break; } + READ_BOOL_FIELD(lateral); READ_BOOL_FIELD(inh); READ_BOOL_FIELD(inFromCl); READ_UINT_FIELD(requiredPerms); diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 1e352fd3b5..57eb2c39a4 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -56,6 +56,7 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) MemoryContext mycontext; MemoryContext oldcxt; RelOptInfo *joinrel; + Path *best_path; Cost fitness; int savelength; struct HTAB *savehash; @@ -99,6 +100,14 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) /* construct the best path for the given combination of relations */ joinrel = gimme_tree(root, tour, num_gene); + best_path = joinrel->cheapest_total_path; + + /* + * If no unparameterized path, use the cheapest parameterized path for + * costing purposes. XXX revisit this after LATERAL dust settles + */ + if (!best_path) + best_path = linitial(joinrel->cheapest_parameterized_paths); /* * compute fitness @@ -106,7 +115,7 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) * XXX geqo does not currently support optimization for partial result * retrieval --- how to fix? */ - fitness = joinrel->cheapest_total_path->total_cost; + fitness = best_path->total_cost; /* * Restore join_rel_list to its former state, and put back original diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index f02954982a..bfda05394d 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -253,8 +253,9 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, case RTE_SUBQUERY: /* - * Subqueries don't support parameterized paths, so just go - * ahead and build their paths immediately. + * Subqueries don't support making a choice between + * parameterized and unparameterized paths, so just go ahead + * and build their paths immediately. */ set_subquery_pathlist(root, rel, rti, rte); break; @@ -698,6 +699,10 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, if (IS_DUMMY_REL(childrel)) continue; + /* XXX need to figure out what to do for LATERAL */ + if (childrel->cheapest_total_path == NULL) + elog(ERROR, "LATERAL within an append relation is not supported yet"); + /* * Child is live, so add its cheapest access path to the Append path * we are constructing for the parent. @@ -906,6 +911,10 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, */ if (cheapest_startup == NULL || cheapest_total == NULL) { + /* XXX need to figure out what to do for LATERAL */ + if (childrel->cheapest_total_path == NULL) + elog(ERROR, "LATERAL within an append relation is not supported yet"); + cheapest_startup = cheapest_total = childrel->cheapest_total_path; Assert(cheapest_total != NULL); @@ -1012,8 +1021,13 @@ has_multiple_baserels(PlannerInfo *root) * set_subquery_pathlist * Build the (single) access path for a subquery RTE * - * There's no need for a separate set_subquery_size phase, since we don't - * support parameterized paths for subqueries. + * We don't currently support generating parameterized paths for subqueries + * by pushing join clauses down into them; it seems too expensive to re-plan + * the subquery multiple times to consider different alternatives. So the + * subquery will have exactly one path. (The path will be parameterized + * if the subquery contains LATERAL references, otherwise not.) Since there's + * no freedom of action here, there's no need for a separate set_subquery_size + * phase: we just make the path right away. */ static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, @@ -1021,6 +1035,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, { Query *parse = root->parse; Query *subquery = rte->subquery; + Relids required_outer; bool *differentTypes; double tuple_fraction; PlannerInfo *subroot; @@ -1033,6 +1048,20 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, */ subquery = copyObject(subquery); + /* + * If it's a LATERAL subquery, it might contain some Vars of the current + * query level, requiring it to be treated as parameterized. + */ + if (rte->lateral) + { + required_outer = pull_varnos_of_level((Node *) subquery, 1); + /* Enforce convention that empty required_outer is exactly NULL */ + if (bms_is_empty(required_outer)) + required_outer = NULL; + } + else + required_outer = NULL; + /* We need a workspace for keeping track of set-op type coercions */ differentTypes = (bool *) palloc0((list_length(subquery->targetList) + 1) * sizeof(bool)); @@ -1051,10 +1080,9 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, * pseudoconstant clauses; better to have the gating node above the * subquery. * - * Also, if the sub-query has "security_barrier" flag, it means the + * Also, if the sub-query has the "security_barrier" flag, it means the * sub-query originated from a view that must enforce row-level security. - * We must not push down quals in order to avoid information leaks, either - * via side-effects or error output. + * Then we must not push down quals that contain leaky functions. * * Non-pushed-down clauses will get evaluated as qpquals of the * SubqueryScan node. @@ -1134,7 +1162,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys); /* Generate appropriate path */ - add_path(rel, create_subqueryscan_path(root, rel, pathkeys, NULL)); + add_path(rel, create_subqueryscan_path(root, rel, pathkeys, required_outer)); /* Select cheapest path (pretty easy in this case...) */ set_cheapest(rel); @@ -1143,12 +1171,32 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, /* * set_function_pathlist * Build the (single) access path for a function RTE + * + * As with subqueries, a function RTE's path might be parameterized due to + * LATERAL references, but that's inherent in the function expression and + * not a result of pushing down join quals. */ static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { + Relids required_outer; + + /* + * If it's a LATERAL function, it might contain some Vars of the current + * query level, requiring it to be treated as parameterized. + */ + if (rte->lateral) + { + required_outer = pull_varnos_of_level(rte->funcexpr, 0); + /* Enforce convention that empty required_outer is exactly NULL */ + if (bms_is_empty(required_outer)) + required_outer = NULL; + } + else + required_outer = NULL; + /* Generate appropriate path */ - add_path(rel, create_functionscan_path(root, rel)); + add_path(rel, create_functionscan_path(root, rel, required_outer)); /* Select cheapest path (pretty easy in this case...) */ set_cheapest(rel); @@ -1157,6 +1205,10 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* * set_values_pathlist * Build the (single) access path for a VALUES RTE + * + * There can be no need for a parameterized path here. (Although the SQL + * spec does allow LATERAL (VALUES (x)), the parser will transform that + * into a subquery, so it doesn't end up here.) */ static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) @@ -1988,10 +2040,16 @@ debug_print_rel(PlannerInfo *root, RelOptInfo *rel) printf("\tpath list:\n"); foreach(l, rel->pathlist) print_path(root, lfirst(l), 1); - printf("\n\tcheapest startup path:\n"); - print_path(root, rel->cheapest_startup_path, 1); - printf("\n\tcheapest total path:\n"); - print_path(root, rel->cheapest_total_path, 1); + if (rel->cheapest_startup_path) + { + printf("\n\tcheapest startup path:\n"); + print_path(root, rel->cheapest_startup_path, 1); + } + if (rel->cheapest_total_path) + { + printf("\n\tcheapest total path:\n"); + print_path(root, rel->cheapest_total_path, 1); + } printf("\n"); fflush(stdout); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 875c611ab5..d3f04eea4b 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -989,12 +989,17 @@ cost_subqueryscan(Path *path, PlannerInfo *root, /* * cost_functionscan * Determines and returns the cost of scanning a function RTE. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL */ void -cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) +cost_functionscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) { Cost startup_cost = 0; Cost run_cost = 0; + QualCost qpqual_cost; Cost cpu_per_tuple; RangeTblEntry *rte; QualCost exprcost; @@ -1004,8 +1009,11 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) rte = planner_rt_fetch(baserel->relid, root); Assert(rte->rtekind == RTE_FUNCTION); - /* functionscans are never parameterized */ - path->rows = baserel->rows; + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; /* * Estimate costs of executing the function expression. @@ -1025,8 +1033,10 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel) startup_cost += exprcost.startup + exprcost.per_tuple; /* Add scanning CPU costs */ - startup_cost += baserel->baserestrictcost.startup; - cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; run_cost += cpu_per_tuple * baserel->tuples; path->startup_cost = startup_cost; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 65f86194e1..fe0e4d7c20 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -491,12 +491,18 @@ sort_inner_and_outer(PlannerInfo *root, * explosion of mergejoin paths of dubious value. This interacts with * decisions elsewhere that also discriminate against mergejoins with * parameterized inputs; see comments in src/backend/optimizer/README. - * - * If unique-ification is requested, do it and then handle as a plain - * inner join. */ outer_path = outerrel->cheapest_total_path; inner_path = innerrel->cheapest_total_path; + + /* Punt if either rel has only parameterized paths */ + if (!outer_path || !inner_path) + return; + + /* + * If unique-ification is requested, do it and then handle as a plain + * inner join. + */ if (jointype == JOIN_UNIQUE_OUTER) { outer_path = (Path *) create_unique_path(root, outerrel, @@ -696,6 +702,10 @@ match_unsorted_outer(PlannerInfo *root, */ if (save_jointype == JOIN_UNIQUE_INNER) { + /* XXX for the moment, don't crash on LATERAL --- rethink this */ + if (inner_cheapest_total == NULL) + return; + inner_cheapest_total = (Path *) create_unique_path(root, innerrel, inner_cheapest_total, sjinfo); Assert(inner_cheapest_total); @@ -707,7 +717,7 @@ match_unsorted_outer(PlannerInfo *root, * enable_material is off or the path in question materializes its * output anyway. */ - if (enable_material && + if (enable_material && inner_cheapest_total != NULL && !ExecMaterializesOutput(inner_cheapest_total->pathtype)) matpath = (Path *) create_material_path(innerrel, inner_cheapest_total); @@ -735,6 +745,8 @@ match_unsorted_outer(PlannerInfo *root, * If we need to unique-ify the outer path, it's pointless to consider * any but the cheapest outer. (XXX we don't consider parameterized * outers, nor inners, for unique-ified cases. Should we?) + * + * XXX does nothing for LATERAL, rethink */ if (save_jointype == JOIN_UNIQUE_OUTER) { @@ -814,6 +826,10 @@ match_unsorted_outer(PlannerInfo *root, if (save_jointype == JOIN_UNIQUE_OUTER) continue; + /* Can't do anything else if inner has no unparameterized paths */ + if (!inner_cheapest_total) + continue; + /* Look for useful mergeclauses (if any) */ mergeclauses = find_mergeclauses_for_pathkeys(root, outerpath->pathkeys, @@ -1092,6 +1108,12 @@ hash_inner_and_outer(PlannerInfo *root, Path *cheapest_total_outer = outerrel->cheapest_total_path; Path *cheapest_total_inner = innerrel->cheapest_total_path; + /* Punt if either rel has only parameterized paths */ + if (!cheapest_startup_outer || + !cheapest_total_outer || + !cheapest_total_inner) + return; + /* Unique-ify if need be; we ignore parameterized possibilities */ if (jointype == JOIN_UNIQUE_OUTER) { diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 414406bb8a..6bb821fb38 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -84,6 +84,7 @@ static HashJoin *create_hashjoin_plan(PlannerInfo *root, HashPath *best_path, Plan *outer_plan, Plan *inner_plan); static Node *replace_nestloop_params(PlannerInfo *root, Node *expr); static Node *replace_nestloop_params_mutator(Node *node, PlannerInfo *root); +static void identify_nestloop_extparams(PlannerInfo *root, Plan *subplan); static List *fix_indexqual_references(PlannerInfo *root, IndexPath *index_path); static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path); static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol); @@ -1640,6 +1641,7 @@ create_subqueryscan_plan(PlannerInfo *root, Path *best_path, { scan_clauses = (List *) replace_nestloop_params(root, (Node *) scan_clauses); + identify_nestloop_extparams(root, best_path->parent->subplan); } scan_plan = make_subqueryscan(tlist, @@ -1664,11 +1666,13 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, FunctionScan *scan_plan; Index scan_relid = best_path->parent->relid; RangeTblEntry *rte; + Node *funcexpr; /* it should be a function base rel... */ Assert(scan_relid > 0); rte = planner_rt_fetch(scan_relid, root); Assert(rte->rtekind == RTE_FUNCTION); + funcexpr = rte->funcexpr; /* Sort clauses into best execution order */ scan_clauses = order_qual_clauses(root, scan_clauses); @@ -1676,8 +1680,17 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ scan_clauses = extract_actual_clauses(scan_clauses, false); + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + /* The func expression itself could contain nestloop params, too */ + funcexpr = replace_nestloop_params(root, funcexpr); + } + scan_plan = make_functionscan(tlist, scan_clauses, scan_relid, - rte->funcexpr, + funcexpr, rte->eref->colnames, rte->funccoltypes, rte->funccoltypmods, @@ -2559,6 +2572,102 @@ replace_nestloop_params_mutator(Node *node, PlannerInfo *root) (void *) root); } +/* + * identify_nestloop_extparams + * Identify extParams of a parameterized subquery that need to be fed + * from an outer nestloop. + * + * The subplan's references to the outer variables are already represented + * as PARAM_EXEC Params, so we need not modify the subplan here. What we + * do need to do is add entries to root->curOuterParams to signal the parent + * nestloop plan node that it must provide these values. + */ +static void +identify_nestloop_extparams(PlannerInfo *root, Plan *subplan) +{ + Bitmapset *tmpset; + int paramid; + + /* Examine each extParam of the subquery's plan */ + tmpset = bms_copy(subplan->extParam); + while ((paramid = bms_first_member(tmpset)) >= 0) + { + PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid); + + /* Ignore anything coming from an upper query level */ + if (pitem->abslevel != root->query_level) + continue; + + if (IsA(pitem->item, Var)) + { + Var *var = (Var *) pitem->item; + NestLoopParam *nlp; + ListCell *lc; + + /* If not from a nestloop outer rel, nothing to do */ + if (!bms_is_member(var->varno, root->curOuterRels)) + continue; + /* Is this param already listed in root->curOuterParams? */ + foreach(lc, root->curOuterParams) + { + nlp = (NestLoopParam *) lfirst(lc); + if (nlp->paramno == paramid) + { + Assert(equal(var, nlp->paramval)); + /* Present, so nothing to do */ + break; + } + } + if (lc == NULL) + { + /* No, so add it */ + nlp = makeNode(NestLoopParam); + nlp->paramno = paramid; + nlp->paramval = copyObject(var); + root->curOuterParams = lappend(root->curOuterParams, nlp); + } + } + else if (IsA(pitem->item, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) pitem->item; + NestLoopParam *nlp; + ListCell *lc; + + /* + * If not from a nestloop outer rel, nothing to do. We use + * bms_overlap as a cheap/quick test to see if the PHV might be + * evaluated in the outer rels, and then grab its PlaceHolderInfo + * to tell for sure. + */ + if (!bms_overlap(phv->phrels, root->curOuterRels)) + continue; + if (!bms_is_subset(find_placeholder_info(root, phv, false)->ph_eval_at, + root->curOuterRels)) + continue; + /* Is this param already listed in root->curOuterParams? */ + foreach(lc, root->curOuterParams) + { + nlp = (NestLoopParam *) lfirst(lc); + if (nlp->paramno == paramid) + { + Assert(equal(phv, nlp->paramval)); + /* Present, so nothing to do */ + break; + } + } + if (lc == NULL) + { + /* No, so add it */ + nlp = makeNode(NestLoopParam); + nlp->paramno = paramid; + nlp->paramval = copyObject(phv); + root->curOuterParams = lappend(root->curOuterParams, nlp); + } + } + } + bms_free(tmpset); +} + /* * fix_indexqual_references * Adjust indexqual clauses to the form the executor's indexqual diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 3c7fa632b8..4481db5c34 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -204,6 +204,64 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, } } +/* + * extract_lateral_references + * If the specified RTE is a LATERAL subquery, extract all its references + * to Vars of the current query level, and make sure those Vars will be + * available for evaluation of the RTE. + * + * XXX this is rather duplicative of processing that has to happen elsewhere. + * Maybe it'd be a good idea to do this type of extraction further upstream + * and save the results? + */ +static void +extract_lateral_references(PlannerInfo *root, int rtindex) +{ + RangeTblEntry *rte = root->simple_rte_array[rtindex]; + List *vars; + List *newvars; + Relids where_needed; + ListCell *lc; + + /* No cross-references are possible if it's not LATERAL */ + if (!rte->lateral) + return; + + /* Fetch the appropriate variables */ + if (rte->rtekind == RTE_SUBQUERY) + vars = pull_vars_of_level((Node *) rte->subquery, 1); + else if (rte->rtekind == RTE_FUNCTION) + vars = pull_vars_of_level(rte->funcexpr, 0); + else + return; + + /* Copy each Var and adjust it to match our level */ + newvars = NIL; + foreach(lc, vars) + { + Var *var = (Var *) lfirst(lc); + + var = copyObject(var); + var->varlevelsup = 0; + newvars = lappend(newvars, var); + } + + /* + * We mark the Vars as being "needed" at the LATERAL RTE. This is a bit + * of a cheat: a more formal approach would be to mark each one as needed + * at the join of the LATERAL RTE with its source RTE. But it will work, + * and it's much less tedious than computing a separate where_needed for + * each Var. + */ + where_needed = bms_make_singleton(rtindex); + + /* Push the Vars into their source relations' targetlists */ + add_vars_to_targetlist(root, newvars, where_needed, false); + + list_free(newvars); + list_free(vars); +} + /***************************************************************************** * @@ -286,7 +344,9 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, { int varno = ((RangeTblRef *) jtnode)->rtindex; - /* No quals to deal with, just return correct result */ + /* No quals to deal with, but do check for LATERAL subqueries */ + extract_lateral_references(root, varno); + /* Result qualscope is just the one Relid */ *qualscope = bms_make_singleton(varno); /* A single baserel does not create an inner join */ *inner_join_rels = NULL; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 31fe557072..26b5dbb559 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1817,7 +1817,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * * Once grouping_planner() has applied a general tlist to the topmost * scan/join plan node, any tlist eval cost for added-on nodes should be - * accounted for as we create those nodes. Presently, of the node types we + * accounted for as we create those nodes. Presently, of the node types we * can add on later, only Agg, WindowAgg, and Group project new tlists (the * rest just copy their input tuples) --- so make_agg(), make_windowagg() and * make_group() are responsible for calling this function to account for their @@ -3257,6 +3257,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) rte->rtekind = RTE_RELATION; rte->relid = tableOid; rte->relkind = RELKIND_RELATION; + rte->lateral = false; rte->inh = false; rte->inFromCl = true; query->rtable = list_make1(rte); diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 8ce6bee856..863c943f2a 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1231,6 +1231,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, rte = addRangeTableEntryForSubquery(NULL, subselect, makeAlias("ANY_subquery", NIL), + false, false); parse->rtable = lappend(parse->rtable, rte); rtindex = list_length(parse->rtable); diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index be1219eb3d..06dbe84540 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -1175,6 +1175,13 @@ is_simple_subquery(Query *subquery) subquery->cteList) return false; + /* + * Don't pull up a LATERAL subquery (hopefully, this is just a temporary + * implementation restriction). + */ + if (contain_vars_of_level((Node *) subquery, 1)) + return false; + /* * Don't pull up a subquery that has any set-returning functions in its * targetlist. Otherwise we might well wind up inserting set-returning diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 00052f5c84..11de5c70d8 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -193,7 +193,7 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) * and cheapest_total. The cheapest_parameterized_paths list collects paths * that are cheapest-total for their parameterization (i.e., there is no * cheaper path with the same or weaker parameterization). This list always - * includes the unparameterized cheapest-total path, too. + * includes the unparameterized cheapest-total path, too, if there is one. * * This is normally called only after we've finished constructing the path * list for the rel node. @@ -250,15 +250,18 @@ set_cheapest(RelOptInfo *parent_rel) cheapest_total_path = path; } - if (cheapest_total_path == NULL) + if (cheapest_total_path == NULL && !have_parameterized_paths) elog(ERROR, "could not devise a query plan for the given query"); parent_rel->cheapest_startup_path = cheapest_startup_path; parent_rel->cheapest_total_path = cheapest_total_path; parent_rel->cheapest_unique_path = NULL; /* computed only if needed */ - /* Seed the parameterized-paths list with the cheapest total */ - parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path); + /* Seed the parameterized-paths list with the cheapest total, if any */ + if (cheapest_total_path) + parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path); + else + parent_rel->cheapest_parameterized_paths = NIL; /* And, if there are any parameterized paths, add them in one at a time */ if (have_parameterized_paths) @@ -1131,6 +1134,13 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, int numCols; ListCell *lc; + /* XXX temporary band-aid to not crash on LATERAL queries */ + if (subpath == NULL) + { + Assert(subpath == rel->cheapest_total_path); + return NULL; + } + /* Caller made a mistake if subpath isn't cheapest_total ... */ Assert(subpath == rel->cheapest_total_path); Assert(subpath->parent == rel); @@ -1657,16 +1667,18 @@ create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, * returning the pathnode. */ Path * -create_functionscan_path(PlannerInfo *root, RelOptInfo *rel) +create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) { Path *pathnode = makeNode(Path); pathnode->pathtype = T_FunctionScan; pathnode->parent = rel; - pathnode->param_info = NULL; /* never parameterized at present */ + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); pathnode->pathkeys = NIL; /* for now, assume unordered result */ - cost_functionscan(pathnode, root, rel); + cost_functionscan(pathnode, root, rel, pathnode->param_info); return pathnode; } diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 9bc90c2531..81332ff1cd 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -42,16 +42,15 @@ typedef struct typedef struct { - int var_location; + List *vars; int sublevels_up; -} locate_var_of_level_context; +} pull_vars_context; typedef struct { int var_location; - int relid; int sublevels_up; -} locate_var_of_relation_context; +} locate_var_of_level_context; typedef struct { @@ -77,12 +76,11 @@ typedef struct static bool pull_varnos_walker(Node *node, pull_varnos_context *context); static bool pull_varattnos_walker(Node *node, pull_varattnos_context *context); +static bool pull_vars_walker(Node *node, pull_vars_context *context); static bool contain_var_clause_walker(Node *node, void *context); static bool contain_vars_of_level_walker(Node *node, int *sublevels_up); static bool locate_var_of_level_walker(Node *node, locate_var_of_level_context *context); -static bool locate_var_of_relation_walker(Node *node, - locate_var_of_relation_context *context); static bool find_minimum_var_level_walker(Node *node, find_minimum_var_level_context *context); static bool pull_var_clause_walker(Node *node, @@ -122,6 +120,31 @@ pull_varnos(Node *node) return context.varnos; } +/* + * pull_varnos_of_level + * Create a set of all the distinct varnos present in a parsetree. + * Only Vars of the specified level are considered. + */ +Relids +pull_varnos_of_level(Node *node, int levelsup) +{ + pull_varnos_context context; + + context.varnos = NULL; + context.sublevels_up = levelsup; + + /* + * Must be prepared to start with a Query or a bare expression tree; if + * it's a Query, we don't want to increment sublevels_up. + */ + query_or_expression_tree_walker(node, + pull_varnos_walker, + (void *) &context, + 0); + + return context.varnos; +} + static bool pull_varnos_walker(Node *node, pull_varnos_context *context) { @@ -230,6 +253,66 @@ pull_varattnos_walker(Node *node, pull_varattnos_context *context) } +/* + * pull_vars_of_level + * Create a list of all Vars referencing the specified query level + * in the given parsetree. + * + * This is used on unplanned parsetrees, so we don't expect to see any + * PlaceHolderVars. + * + * Caution: the Vars are not copied, only linked into the list. + */ +List * +pull_vars_of_level(Node *node, int levelsup) +{ + pull_vars_context context; + + context.vars = NIL; + context.sublevels_up = levelsup; + + /* + * Must be prepared to start with a Query or a bare expression tree; if + * it's a Query, we don't want to increment sublevels_up. + */ + query_or_expression_tree_walker(node, + pull_vars_walker, + (void *) &context, + 0); + + return context.vars; +} + +static bool +pull_vars_walker(Node *node, pull_vars_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == context->sublevels_up) + context->vars = lappend(context->vars, var); + return false; + } + Assert(!IsA(node, PlaceHolderVar)); + if (IsA(node, Query)) + { + /* Recurse into RTE subquery or not-yet-planned sublink subquery */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, pull_vars_walker, + (void *) context, 0); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, pull_vars_walker, + (void *) context); +} + + /* * contain_var_clause * Recursively scan a clause to discover whether it contains any Var nodes @@ -405,76 +488,6 @@ locate_var_of_level_walker(Node *node, } -/* - * locate_var_of_relation - * Find the parse location of any Var of the specified relation. - * - * Returns -1 if no such Var is in the querytree, or if they all have - * unknown parse location. - * - * Will recurse into sublinks. Also, may be invoked directly on a Query. - */ -int -locate_var_of_relation(Node *node, int relid, int levelsup) -{ - locate_var_of_relation_context context; - - context.var_location = -1; /* in case we find nothing */ - context.relid = relid; - context.sublevels_up = levelsup; - - (void) query_or_expression_tree_walker(node, - locate_var_of_relation_walker, - (void *) &context, - 0); - - return context.var_location; -} - -static bool -locate_var_of_relation_walker(Node *node, - locate_var_of_relation_context *context) -{ - if (node == NULL) - return false; - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - if (var->varno == context->relid && - var->varlevelsup == context->sublevels_up && - var->location >= 0) - { - context->var_location = var->location; - return true; /* abort tree traversal and return true */ - } - return false; - } - if (IsA(node, CurrentOfExpr)) - { - /* since CurrentOfExpr doesn't carry location, nothing we can do */ - return false; - } - /* No extra code needed for PlaceHolderVar; just look in contained expr */ - if (IsA(node, Query)) - { - /* Recurse into subselects */ - bool result; - - context->sublevels_up++; - result = query_tree_walker((Query *) node, - locate_var_of_relation_walker, - (void *) context, - 0); - context->sublevels_up--; - return result; - } - return expression_tree_walker(node, - locate_var_of_relation_walker, - (void *) context); -} - - /* * find_minimum_var_level * Recursively scan a clause to find the lowest variable level it diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 263edb5a7a..1a112cd9a4 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -533,6 +533,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) rte = addRangeTableEntryForSubquery(pstate, selectQuery, makeAlias("*SELECT*", NIL), + false, false); rtr = makeNode(RangeTblRef); /* assume new rte is at end */ @@ -651,18 +652,6 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) for (i = 0; i < sublist_length; i++) collations = lappend_oid(collations, InvalidOid); - /* - * There mustn't have been any table references in the expressions, - * else strange things would happen, like Cartesian products of those - * tables with the VALUES list ... - */ - if (pstate->p_joinlist != NIL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("VALUES must not contain table references"), - parser_errposition(pstate, - locate_var_of_level((Node *) exprsLists, 0)))); - /* * Another thing we can't currently support is NEW/OLD references in * rules --- seems we'd need something like SQL99's LATERAL construct @@ -1067,7 +1056,7 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt) List **colexprs = NULL; int sublist_length = -1; RangeTblEntry *rte; - RangeTblRef *rtr; + int rtindex; ListCell *lc; ListCell *lc2; int i; @@ -1215,19 +1204,17 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt) */ rte = addRangeTableEntryForValues(pstate, exprsLists, collations, NULL, true); - rtr = makeNode(RangeTblRef); + addRTEtoQuery(pstate, rte, true, true, true); + /* assume new rte is at end */ - rtr->rtindex = list_length(pstate->p_rtable); - Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable)); - pstate->p_joinlist = lappend(pstate->p_joinlist, rtr); - pstate->p_relnamespace = lappend(pstate->p_relnamespace, rte); - pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte); + rtindex = list_length(pstate->p_rtable); + Assert(rte == rt_fetch(rtindex, pstate->p_rtable)); /* * Generate a targetlist as though expanding "*" */ Assert(pstate->p_next_resno == 1); - qry->targetList = expandRelAttrs(pstate, rte, rtr->rtindex, 0, -1); + qry->targetList = expandRelAttrs(pstate, rte, rtindex, 0, -1); /* * The grammar allows attaching ORDER BY, LIMIT, and FOR UPDATE to a @@ -1249,19 +1236,6 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SELECT FOR UPDATE/SHARE cannot be applied to VALUES"))); - /* - * There mustn't have been any table references in the expressions, else - * strange things would happen, like Cartesian products of those tables - * with the VALUES list. We have to check this after parsing ORDER BY et - * al since those could insert more junk. - */ - if (list_length(pstate->p_joinlist) != 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("VALUES must not contain table references"), - parser_errposition(pstate, - locate_var_of_level((Node *) exprsLists, 0)))); - /* * Another thing we can't currently support is NEW/OLD references in rules * --- seems we'd need something like SQL99's LATERAL construct to ensure @@ -1477,10 +1451,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) false); sv_relnamespace = pstate->p_relnamespace; - pstate->p_relnamespace = NIL; /* no qualified names allowed */ - sv_varnamespace = pstate->p_varnamespace; - pstate->p_varnamespace = list_make1(jrte); + pstate->p_relnamespace = NIL; + pstate->p_varnamespace = NIL; + + /* add jrte to varnamespace only */ + addRTEtoQuery(pstate, jrte, false, false, true); /* * For now, we don't support resjunk sort clauses on the output of a @@ -1577,7 +1553,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, /* * If an internal node of a set-op tree has ORDER BY, LIMIT, FOR UPDATE, * or WITH clauses attached, we need to treat it like a leaf node to - * generate an independent sub-Query tree. Otherwise, it can be + * generate an independent sub-Query tree. Otherwise, it can be * represented by a SetOperationStmt node underneath the parent Query. */ if (stmt->op == SETOP_NONE) @@ -1652,6 +1628,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, rte = addRangeTableEntryForSubquery(pstate, selectQuery, makeAlias(selectName, NIL), + false, false); /* @@ -2074,7 +2051,6 @@ transformReturningList(ParseState *pstate, List *returningList) int save_next_resno; bool save_hasAggs; bool save_hasWindowFuncs; - int length_rtable; if (returningList == NIL) return NIL; /* nothing to do */ @@ -2092,7 +2068,6 @@ transformReturningList(ParseState *pstate, List *returningList) pstate->p_hasAggs = false; save_hasWindowFuncs = pstate->p_hasWindowFuncs; pstate->p_hasWindowFuncs = false; - length_rtable = list_length(pstate->p_rtable); /* transform RETURNING identically to a SELECT targetlist */ rlist = transformTargetList(pstate, returningList); @@ -2113,25 +2088,6 @@ transformReturningList(ParseState *pstate, List *returningList) parser_errposition(pstate, locate_windowfunc((Node *) rlist)))); - /* no new relation references please */ - if (list_length(pstate->p_rtable) != length_rtable) - { - int vlocation = -1; - int relid; - - /* try to locate such a reference to point to */ - for (relid = length_rtable + 1; relid <= list_length(pstate->p_rtable); relid++) - { - vlocation = locate_var_of_relation((Node *) rlist, relid, 0); - if (vlocation >= 0) - break; - } - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("RETURNING cannot contain references to other relations"), - parser_errposition(pstate, vlocation))); - } - /* mark column origins */ markTargetListOrigins(pstate, rlist); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 6b6901197d..90ea1f9f00 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -396,7 +396,8 @@ static void processCASbits(int cas_bits, int location, const char *constrType, %type ctext_expr %type NumericOnly %type NumericOnly_list -%type alias_clause +%type alias_clause opt_alias_clause +%type func_alias_clause %type sortby %type index_elem %type table_ref @@ -532,9 +533,9 @@ static void processCASbits(int cas_bits, int location, const char *constrType, KEY - LABEL LANGUAGE LARGE_P LAST_P LC_COLLATE_P LC_CTYPE_P LEADING LEAKPROOF - LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP - LOCATION LOCK_P + LABEL LANGUAGE LARGE_P LAST_P LATERAL_P LC_COLLATE_P LC_CTYPE_P + LEADING LEAKPROOF LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL + LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P MAPPING MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE @@ -9309,65 +9310,37 @@ from_list: ; /* - * table_ref is where an alias clause can be attached. Note we cannot make - * alias_clause have an empty production because that causes parse conflicts - * between table_ref := '(' joined_table ')' alias_clause - * and joined_table := '(' joined_table ')'. So, we must have the - * redundant-looking productions here instead. + * table_ref is where an alias clause can be attached. */ -table_ref: relation_expr - { - $$ = (Node *) $1; - } - | relation_expr alias_clause +table_ref: relation_expr opt_alias_clause { $1->alias = $2; $$ = (Node *) $1; } - | func_table + | func_table func_alias_clause { RangeFunction *n = makeNode(RangeFunction); + n->lateral = false; n->funccallnode = $1; - n->coldeflist = NIL; + n->alias = linitial($2); + n->coldeflist = lsecond($2); $$ = (Node *) n; } - | func_table alias_clause + | LATERAL_P func_table func_alias_clause { RangeFunction *n = makeNode(RangeFunction); - n->funccallnode = $1; + n->lateral = true; + n->funccallnode = $2; + n->alias = linitial($3); + n->coldeflist = lsecond($3); + $$ = (Node *) n; + } + | select_with_parens opt_alias_clause + { + RangeSubselect *n = makeNode(RangeSubselect); + n->lateral = false; + n->subquery = $1; n->alias = $2; - n->coldeflist = NIL; - $$ = (Node *) n; - } - | func_table AS '(' TableFuncElementList ')' - { - RangeFunction *n = makeNode(RangeFunction); - n->funccallnode = $1; - n->coldeflist = $4; - $$ = (Node *) n; - } - | func_table AS ColId '(' TableFuncElementList ')' - { - RangeFunction *n = makeNode(RangeFunction); - Alias *a = makeNode(Alias); - n->funccallnode = $1; - a->aliasname = $3; - n->alias = a; - n->coldeflist = $5; - $$ = (Node *) n; - } - | func_table ColId '(' TableFuncElementList ')' - { - RangeFunction *n = makeNode(RangeFunction); - Alias *a = makeNode(Alias); - n->funccallnode = $1; - a->aliasname = $2; - n->alias = a; - n->coldeflist = $4; - $$ = (Node *) n; - } - | select_with_parens - { /* * The SQL spec does not permit a subselect * () without an alias clause, @@ -9379,26 +9352,47 @@ table_ref: relation_expr * However, it does seem like a good idea to emit * an error message that's better than "syntax error". */ - if (IsA($1, SelectStmt) && - ((SelectStmt *) $1)->valuesLists) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("VALUES in FROM must have an alias"), - errhint("For example, FROM (VALUES ...) [AS] foo."), - parser_errposition(@1))); - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("subquery in FROM must have an alias"), - errhint("For example, FROM (SELECT ...) [AS] foo."), - parser_errposition(@1))); - $$ = NULL; + if ($2 == NULL) + { + if (IsA($1, SelectStmt) && + ((SelectStmt *) $1)->valuesLists) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("VALUES in FROM must have an alias"), + errhint("For example, FROM (VALUES ...) [AS] foo."), + parser_errposition(@1))); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("subquery in FROM must have an alias"), + errhint("For example, FROM (SELECT ...) [AS] foo."), + parser_errposition(@1))); + } + $$ = (Node *) n; } - | select_with_parens alias_clause + | LATERAL_P select_with_parens opt_alias_clause { RangeSubselect *n = makeNode(RangeSubselect); - n->subquery = $1; - n->alias = $2; + n->lateral = true; + n->subquery = $2; + n->alias = $3; + /* same coment as above */ + if ($3 == NULL) + { + if (IsA($2, SelectStmt) && + ((SelectStmt *) $2)->valuesLists) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("VALUES in FROM must have an alias"), + errhint("For example, FROM (VALUES ...) [AS] foo."), + parser_errposition(@2))); + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("subquery in FROM must have an alias"), + errhint("For example, FROM (SELECT ...) [AS] foo."), + parser_errposition(@2))); + } $$ = (Node *) n; } | joined_table @@ -9524,6 +9518,41 @@ alias_clause: } ; +opt_alias_clause: alias_clause { $$ = $1; } + | /*EMPTY*/ { $$ = NULL; } + ; + +/* + * func_alias_clause can include both an Alias and a coldeflist, so we make it + * return a 2-element list that gets disassembled by calling production. + */ +func_alias_clause: + alias_clause + { + $$ = list_make2($1, NIL); + } + | AS '(' TableFuncElementList ')' + { + $$ = list_make2(NULL, $3); + } + | AS ColId '(' TableFuncElementList ')' + { + Alias *a = makeNode(Alias); + a->aliasname = $2; + $$ = list_make2(a, $4); + } + | ColId '(' TableFuncElementList ')' + { + Alias *a = makeNode(Alias); + a->aliasname = $1; + $$ = list_make2(a, $3); + } + | /*EMPTY*/ + { + $$ = list_make2(NULL, NIL); + } + ; + join_type: FULL join_outer { $$ = JOIN_FULL; } | LEFT join_outer { $$ = JOIN_LEFT; } | RIGHT join_outer { $$ = JOIN_RIGHT; } @@ -12736,6 +12765,7 @@ reserved_keyword: | INITIALLY | INTERSECT | INTO + | LATERAL_P | LEADING | LIMIT | LOCALTIME diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 380d9d3560..5854f81005 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -181,6 +181,16 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, while (min_varlevel-- > 0) pstate = pstate->parentParseState; pstate->p_hasAggs = true; + + /* + * Complain if we are inside a LATERAL subquery of the aggregation query. + * We must be in its FROM clause, so the aggregate is misplaced. + */ + if (pstate->p_lateral_active) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("aggregates not allowed in FROM clause"), + parser_errposition(pstate, agg->location))); } /* diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 97ab9d5581..f9faa11b2e 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -58,8 +58,7 @@ static Node *transformJoinUsingClause(ParseState *pstate, static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j, RangeTblEntry *l_rte, RangeTblEntry *r_rte, - List *relnamespace, - Relids containedRels); + List *relnamespace); static RangeTblEntry *transformTableEntry(ParseState *pstate, RangeVar *r); static RangeTblEntry *transformCTEReference(ParseState *pstate, RangeVar *r, CommonTableExpr *cte, Index levelsup); @@ -69,10 +68,13 @@ static RangeTblEntry *transformRangeFunction(ParseState *pstate, RangeFunction *r); static Node *transformFromClauseItem(ParseState *pstate, Node *n, RangeTblEntry **top_rte, int *top_rti, - List **relnamespace, - Relids *containedRels); + List **relnamespace); static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype, Var *l_colvar, Var *r_colvar); +static ParseNamespaceItem *makeNamespaceItem(RangeTblEntry *rte, + bool lateral_only, bool lateral_ok); +static void setNamespaceLateralState(List *namespace, + bool lateral_only, bool lateral_ok); static void checkExprIsVarFree(ParseState *pstate, Node *n, const char *constructName); static TargetEntry *findTargetlistEntrySQL92(ParseState *pstate, Node *node, @@ -101,11 +103,6 @@ static Node *transformFrameOffset(ParseState *pstate, int frameOptions, * p_varnamespace lists were initialized to NIL when the pstate was created. * We will add onto any entries already present --- this is needed for rule * processing, as well as for UPDATE and DELETE. - * - * The range table may grow still further when we transform the expressions - * in the query's quals and target list. (This is possible because in - * POSTQUEL, we allowed references to relations not specified in the - * from-clause. PostgreSQL keeps this extension to standard SQL.) */ void transformFromClause(ParseState *pstate, List *frmList) @@ -117,6 +114,9 @@ transformFromClause(ParseState *pstate, List *frmList) * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding * entries to the rtable), check for duplicate refnames, and then add it * to the joinlist and namespaces. + * + * Note we must process the items left-to-right for proper handling of + * LATERAL references. */ foreach(fl, frmList) { @@ -124,20 +124,31 @@ transformFromClause(ParseState *pstate, List *frmList) RangeTblEntry *rte; int rtindex; List *relnamespace; - Relids containedRels; n = transformFromClauseItem(pstate, n, &rte, &rtindex, - &relnamespace, - &containedRels); + &relnamespace); + /* Mark the new relnamespace items as visible to LATERAL */ + setNamespaceLateralState(relnamespace, true, true); + checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace); + pstate->p_joinlist = lappend(pstate->p_joinlist, n); pstate->p_relnamespace = list_concat(pstate->p_relnamespace, relnamespace); - pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte); - bms_free(containedRels); + pstate->p_varnamespace = lappend(pstate->p_varnamespace, + makeNamespaceItem(rte, true, true)); } + + /* + * We're done parsing the FROM list, so make all namespace items + * unconditionally visible. Note that this will also reset lateral_only + * for any namespace items that were already present when we were called; + * but those should have been that way already. + */ + setNamespaceLateralState(pstate->p_relnamespace, false, true); + setNamespaceLateralState(pstate->p_varnamespace, false, true); } /* @@ -375,55 +386,34 @@ static Node * transformJoinOnClause(ParseState *pstate, JoinExpr *j, RangeTblEntry *l_rte, RangeTblEntry *r_rte, - List *relnamespace, - Relids containedRels) + List *relnamespace) { Node *result; List *save_relnamespace; List *save_varnamespace; - Relids clause_varnos; - int varno; /* - * This is a tad tricky, for two reasons. First, the namespace that the - * join expression should see is just the two subtrees of the JOIN plus - * any outer references from upper pstate levels. So, temporarily set - * this pstate's namespace accordingly. (We need not check for refname - * conflicts, because transformFromClauseItem() already did.) NOTE: this - * code is OK only because the ON clause can't legally alter the namespace - * by causing implicit relation refs to be added. + * The namespace that the join expression should see is just the two + * subtrees of the JOIN plus any outer references from upper pstate + * levels. Temporarily set this pstate's namespace accordingly. (We need + * not check for refname conflicts, because transformFromClauseItem() + * already did.) All namespace items are marked visible regardless of + * LATERAL state. */ save_relnamespace = pstate->p_relnamespace; save_varnamespace = pstate->p_varnamespace; + setNamespaceLateralState(relnamespace, false, true); pstate->p_relnamespace = relnamespace; - pstate->p_varnamespace = list_make2(l_rte, r_rte); + + pstate->p_varnamespace = list_make2(makeNamespaceItem(l_rte, false, true), + makeNamespaceItem(r_rte, false, true)); result = transformWhereClause(pstate, j->quals, "JOIN/ON"); pstate->p_relnamespace = save_relnamespace; pstate->p_varnamespace = save_varnamespace; - /* - * Second, we need to check that the ON condition doesn't refer to any - * rels outside the input subtrees of the JOIN. It could do that despite - * our hack on the namespace if it uses fully-qualified names. So, grovel - * through the transformed clause and make sure there are no bogus - * references. (Outer references are OK, and are ignored here.) - */ - clause_varnos = pull_varnos(result); - clause_varnos = bms_del_members(clause_varnos, containedRels); - if ((varno = bms_first_member(clause_varnos)) >= 0) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN", - rt_fetch(varno, pstate->p_rtable)->eref->aliasname), - parser_errposition(pstate, - locate_var_of_relation(result, varno, 0)))); - } - bms_free(clause_varnos); - return result; } @@ -435,13 +425,7 @@ transformTableEntry(ParseState *pstate, RangeVar *r) { RangeTblEntry *rte; - /* - * mark this entry to indicate it comes from the FROM clause. In SQL, the - * target list can only refer to range variables specified in the from - * clause but we follow the more powerful POSTQUEL semantics and - * automatically generate the range variable if not specified. However - * there are times we need to know whether the entries are legitimate. - */ + /* We need only build a range table entry */ rte = addRangeTableEntry(pstate, r, r->alias, interpretInhOption(r->inhOpt), true); @@ -476,17 +460,28 @@ transformRangeSubselect(ParseState *pstate, RangeSubselect *r) * We require user to supply an alias for a subselect, per SQL92. To relax * this, we'd have to be prepared to gin up a unique alias for an * unlabeled subselect. (This is just elog, not ereport, because the - * grammar should have enforced it already.) + * grammar should have enforced it already. It'd probably be better to + * report the error here, but we don't have a good error location here.) */ if (r->alias == NULL) elog(ERROR, "subquery in FROM must have an alias"); + /* + * If the subselect is LATERAL, make lateral_only names of this level + * visible to it. (LATERAL can't nest within a single pstate level, so we + * don't need save/restore logic here.) + */ + Assert(!pstate->p_lateral_active); + pstate->p_lateral_active = r->lateral; + /* * Analyze and transform the subquery. */ query = parse_sub_analyze(r->subquery, pstate, NULL, isLockedRefname(pstate, r->alias->aliasname)); + pstate->p_lateral_active = false; + /* * Check that we got something reasonable. Many of these conditions are * impossible given restrictions of the grammar, but check 'em anyway. @@ -496,33 +491,14 @@ transformRangeSubselect(ParseState *pstate, RangeSubselect *r) query->utilityStmt != NULL) elog(ERROR, "unexpected non-SELECT command in subquery in FROM"); - /* - * The subquery cannot make use of any variables from FROM items created - * earlier in the current query. Per SQL92, the scope of a FROM item does - * not include other FROM items. Formerly we hacked the namespace so that - * the other variables weren't even visible, but it seems more useful to - * leave them visible and give a specific error message. - * - * XXX this will need further work to support SQL99's LATERAL() feature, - * wherein such references would indeed be legal. - * - * We can skip groveling through the subquery if there's not anything - * visible in the current query. Also note that outer references are OK. - */ - if (pstate->p_relnamespace || pstate->p_varnamespace) - { - if (contain_vars_of_level((Node *) query, 1)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("subquery in FROM cannot refer to other relations of same query level"), - parser_errposition(pstate, - locate_var_of_level((Node *) query, 1)))); - } - /* * OK, build an RTE for the subquery. */ - rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true); + rte = addRangeTableEntryForSubquery(pstate, + query, + r->alias, + r->lateral, + true); return rte; } @@ -546,35 +522,26 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r) */ funcname = FigureColname(r->funccallnode); + /* + * If the function is LATERAL, make lateral_only names of this level + * visible to it. (LATERAL can't nest within a single pstate level, so we + * don't need save/restore logic here.) + */ + Assert(!pstate->p_lateral_active); + pstate->p_lateral_active = r->lateral; + /* * Transform the raw expression. */ funcexpr = transformExpr(pstate, r->funccallnode); + pstate->p_lateral_active = false; + /* * We must assign collations now so that we can fill funccolcollations. */ assign_expr_collations(pstate, funcexpr); - /* - * The function parameters cannot make use of any variables from other - * FROM items. (Compare to transformRangeSubselect(); the coding is - * different though because we didn't parse as a sub-select with its own - * level of namespace.) - * - * XXX this will need further work to support SQL99's LATERAL() feature, - * wherein such references would indeed be legal. - */ - if (pstate->p_relnamespace || pstate->p_varnamespace) - { - if (contain_vars_of_level(funcexpr, 0)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("function expression in FROM cannot refer to other relations of same query level"), - parser_errposition(pstate, - locate_var_of_level(funcexpr, 0)))); - } - /* * Disallow aggregate functions in the expression. (No reason to postpone * this check until parseCheckAggregates.) @@ -598,7 +565,7 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r) * OK, build an RTE for the function. */ rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr, - r, true); + r, r->lateral, true); /* * If a coldeflist was supplied, ensure it defines a legal set of names @@ -637,12 +604,9 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r) * * *top_rti: receives the rangetable index of top_rte. (Ditto.) * - * *relnamespace: receives a List of the RTEs exposed as relation names - * by this item. - * - * *containedRels: receives a bitmap set of the rangetable indexes - * of all the base and join relations represented in this jointree item. - * This is needed for checking JOIN/ON conditions in higher levels. + * *relnamespace: receives a List of ParseNamespaceItems for the RTEs exposed + * as relation names by this item. (The lateral_only flags in these items + * are indeterminate and should be explicitly set by the caller before use.) * * We do not need to pass back an explicit varnamespace value, because * in all cases the varnamespace contribution is exactly top_rte. @@ -650,8 +614,7 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r) static Node * transformFromClauseItem(ParseState *pstate, Node *n, RangeTblEntry **top_rte, int *top_rti, - List **relnamespace, - Relids *containedRels) + List **relnamespace) { if (IsA(n, RangeVar)) { @@ -681,8 +644,7 @@ transformFromClauseItem(ParseState *pstate, Node *n, Assert(rte == rt_fetch(rtindex, pstate->p_rtable)); *top_rte = rte; *top_rti = rtindex; - *relnamespace = list_make1(rte); - *containedRels = bms_make_singleton(rtindex); + *relnamespace = list_make1(makeNamespaceItem(rte, false, true)); rtr = makeNode(RangeTblRef); rtr->rtindex = rtindex; return (Node *) rtr; @@ -700,8 +662,7 @@ transformFromClauseItem(ParseState *pstate, Node *n, Assert(rte == rt_fetch(rtindex, pstate->p_rtable)); *top_rte = rte; *top_rti = rtindex; - *relnamespace = list_make1(rte); - *containedRels = bms_make_singleton(rtindex); + *relnamespace = list_make1(makeNamespaceItem(rte, false, true)); rtr = makeNode(RangeTblRef); rtr->rtindex = rtindex; return (Node *) rtr; @@ -719,8 +680,7 @@ transformFromClauseItem(ParseState *pstate, Node *n, Assert(rte == rt_fetch(rtindex, pstate->p_rtable)); *top_rte = rte; *top_rti = rtindex; - *relnamespace = list_make1(rte); - *containedRels = bms_make_singleton(rtindex); + *relnamespace = list_make1(makeNamespaceItem(rte, false, true)); rtr = makeNode(RangeTblRef); rtr->rtindex = rtindex; return (Node *) rtr; @@ -733,9 +693,6 @@ transformFromClauseItem(ParseState *pstate, Node *n, RangeTblEntry *r_rte; int l_rtindex; int r_rtindex; - Relids l_containedRels, - r_containedRels, - my_containedRels; List *l_relnamespace, *r_relnamespace, *my_relnamespace, @@ -745,38 +702,66 @@ transformFromClauseItem(ParseState *pstate, Node *n, *l_colvars, *r_colvars, *res_colvars; + bool lateral_ok; + int sv_relnamespace_length, + sv_varnamespace_length; RangeTblEntry *rte; int k; /* - * Recursively process the left and right subtrees + * Recursively process the left subtree, then the right. We must do + * it in this order for correct visibility of LATERAL references. */ j->larg = transformFromClauseItem(pstate, j->larg, &l_rte, &l_rtindex, - &l_relnamespace, - &l_containedRels); + &l_relnamespace); + + /* + * Make the left-side RTEs available for LATERAL access within the + * right side, by temporarily adding them to the pstate's namespace + * lists. Per SQL:2008, if the join type is not INNER or LEFT then + * the left-side names must still be exposed, but it's an error to + * reference them. (Stupid design, but that's what it says.) Hence, + * we always push them into the namespaces, but mark them as not + * lateral_ok if the jointype is wrong. + * + * NB: this coding relies on the fact that list_concat is not + * destructive to its second argument. + */ + lateral_ok = (j->jointype == JOIN_INNER || j->jointype == JOIN_LEFT); + setNamespaceLateralState(l_relnamespace, true, lateral_ok); + checkNameSpaceConflicts(pstate, pstate->p_relnamespace, l_relnamespace); + sv_relnamespace_length = list_length(pstate->p_relnamespace); + pstate->p_relnamespace = list_concat(pstate->p_relnamespace, + l_relnamespace); + sv_varnamespace_length = list_length(pstate->p_varnamespace); + pstate->p_varnamespace = lappend(pstate->p_varnamespace, + makeNamespaceItem(l_rte, true, lateral_ok)); + + /* And now we can process the RHS */ j->rarg = transformFromClauseItem(pstate, j->rarg, &r_rte, &r_rtindex, - &r_relnamespace, - &r_containedRels); + &r_relnamespace); + + /* Remove the left-side RTEs from the namespace lists again */ + pstate->p_relnamespace = list_truncate(pstate->p_relnamespace, + sv_relnamespace_length); + pstate->p_varnamespace = list_truncate(pstate->p_varnamespace, + sv_varnamespace_length); /* * Check for conflicting refnames in left and right subtrees. Must do * this because higher levels will assume I hand back a self- - * consistent namespace subtree. + * consistent namespace list. */ checkNameSpaceConflicts(pstate, l_relnamespace, r_relnamespace); /* - * Generate combined relation membership info for possible use by - * transformJoinOnClause below. + * Generate combined relnamespace info for possible use below. */ my_relnamespace = list_concat(l_relnamespace, r_relnamespace); - my_containedRels = bms_join(l_containedRels, r_containedRels); - - pfree(r_relnamespace); /* free unneeded list header */ /* * Extract column name and var lists from both subtrees @@ -941,8 +926,7 @@ transformFromClauseItem(ParseState *pstate, Node *n, /* User-written ON-condition; transform it */ j->quals = transformJoinOnClause(pstate, j, l_rte, r_rte, - my_relnamespace, - my_containedRels); + my_relnamespace); } else { @@ -1006,18 +990,10 @@ transformFromClauseItem(ParseState *pstate, Node *n, * relnamespace. */ if (j->alias) - { - *relnamespace = list_make1(rte); - list_free(my_relnamespace); - } + *relnamespace = list_make1(makeNamespaceItem(rte, false, true)); else *relnamespace = my_relnamespace; - /* - * Include join RTE in returned containedRels set - */ - *containedRels = bms_add_member(my_containedRels, j->rtindex); - return (Node *) j; } else @@ -1144,6 +1120,40 @@ buildMergedJoinVar(ParseState *pstate, JoinType jointype, return res_node; } +/* + * makeNamespaceItem - + * Convenience subroutine to construct a ParseNamespaceItem. + */ +static ParseNamespaceItem * +makeNamespaceItem(RangeTblEntry *rte, bool lateral_only, bool lateral_ok) +{ + ParseNamespaceItem *nsitem; + + nsitem = (ParseNamespaceItem *) palloc(sizeof(ParseNamespaceItem)); + nsitem->p_rte = rte; + nsitem->p_lateral_only = lateral_only; + nsitem->p_lateral_ok = lateral_ok; + return nsitem; +} + +/* + * setNamespaceLateralState - + * Convenience subroutine to update LATERAL flags in a namespace list. + */ +static void +setNamespaceLateralState(List *namespace, bool lateral_only, bool lateral_ok) +{ + ListCell *lc; + + foreach(lc, namespace) + { + ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(lc); + + nsitem->p_lateral_only = lateral_only; + nsitem->p_lateral_ok = lateral_ok; + } +} + /* * transformWhereClause - diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index bb1ad9af96..385f8e767e 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -751,19 +751,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) switch (crerr) { case CRERR_NO_COLUMN: - if (relname) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column %s.%s does not exist", - relname, colname), - parser_errposition(pstate, cref->location))); - - else - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("column \"%s\" does not exist", - colname), - parser_errposition(pstate, cref->location))); + errorMissingColumn(pstate, relname, colname, cref->location); break; case CRERR_NO_RTE: errorMissingRTE(pstate, makeRangeVar(nspname, relname, diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 30b307b191..47686c8719 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -137,7 +137,12 @@ scanNameSpaceForRefname(ParseState *pstate, const char *refname, int location) foreach(l, pstate->p_relnamespace) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); + ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(l); + RangeTblEntry *rte = nsitem->p_rte; + + /* If not inside LATERAL, ignore lateral-only items */ + if (nsitem->p_lateral_only && !pstate->p_lateral_active) + continue; if (strcmp(rte->eref->aliasname, refname) == 0) { @@ -147,6 +152,14 @@ scanNameSpaceForRefname(ParseState *pstate, const char *refname, int location) errmsg("table reference \"%s\" is ambiguous", refname), parser_errposition(pstate, location))); + /* SQL:2008 demands this be an error, not an invisible item */ + if (nsitem->p_lateral_only && !nsitem->p_lateral_ok) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("invalid reference to FROM-clause entry for table \"%s\"", + refname), + errdetail("The combining JOIN type must be INNER or LEFT for a LATERAL reference."), + parser_errposition(pstate, location))); result = rte; } } @@ -170,7 +183,12 @@ scanNameSpaceForRelid(ParseState *pstate, Oid relid, int location) foreach(l, pstate->p_relnamespace) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); + ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(l); + RangeTblEntry *rte = nsitem->p_rte; + + /* If not inside LATERAL, ignore lateral-only items */ + if (nsitem->p_lateral_only && !pstate->p_lateral_active) + continue; /* yes, the test for alias == NULL should be there... */ if (rte->rtekind == RTE_RELATION && @@ -183,6 +201,14 @@ scanNameSpaceForRelid(ParseState *pstate, Oid relid, int location) errmsg("table reference %u is ambiguous", relid), parser_errposition(pstate, location))); + /* SQL:2008 demands this be an error, not an invisible item */ + if (nsitem->p_lateral_only && !nsitem->p_lateral_ok) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("invalid reference to FROM-clause entry for table \"%s\"", + rte->eref->aliasname), + errdetail("The combining JOIN type must be INNER or LEFT for a LATERAL reference."), + parser_errposition(pstate, location))); result = rte; } } @@ -245,7 +271,7 @@ isFutureCTE(ParseState *pstate, const char *refname) } /* - * searchRangeTable + * searchRangeTableForRel * See if any RangeTblEntry could possibly match the RangeVar. * If so, return a pointer to the RangeTblEntry; else return NULL. * @@ -260,7 +286,7 @@ isFutureCTE(ParseState *pstate, const char *refname) * and matches on alias. */ static RangeTblEntry * -searchRangeTable(ParseState *pstate, RangeVar *relation) +searchRangeTableForRel(ParseState *pstate, RangeVar *relation) { const char *refname = relation->relname; Oid relId = InvalidOid; @@ -322,6 +348,9 @@ searchRangeTable(ParseState *pstate, RangeVar *relation) * Per SQL92, two alias-less plain relation RTEs do not conflict even if * they have the same eref->aliasname (ie, same relation name), if they * are for different relation OIDs (implying they are in different schemas). + * + * We ignore the lateral-only flags in the namespace items: the lists must + * not conflict, even when all items are considered visible. */ void checkNameSpaceConflicts(ParseState *pstate, List *namespace1, @@ -331,13 +360,15 @@ checkNameSpaceConflicts(ParseState *pstate, List *namespace1, foreach(l1, namespace1) { - RangeTblEntry *rte1 = (RangeTblEntry *) lfirst(l1); + ParseNamespaceItem *nsitem1 = (ParseNamespaceItem *) lfirst(l1); + RangeTblEntry *rte1 = nsitem1->p_rte; const char *aliasname1 = rte1->eref->aliasname; ListCell *l2; foreach(l2, namespace2) { - RangeTblEntry *rte2 = (RangeTblEntry *) lfirst(l2); + ParseNamespaceItem *nsitem2 = (ParseNamespaceItem *) lfirst(l2); + RangeTblEntry *rte2 = nsitem2->p_rte; if (strcmp(rte2->eref->aliasname, aliasname1) != 0) continue; /* definitely no conflict */ @@ -544,9 +575,14 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly, foreach(l, pstate->p_varnamespace) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); + ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(l); + RangeTblEntry *rte = nsitem->p_rte; Node *newresult; + /* If not inside LATERAL, ignore lateral-only items */ + if (nsitem->p_lateral_only && !pstate->p_lateral_active) + continue; + /* use orig_pstate here to get the right sublevels_up */ newresult = scanRTEForColumn(orig_pstate, rte, colname, location); @@ -558,6 +594,14 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly, errmsg("column reference \"%s\" is ambiguous", colname), parser_errposition(orig_pstate, location))); + /* SQL:2008 demands this be an error, not an invisible item */ + if (nsitem->p_lateral_only && !nsitem->p_lateral_ok) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("invalid reference to FROM-clause entry for table \"%s\"", + rte->eref->aliasname), + errdetail("The combining JOIN type must be INNER or LEFT for a LATERAL reference."), + parser_errposition(orig_pstate, location))); result = newresult; } } @@ -571,6 +615,40 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly, return result; } +/* + * searchRangeTableForCol + * See if any RangeTblEntry could possibly provide the given column name. + * If so, return a pointer to the RangeTblEntry; else return NULL. + * + * This is different from colNameToVar in that it considers every entry in + * the ParseState's rangetable(s), not only those that are currently visible + * in the p_varnamespace lists. This behavior is invalid per the SQL spec, + * and it may give ambiguous results (there might be multiple equally valid + * matches, but only one will be returned). This must be used ONLY as a + * heuristic in giving suitable error messages. See errorMissingColumn. + */ +static RangeTblEntry * +searchRangeTableForCol(ParseState *pstate, char *colname, int location) +{ + ParseState *orig_pstate = pstate; + + while (pstate != NULL) + { + ListCell *l; + + foreach(l, pstate->p_rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); + + if (scanRTEForColumn(orig_pstate, rte, colname, location)) + return rte; + } + + pstate = pstate->parentParseState; + } + return NULL; +} + /* * markRTEForSelectPriv * Mark the specified column of an RTE as requiring SELECT privilege @@ -917,16 +995,13 @@ addRangeTableEntry(ParseState *pstate, */ heap_close(rel, NoLock); - /*---------- - * Flags: - * - this RTE should be expanded to include descendant tables, - * - this RTE is in the FROM clause, - * - this RTE should be checked for appropriate access rights. + /* + * Set flags and access permissions. * * The initial default on access checks is always check-for-READ-access, * which is the right thing for all except target tables. - *---------- */ + rte->lateral = false; rte->inh = inh; rte->inFromCl = inFromCl; @@ -973,16 +1048,13 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->eref = makeAlias(refname, NIL); buildRelationAliases(rel->rd_att, alias, rte->eref); - /*---------- - * Flags: - * - this RTE should be expanded to include descendant tables, - * - this RTE is in the FROM clause, - * - this RTE should be checked for appropriate access rights. + /* + * Set flags and access permissions. * * The initial default on access checks is always check-for-READ-access, * which is the right thing for all except target tables. - *---------- */ + rte->lateral = false; rte->inh = inh; rte->inFromCl = inFromCl; @@ -1011,6 +1083,7 @@ RangeTblEntry * addRangeTableEntryForSubquery(ParseState *pstate, Query *subquery, Alias *alias, + bool lateral, bool inFromCl) { RangeTblEntry *rte = makeNode(RangeTblEntry); @@ -1054,15 +1127,12 @@ addRangeTableEntryForSubquery(ParseState *pstate, rte->eref = eref; - /*---------- - * Flags: - * - this RTE should be expanded to include descendant tables, - * - this RTE is in the FROM clause, - * - this RTE should be checked for appropriate access rights. + /* + * Set flags and access permissions. * * Subqueries are never checked for access rights. - *---------- */ + rte->lateral = lateral; rte->inh = false; /* never true for subqueries */ rte->inFromCl = inFromCl; @@ -1091,6 +1161,7 @@ addRangeTableEntryForFunction(ParseState *pstate, char *funcname, Node *funcexpr, RangeFunction *rangefunc, + bool lateral, bool inFromCl) { RangeTblEntry *rte = makeNode(RangeTblEntry); @@ -1192,16 +1263,13 @@ addRangeTableEntryForFunction(ParseState *pstate, funcname, format_type_be(funcrettype)), parser_errposition(pstate, exprLocation(funcexpr)))); - /*---------- - * Flags: - * - this RTE should be expanded to include descendant tables, - * - this RTE is in the FROM clause, - * - this RTE should be checked for appropriate access rights. + /* + * Set flags and access permissions. * - * Functions are never checked for access rights (at least, not by - * the RTE permissions mechanism). - *---------- + * Functions are never checked for access rights (at least, not by the RTE + * permissions mechanism). */ + rte->lateral = lateral; rte->inh = false; /* never true for functions */ rte->inFromCl = inFromCl; @@ -1267,15 +1335,12 @@ addRangeTableEntryForValues(ParseState *pstate, rte->eref = eref; - /*---------- - * Flags: - * - this RTE should be expanded to include descendant tables, - * - this RTE is in the FROM clause, - * - this RTE should be checked for appropriate access rights. + /* + * Set flags and access permissions. * * Subqueries are never checked for access rights. - *---------- */ + rte->lateral = false; rte->inh = false; /* never true for values RTEs */ rte->inFromCl = inFromCl; @@ -1338,15 +1403,12 @@ addRangeTableEntryForJoin(ParseState *pstate, rte->eref = eref; - /*---------- - * Flags: - * - this RTE should be expanded to include descendant tables, - * - this RTE is in the FROM clause, - * - this RTE should be checked for appropriate access rights. + /* + * Set flags and access permissions. * * Joins are never checked for access rights. - *---------- */ + rte->lateral = false; rte->inh = false; /* never true for joins */ rte->inFromCl = inFromCl; @@ -1441,15 +1503,12 @@ addRangeTableEntryForCTE(ParseState *pstate, rte->eref = eref; - /*---------- - * Flags: - * - this RTE should be expanded to include descendant tables, - * - this RTE is in the FROM clause, - * - this RTE should be checked for appropriate access rights. + /* + * Set flags and access permissions. * * Subqueries are never checked for access rights. - *---------- */ + rte->lateral = false; rte->inh = false; /* never true for subqueries */ rte->inFromCl = inFromCl; @@ -1519,7 +1578,8 @@ isLockedRefname(ParseState *pstate, const char *refname) /* * Add the given RTE as a top-level entry in the pstate's join list * and/or name space lists. (We assume caller has checked for any - * namespace conflicts.) + * namespace conflicts.) The RTE is always marked as unconditionally + * visible, that is, not LATERAL-only. */ void addRTEtoQuery(ParseState *pstate, RangeTblEntry *rte, @@ -1534,10 +1594,19 @@ addRTEtoQuery(ParseState *pstate, RangeTblEntry *rte, rtr->rtindex = rtindex; pstate->p_joinlist = lappend(pstate->p_joinlist, rtr); } - if (addToRelNameSpace) - pstate->p_relnamespace = lappend(pstate->p_relnamespace, rte); - if (addToVarNameSpace) - pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte); + if (addToRelNameSpace || addToVarNameSpace) + { + ParseNamespaceItem *nsitem; + + nsitem = (ParseNamespaceItem *) palloc(sizeof(ParseNamespaceItem)); + nsitem->p_rte = rte; + nsitem->p_lateral_only = false; + nsitem->p_lateral_ok = true; + if (addToRelNameSpace) + pstate->p_relnamespace = lappend(pstate->p_relnamespace, nsitem); + if (addToVarNameSpace) + pstate->p_varnamespace = lappend(pstate->p_varnamespace, nsitem); + } } /* @@ -2453,7 +2522,7 @@ errorMissingRTE(ParseState *pstate, RangeVar *relation) * rangetable. (Note: cases involving a bad schema name in the RangeVar * will throw error immediately here. That seems OK.) */ - rte = searchRangeTable(pstate, relation); + rte = searchRangeTableForRel(pstate, relation); /* * If we found a match that has an alias and the alias is visible in the @@ -2490,3 +2559,43 @@ errorMissingRTE(ParseState *pstate, RangeVar *relation) relation->relname), parser_errposition(pstate, relation->location))); } + +/* + * Generate a suitable error about a missing column. + * + * Since this is a very common type of error, we work rather hard to + * produce a helpful message. + */ +void +errorMissingColumn(ParseState *pstate, + char *relname, char *colname, int location) +{ + RangeTblEntry *rte; + + /* + * If relname was given, just play dumb and report it. (In practice, a + * bad qualification name should end up at errorMissingRTE, not here, so + * no need to work hard on this case.) + */ + if (relname) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column %s.%s does not exist", relname, colname), + parser_errposition(pstate, location))); + + /* + * Otherwise, search the entire rtable looking for possible matches. If + * we find one, emit a hint about it. + * + * TODO: improve this code (and also errorMissingRTE) to mention using + * LATERAL if appropriate. + */ + rte = searchRangeTableForCol(pstate, colname, location); + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" does not exist", colname), + rte ? errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.", + colname, rte->eref->aliasname) : 0, + parser_errposition(pstate, location))); +} diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c index 3850a3bc64..4d9e6e6106 100644 --- a/src/backend/parser/parse_target.c +++ b/src/backend/parser/parse_target.c @@ -1129,9 +1129,13 @@ ExpandAllTables(ParseState *pstate, int location) foreach(l, pstate->p_varnamespace) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); + ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(l); + RangeTblEntry *rte = nsitem->p_rte; int rtindex = RTERangeTablePosn(pstate, rte, NULL); + /* Should not have any lateral-only items when parsing targetlist */ + Assert(!nsitem->p_lateral_only); + target = list_concat(target, expandRelAttrs(pstate, rte, rtindex, 0, location)); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 412dfe6f9a..0103021961 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -676,6 +676,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) oldrte->relid = trigrec->tgrelid; oldrte->relkind = relkind; oldrte->eref = makeAlias("old", NIL); + oldrte->lateral = false; oldrte->inh = false; oldrte->inFromCl = true; @@ -684,6 +685,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) newrte->relid = trigrec->tgrelid; newrte->relkind = relkind; newrte->eref = makeAlias("new", NIL); + newrte->lateral = false; newrte->inh = false; newrte->inFromCl = true; @@ -2174,6 +2176,7 @@ deparse_context_for(const char *aliasname, Oid relid) rte->relid = relid; rte->relkind = RELKIND_RELATION; /* no need for exactness here */ rte->eref = makeAlias(aliasname, NIL); + rte->lateral = false; rte->inh = false; rte->inFromCl = true; @@ -6618,6 +6621,9 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context) RangeTblEntry *rte = rt_fetch(varno, query->rtable); bool gavealias = false; + if (rte->lateral) + appendStringInfoString(buf, "LATERAL "); + switch (rte->rtekind) { case RTE_RELATION: diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 4806b31334..82b6c0cfc7 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201207201 +#define CATALOG_VERSION_NO 201208071 #endif diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 119e1ed2f6..f433166cc6 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -451,6 +451,7 @@ typedef struct WindowDef typedef struct RangeSubselect { NodeTag type; + bool lateral; /* does it have LATERAL prefix? */ Node *subquery; /* the untransformed sub-select clause */ Alias *alias; /* table alias & optional column aliases */ } RangeSubselect; @@ -461,6 +462,7 @@ typedef struct RangeSubselect typedef struct RangeFunction { NodeTag type; + bool lateral; /* does it have LATERAL prefix? */ Node *funccallnode; /* untransformed function call tree */ Alias *alias; /* table alias & optional column aliases */ List *coldeflist; /* list of ColumnDef nodes to describe result @@ -706,7 +708,7 @@ typedef struct RangeTblEntry * Fields valid for a subquery RTE (else NULL): */ Query *subquery; /* the sub-query */ - bool security_barrier; /* subquery from security_barrier view */ + bool security_barrier; /* is from security_barrier view? */ /* * Fields valid for a join RTE (else NULL/zero): @@ -756,6 +758,7 @@ typedef struct RangeTblEntry */ Alias *alias; /* user-written alias clause, if any */ Alias *eref; /* expanded reference names */ + bool lateral; /* subquery or function is marked LATERAL? */ bool inh; /* inheritance requested? */ bool inFromCl; /* present in FROM clause? */ AclMode requiredPerms; /* bitmask of required access permissions */ @@ -1752,7 +1755,7 @@ typedef struct AlterEventTrigStmt { NodeTag type; char *trigname; /* TRIGGER's name */ - char tgenabled; /* trigger's firing configuration WRT + char tgenabled; /* trigger's firing configuration WRT * session_replication_role */ } AlterEventTrigStmt; @@ -2046,7 +2049,7 @@ typedef struct FetchStmt * * This represents creation of an index and/or an associated constraint. * If isconstraint is true, we should create a pg_constraint entry along - * with the index. But if indexOid isn't InvalidOid, we are not creating an + * with the index. But if indexOid isn't InvalidOid, we are not creating an * index, just a UNIQUE/PKEY constraint using an existing index. isconstraint * must always be true in this case, and the fields describing the index * properties are empty. diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index cf0bbd9f15..8238981c28 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -307,15 +307,17 @@ typedef struct PlannerInfo * ppilist - ParamPathInfo nodes for parameterized Paths, if any * cheapest_startup_path - the pathlist member with lowest startup cost * (regardless of its ordering; but must be - * unparameterized) + * unparameterized; hence will be NULL for + * a LATERAL subquery) * cheapest_total_path - the pathlist member with lowest total cost * (regardless of its ordering; but must be - * unparameterized) + * unparameterized; hence will be NULL for + * a LATERAL subquery) * cheapest_unique_path - for caching cheapest path to produce unique * (no duplicates) output from relation * cheapest_parameterized_paths - paths with cheapest total costs for * their parameterizations; always includes - * cheapest_total_path + * cheapest_total_path, if that exists * * If the relation is a base relation it will have these fields set: * diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index b2cdb3d62e..e3d33d69ba 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -81,7 +81,7 @@ extern void cost_tidscan(Path *path, PlannerInfo *root, extern void cost_subqueryscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_functionscan(Path *path, PlannerInfo *root, - RelOptInfo *baserel); + RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel); extern void cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 385bae6eb8..3af1172cbe 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -69,7 +69,8 @@ extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, List *pathkeys, Relids required_outer); -extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel); +extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer); extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel); diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h index f546362b68..ec21df3a7e 100644 --- a/src/include/optimizer/var.h +++ b/src/include/optimizer/var.h @@ -31,11 +31,12 @@ typedef enum } PVCPlaceHolderBehavior; extern Relids pull_varnos(Node *node); +extern Relids pull_varnos_of_level(Node *node, int levelsup); extern void pull_varattnos(Node *node, Index varno, Bitmapset **varattnos); +extern List *pull_vars_of_level(Node *node, int levelsup); extern bool contain_var_clause(Node *node); extern bool contain_vars_of_level(Node *node, int levelsup); extern int locate_var_of_level(Node *node, int levelsup); -extern int locate_var_of_relation(Node *node, int relid, int levelsup); extern int find_minimum_var_level(Node *node); extern List *pull_var_clause(Node *node, PVCAggregateBehavior aggbehavior, PVCPlaceHolderBehavior phbehavior); diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 7e55a92185..af60dac08e 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -213,6 +213,7 @@ PG_KEYWORD("label", LABEL, UNRESERVED_KEYWORD) PG_KEYWORD("language", LANGUAGE, UNRESERVED_KEYWORD) PG_KEYWORD("large", LARGE_P, UNRESERVED_KEYWORD) PG_KEYWORD("last", LAST_P, UNRESERVED_KEYWORD) +PG_KEYWORD("lateral", LATERAL_P, RESERVED_KEYWORD) PG_KEYWORD("lc_collate", LC_COLLATE_P, UNRESERVED_KEYWORD) PG_KEYWORD("lc_ctype", LC_CTYPE_P, UNRESERVED_KEYWORD) PG_KEYWORD("leading", LEADING, RESERVED_KEYWORD) diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index 670e084993..13f745f6fa 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -54,22 +54,25 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param, * p_joinlist: list of join items (RangeTblRef and JoinExpr nodes) that * will become the fromlist of the query's top-level FromExpr node. * - * p_relnamespace: list of RTEs that represents the current namespace for - * table lookup, ie, those RTEs that are accessible by qualified names. - * This may be just a subset of the rtable + joinlist, and/or may contain - * entries that are not yet added to the main joinlist. + * p_relnamespace: list of ParseNamespaceItems that represents the current + * namespace for table lookup, ie, those RTEs that are accessible by + * qualified names. (This may be just a subset of the whole rtable.) * - * p_varnamespace: list of RTEs that represents the current namespace for - * column lookup, ie, those RTEs that are accessible by unqualified names. - * This is different from p_relnamespace because a JOIN without an alias does - * not hide the contained tables (so they must still be in p_relnamespace) - * but it does hide their columns (unqualified references to the columns must - * refer to the JOIN, not the member tables). Other special RTEs such as - * NEW/OLD for rules may also appear in just one of these lists. + * p_varnamespace: list of ParseNamespaceItems that represents the current + * namespace for column lookup, ie, those RTEs that are accessible by + * unqualified names. This is different from p_relnamespace because a JOIN + * without an alias does not hide the contained tables (so they must be in + * p_relnamespace) but it does hide their columns (unqualified references to + * the columns must refer to the JOIN, not the member tables). Other special + * RTEs such as NEW/OLD for rules may also appear in just one of these lists. + * + * p_lateral_active: TRUE if we are currently parsing a LATERAL subexpression + * of this parse level. This makes p_lateral_only namespace items visible, + * whereas they are not visible when p_lateral_active is FALSE. * * p_ctenamespace: list of CommonTableExprs (WITH items) that are visible - * at the moment. This is different from p_relnamespace because you have - * to make an RTE before you can access a CTE. + * at the moment. This is entirely different from p_relnamespace because + * a CTE is not an RTE, rather "visibility" means you could make an RTE. * * p_future_ctes: list of CommonTableExprs (WITH items) that are not yet * visible due to scope rules. This is used to help improve error messages. @@ -93,6 +96,7 @@ struct ParseState * node's fromlist) */ List *p_relnamespace; /* current namespace for relations */ List *p_varnamespace; /* current namespace for columns */ + bool p_lateral_active; /* p_lateral_only items visible? */ List *p_ctenamespace; /* current namespace for common table exprs */ List *p_future_ctes; /* common table exprs not yet in namespace */ CommonTableExpr *p_parent_cte; /* this query's containing CTE */ @@ -121,6 +125,14 @@ struct ParseState void *p_ref_hook_state; /* common passthrough link for above */ }; +/* An element of p_relnamespace or p_varnamespace */ +typedef struct ParseNamespaceItem +{ + RangeTblEntry *p_rte; /* The relation's rangetable entry */ + bool p_lateral_only; /* Is only visible to LATERAL expressions? */ + bool p_lateral_ok; /* If so, does join type allow use? */ +} ParseNamespaceItem; + /* Support for parser_errposition_callback function */ typedef struct ParseCallbackState { diff --git a/src/include/parser/parse_relation.h b/src/include/parser/parse_relation.h index ababd74f82..ba99fc2d8a 100644 --- a/src/include/parser/parse_relation.h +++ b/src/include/parser/parse_relation.h @@ -55,11 +55,13 @@ extern RangeTblEntry *addRangeTableEntryForRelation(ParseState *pstate, extern RangeTblEntry *addRangeTableEntryForSubquery(ParseState *pstate, Query *subquery, Alias *alias, + bool lateral, bool inFromCl); extern RangeTblEntry *addRangeTableEntryForFunction(ParseState *pstate, char *funcname, Node *funcexpr, RangeFunction *rangefunc, + bool lateral, bool inFromCl); extern RangeTblEntry *addRangeTableEntryForValues(ParseState *pstate, List *exprs, @@ -82,6 +84,8 @@ extern void addRTEtoQuery(ParseState *pstate, RangeTblEntry *rte, bool addToJoinList, bool addToRelNameSpace, bool addToVarNameSpace); extern void errorMissingRTE(ParseState *pstate, RangeVar *relation); +extern void errorMissingColumn(ParseState *pstate, + char *relname, char *colname, int location); extern void expandRTE(RangeTblEntry *rte, int rtindex, int sublevels_up, int location, bool include_dropped, List **colnames, List **colvars); diff --git a/src/interfaces/ecpg/preproc/ecpg.addons b/src/interfaces/ecpg/preproc/ecpg.addons index 5c5adf7699..aae3cc9d92 100644 --- a/src/interfaces/ecpg/preproc/ecpg.addons +++ b/src/interfaces/ecpg/preproc/ecpg.addons @@ -358,7 +358,11 @@ ECPG: into_clauseINTOOptTempTableName block $$= cat2_str(mm_strdup("into"), $2); } | ecpg_into { $$ = EMPTY; } -ECPG: table_refselect_with_parens addon +ECPG: table_refselect_with_parensopt_alias_clause addon + if ($2 == NULL) + mmerror(PARSE_ERROR, ET_ERROR, "subquery in FROM must have an alias"); +ECPG: table_refLATERAL_Pselect_with_parensopt_alias_clause addon + if ($3 == NULL) mmerror(PARSE_ERROR, ET_ERROR, "subquery in FROM must have an alias"); ECPG: TypenameSimpleTypenameopt_array_bounds block { $$ = cat2_str($1, $2.str); } diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index c46d35df89..c5b92582b4 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2986,3 +2986,163 @@ SELECT * FROM (5 rows) rollback; +-- +-- Test LATERAL +-- +select unique2, x.* +from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x; + unique2 | f1 +---------+---- + 9998 | 0 +(1 row) + +explain (costs off) + select unique2, x.* + from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x; + QUERY PLAN +---------------------------------- + Nested Loop + -> Seq Scan on tenk1 a + -> Seq Scan on int4_tbl b + Filter: (f1 = a.unique1) +(4 rows) + +select unique2, x.* +from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss; + unique2 | f1 +---------+---- + 9998 | 0 +(1 row) + +explain (costs off) + select unique2, x.* + from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss; + QUERY PLAN +----------------------------------------------- + Nested Loop + -> Seq Scan on int4_tbl x + -> Index Scan using tenk1_unique1 on tenk1 + Index Cond: (x.f1 = unique1) +(4 rows) + +explain (costs off) + select unique2, x.* + from int4_tbl x cross join lateral (select unique2 from tenk1 where f1 = unique1) ss; + QUERY PLAN +----------------------------------------------- + Nested Loop + -> Seq Scan on int4_tbl x + -> Index Scan using tenk1_unique1 on tenk1 + Index Cond: (x.f1 = unique1) +(4 rows) + +select unique2, x.* +from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on f1 = unique1; + unique2 | f1 +---------+------------- + 9998 | 0 + | 123456 + | -123456 + | 2147483647 + | -2147483647 +(5 rows) + +explain (costs off) + select unique2, x.* + from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on f1 = unique1; + QUERY PLAN +----------------------------------------------------- + Nested Loop Left Join + -> Seq Scan on int4_tbl x + -> Subquery Scan on ss + Filter: (x.f1 = ss.unique1) + -> Index Scan using tenk1_unique1 on tenk1 + Index Cond: (x.f1 = unique1) +(6 rows) + +-- check scoping of lateral versus parent references +-- the first of these should return int8_tbl.q2, the second int8_tbl.q1 +select *, (select r from (select q1 as q2) x, (select q2 as r) y) from int8_tbl; + q1 | q2 | r +------------------+-------------------+------------------- + 123 | 456 | 456 + 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 123 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | -4567890123456789 +(5 rows) + +select *, (select r from (select q1 as q2) x, lateral (select q2 as r) y) from int8_tbl; + q1 | q2 | r +------------------+-------------------+------------------ + 123 | 456 | 123 + 123 | 4567890123456789 | 123 + 4567890123456789 | 123 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | -4567890123456789 | 4567890123456789 +(5 rows) + +-- lateral SRF +select count(*) from tenk1 a, lateral generate_series(1,two) g; + count +------- + 5000 +(1 row) + +explain (costs off) + select count(*) from tenk1 a, lateral generate_series(1,two) g; + QUERY PLAN +------------------------------------------------ + Aggregate + -> Nested Loop + -> Seq Scan on tenk1 a + -> Function Scan on generate_series g +(4 rows) + +explain (costs off) + select count(*) from tenk1 a cross join lateral generate_series(1,two) g; + QUERY PLAN +------------------------------------------------ + Aggregate + -> Nested Loop + -> Seq Scan on tenk1 a + -> Function Scan on generate_series g +(4 rows) + +-- test some error cases where LATERAL should have been used but wasn't +select f1,g from int4_tbl a, generate_series(0, f1) g; +ERROR: column "f1" does not exist +LINE 1: select f1,g from int4_tbl a, generate_series(0, f1) g; + ^ +HINT: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query. +select f1,g from int4_tbl a, generate_series(0, a.f1) g; +ERROR: invalid reference to FROM-clause entry for table "a" +LINE 1: select f1,g from int4_tbl a, generate_series(0, a.f1) g; + ^ +HINT: There is an entry for table "a", but it cannot be referenced from this part of the query. +select f1,g from int4_tbl a cross join generate_series(0, f1) g; +ERROR: column "f1" does not exist +LINE 1: ...ct f1,g from int4_tbl a cross join generate_series(0, f1) g; + ^ +HINT: There is a column named "f1" in table "a", but it cannot be referenced from this part of the query. +select f1,g from int4_tbl a cross join generate_series(0, a.f1) g; +ERROR: invalid reference to FROM-clause entry for table "a" +LINE 1: ... f1,g from int4_tbl a cross join generate_series(0, a.f1) g; + ^ +HINT: There is an entry for table "a", but it cannot be referenced from this part of the query. +-- SQL:2008 says the left table is in scope but illegal to access here +select f1,g from int4_tbl a right join lateral generate_series(0, a.f1) g on true; +ERROR: invalid reference to FROM-clause entry for table "a" +LINE 1: ... int4_tbl a right join lateral generate_series(0, a.f1) g on... + ^ +DETAIL: The combining JOIN type must be INNER or LEFT for a LATERAL reference. +select f1,g from int4_tbl a full join lateral generate_series(0, a.f1) g on true; +ERROR: invalid reference to FROM-clause entry for table "a" +LINE 1: ...m int4_tbl a full join lateral generate_series(0, a.f1) g on... + ^ +DETAIL: The combining JOIN type must be INNER or LEFT for a LATERAL reference. +-- LATERAL can be used to put an aggregate into the FROM clause of its query +select 1 from tenk1 a, lateral (select max(a.unique1) from int4_tbl b) ss; +ERROR: aggregates not allowed in FROM clause +LINE 1: select 1 from tenk1 a, lateral (select max(a.unique1) from i... + ^ diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 6710b9c15e..0fe8ca4c4e 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -21,9 +21,10 @@ INSERT INTO foo2 VALUES(1, 111); CREATE FUNCTION foot(int) returns setof foo2 as 'SELECT * FROM foo2 WHERE fooid = $1;' LANGUAGE SQL; -- supposed to fail with ERROR select * from foo2, foot(foo2.fooid) z where foo2.f2 = z.f2; -ERROR: function expression in FROM cannot refer to other relations of same query level +ERROR: invalid reference to FROM-clause entry for table "foo2" LINE 1: select * from foo2, foot(foo2.fooid) z where foo2.f2 = z.f2; ^ +HINT: There is an entry for table "foo2", but it cannot be referenced from this part of the query. -- function in subselect select * from foo2 where f2 in (select f2 from foot(foo2.fooid) z where z.fooid = foo2.fooid) ORDER BY 1,2; fooid | f2 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index f07f39534a..b041550f00 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1190,6 +1190,7 @@ do instead insert into foo2 values (f1); ERROR: column "f1" does not exist LINE 2: do instead insert into foo2 values (f1); ^ +HINT: There is a column named "f1" in table "old", but it cannot be referenced from this part of the query. -- this is the correct way: create rule foorule as on insert to foo where f1 < 100 do instead insert into foo2 values (new.f1); diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index 14028314e8..f78218822f 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -412,6 +412,7 @@ SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1; ERROR: column "q2" does not exist LINE 1: ... int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1... ^ +HINT: There is a column named "q2" in table "*SELECT* 2", but it cannot be referenced from this part of the query. -- But this should work: SELECT q1 FROM int8_tbl EXCEPT (((SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1))); q1 diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 8676e2f761..5de98dc0a7 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -840,3 +840,49 @@ SELECT * FROM ON true; rollback; + +-- +-- Test LATERAL +-- + +select unique2, x.* +from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x; +explain (costs off) + select unique2, x.* + from tenk1 a, lateral (select * from int4_tbl b where f1 = a.unique1) x; +select unique2, x.* +from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss; +explain (costs off) + select unique2, x.* + from int4_tbl x, lateral (select unique2 from tenk1 where f1 = unique1) ss; +explain (costs off) + select unique2, x.* + from int4_tbl x cross join lateral (select unique2 from tenk1 where f1 = unique1) ss; +select unique2, x.* +from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on f1 = unique1; +explain (costs off) + select unique2, x.* + from int4_tbl x left join lateral (select unique1, unique2 from tenk1 where f1 = unique1) ss on f1 = unique1; + +-- check scoping of lateral versus parent references +-- the first of these should return int8_tbl.q2, the second int8_tbl.q1 +select *, (select r from (select q1 as q2) x, (select q2 as r) y) from int8_tbl; +select *, (select r from (select q1 as q2) x, lateral (select q2 as r) y) from int8_tbl; + +-- lateral SRF +select count(*) from tenk1 a, lateral generate_series(1,two) g; +explain (costs off) + select count(*) from tenk1 a, lateral generate_series(1,two) g; +explain (costs off) + select count(*) from tenk1 a cross join lateral generate_series(1,two) g; + +-- test some error cases where LATERAL should have been used but wasn't +select f1,g from int4_tbl a, generate_series(0, f1) g; +select f1,g from int4_tbl a, generate_series(0, a.f1) g; +select f1,g from int4_tbl a cross join generate_series(0, f1) g; +select f1,g from int4_tbl a cross join generate_series(0, a.f1) g; +-- SQL:2008 says the left table is in scope but illegal to access here +select f1,g from int4_tbl a right join lateral generate_series(0, a.f1) g on true; +select f1,g from int4_tbl a full join lateral generate_series(0, a.f1) g on true; +-- LATERAL can be used to put an aggregate into the FROM clause of its query +select 1 from tenk1 a, lateral (select max(a.unique1) from int4_tbl b) ss;