From 099eab0d52eed5b45afdd792e9b9cd73e5860d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Villemain?= Date: Thu, 22 Aug 2024 19:07:50 +0200 Subject: [PATCH] Fix issue XXX related to Indirection in UPDATE SET() ruleutils in Citus is based on PostgreSQL source code, but in PostgreSQL ruleutils is not used at the planner stage. For instance, it is assumed after parser that targetList are ordered as they were read, but it's not true after rewriter, the resulting rewrite tree is then provided to planner (and citus), but the ordering of the list is not granted anymore. It's similar to others previous issues reported and still open, as well as to other bugfixes/improvment over time, the most noticable being the ProcessIndirection() which is for domain and similar. However, the implications of this bug are huge for users of `UPDATE SET (...)`: 1. if you used to order by columns order, you're maybe safe: `SET (col1, col2, col3, ...)` 2. if you used not to order by column order: `SET (col2, col1, col3, ...)` then you probably found a problem, or you have one. Note about 1. that despite appearance and your QA, you are at risk: if physical columns ordering is changed (for example after DROPping/ADDing some), the same query which use to apparently works well will silently update other columns... As it is this code is not optimized for performance, not sure it'll be needed. --- .../distributed/deparser/ruleutils_16.c | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/src/backend/distributed/deparser/ruleutils_16.c b/src/backend/distributed/deparser/ruleutils_16.c index 7f2a41d75c3..cc0da165b28 100644 --- a/src/backend/distributed/deparser/ruleutils_16.c +++ b/src/backend/distributed/deparser/ruleutils_16.c @@ -469,6 +469,9 @@ static void get_tablesample_def(TableSampleClause *tablesample, deparse_context *context); static void get_opclass_name(Oid opclass, Oid actual_datatype, StringInfo buf); +static bool is_update_set_with_multiple_columns(List *targetList); +static List *processTargetsIndirection(List *targetList); +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func); static Node *processIndirection(Node *node, deparse_context *context); static void printSubscripts(SubscriptingRef *aref, deparse_context *context); static char *get_relation_name(Oid relid); @@ -3545,6 +3548,9 @@ get_update_query_targetlist_def(Query *query, List *targetList, } } } + if (is_update_set_with_multiple_columns(targetList)) + targetList = processTargetsIndirection(targetList); + next_ma_cell = list_head(ma_sublinks); cur_ma_sublink = NULL; remaining_ma_columns = 0; @@ -8607,6 +8613,153 @@ get_opclass_name(Oid opclass, Oid actual_datatype, ReleaseSysCache(ht_opc); } +/* + * helper function to evaluate if we are in an SET (...) + * Caller is responsible to check the command type (UPDATE) + */ +static bool is_update_set_with_multiple_columns(List *targetList) +{ + ListCell *lc; + foreach(lc, targetList) { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Node *expr; + + if (tle->resjunk) + continue; + + expr = strip_implicit_coercions((Node *) tle->expr); + + if (expr && IsA(expr, Param) && + ((Param *) expr)->paramkind == PARAM_MULTIEXPR) + { + return true; + } + } + + // No multi-column set expression found + return false; +} + +/* + * processTargetsIndirection - reorder targets list (from indirection) + * + * We don't change anything but the order the target list. + * The purpose here is to be able to deparse a query tree as if it was + * provided by the PostgreSQL parser, not the rewriter (which is the one + * received by the planner hook). + * + * It's required only for UPDATE SET (MULTIEXPR) queries, other candidates + * are not supported by Citus. + * + * Returns the new target list, reordered. +*/ +static List *processTargetsIndirection(List *targetList) +{ + int nAssignableCols; + int targetListPosition; + bool sawJunk = false; + List *newTargetList = NIL; + ListCell *lc; + + /* Count non-junk columns and ensure they precede junk columns */ + nAssignableCols = 0; + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) + { + sawJunk = true; + } + else + { + if (sawJunk) + elog(ERROR, "Subplan target list is out of order"); + + nAssignableCols++; + } + } + + /* If no assignable columns, return the original target list */ + if (nAssignableCols == 0) + return targetList; + + /* Reorder the target list */ + /* we start from 1 */ + targetListPosition = 1; + while (nAssignableCols > 0) + { + nAssignableCols--; + + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + + if (IsA(tle->expr, FuncExpr)) + { + FuncExpr *funcexpr = (FuncExpr *) tle->expr; + AttrNumber attnum = extract_paramid_from_funcexpr(funcexpr); + + if (attnum == targetListPosition) + { + ereport(DEBUG1, (errmsg("Adding FuncExpr resno: %d", tle->resno))); + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + else if (IsA(tle->expr, Param)) + { + Param *param = (Param *) tle->expr; + AttrNumber attnum = param->paramid; + + if (attnum == targetListPosition) + { + newTargetList = lappend(newTargetList, tle); + targetListPosition++; + break; + } + } + } + } + + // TODO add check about what we did here ? + + /* Append any remaining junk columns */ + foreach(lc, targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + if (tle->resjunk) + newTargetList = lappend(newTargetList, tle); + } + + return newTargetList; +} + +/* Function to extract paramid from a FuncExpr node */ +static AttrNumber extract_paramid_from_funcexpr(FuncExpr *func) +{ + AttrNumber targetAttnum = InvalidAttrNumber; + ListCell *lc; + + /* Iterate through the arguments of the FuncExpr */ + foreach(lc, func->args) + { + Node *arg = (Node *) lfirst(lc); + + /* Check if the argument is a PARAM node */ + if (IsA(arg, Param)) + { + Param *param = (Param *) arg; + targetAttnum = param->paramid; + + break; // Exit loop once we find the PARAM node + } + } + + return targetAttnum; +} + /* * processIndirection - take care of array and subfield assignment *