From bd0c025bb6dbc86d76699eb791196cbb5bca39fd Mon Sep 17 00:00:00 2001 From: PeterWeiWang <715533650@qq.com> Date: Wed, 27 Apr 2022 15:18:03 +0800 Subject: [PATCH] for open source scanning. --- README.en.md | 36 - README.md | 39 - code-pq-for-MySQL-8.0.25.patch | 22269 +++++++++++++++++++++++++++++++ 3 files changed, 22269 insertions(+), 75 deletions(-) delete mode 100644 README.en.md delete mode 100644 README.md create mode 100644 code-pq-for-MySQL-8.0.25.patch diff --git a/README.en.md b/README.en.md deleted file mode 100644 index 320206103..000000000 --- a/README.en.md +++ /dev/null @@ -1,36 +0,0 @@ -# mysql-server - -#### Description -{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**} - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx - -#### Contribution - -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request - - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/README.md deleted file mode 100644 index 71cb204c0..000000000 --- a/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# mysql-server - -#### 介绍 -{**以下是 Gitee 平台说明,您可以替换此简介** -Gitee 是 OSCHINA 推出的基于 Git 的代码托管平台(同时支持 SVN)。专为开发者提供稳定、高效、安全的云端软件开发协作平台 -无论是个人、团队、或是企业,都能够用 Gitee 实现代码托管、项目管理、协作开发。企业项目请看 [https://gitee.com/enterprises](https://gitee.com/enterprises)} - -#### 软件架构 -软件架构说明 - - -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx - -#### 参与贡献 - -1. Fork 本仓库 -2. 新建 Feat_xxx 分支 -3. 提交代码 -4. 新建 Pull Request - - -#### 特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 -5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/code-pq-for-MySQL-8.0.25.patch b/code-pq-for-MySQL-8.0.25.patch new file mode 100644 index 000000000..13f6b2e87 --- /dev/null +++ b/code-pq-for-MySQL-8.0.25.patch @@ -0,0 +1,22269 @@ +diff --git a/include/my_alloc.h b/include/my_alloc.h +index b652bf89..5b651d44 100644 +--- a/include/my_alloc.h ++++ b/include/my_alloc.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -44,6 +45,9 @@ + #include "my_pointer_arithmetic.h" + #include "mysql/psi/psi_memory.h" + ++typedef void CallBackFunc(PSI_memory_key key, size_t length, unsigned int id) ; ++const int PQ_MEMORY_USED_BUCKET = 16; ++ + /** + * The MEM_ROOT is a simple arena, where allocations are carved out of + * larger blocks. Using an arena over plain malloc gives you two main +@@ -132,28 +136,7 @@ struct MEM_ROOT { + * + * The returned pointer will always be 8-aligned. + */ +- void *Alloc(size_t length) MY_ATTRIBUTE((malloc)) { +- length = ALIGN_SIZE(length); +- +- // Skip the straight path if simulating OOM; it should always fail. +- DBUG_EXECUTE_IF("simulate_out_of_memory", return AllocSlow(length);); +- +- // Fast path, used in the majority of cases. It would be faster here +- // (saving one register due to CSE) to instead test +- // +- // m_current_free_start + length <= m_current_free_end +- // +- // but it would invoke undefined behavior, and in particular be prone +- // to wraparound on 32-bit platforms. +- if (static_cast(m_current_free_end - m_current_free_start) >= +- length) { +- void *ret = m_current_free_start; +- m_current_free_start += length; +- return ret; +- } +- +- return AllocSlow(length); +- } ++ void *Alloc(size_t length) MY_ATTRIBUTE((malloc)); + + /** + Allocate “num” objects of type T, and default-construct them. +@@ -389,6 +372,11 @@ struct MEM_ROOT { + void (*m_error_handler)(void) = nullptr; + + PSI_memory_key m_psi_key = 0; ++ ++public: ++ CallBackFunc *allocCBFunc = nullptr; ++ ++ CallBackFunc *freeCBFunc = nullptr; + }; + + // Legacy C thunks. Do not use in new code. +diff --git a/include/my_dbug.h b/include/my_dbug.h +index 9a143735..086ab469 100644 +--- a/include/my_dbug.h ++++ b/include/my_dbug.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -60,6 +61,8 @@ extern int _db_is_pushed_(void); + extern void _db_process_(const char *name); + extern void _db_push_(const char *control); + extern void _db_pop_(void); ++extern void pq_stack_copy(CODE_STATE *leader_cs); ++extern void pq_stack_reset(); + extern void _db_set_(const char *control); + extern void _db_set_init_(const char *control); + extern void _db_enter_(const char *_func_, int func_len, const char *_file_, +diff --git a/include/priority_queue.h b/include/priority_queue.h +index ee22f0e1..af9bb452 100644 +--- a/include/priority_queue.h ++++ b/include/priority_queue.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2014, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -102,7 +103,7 @@ class Priority_queue : public Less { + + friend class priority_queue_unittest::PriorityQueueTest; + +- private: ++ public: + // Deriving from Less allows empty base-class optimization in some cases. + typedef Less Base; + +@@ -403,7 +404,7 @@ class Priority_queue : public Less { + return false; + } + +- private: ++ public: + container_type m_container; + }; + +diff --git a/include/sql_string.h b/include/sql_string.h +index b4be536d..a9cb41f2 100644 +--- a/include/sql_string.h ++++ b/include/sql_string.h +@@ -70,11 +70,10 @@ extern PSI_memory_key key_memory_String_value; + Don't add new members or virual methods into this class! + */ + class Simple_cstring { +- private: ++ public: + const char *m_str; + size_t m_length; + +- public: + /** + Initialize from a C string whose length is already known. + */ +diff --git a/mysys/dbug.cc b/mysys/dbug.cc +index 0da19457..3521c954 100644 +--- a/mysys/dbug.cc ++++ b/mysys/dbug.cc +@@ -923,6 +923,19 @@ void _db_pop_() { + } + } + ++void pq_stack_copy(CODE_STATE *leader_cs) { ++ CODE_STATE *cs; ++ get_code_state_or_return; ++ assert(cs->stack == &init_settings); ++ cs->stack = leader_cs->stack; ++} ++ ++void pq_stack_reset() { ++ CODE_STATE *cs; ++ get_code_state_or_return; ++ cs->stack = &init_settings; ++} ++ + /* + * FUNCTION + * +diff --git a/mysys/my_alloc.cc b/mysys/my_alloc.cc +index f7a30c18..137a4ce1 100644 +--- a/mysys/my_alloc.cc ++++ b/mysys/my_alloc.cc +@@ -1,4 +1,6 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. +@@ -99,6 +101,39 @@ std::pair MEM_ROOT::AllocBlock( + return {new_block, length}; + } + ++void *MEM_ROOT::Alloc(size_t length) { ++ void *ret = nullptr; ++ length = ALIGN_SIZE(length); ++ ++ // Skip the straight path if simulating OOM; it should always fail. ++ DBUG_EXECUTE_IF("simulate_out_of_memory", return AllocSlow(length);); ++ ++ size_t old_alloc_size = m_allocated_size; ++ // Fast path, used in the majority of cases. It would be faster here ++ // (saving one register due to CSE) to instead test ++ // ++ // m_current_free_start + length <= m_current_free_end ++ // ++ // but it would invoke undefined behavior, and in particular be prone ++ // to wraparound on 32-bit platforms. ++ if (static_cast(m_current_free_end - m_current_free_start) >= length) { ++ ret = m_current_free_start; ++ m_current_free_start += length; ++ return ret; ++ } ++ ++ ret = AllocSlow(length); ++ assert(m_allocated_size >= old_alloc_size); ++ if (allocCBFunc && (m_allocated_size - old_alloc_size)) { ++ allocCBFunc( ++ m_psi_key, m_allocated_size - old_alloc_size, ++ ((reinterpret_cast(this) >> PQ_MEMORY_USED_BUCKET) & ++ 0xf)); ++ } ++ ++ return ret; ++} ++ + void *MEM_ROOT::AllocSlow(size_t length) { + DBUG_TRACE; + DBUG_PRINT("enter", ("root: %p", this)); +@@ -166,6 +201,12 @@ void MEM_ROOT::Clear() { + DBUG_TRACE; + DBUG_PRINT("enter", ("root: %p", this)); + ++ if (freeCBFunc && m_allocated_size) { ++ freeCBFunc( ++ m_psi_key, m_allocated_size, ++ (reinterpret_cast(this) >> PQ_MEMORY_USED_BUCKET) & 0xf); ++ } ++ + // Already cleared, or memset() to zero, so just ignore. + if (m_current_block == nullptr) return; + +@@ -188,6 +229,8 @@ void MEM_ROOT::ClearForReuse() { + return; + } + ++ size_t old_alloc_size = m_allocated_size; ++ + // Already cleared, or memset() to zero, so just ignore. + if (m_current_block == nullptr) return; + +@@ -198,6 +241,12 @@ void MEM_ROOT::ClearForReuse() { + m_current_block->prev = nullptr; + m_allocated_size = m_current_free_end - m_current_free_start; + ++ if (freeCBFunc && (old_alloc_size - m_allocated_size)) { ++ freeCBFunc( ++ m_psi_key, old_alloc_size - m_allocated_size, ++ (reinterpret_cast(this) >> PQ_MEMORY_USED_BUCKET) & 0xf); ++ } ++ + FreeBlocks(start); + } + +diff --git a/share/messages_to_clients.txt b/share/messages_to_clients.txt +index 7ff9af75..1ad31d40 100644 +--- a/share/messages_to_clients.txt ++++ b/share/messages_to_clients.txt +@@ -1,4 +1,5 @@ + # Copyright (c) 2017, 2021, Oracle and/or its affiliates. ++# Copyright (c) 2022, Huawei Technologies Co., Ltd. + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License, version 2.0, +@@ -9555,6 +9556,22 @@ ER_CHANGE_RPL_SRC_WRONG_COMPRESSION_ALGORITHM_SIZE + + reserved-error-section 5000 5999 + ++################################################################################ ++# Error numbers starting at 7500: server-to-client messages hwsql ++# ++# Start of hwsql error messages (error log). ++# ++ ++start-error-number 7500 ++ ++ER_PARALLEL_FAIL_INIT ++ eng "Failed to parallel execute. this SQL is not supported." ++ ++ER_WARN_BAD_PARALLEL_NUM ++ eng "Incorrect number for degree of parallel" ++ ++ER_PARALLEL_EXEC_ERROR ++ eng "Parallel execution error" + + ################################################################################ + # DO NOT add messages for the error-log to this file; +diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt +index 5fd225c1..315044ef 100644 +--- a/sql/CMakeLists.txt ++++ b/sql/CMakeLists.txt +@@ -1,4 +1,5 @@ + # Copyright (c) 2006, 2021, Oracle and/or its affiliates. ++# Copyright (c) 2022, Huawei Technologies Co., Ltd. + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License, version 2.0, +@@ -459,6 +460,7 @@ SET(SQL_SHARED_SOURCES + protocol_classic.cc + psi_memory_key.cc + query_result.cc ++ msg_queue.cc + records.cc + regexp/errors.cc + regexp/regexp_engine.cc +@@ -527,6 +529,10 @@ SET(SQL_SHARED_SOURCES + sql_locale.cc + sql_manager.cc + sql_optimizer.cc ++ sql_parallel.cc ++ pq_condition.cc ++ pq_clone.cc ++ pq_clone_item.cc + sql_parse.cc + sql_partition.cc + sql_partition_admin.cc +@@ -587,6 +593,10 @@ SET(SQL_SHARED_SOURCES + uniques.cc + xa.cc + daemon_proxy_keyring/daemon_proxy_keyring.cc ++ exchange.cc ++ exchange_sort.cc ++ exchange_nosort.cc ++ msg_queue.cc + ) + + # BISON_TARGET( +diff --git a/sql/basic_row_iterators.h b/sql/basic_row_iterators.h +index bba4d07a..cd9479cb 100644 +--- a/sql/basic_row_iterators.h ++++ b/sql/basic_row_iterators.h +@@ -2,6 +2,7 @@ + #define SQL_BASIC_ROW_ITERATORS_H_ + + /* Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -41,6 +42,7 @@ + #include "sql/mem_root_array.h" + #include "sql/row_iterator.h" + #include "sql/sql_list.h" ++#include "filesort.h" + + class Filesort_info; + class Item; +@@ -50,6 +52,80 @@ class THD; + class handler; + struct IO_CACHE; + struct TABLE; ++class Gather_operator; ++class ORDER; ++class MQ_record_gather; ++class QEP_TAB; ++ ++/** ++ * Parallel scan iterator, which is used in parallel leader ++ */ ++class ParallelScanIterator final : public TableRowIterator { ++ public: ++ ParallelScanIterator(THD *thd, QEP_TAB *tab, TABLE *table, ++ ha_rows *examined_rows, JOIN *join, ++ Gather_operator *gather, bool stab_output = false, ++ uint ref_length = 0); ++ ++ ~ParallelScanIterator() override; ++ ++ bool Init() override; ++ int Read() override; ++ int End() override; ++ void UnlockRow() override {} ++ void SetNullRowFlag(bool) override {} ++ void StartPSIBatchMode() override {} ++ void EndPSIBatchModeIfStarted() override {} ++ ++ private: ++ uchar *const m_record; ++ ha_rows *const m_examined_rows; ++ uint m_dop; ++ JOIN *m_join; ++ Gather_operator *m_gather; ++ MQ_record_gather *m_record_gather; ++ ORDER *m_order; /** use for records merge sort */ ++ QEP_TAB *m_tab; ++ bool m_stable_sort; /** determine whether using stable sort */ ++ uint m_ref_length; ++ ++ /** construct filesort on leader when needing stab_output or merge_sort */ ++ bool pq_make_filesort(Filesort **sort); ++ /** init m_record_gather */ ++ bool pq_init_record_gather(); ++ /** launch worker threads to execute parallel query */ ++ bool pq_launch_worker(); ++ /** wait all workers finished */ ++ void pq_wait_workers_finished(); ++ /** outoput parallel query error code */ ++ int pq_error_code(); ++}; ++ ++class PQ_worker_manager; ++ ++/** ++ * block scan iterator, which is used is in parallel worker. ++ * a whole talbe is cut into many blocks for parallel scan ++ */ ++class PQblockScanIterator final : public TableRowIterator { ++ public: ++ PQblockScanIterator(THD *thd, TABLE *table, uchar *record, ++ ha_rows *examined_rows, Gather_operator *gather, ++ bool need_rowid = false); ++ ~PQblockScanIterator() override; ++ ++ bool Init() override; ++ int Read() override; ++ int End() override; ++ ++ private: ++ uchar *const m_record; ++ ha_rows *const m_examined_rows; ++ void *m_pq_ctx; // parallel query context ++ uint keyno; ++ Gather_operator *m_gather; ++ bool m_need_rowid; ++}; + + /** + Scan a table from beginning to end. +diff --git a/sql/binary_heap.h b/sql/binary_heap.h +new file mode 100644 +index 00000000..debdb9c1 +--- /dev/null ++++ b/sql/binary_heap.h +@@ -0,0 +1,190 @@ ++#ifndef MYSQL_BINARY_HEAP_H ++#define MYSQL_BINARY_HEAP_H ++ ++/* Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "sql/sql_class.h" ++ ++// compare based on the sort_key ++typedef bool (*binaryheap_comparator)(int a, int b, void *arg); ++class binary_heap { ++ public: ++ binary_heap(int element_size, void *arg, binaryheap_comparator cmp, THD *thd) ++ : m_queue(nullptr), ++ m_capacity(element_size), ++ m_size(0), ++ m_compare(cmp), ++ m_arg(arg), ++ m_thd(thd) {} ++ ~binary_heap() {} ++ /* @retval: false of success, and true otherwise. */ ++ bool init_binary_heap() { ++ if (m_capacity <= 0) { ++ return true; ++ } ++ m_queue = new (m_thd->pq_mem_root) int[m_capacity + 1]; ++ if (!m_queue || DBUG_EVALUATE_IF("pq_msort_error9", true, false)) { ++ my_error(ER_STD_BAD_ALLOC_ERROR, MYF(0), "", "(PQ::init)"); ++ return true; ++ } ++ return false; ++ } ++ ++ void reset() { m_size = 0; } ++ ++ void add_unorderd(int element) { ++ if (m_size >= m_capacity || ++ DBUG_EVALUATE_IF("pq_msort_error8", true, false)) { ++ my_error(ER_STD_BAD_ALLOC_ERROR, MYF(0), "", "(PQ::add_unorderd)"); ++ return; ++ } ++ m_queue[m_size++] = element; ++ } ++ ++ void build() { ++ if (m_size <= 1) { ++ return; ++ } ++ for (int i = parent(m_size - 1); i >= 0; i--) { ++ sift_down(i); ++ } ++ } ++ ++ int first() { ++ assert(!empty()); ++ return m_queue[0]; ++ } ++ ++ int remove_first() { ++ assert(!empty()); ++ if (m_size == 1) { ++ m_size--; ++ return m_queue[0]; ++ } ++ ++ swap_node(0, m_size - 1); ++ m_size--; ++ sift_down(0); ++ ++ return m_queue[m_size]; ++ } ++ ++ void replace_first(int element) { ++ assert(!empty()); ++ m_queue[0] = element; ++ if (m_size > 1) { ++ sift_down(0); ++ } ++ } ++ ++ bool empty() { return m_size == 0; } ++ ++ void cleanup() { ++ if (m_queue) { ++ destroy(m_queue); ++ } ++ } ++ ++ private: ++ /* return the index ((i - 1) / 2) of the parent node of node i */ ++ int parent(unsigned int i) { ++ assert(i != 0); ++ return (--i) >> 1; ++ } ++ ++ /* return the index (2 * i + 1) of the left child of node i */ ++ int left(unsigned int i) { return (i << 1) | 1; } ++ ++ /* return the index (2 * i + 2) of the right child of node */ ++ int right(unsigned int i) { return (++i) << 1; } ++ ++ uint size() { return m_size; } ++ ++ void add(int element) { ++ if (m_size >= m_capacity) { ++ my_error(ER_STD_BAD_ALLOC_ERROR, MYF(0), "out of binary heap space"); ++ return; ++ } ++ m_queue[m_size++] = element; ++ sift_up(m_size - 1); ++ } ++ ++ void swap_node(int a, int b) { ++ int temp; ++ temp = m_queue[a]; ++ m_queue[a] = m_queue[b]; ++ m_queue[b] = temp; ++ } ++ ++ void sift_down(int node_off) { ++ while (true) { ++ int left_off = left(node_off); ++ int right_off = right(node_off); ++ int swap_off = 0; ++ ++ if (left_off < m_size && ++ m_compare(m_queue[left_off], m_queue[node_off], m_arg)) { ++ swap_off = left_off; ++ } ++ ++ if (right_off < m_size && ++ m_compare(m_queue[right_off], m_queue[node_off], m_arg)) { ++ if (!swap_off || ++ m_compare(m_queue[right_off], m_queue[left_off], m_arg)) { ++ swap_off = right_off; ++ } ++ } ++ ++ if (!swap_off) { ++ break; ++ } ++ ++ swap_node(swap_off, node_off); ++ node_off = swap_off; ++ } ++ } ++ ++ void sift_up(int node_off) { ++ bool cmp = false; ++ int parent_off; ++ while (node_off != 0) { ++ parent_off = parent(node_off); ++ cmp = m_compare(m_queue[parent_off], m_queue[node_off], m_arg); ++ if (cmp) { ++ break; ++ } ++ ++ swap_node(node_off, parent_off); ++ node_off = parent_off; ++ } ++ } ++ ++ int *m_queue; ++ int m_capacity; ++ int m_size; ++ binaryheap_comparator m_compare; ++ void *m_arg; ++ THD *m_thd; ++}; ++#endif // MYSQL_BINARY_HEAP_H +diff --git a/sql/cmp_varlen_keys.h b/sql/cmp_varlen_keys.h +index 220c2949..43391ff2 100644 +--- a/sql/cmp_varlen_keys.h ++++ b/sql/cmp_varlen_keys.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2016, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -39,12 +40,14 @@ + @param s1 pointer to record 1 + @param s2 pointer to record 2 + @return true/false according to sorting order +- true : s1 < s2 +- false : s1 >= s2 ++ * return: ++ * - if s1 < s2; ++ * 0 if s1 = s2; ++ * + if s1 > s2; + */ + inline bool cmp_varlen_keys( + Bounds_checked_array sort_field_array, bool use_hash, +- const uchar *s1, const uchar *s2) { ++ const uchar *s1, const uchar *s2, int *pq_cmp_value=nullptr) { + const uchar *kp1 = s1 + Sort_param::size_of_varlength_field; + const uchar *kp2 = s2 + Sort_param::size_of_varlength_field; + int res; +@@ -53,7 +56,11 @@ inline bool cmp_varlen_keys( + if (sort_field.maybe_null) { + const int k1_nullbyte = *kp1++; + const int k2_nullbyte = *kp2++; +- if (k1_nullbyte != k2_nullbyte) return k1_nullbyte < k2_nullbyte; ++ ++ if (k1_nullbyte != k2_nullbyte) { ++ if (pq_cmp_value) *pq_cmp_value = k1_nullbyte - k2_nullbyte; ++ return k1_nullbyte < k2_nullbyte; ++ } + if (k1_nullbyte == 0 || k1_nullbyte == 0xff) { + if (!sort_field.is_varlen) { + kp1 += sort_field.length; +@@ -79,12 +86,23 @@ inline bool cmp_varlen_keys( + + res = memcmp(kp1, kp2, kp_len); + +- if (res) return res < 0; ++ if (res) { ++ if (pq_cmp_value) ++ *pq_cmp_value = res; ++ return res < 0; ++ } ++ + if (kp1_len != kp2_len) { +- if (sort_field.reverse) ++ if (sort_field.reverse) { ++ if (pq_cmp_value) ++ *pq_cmp_value = kp2_len - kp1_len; + return kp2_len < kp1_len; +- else ++ } ++ else { ++ if (pq_cmp_value) ++ *pq_cmp_value = kp1_len - kp2_len; + return kp1_len < kp2_len; ++ } + } + + kp1 += kp1_len; +@@ -93,8 +111,11 @@ inline bool cmp_varlen_keys( + + if (use_hash) { + // Compare hashes at the end of sort keys +- return memcmp(kp1, kp2, 8) < 0; ++ int cmp_value = memcmp(kp1, kp2, 8); ++ if (pq_cmp_value) *pq_cmp_value = cmp_value; ++ return cmp_value < 0; + } else { ++ if (pq_cmp_value) *pq_cmp_value = 1; + return false; + } + } +diff --git a/sql/composite_iterators.cc b/sql/composite_iterators.cc +index dff62b69..0d69b992 100644 +--- a/sql/composite_iterators.cc ++++ b/sql/composite_iterators.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -179,6 +180,10 @@ int LimitOffsetIterator::Read() { + return result; + } + ++int LimitOffsetIterator::End() { ++ return m_source->End(); ++} ++ + AggregateIterator::AggregateIterator( + THD *thd, unique_ptr_destroy_only source, JOIN *join, + TableCollection tables, bool rollup) +@@ -193,7 +198,7 @@ AggregateIterator::AggregateIterator( + } + + bool AggregateIterator::Init() { +- assert(!m_join->tmp_table_param.precomputed_group_by); ++ assert(!m_join->tmp_table_param->precomputed_group_by); + + // Disable any leftover rollup items used in children. + m_current_rollup_position = -1; +@@ -214,6 +219,10 @@ bool AggregateIterator::Init() { + return false; + } + ++int AggregateIterator::End() { ++ return m_source->End(); ++} ++ + int AggregateIterator::Read() { + switch (m_state) { + case READING_FIRST_ROW: { +@@ -1187,6 +1196,10 @@ int TemptableAggregateIterator::Read() { + return m_table_iterator->Read(); + } + ++int TemptableAggregateIterator::End() { ++ return m_subquery_iterator->End(); ++} ++ + MaterializedTableFunctionIterator::MaterializedTableFunctionIterator( + THD *thd, Table_function *table_function, TABLE *table, + unique_ptr_destroy_only table_iterator) +diff --git a/sql/composite_iterators.h b/sql/composite_iterators.h +index 7e1b31fc..9bff5e44 100644 +--- a/sql/composite_iterators.h ++++ b/sql/composite_iterators.h +@@ -2,6 +2,7 @@ + #define SQL_COMPOSITE_ITERATORS_INCLUDED + + /* Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -81,6 +82,8 @@ class FilterIterator final : public RowIterator { + + int Read() override; + ++ int End() override { return m_source->End(); } ++ + void SetNullRowFlag(bool is_null_row) override { + m_source->SetNullRowFlag(is_null_row); + } +@@ -135,6 +138,8 @@ class LimitOffsetIterator final : public RowIterator { + + int Read() override; + ++ int End() override; ++ + void SetNullRowFlag(bool is_null_row) override { + m_source->SetNullRowFlag(is_null_row); + } +@@ -205,6 +210,8 @@ class AggregateIterator final : public RowIterator { + + bool Init() override; + int Read() override; ++ int End() override; ++ + void SetNullRowFlag(bool is_null_row) override { + m_source->SetNullRowFlag(is_null_row); + } +@@ -535,6 +542,13 @@ class MaterializeIterator final : public TableRowIterator { + bool Init() override; + int Read() override; + ++ int End() override { ++ for (auto &qb : m_query_blocks_to_materialize) { ++ qb.subquery_iterator->End(); ++ } ++ return thd()->is_worker() ? -1 : 1; ++ } ++ + void SetNullRowFlag(bool is_null_row) override { + m_table_iterator->SetNullRowFlag(is_null_row); + } +@@ -654,6 +668,10 @@ class StreamingIterator final : public TableRowIterator { + + int Read() override; + ++ int End() override { ++ return m_subquery_iterator->End(); ++ } ++ + void StartPSIBatchMode() override { + m_subquery_iterator->StartPSIBatchMode(); + } +@@ -691,6 +709,8 @@ class TemptableAggregateIterator final : public TableRowIterator { + + bool Init() override; + int Read() override; ++ int End() override; ++ + void SetNullRowFlag(bool is_null_row) override { + m_table_iterator->SetNullRowFlag(is_null_row); + } +diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc +index 49b810f9..20db49a3 100644 +--- a/sql/debug_sync.cc ++++ b/sql/debug_sync.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2009, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -932,7 +933,7 @@ static void debug_sync_remove_action(st_debug_sync_control *ds_control, + uint dsp_idx = static_cast(action - ds_control->ds_action); + DBUG_TRACE; + assert(ds_control); +- assert(ds_control == current_thd->debug_sync_control); ++ assert(ds_control == current_thd->debug_sync_control || ds_control == current_thd->pq_leader->debug_sync_control); + assert(action); + assert(dsp_idx < ds_control->ds_active); + +diff --git a/sql/exchange.cc b/sql/exchange.cc +new file mode 100644 +index 00000000..2d1b4ecc +--- /dev/null ++++ b/sql/exchange.cc +@@ -0,0 +1,223 @@ ++/* Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "exchange.h" ++#include "field.h" ++#include "table.h" ++ ++/** ++ * alloc space for mqueue_handles ++ * ++ * @return false for success, and otherwise true. ++ */ ++bool Exchange::init() { ++ uint i = 0; ++ MQueue **mqueues = nullptr; ++ /** note that: all workers share one receiver. */ ++ m_receiver = new (m_thd->pq_mem_root) MQ_event(m_thd); ++ if (m_receiver == nullptr) { ++ goto err; ++ } ++ ++ mqueue_handles = ++ new (m_thd->pq_mem_root) MQueue_handle *[m_nqueues] { nullptr }; ++ if (mqueue_handles == nullptr) { ++ goto err; ++ } ++ ++ mqueues = new (m_thd->pq_mem_root) MQueue *[m_nqueues] { nullptr }; ++ if (mqueues == nullptr) { ++ goto err; ++ } ++ ++ for (i = 0; i < m_nqueues; i++) { ++ char *ring_buffer = new (m_thd->pq_mem_root) char[RING_SIZE]; ++ if (ring_buffer == nullptr) { ++ goto err; ++ } ++ ++ MQ_event *sender = new (m_thd->pq_mem_root) MQ_event(); ++ if (sender == nullptr) { ++ goto err; ++ } ++ ++ mqueues[i] = new (m_thd->pq_mem_root) ++ MQueue(sender, m_receiver, ring_buffer, RING_SIZE); ++ if (mqueues[i] == nullptr || ++ DBUG_EVALUATE_IF("pq_mq_error1", true, false)) { ++ goto err; ++ } ++ } ++ ++ for (i = 0; i < m_nqueues; i++) { ++ mqueue_handles[i] = ++ new (m_thd->pq_mem_root) MQueue_handle(mqueues[i], MQ_BUFFER_SIZE); ++ if (mqueue_handles[i] == nullptr || ++ mqueue_handles[i]->init_mqueue_handle(m_thd) || ++ DBUG_EVALUATE_IF("pq_mq_error2", true, false)) { ++ goto err; ++ } ++ } ++ ++ return false; ++ ++err: ++ sql_print_error("alloc space for exchange_record_pq error"); ++ return true; ++} ++ ++void Exchange::cleanup() { ++ destroy(m_receiver); ++ if (mqueue_handles) { ++ for (uint i = 0; i < m_nqueues; i++) { ++ if (mqueue_handles[i]) { ++ mqueue_handles[i]->cleanup(); ++ } ++ } ++ } ++} ++ ++/* ++* determine the checked value corresponding to CONST_ITEM/NULL_FIELD ++* using look-up table to accelerate this process, such as ++ t[0] = {false, false}, t[1] = {false, true}, ++ t[2] = {true, false}, t[3] = {true, true} ++*/ ++static char bool_item_field[8] = {0, 0, 0, 1, 1, 0, 1, 1}; ++ ++char *const_item_and_field_flag(uint value) { ++ assert(value < 4); ++ return bool_item_field + 2 * value; ++} ++ ++/** ++ * reconstruct table->record[0] from MQ's message ++ * @data: the message data ++ * @msg_len: the message length ++ * ++ * @return true if successful execution, and return false otherwise ++ * ++ * Note that: in this process, we can receive an error msg from worker, which ++ * may come from the several conditions: ++ * (1) some workers may produce an error during the execution; ++ * (2) the sending/receiving msg procedure occurs an error; ++ * (3) some unexpected errors; ++ * ++ */ ++bool Exchange::convert_mq_data_to_record(uchar *data, int msg_len, ++ uchar *row_id) { ++ /** there is error */ ++ if (m_thd->is_killed() || m_thd->pq_error) { ++ return false; ++ } ++ if (msg_len == 1 || DBUG_EVALUATE_IF("pq_worker_error10", true, false)) { ++ if (data[0] == EMPTY_MSG && ++ DBUG_EVALUATE_IF("pq_worker_error10", false, true)) { ++ return true; ++ } else { ++ const char *msg = (data[0] == ERROR_MSG) ? "error msg" : "unknown error"; ++ sql_print_error("[Parallel query]: error info. %s\n", msg); ++ m_thd->pq_error = true; ++ return false; ++ } ++ } ++ ++ memset(m_table->record[0], 255, m_table->s->reclength); ++ int size_field = m_table->s->fields; ++ ++ // fetch the row_id info. from MQ ++ if (m_stab_output) { ++ if (row_id) { ++ memcpy(row_id, data, m_ref_length); ++ } ++ data += m_ref_length; ++ } ++ ++ uint null_len = *(uint16 *)data; ++ data = data + sizeof(uint16); ++ uchar *null_flag = (uchar *)data; ++ ++ /** ++ * Note that: we use one more byte to store Field_varstring::length_bytes. ++ */ ++ if (DBUG_EVALUATE_IF("pq_worker_error11", true, false) || ++ msg_len > ++ (int)(m_table->s->reclength + 6 + null_len + m_table->s->fields + ++ (m_stab_output ? m_ref_length : 0))) { ++ m_thd->pq_error = true; ++ sql_print_error( ++ "[Parallel query]: sending (or receiving) msg from MQ error"); ++ return false; ++ } ++ ++ bool null_field = false; ++ bool const_item = false; ++ uint bit_value; ++ char *status_flag = nullptr; ++ uint null_offset = 0; ++ uint ptr_offset = null_len; ++ Field *item_field = nullptr; ++ int i = 0, j; ++ for (; i < size_field; i++) { ++ item_field = m_table->field[i]; ++ /** determine whether it is a CONST_ITEM or NULL_FIELD */ ++ j = (null_offset >> 3) + 1; ++ assert((null_offset & 1) == 0); ++ bit_value = (null_flag[j] >> (6 - (null_offset & 7))) & 3; ++ status_flag = const_item_and_field_flag(bit_value); ++ const_item = *status_flag; ++ null_field = *(status_flag + 1); ++ enum_field_types field_type = item_field->type(); ++ /** we should fill data into record[0] only when NOT_CONST_ITEM & ++ * NOT_NULL_FIELD */ ++ if (!const_item && !null_field) { ++ if (field_type == MYSQL_TYPE_VARCHAR || ++ field_type == MYSQL_TYPE_VAR_STRING) { ++ Field_varstring *field_var = static_cast(item_field); ++ field_var->length_bytes = (uint)data[ptr_offset]; ++ ptr_offset++; // moving to the real value ++ uint field_length = (field_var->length_bytes == 1) ++ ? (uint)data[ptr_offset] ++ : uint2korr(&data[ptr_offset]); ++ uint pack_length = field_length + field_var->length_bytes; ++ memcpy(field_var->ptr, &data[ptr_offset], pack_length); ++ ptr_offset += pack_length; ++ } else { ++ uint pack_length = item_field->pack_length(); ++ memcpy(item_field->ptr, &data[ptr_offset], pack_length); ++ ptr_offset += pack_length; ++ } ++ } ++ /** set NULL flag of field */ ++ if (!const_item) { ++ if (null_field) { ++ item_field->set_null(); ++ } else { ++ item_field->set_notnull(); ++ } ++ } ++ null_offset += 2; ++ } ++ ++ return true; ++} +diff --git a/sql/exchange.h b/sql/exchange.h +new file mode 100644 +index 00000000..f58ceecc +--- /dev/null ++++ b/sql/exchange.h +@@ -0,0 +1,98 @@ ++#ifndef EXCHAGE_H ++#define EXCHAGE_H ++ ++/* Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "msg_queue.h" ++ ++class Exchange { ++ public: ++ enum EXCHANGE_TYPE { ++ EXCHANGE_NOSORT = 0, ++ EXCHANGE_SORT, ++ }; ++ ++ Exchange() ++ : mqueue_handles(nullptr), ++ m_thd(nullptr), ++ m_table(nullptr), ++ m_nqueues(0), ++ m_receiver(nullptr), ++ m_ref_length(0), ++ m_stab_output(false) {} ++ ++ Exchange(THD *thd, TABLE *table, uint32 workers, uint ref_length, ++ bool stab_output = false) ++ : mqueue_handles(nullptr), ++ m_thd(thd), ++ m_table(table), ++ m_nqueues(workers), ++ m_receiver(nullptr), ++ m_ref_length(ref_length), ++ m_stab_output(stab_output) {} ++ ++ virtual ~Exchange() {} ++ ++ virtual bool read_mq_record() = 0; ++ virtual EXCHANGE_TYPE get_exchange_type() = 0; ++ virtual bool init(); ++ virtual void cleanup(); ++ virtual bool convert_mq_data_to_record(uchar *data, int msg_len, ++ uchar *row_id = nullptr); ++ ++ inline THD *get_thd() { return m_thd ? m_thd : current_thd; } ++ ++ inline MQueue_handle *get_mq_handle(uint32 i) { ++ assert(mqueue_handles); ++ assert(i < m_nqueues); ++ return mqueue_handles[i]; ++ } ++ ++ inline void mqueue_mmove(int mq_next_readers, int number_workers) { ++ memmove(&mqueue_handles[mq_next_readers], ++ &mqueue_handles[mq_next_readers + 1], ++ sizeof(MQueue_handle *) * (number_workers - mq_next_readers)); ++ } ++ ++ inline int lanuch_workers() { return m_nqueues; } ++ ++ inline TABLE *get_table() { return m_table; } ++ ++ inline int ref_length() { return m_ref_length; } ++ ++ inline bool is_stable() { return m_stab_output; } ++ ++ public: ++ MQueue_handle **mqueue_handles; ++ THD *m_thd; ++ TABLE *m_table; ++ ++ private: ++ uint32 m_nqueues; ++ MQ_event *m_receiver; ++ uint m_ref_length; ++ bool m_stab_output; ++}; ++ ++#endif // EXCHAGE_H +diff --git a/sql/exchange_nosort.cc b/sql/exchange_nosort.cc +new file mode 100644 +index 00000000..e04b4e65 +--- /dev/null ++++ b/sql/exchange_nosort.cc +@@ -0,0 +1,125 @@ ++/* Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "exchange_nosort.h" ++ ++/** ++ * read one message from MQ[next_queue] ++ * ++ * @retval: true for success, and otherwise false ++ */ ++bool Exchange_nosort::get_next(void **datap, uint32 *m_len, bool *done) { ++ MQ_RESULT result; ++ if (done != nullptr) { ++ *done = false; ++ } ++ MQueue_handle *reader = get_mq_handle(m_next_queue); ++ result = reader->receive(datap, m_len); ++ if (result == MQ_DETACHED) { ++ if (done != nullptr) { ++ *done = true; ++ } ++ return false; ++ } ++ if (result == MQ_WOULD_BLOCK) { ++ return false; ++ } ++ ++ return true; ++} ++ ++/** ++ * read one message from MQ in a round-robin method ++ * @datap: the message data ++ * @m_len: the message length ++ * ++ * @retval: true for success, and otherwise false ++ */ ++bool Exchange_nosort::read_next(void **datap, uint32 *m_len) { ++ bool readerdone = false; ++ int nvisited = 0; ++ bool read_result = false; ++ THD *thd = get_thd(); ++ ++ /** round-robin method to acquire the data */ ++ while (!thd->is_killed() && !thd->pq_error) { ++ read_result = get_next(datap, m_len, &readerdone); ++ /** detached and its content is also read done */ ++ if (readerdone) { ++ assert(false == read_result); ++ m_active_readers--; ++ /** read done for all queues */ ++ if (m_active_readers == 0) { ++ return false; ++ } ++ mqueue_mmove(m_next_queue, m_active_readers); ++ if (m_next_queue >= m_active_readers) { ++ m_next_queue = 0; ++ } ++ continue; ++ } ++ ++ /** data has successfully read into datap */ ++ if (read_result) { ++ return true; ++ } ++ /** move to next worker */ ++ m_next_queue++; ++ if (m_next_queue >= m_active_readers) { ++ m_next_queue = 0; ++ } ++ nvisited++; ++ /** In a round-robin, we cannot read one message from MQ */ ++ if (nvisited >= m_active_readers) { ++ /** ++ * this barrier ensures that the receiver first enters into a ++ * waiting status and then is waked by one sender. ++ */ ++ memory_barrier(); ++ MQ_event *receiver = get_mq_handle(0)->get_receiver(); ++ if (receiver) { ++ receiver->wait_latch(); ++ receiver->reset_latch(); ++ } ++ nvisited = 0; ++ } ++ } ++ ++ return false; ++} ++ ++/** ++ * read one message from MQ and fill it to table->record[0] ++ * ++ * @retval: true for success, and otherwise false ++ */ ++bool Exchange_nosort::read_mq_record() { ++ assert(!is_stable() && get_exchange_type() == EXCHANGE_NOSORT); ++ bool result = false; ++ uchar *data = nullptr; ++ uint32 msg_len = 0; ++ ++ /** read a message from MQ's local buffer */ ++ result = read_next((void **)&data, &msg_len); ++ return (result && convert_mq_data_to_record(data, msg_len)); ++} +diff --git a/sql/exchange_nosort.h b/sql/exchange_nosort.h +new file mode 100644 +index 00000000..e866c324 +--- /dev/null ++++ b/sql/exchange_nosort.h +@@ -0,0 +1,57 @@ ++#ifndef EXCHANGE_NOSORT_H ++#define EXCHANGE_NOSORT_H ++ ++/* Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "exchange.h" ++ ++class Exchange_nosort : public Exchange { ++ public: ++ Exchange_nosort() : Exchange(), m_next_queue(0), m_active_readers(0) {} ++ ++ Exchange_nosort(THD *thd, TABLE *table, int workers, int ref_length, ++ bool stab_output) ++ : Exchange(thd, table, workers, ref_length, stab_output), ++ m_next_queue(0), ++ m_active_readers(workers) {} ++ ++ virtual ~Exchange_nosort() {} ++ ++ /** read/convert one message from mq to table->record[0] */ ++ bool read_mq_record() override; ++ ++ inline EXCHANGE_TYPE get_exchange_type() override { return EXCHANGE_NOSORT; } ++ ++ private: ++ /** read one message from MQ[next_queue] */ ++ bool get_next(void **datap, uint32 *len, bool *done); ++ /** read one message from MQ in a round-robin manner */ ++ bool read_next(void **datap, uint32 *len); ++ ++ int m_next_queue; /** the next read queue */ ++ int m_active_readers; /** number of left queues which is sending or ++ receiving message */ ++}; ++ ++#endif // EXCHANGE_NOSORT_H +diff --git a/sql/exchange_sort.cc b/sql/exchange_sort.cc +new file mode 100644 +index 00000000..c5126f1d +--- /dev/null ++++ b/sql/exchange_sort.cc +@@ -0,0 +1,350 @@ ++/* Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "exchange_sort.h" ++#include "bounded_queue.h" ++#include "sql_executor.h" ++ ++/** ++ * allocate memory for members ++ * ++ * @retval: ++ * false for success, and otherwise true. ++ */ ++bool Exchange_sort::alloc() { ++ int i, j; ++ int readers = lanuch_workers(); ++ THD *thd = get_thd(); ++ ++ m_min_records = new (thd->pq_mem_root) mq_record_st *[readers] { NULL }; ++ if (!m_min_records) goto err; ++ ++ for (i = 0; i < readers; i++) { ++ m_min_records[i] = new (thd->pq_mem_root) mq_record_st(); ++ if (!m_min_records[i]) goto err; ++ } ++ ++ m_record_groups = new (thd->pq_mem_root) mq_records_batch_st[readers]; ++ if (!m_record_groups) goto err; ++ ++ for (i = 0; i < readers; i++) { ++ m_record_groups[i].records = ++ new (thd->pq_mem_root) mq_record_st *[MAX_RECORD_STORE] { NULL }; ++ if (!m_record_groups[i].records) goto err; ++ ++ for (j = 0; j < MAX_RECORD_STORE; j++) { ++ m_record_groups[i].records[j] = new (thd->pq_mem_root) mq_record_st(); ++ if (!m_record_groups[i].records[j]) goto err; ++ ++ m_record_groups[i].records[j]->m_data = ++ new (thd->pq_mem_root) uchar[RECORD_BUFFER_SIZE]; ++ if (!m_record_groups[i].records[j]->m_data) goto err; ++ } ++ } ++ m_heap = new (thd->pq_mem_root) ++ binary_heap(readers + 1, this, heap_compare_records, thd); ++ if (!m_heap || m_heap->init_binary_heap()) goto err; ++ ++ return false; ++ ++err: ++ sql_print_error("allocate space in Exchange_sort::alloc() error"); ++ return true; ++} ++ ++/* ++ * init sort-related structure ++ * ++ * @retval: ++ * false for success, and otherwise true. ++ */ ++bool Exchange_sort::init() { ++ if (Exchange::init()) return true; ++ ++ THD *thd = get_thd(); ++ uint row_id_length = ref_length(); ++ ++ /** init Sort_param */ ++ if (m_sort && m_sort->m_order) { ++ /** generate sort_order */ ++ int s_length = m_sort->make_sortorder(m_sort->m_order, false); ++ if (!s_length) return true; ++ m_sort_param = new (thd->pq_mem_root) Sort_param(); ++ if (!m_sort_param) return true; ++ ++ /** generate sort_param */ ++ TABLE *const table = get_table(); ++ m_sort_param->init_for_filesort( ++ m_sort, make_array(m_sort->sortorder, s_length), ++ sortlength(thd, m_sort->sortorder, s_length), {table}, lanuch_workers(), ++ false); ++ ++ m_sort_param->local_sortorder = ++ Bounds_checked_array(m_sort->sortorder, s_length); ++ ++ /** cache sort key for compare */ ++ m_tmp_key = new (thd->pq_mem_root) uchar[row_id_length]; ++ memset(m_tmp_key, 0, row_id_length); ++ if (!m_tmp_key) return true; ++ } ++ ++ assert(m_sort_param || is_stable()); ++ if (m_sort_param) { ++ int key_len = m_sort_param->max_record_length() + 1; ++ keys[0] = new (thd->pq_mem_root) uchar[key_len]; ++ keys[1] = new (thd->pq_mem_root) uchar[key_len]; ++ ++ if (keys[0] == nullptr || keys[1] == nullptr) return true; ++ ++ memset(keys[0], 0, key_len); ++ memset(keys[1], 0, key_len); ++ } ++ ++ if (is_stable()) { ++ assert(m_file->ht->db_type == DB_TYPE_INNODB); ++ assert(row_id_length == m_file->ref_length); ++ row_id[0] = new (thd->pq_mem_root) uchar[row_id_length]; ++ row_id[1] = new (thd->pq_mem_root) uchar[row_id_length]; ++ ++ if (row_id[0] == nullptr || row_id[1] == nullptr) return true; ++ ++ memset(row_id[0], 0, row_id_length); ++ memset(row_id[1], 0, row_id_length); ++ } ++ ++ /** alloc space for binary heap etc. */ ++ return alloc(); ++} ++ ++/** build the binary heap */ ++void Exchange_sort::build_heap() { ++ int ngroups = lanuch_workers(); ++ for (int i = 0; i < ngroups; i++) { ++ m_record_groups[i].completed = false; ++ m_record_groups[i].n_read = 0; ++ m_record_groups[i].n_total = 0; ++ m_min_records[i]->m_data = nullptr; ++ } ++ ++ /** reset for binary heap */ ++ m_heap->reset(); ++ /** when nowait = false, ensure that each slot has one record through ++ read message from MQ in a blocking mode */ ++ bool nowait = false; ++ ++reread: ++ for (int i = 0; i < ngroups; i++) { ++ if (!m_record_groups[i].completed) { ++ if (m_min_records[i]->m_data == nullptr) { ++ if (read_group(i, nowait)) m_heap->add_unorderd(i); ++ } else { ++ load_group_records(i); ++ } ++ } ++ } ++ ++ /** recheck each slot in m_records */ ++ for (int i = 0; i < ngroups; i++) { ++ if (!m_record_groups[i].completed && !m_min_records[i]->m_data) { ++ nowait = false; ++ goto reread; ++ } ++ } ++ ++ /** build the binary heap */ ++ m_heap->build(); ++ m_init_heap = true; ++} ++ ++/**return the next minimum record */ ++mq_record_st *Exchange_sort::get_min_record() { ++ int i; ++ ++ if (!m_init_heap) { ++ build_heap(); ++ } else { ++ /** (1) obtain the top element of binary heap */ ++ i = m_heap->first(); ++ /** ++ * (2) try to read a record from MQ[i] in a blocking mode. ++ * If read_next() == true, then we push it into binary heap and ++ * adjust the binary heap; otherwise, there is no more message in ++ * MQ[i], and we should remove the i-th worker from binary heap. ++ * ++ */ ++ if (read_group(i, false)) { ++ m_heap->replace_first(i); ++ } else { ++ m_heap->remove_first(); ++ } ++ } ++ /** (3) fetch the top element of binary heap */ ++ if (m_heap->empty()) { ++ return nullptr; ++ } else { ++ i = m_heap->first(); ++ return m_min_records[i]; ++ } ++} ++ ++/** ++ * try to load a batch of messages into a record group ++ * @id: the worker to be loaded ++ */ ++void Exchange_sort::load_group_records(int id) { ++ assert(0 <= id && id < lanuch_workers()); ++ mq_records_batch_st *rec_group = &m_record_groups[id]; ++ /** try to read message from MQ with a non-blocking mode */ ++ for (int i = rec_group->n_total; i < MAX_RECORD_STORE; i++) { ++ if (!load_group_record(id, i, &rec_group->completed, true)) break; ++ /** now, read a new message */ ++ rec_group->n_total++; ++ } ++} ++ ++/** ++ * fetch the current minimum record of the id-th records group ++ * ++ * @i: the ID of group ++ * @nowait: the fetch mode, nowait = false for blocking-mode and ++ * otherwise, nowait = true ++ */ ++bool Exchange_sort::read_group(int id, bool nowait) { ++ assert(0 <= id && id < lanuch_workers()); ++ mq_records_batch_st *rec_group = &m_record_groups[id]; ++ ++ /** the record has been fetched into records_batch */ ++ if (rec_group->n_read < rec_group->n_total) { ++ m_min_records[id] = rec_group->records[rec_group->n_read++]; ++ return true; ++ } else if (rec_group->completed) { ++ return false; ++ } else { ++ if (rec_group->n_read == rec_group->n_total) ++ rec_group->n_read = rec_group->n_total = 0; ++ ++ /** fetch the record from the id-th MQ */ ++ int i = rec_group->n_read; ++ if (!load_group_record(id, i, &rec_group->completed, nowait)) return false; ++ ++ rec_group->n_total++; ++ m_min_records[id] = rec_group->records[rec_group->n_read++]; ++ ++ /** load a batch of records into the id-th record group */ ++ load_group_records(id); ++ return true; ++ } ++} ++ ++/** ++ * store the message from table->record as mq_record_struct ++ * ++ * @data: the message data ++ * @msg_len: the message length ++ */ ++bool Exchange_sort::store_mq_record(mq_record_st *rec, uchar *data, ++ uint32 msg_len) { ++ assert(rec && data); ++ THD *thd = get_thd(); ++ /** ++ * Making a deep copy from data to rec. First, we determine whether rec has ++ * enough space to copy data. If there is not enough space, then alloc space ++ * for rec. ++ */ ++ if (msg_len > rec->m_buffer_len || ++ DBUG_EVALUATE_IF("pq_msort_error5", true, false)) { ++ if (rec->m_data) destroy(rec->m_data); ++ ++ uint32 new_buffer_len = rec->m_buffer_len; ++ while (msg_len > new_buffer_len) new_buffer_len *= 2; ++ rec->m_data = new (thd->pq_mem_root) uchar[new_buffer_len]; ++ if (!rec->m_data || DBUG_EVALUATE_IF("pq_msort_error5", true, false)) ++ goto err; ++ rec->m_buffer_len = new_buffer_len; ++ } ++ ++ assert(rec->m_data); ++ memcpy(rec->m_data, data, msg_len); ++ rec->m_length = msg_len; ++ return true; ++ ++err: ++ my_error(ER_STD_BAD_ALLOC_ERROR, MYF(0), "", "(MSort::store_mq_record)"); ++ return false; ++} ++ ++/** ++ * read one message from MQ[id] and then copy it to ++ * m_record_groups[id].records[i] ++ * ++ * @id: the ID of worker ++ * @i: the i-th cached record in m_record_groups[id] ++ * @completed: indicates whether MQ[i] has been read completed ++ * @nowait: the mode of read message from MQ ++ */ ++bool Exchange_sort::load_group_record(int id, int i, bool *completed, ++ bool nowait) { ++ assert(0 <= id && id < lanuch_workers()); ++ assert(0 <= i && i < MAX_RECORD_STORE); ++ ++ MQueue_handle *handle = get_mq_handle(id); ++ uchar *data = nullptr; ++ uint32 msg_len = 0; ++ ++ if (completed) *completed = false; ++ /** receive one message from MQ */ ++ MQ_RESULT result = handle->receive((void **)&data, &msg_len, nowait); ++ mq_records_batch_st *rec_group = &m_record_groups[id]; ++ if (result == MQ_DETACHED) { ++ rec_group->completed = true; ++ return false; ++ } ++ if (result == MQ_WOULD_BLOCK) { ++ return false; ++ } ++ ++ assert(result == MQ_SUCCESS); ++ /** copy data into m_record_groups[id].records[i] */ ++ if (store_mq_record(rec_group->records[i], data, msg_len)) return true; ++ ++ return false; ++} ++ ++/** ++ * read the minimum record among all workers and fill it into table->record[0] ++ * ++ * @retval: true for success, and otherwise false ++ */ ++bool Exchange_sort::read_mq_record() { ++ assert(get_exchange_type() == EXCHANGE_SORT); ++ mq_record_st *record = get_min_record(); ++ if (!record) return false; ++ return convert_mq_data_to_record(record->m_data, record->m_length); ++} ++ ++/** cleanup allocated space */ ++void Exchange_sort::cleanup() { ++ Exchange::cleanup(); ++ if (m_heap) m_heap->cleanup(); ++ if (m_sort_param) destroy(m_sort_param); ++} +diff --git a/sql/exchange_sort.h b/sql/exchange_sort.h +new file mode 100644 +index 00000000..70470918 +--- /dev/null ++++ b/sql/exchange_sort.h +@@ -0,0 +1,159 @@ ++#ifndef EXCHANGE_SORT_H ++#define EXCHANGE_SORT_H ++ ++/* Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include ++#include "sql/binary_heap.h" ++#include "sql/exchange.h" ++#include "sql/filesort.h" ++#include "sql/handler.h" ++#include "sql/sql_class.h" ++#include "sql/sql_sort.h" ++ ++struct TABLE; ++class THD; ++class Filesort; ++ ++#define MAX_RECORD_STORE 10 ++#define RECORD_BUFFER_SIZE 128 ++ ++/** wrapper of message in MQ */ ++typedef struct mq_record_struct { ++ uchar *m_data; /** the message data in MQ */ ++ uint32 m_length; /** the message length */ ++ uint32 m_buffer_len; /** the length of buffer used to cache this message */ ++ mq_record_struct() ++ : m_data(nullptr), m_length(0), m_buffer_len(RECORD_BUFFER_SIZE) {} ++} mq_record_st; ++ ++/** batch of records cached */ ++typedef struct mq_records_batch_struct { ++ mq_record_st **records; /** the cached message from MQ, where records[i] ++ is the i-th worker's cached records */ ++ int n_total; /** total number of records cached */ ++ int n_read; /** number of records have been read */ ++ bool completed; /** the status of worker's MQ completed = true indicates ++ that all messages have been read from MQ, ++ otherwise, completed = false */ ++} mq_records_batch_st; ++ ++class Exchange_sort : public Exchange { ++ public: ++ Sort_param *m_sort_param; /** sort param */ ++ uchar *row_id[2]{NULL}; /** row_id (or PK) used for stable output */ ++ handler *m_file; /** innodb handler */ ++ uchar *keys[2]{NULL}; /** compared-keys of two nodes in heap*/ ++ ++ private: ++ mq_record_st **m_min_records; /** array of minimum records */ ++ mq_records_batch_st ++ *m_record_groups; /** array of minimum recors of each worker's group */ ++ binary_heap *m_heap; /** binary heap used for merge sort */ ++ Filesort *m_sort; /** sort structure */ ++ bool m_init_heap; /** indicates whether init the heap */ ++ uchar *m_tmp_key; /** tmp key for comparing */ ++ ++ public: ++ Exchange_sort() ++ : Exchange(), ++ m_sort_param(nullptr), ++ m_file(nullptr), ++ m_min_records(nullptr), ++ m_record_groups(nullptr), ++ m_heap(nullptr), ++ m_sort(nullptr), ++ m_init_heap(false), ++ m_tmp_key(nullptr) {} ++ Exchange_sort(THD *thd, TABLE *table, Filesort *sort, handler *file, ++ uint32 workers, uint ref_len, bool stab_output = false) ++ : Exchange(thd, table, workers, ref_len, stab_output), ++ m_sort_param(nullptr), ++ m_file(file), ++ m_min_records(nullptr), ++ m_record_groups(nullptr), ++ m_heap(nullptr), ++ m_sort(sort), ++ m_init_heap(false), ++ m_tmp_key(nullptr) {} ++ ++ virtual ~Exchange_sort() {} ++ ++ public: ++ /** init members */ ++ bool init() override; ++ /** read message from MQ and fill it into table->record[0] */ ++ bool read_mq_record() override; ++ /** cleanup */ ++ void cleanup() override; ++ ++ /** get the k-th record in m_min_records */ ++ mq_record_st *get_record(int k) { ++ assert(0 <= k && k < lanuch_workers()); ++ mq_record_st *record = m_min_records[k]; ++ return record; ++ } ++ ++ inline handler *get_file() { return m_file; } ++ ++ inline const Filesort *get_filesort() { return m_sort; } ++ ++ inline uchar *get_row_id(int i) { ++ assert(0 <= i && i < 2); ++ return row_id[i]; ++ } ++ ++ inline uchar *get_key(int i) { ++ assert(0 <= i && i < 2); ++ return keys[i]; ++ } ++ ++ inline const uchar *get_tmp_key() { return m_tmp_key; } ++ ++ inline Sort_param *get_sort_param() { return m_sort_param; } ++ ++ EXCHANGE_TYPE get_exchange_type() override { return EXCHANGE_SORT; } ++ ++ private: ++ /** alloc space for sort */ ++ bool alloc(); ++ /** build the binary heap */ ++ void build_heap(); ++ /** get minimum record */ ++ mq_record_st *get_min_record(); ++ /** read one message from MQ[id] and then copy it ++ to m_record_groups[id].records[i] */ ++ bool load_group_record(int id, int i, bool *completed, bool nowait); ++ /** fetch the current minimum record of the id-th records group ++ and store it in m_min_records */ ++ bool read_group(int id, bool nowait); ++ /** try to load a batch of messages in no-blocking mode */ ++ void load_group_records(int id); ++ /** store message from table->record as a mq_record */ ++ bool store_mq_record(mq_record_st *rec, uchar *data, uint32 msg_len); ++}; ++ ++/** compare two nodes in heap */ ++extern bool heap_compare_records(int a, int b, void *arg); ++#endif // PQ_MERGE_SORT_H +diff --git a/sql/field.cc b/sql/field.cc +index 1226d137..5436f682 100644 +--- a/sql/field.cc ++++ b/sql/field.cc +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -1870,6 +1871,22 @@ uchar *Field::pack(uchar *to, const uchar *from, size_t max_length) const { + return to + length; + } + ++/* ++ * store extra info. (a.k.a. Aggr.->count) into Field ++ * ++ * @param extra the value of Aggr.->count ++ * @param len the length of (Aggr.->count), i.e., sizeof(longlong) ++ */ ++type_conversion_status Field::store_extra(const uchar *extra, size_t len) { ++ if(len == 0 || extra == nullptr) { ++ return TYPE_OK; ++ } ++ assert(pack_length() >= len); ++ uchar *extra_ptr = ptr + pack_length() - len; ++ memcpy(extra_ptr,extra,len); ++ return TYPE_OK; ++} ++ + /** + Unpack a field from row data. + +@@ -2750,7 +2767,9 @@ Field_new_decimal::Field_new_decimal(uint32 len_arg, bool is_nullable_arg, + bin_size = my_decimal_get_binary_size(precision, dec); + } + +-Field *Field_new_decimal::create_from_item(const Item *item) { ++Field *Field_new_decimal::create_from_item(const Item *item, MEM_ROOT *root) { ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; ++ + uint8 dec = item->decimals; + uint8 intg = item->decimal_precision() - dec; + uint32 len = item->max_char_length(); +@@ -2786,7 +2805,7 @@ Field *Field_new_decimal::create_from_item(const Item *item) { + /* Corrected value fits. */ + len = required_length; + } +- return new (*THR_MALLOC) ++ return new (pq_check_root) + Field_new_decimal(len, item->is_nullable(), item->item_name.ptr(), dec, + item->unsigned_flag); + } +diff --git a/sql/field.h b/sql/field.h +index 235ba53e..e3fd64b6 100644 +--- a/sql/field.h ++++ b/sql/field.h +@@ -2,6 +2,7 @@ + #define FIELD_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -105,9 +106,10 @@ class Send_field; + class THD; + class Time_zone; + class my_decimal; ++class Item_sum; + struct TYPELIB; + struct timeval; +- ++struct Field_raw_data; + using Mysql::Nullable; + + /* +@@ -627,7 +629,6 @@ class Field { + return (auto_flags & (GENERATED_FROM_EXPRESSION | DEFAULT_NOW)) == 0; + } + +- protected: + /// Holds the position to the field in record + uchar *ptr; + +@@ -676,6 +677,7 @@ class Field { + const char *orig_db_name{nullptr}; + /// Pointer to original table name, only non-NULL for a temporary table + const char *orig_table_name{nullptr}; ++ Item_sum *item_sum_ref{nullptr}; + const char **table_name, *field_name; + LEX_CSTRING comment; + /* Field is part of the following keys */ +@@ -732,9 +734,9 @@ class Field { + // Length of field. Never write to this member directly; instead, use + // set_field_length(). + uint32 field_length; ++ uint32 extra_length{0}; + virtual void set_field_length(uint32 length) { field_length = length; } + +- private: + uint32 flags{0}; + uint16 m_field_index; // field number in fields array + +@@ -919,6 +921,7 @@ class Field { + return store(nr, false); + } + virtual type_conversion_status store_decimal(const my_decimal *d) = 0; ++ virtual type_conversion_status store_extra(const uchar *, size_t); + /** + Store MYSQL_TIME value with the given amount of decimal digits + into a field. +@@ -2095,6 +2098,7 @@ class Field_new_decimal : public Field_num { + is. + */ + bool m_keep_precision{false}; ++ ulonglong *m_result_count_ptr{nullptr}; + int do_save_field_metadata(uchar *first_byte) const final; + + public: +@@ -2137,7 +2141,7 @@ class Field_new_decimal : public Field_num { + bool zero_pack() const final { return false; } + void sql_type(String &str) const final; + uint32 max_display_length() const final { return field_length; } +- uint32 pack_length() const final { return (uint32)bin_size; } ++ uint32 pack_length() const final { return (uint32)(bin_size + extra_length); } + uint pack_length_from_metadata(uint field_metadata) const final; + bool compatible_field_size(uint field_metadata, Relay_log_info *, uint16, + int *order_var) const final; +@@ -2147,7 +2151,7 @@ class Field_new_decimal : public Field_num { + return new (mem_root) Field_new_decimal(*this); + } + const uchar *unpack(uchar *to, const uchar *from, uint param_data) final; +- static Field *create_from_item(const Item *item); ++ static Field *create_from_item(const Item *item, MEM_ROOT *root = nullptr); + bool send_to_protocol(Protocol *protocol) const final; + void set_keep_precision(bool arg) { m_keep_precision = arg; } + }; +@@ -2472,7 +2476,7 @@ class Field_double final : public Field_real { + bool send_to_protocol(Protocol *protocol) const final; + int cmp(const uchar *, const uchar *) const final; + size_t make_sort_key(uchar *buff, size_t length) const final; +- uint32 pack_length() const final { return sizeof(double); } ++ uint32 pack_length() const final { return sizeof(double) + extra_length; } + void sql_type(String &str) const final; + Field_double *clone(MEM_ROOT *mem_root) const final { + assert(type() == MYSQL_TYPE_DOUBLE); +@@ -3517,7 +3521,6 @@ class Field_varstring : public Field_longstr { + bool is_text_key_type() const final { return binary() ? false : true; } + uint32 get_length_bytes() const override { return length_bytes; } + +- private: + /* Store number of bytes used to store length (1 or 2) */ + uint32 length_bytes; + +@@ -4614,6 +4617,7 @@ class Copy_field { + void set(Field *to, Field *from, bool save); // Field to field + + private: ++ void do_copy_extra(const Field *, Field *); + void (*m_do_copy)(Copy_field *, const Field *, Field *); + void (*m_do_copy2)(Copy_field *, const Field *, + Field *); // Used to handle null values +@@ -4716,4 +4720,13 @@ const char *get_field_name_or_expression(THD *thd, const Field *field); + */ + bool pre_validate_value_generator_expr(Item *expression, const char *name, + Value_generator_source source); ++ ++// build field raw data from Field ++extern uint32 pq_build_field_raw(Field *field, Field_raw_data *field_raw); ++ ++extern void pq_build_mq_fields(Field *field, Field_raw_data *field_raw, ++ bool *null_array, int &null_num, uint32 &total_bytes); ++ ++extern void pq_build_mq_item(Item *item, Field_raw_data *field_raw, ++ bool *null_array, int &null_num, uint32 &total_bytes); + #endif /* FIELD_INCLUDED */ +diff --git a/sql/field_conv.cc b/sql/field_conv.cc +index 72763102..a3f24a2c 100644 +--- a/sql/field_conv.cc ++++ b/sql/field_conv.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -56,6 +57,8 @@ + #include "sql/table.h" + #include "sql_string.h" + #include "template_utils.h" // down_cast ++#include "msg_queue.h" ++#include "sql/log.h" + + /** + Check if geometry type sub is a subtype of super. +@@ -544,6 +547,19 @@ static void do_varstring(Copy_field *, const Field *from_field, + /*************************************************************************** + ** The different functions that fills in a Copy_field class + ***************************************************************************/ ++/** copy extra info between two fields */ ++void Copy_field::do_copy_extra(const Field *from, Field *to) { ++ uint32 extra_from_len = from ? from->extra_length : 0; ++ uint32 extra_to_len = to ? to->extra_length : 0; ++ ++ /* strict constraints */ ++ if (extra_from_len == extra_to_len && extra_from_len > 0 && ++ from->pack_length() == to->pack_length()) { ++ uchar *copy_from = from->ptr + from->pack_length() - extra_from_len; ++ uchar *copy_to = to->ptr + to->pack_length() - extra_from_len; ++ memcpy(copy_to, copy_from, extra_from_len); ++ } ++} + + void Copy_field::invoke_do_copy(bool reverse) { + const Field *from = reverse ? m_to_field : m_from_field; +@@ -551,6 +567,10 @@ void Copy_field::invoke_do_copy(bool reverse) { + + (*(m_do_copy))(this, from, to); + ++ if (current_thd->parallel_exec) { ++ do_copy_extra(from, to); ++ } ++ + if (from->is_tmp_null() && !to->is_tmp_null()) { + to->set_tmp_nullable(); + to->set_tmp_null(); +@@ -898,3 +918,91 @@ type_conversion_status field_conv(Field *to, const Field *from) { + } else + return to->store(from->val_int(), from->is_flag_set(UNSIGNED_FLAG)); + } ++ ++/** ++ * copy field->ptr to MQ ++ * @field ++ * @field_raw: the corresponding copy structure in MQ ++ */ ++uint32 pq_build_field_raw(Field *field, Field_raw_data *field_raw) { ++ // field must not be NULL value ++ assert(field && !field->is_null()); ++ ++ uint32 copy_bytes = 0; ++ auto field_type = field->type(); ++ /* ++ * For the variable-length field, we should first extract its ++ * effective length and then only copy these effective content. ++ * Corresponding, we need to use one byte to store field->length_bytes, as ++ follows: ++ * |one byte | m_var_len | field_len | ++ \-------m_len-------/ ++ */ ++ if (field_type == MYSQL_TYPE_VARCHAR || field_type == MYSQL_TYPE_VAR_STRING) { ++ Field_varstring *from = static_cast(field); ++ uint field_length = ++ (from->length_bytes == 1) ? *from->ptr : uint2korr(from->ptr); ++ field_raw->m_ptr = from->ptr; ++ field_raw->m_var_len = from->length_bytes; // m_var_len = 1 or 2 ++ field_raw->m_len = from->length_bytes + field_length; ++ ++ // Note that: we use one more byte to store the ++ // Field_varstring::length_bytes ++ copy_bytes += 1 + field_raw->m_len; ++ } ++ /* ++ * For the other fields, they are fixed-length fields whose field length ++ * is field->pack_length(); ++ */ ++ else { ++ field_raw->m_ptr = field->ptr; ++ field_raw->m_len = field->pack_length(); ++ field_raw->m_var_len = 0; ++ copy_bytes += field_raw->m_len; ++ } ++ ++ return copy_bytes; ++} ++ ++/** ++ * build fields' raw data sending to MQ, and fill the NULL value info of field ++ * to the null_array. ++ * @field: ++ * @field_raw: ++ * @null_array: ++ * @null_num: number of maybe-NULL field ++ * @total_bytes: number of total copied bytes ++ */ ++void pq_build_mq_fields(Field *field, Field_raw_data *mq_fields, ++ bool *null_array, int &null_num, uint32 &total_bytes) { ++ /* ++ * for a clearly defined NOT NULL field, its m_null_ptr is nullptr and we ++ * should not mark it in null_array. For a maybe-NULL field, we first ++ * determine this field is NULL or not. ++ */ ++ /* (1) first, mark it as a NOT_CONST_ITEM */ ++ null_array[null_num++] = 0; ++ ++ /* (2) then, determine whether the field is NULL */ ++ null_array[null_num++] = field->is_null() ? 1 : 0; ++ ++ // If this field is a not NULL-value, then we copy it to MQ ++ if (!null_array[null_num - 1]) { ++ /* the case of NOT_CONST_ITEM & NOT_NULL_FIELD (i.e., 00)*/ ++ mq_fields->m_need_send = true; ++ total_bytes += pq_build_field_raw(field, mq_fields); ++ } else { ++ /* the case of NOT_CONST_ITEM & NULL_FIELD (i.e., 01)*/ ++ mq_fields->m_need_send = false; ++ } ++} ++ ++void pq_build_mq_item(Item *item MY_ATTRIBUTE((unused)), ++ Field_raw_data *mq_fields, bool *null_array, ++ int &null_num, ++ uint32 &total_bytes MY_ATTRIBUTE((unused))) { ++ assert(item && (item->const_item() || item->basic_const_item())); ++ null_array[null_num++] = 1; ++ null_array[null_num++] = 0; ++ mq_fields->m_need_send = false; ++} +diff --git a/sql/filesort.cc b/sql/filesort.cc +index 725ee0fe..0c136c5e 100644 +--- a/sql/filesort.cc ++++ b/sql/filesort.cc +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -77,6 +78,7 @@ + #include "sql/debug_sync.h" + #include "sql/derror.h" + #include "sql/error_handler.h" ++#include "sql/exchange_sort.h" + #include "sql/field.h" + #include "sql/filesort_utils.h" + #include "sql/handler.h" +@@ -601,8 +603,7 @@ bool filesort(THD *thd, Filesort *filesort, RowIterator *source_iterator, + else + sort_mode.append("fixed_sort_key"); + sort_mode.append(", "); +- sort_mode.append(param->using_packed_addons() +- ? "packed_additional_fields" ++ sort_mode.append(param->using_packed_addons() ? "packed_additional_fields" + : param->using_addon_fields() ? "additional_fields" + : "rowid"); + sort_mode.append(">"); +@@ -683,6 +684,7 @@ Filesort::Filesort(THD *thd, Mem_root_array tables_arg, + bool sort_positions, bool unwrap_rollup) + : m_thd(thd), + tables(std::move(tables_arg)), ++ m_order(order), + keep_buffers(keep_buffers_arg), + limit(limit_arg), + sortorder(nullptr), +@@ -691,7 +693,11 @@ Filesort::Filesort(THD *thd, Mem_root_array
tables_arg, + force_stable_sort), // keep relative order of equiv. elts + m_remove_duplicates(remove_duplicates), + m_force_sort_positions(sort_positions), +- m_sort_order_length(make_sortorder(order, unwrap_rollup)) {} ++ m_sort_order_length(0) { ++ if (order) { ++ m_sort_order_length = make_sortorder(order, unwrap_rollup); ++ } ++ } + + uint Filesort::make_sortorder(ORDER *order, bool unwrap_rollup) { + uint count; +@@ -2314,3 +2320,111 @@ void change_double_for_sort(double nr, uchar *to) { + swap(to[3], to[4]); + #endif + } ++ ++/** ++ * compare table->record[0] of two workers in PQ_merge_sort ++ * @a: the ID of first worker ++ * @b: the ID of second worker ++ * @arg: PQ_merge sort ++ * @return ++ * true if a's record is less than b's record; ++ * false otherwise. ++ */ ++bool heap_compare_records(int a, int b, void *arg) { ++ assert(arg); ++ bool convert_res; ++ ++ Exchange_sort *merge_sort = static_cast(arg); ++ const Filesort *filesort = merge_sort->get_filesort(); ++ THD *thd = merge_sort->get_thd(); ++ assert(filesort && current_thd == thd); ++ ++ uchar *row_id_0 = merge_sort->get_row_id(0); ++ uchar *row_id_1 = merge_sort->get_row_id(1); ++ uchar *key_0 = merge_sort->get_key(0); ++ uchar *key_1 = merge_sort->get_key(1); ++ ++ /** using previous old table when comparing row_id (or PK) */ ++ handler *file = merge_sort->get_file(); ++ assert(file->ht->db_type == DB_TYPE_INNODB); ++#if !defined(NDEBUG) ++ uint ref_len = merge_sort->ref_length(); ++ assert(ref_len == file->ref_length); ++#endif ++ bool force_stable_sort = merge_sort->is_stable(); ++ ++ Sort_param *sort_param = merge_sort->get_sort_param(); ++ int key_len = 0, compare_len = 0; ++ ++ if (sort_param) { ++ key_len = sort_param->max_record_length() + 1; ++ compare_len = sort_param->max_compare_length(); ++ } ++ ++ /** ++ * the compare process contains the following three steps: ++ * 1. copy to table->record[0] ++ * 2. add row_id info. ++ * 3. generate sort key ++ */ ++ mq_record_st *compare_a = merge_sort->get_record(a); ++ convert_res = merge_sort->convert_mq_data_to_record( ++ compare_a->m_data, compare_a->m_length, row_id_0); ++ ++ // there is an error during execution ++ if (!convert_res || DBUG_EVALUATE_IF("pq_msort_error6", true, false)) { ++ thd->pq_error = true; ++ return true; ++ } ++ ++ /* ++ * using row_id to achieve stable sort, i.e., ++ * record1 < record2 <=> key1 < key2 or (key1 = key2 && row_id1 < row_id2) ++ */ ++ if (sort_param) { ++ sort_param->make_sortkey(key_0, key_len, filesort->tables); ++ } ++ ++ mq_record_st *compare_b = merge_sort->get_record(b); ++ convert_res = merge_sort->convert_mq_data_to_record( ++ compare_b->m_data, compare_b->m_length, row_id_1); ++ ++ // there is an error during execution ++ if (!convert_res || DBUG_EVALUATE_IF("pq_msort_error7", true, false)) { ++ thd->pq_error = true; ++ return true; ++ } ++ ++ if (sort_param) { ++ sort_param->make_sortkey(key_1, key_len, filesort->tables); ++ } ++ ++ // c1: table scan (or index scan with optimized order = nullptr) ++ if (!filesort->sortorder) { ++ assert(sort_param == nullptr && force_stable_sort); ++ assert(row_id_0 && row_id_1); ++ return file->cmp_ref(row_id_0, row_id_1) < 0; ++ } else { ++ int cmp_key_result; ++ // c2: with order ++ if (sort_param != nullptr && sort_param->using_varlen_keys()) { ++ cmp_varlen_keys(sort_param->local_sortorder, sort_param->use_hash, key_0, ++ key_1, &cmp_key_result); ++ if (!force_stable_sort) { ++ return cmp_key_result < 0; ++ } else { ++ assert(row_id_0 && row_id_1); ++ return (cmp_key_result < 0 || ++ (cmp_key_result == 0 && file->cmp_ref(row_id_0, row_id_1) < 0)); ++ } ++ } else { ++ int cmp = memcmp(key_0, key_1, compare_len); ++ if (!force_stable_sort) { ++ return cmp < 0; ++ } else { ++ assert(row_id_0 && row_id_1); ++ return (cmp < 0 || (cmp == 0 && file->cmp_ref(row_id_0, row_id_1) < 0)); ++ } ++ } ++ } ++} +diff --git a/sql/filesort.h b/sql/filesort.h +index 7197ee87..6c7ce384 100644 +--- a/sql/filesort.h ++++ b/sql/filesort.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2006, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -53,6 +54,8 @@ class Filesort { + THD *m_thd; + /// The tables we are sorting. + Mem_root_array
tables; ++ /// list of expressions to order the table by ++ ORDER *m_order; + /// If true, do not free the filesort buffers (use if you expect to sort many + /// times, like in an uncacheable subquery). + const bool keep_buffers; +@@ -96,10 +99,10 @@ class Filesort { + /// circumstances (see NewWeedoutAccessPathForTables()). + void clear_addon_fields(); + +- private: + /* Prepare ORDER BY list for sorting. */ + uint make_sortorder(ORDER *order, bool unwrap_rollup); +- ++ ++ private: + uint m_sort_order_length; + }; + +diff --git a/sql/handler.cc b/sql/handler.cc +index 9219a872..f7f215c8 100644 +--- a/sql/handler.cc ++++ b/sql/handler.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -2942,6 +2943,30 @@ int handler::ha_rnd_init(bool scan) { + return result; + } + ++int handler::ha_pq_init(uint &dop, uint keyno) { ++ DBUG_EXECUTE_IF("ha_pq_init_fail", return HA_ERR_TABLE_DEF_CHANGED;); ++ int result; ++ DBUG_ENTER("handler::ha_pq_init"); ++ assert(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); ++ assert(inited == NONE || inited == INDEX || (inited == PQ_LEADER)); ++ THD *cur_thd = table->in_use; ++ inited = (result = pq_leader_scan_init(keyno, cur_thd->pq_ctx, dop)) ++ ? NONE ++ : PQ_LEADER; ++ end_range = NULL; ++ pq_ref = false; ++ pq_reverse_scan = false; ++ DBUG_RETURN(result); ++} ++ ++int handler::ha_pq_signal_all() { ++ DBUG_ENTER("handler::ha_pq_signal_all"); ++ int result; ++ THD *cur_thd = table->in_use; ++ result = pq_leader_signal_all(cur_thd->pq_ctx); ++ DBUG_RETURN(result); ++} ++ + /** + End use of random access. + +@@ -2959,9 +2984,25 @@ int handler::ha_rnd_end() { + inited = NONE; + end_range = nullptr; + m_record_buffer = nullptr; ++ pq_range_type = PQ_QUICK_SELECT_NONE; + return rnd_end(); + } + ++int handler::ha_pq_end() { ++ DBUG_ENTER("handler::ha_pq_end"); ++ ++ if(pq_table_scan){ ++ inited = RND; ++ ha_rnd_end(); ++ } else { ++ inited = INDEX; ++ ha_index_end(); ++ } ++ ++ THD *thd = current_thd; ++ DBUG_RETURN(pq_leader_scan_end(thd->pq_ctx)); ++} ++ + /** + Read next row via random scan. + +@@ -2992,6 +3033,28 @@ int handler::ha_rnd_next(uchar *buf) { + return result; + } + ++int handler::ha_pq_next(uchar *buf, void *scan_ctx) { ++ int result; ++ DBUG_EXECUTE_IF("ha_pq_next_deadlock", return HA_ERR_LOCK_DEADLOCK;); ++ DBUG_ENTER("handler::ha_pq_next"); ++ assert(table_share->tmp_table != NO_TMP_TABLE || m_lock_type != F_UNLCK); ++ ++ // Set status for the need to update generated fields ++ m_update_generated_read_fields = table->has_gcol(); ++ ++ MYSQL_TABLE_IO_WAIT(PSI_TABLE_FETCH_ROW, ++ pq_table_scan ? MAX_KEY : active_index, result, ++ { result = pq_worker_scan_next(scan_ctx, buf); }) ++ if (!result && m_update_generated_read_fields) { ++ result = update_generated_read_fields( ++ buf, table, pq_table_scan ? MAX_KEY : active_index); ++ m_update_generated_read_fields = false; ++ } ++ ++ table->set_row_status_from_handler(result); ++ DBUG_RETURN(result); ++} ++ + /** + Read row via random scan from position. + +@@ -6456,6 +6519,14 @@ int DsMrr_impl::dsmrr_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, + return retval; + } + ++ if (thd->in_sp_trigger == 0 && thd->parallel_exec && ++ table->file->pq_range_type != PQ_QUICK_SELECT_NONE) { ++ use_default_impl = true; ++ retval = h->handler::multi_range_read_init(seq_funcs, seq_init_param, ++ n_ranges, mode, buf); ++ return retval; ++ } ++ + /* + This assert will hit if we have pushed an index condition to the + primary key index and then "change our mind" and use a different +diff --git a/sql/handler.h b/sql/handler.h +index 76e55abc..81d90037 100644 +--- a/sql/handler.h ++++ b/sql/handler.h +@@ -3,6 +3,7 @@ + + /* + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -69,6 +70,7 @@ + #include "sql/sql_plugin_ref.h" // plugin_ref + #include "thr_lock.h" // thr_lock_type + #include "typelib.h" ++#include "pq_range.h" + + class Alter_info; + class Create_field; +@@ -4134,6 +4136,12 @@ class handler { + ha_rows estimation_rows_to_insert; + + public: ++ uint pq_range_type{0}; ++ key_range pq_ref_key; ++ bool pq_ref{false}; ++ bool pq_table_scan{false}; ++ bool pq_reverse_scan{false}; ++ + handlerton *ht; /* storage engine of this handler */ + /** Pointer to current row */ + uchar *ref; +@@ -4206,7 +4214,7 @@ class handler { + /** Length of ref (1-8 or the clustered key length) */ + uint ref_length; + FT_INFO *ft_handler; +- enum { NONE = 0, INDEX, RND, SAMPLING } inited; ++ enum { NONE = 0, INDEX, RND, SAMPLING, PQ_LEADER, PQ_WORKER } inited; + bool implicit_emptied; /* Can be !=0 only if HEAP */ + const Item *pushed_cond; + +@@ -4442,8 +4450,12 @@ class handler { + int ha_index_init(uint idx, bool sorted); + int ha_index_end(); + int ha_rnd_init(bool scan); ++ int ha_pq_init(uint &dop, uint keyno); + int ha_rnd_end(); ++ int ha_pq_end(); ++ int ha_pq_signal_all(); + int ha_rnd_next(uchar *buf); ++ int ha_pq_next(uchar *buf, void *scan_ctx); + // See the comment on m_update_generated_read_fields. + int ha_rnd_pos(uchar *buf, uchar *pos); + int ha_index_read_map(uchar *buf, const uchar *key, key_part_map keypart_map, +@@ -4461,7 +4473,23 @@ class handler { + int ha_reset(); + /* this is necessary in many places, e.g. in HANDLER command */ + int ha_index_or_rnd_end() { +- return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0; ++ switch (inited) ++ { ++ case INDEX: ++ return ha_index_end(); ++ break; ++ case RND: ++ return ha_rnd_end(); ++ break; ++ case PQ_LEADER: ++ return ha_pq_end(); ++ break; ++ case PQ_WORKER: ++ return pq_worker_scan_end(nullptr); ++ break; ++ default: ++ return 0; ++ } + } + /** + The cached_table_flags is set at ha_open and ha_external_lock +@@ -4515,6 +4543,10 @@ class handler { + int ha_unload_table(const char *db_name, const char *table_name, + bool error_if_not_loaded); + ++ virtual int pq_leader_signal_all(void *scan_ctx MY_ATTRIBUTE((unused))) { ++ return (0); ++ } ++ + /** + Initializes a parallel scan. It creates a parallel_scan_ctx that has to + be used across all parallel_scan methods. Also, gets the number of +@@ -4535,6 +4567,17 @@ class handler { + return 0; + } + ++ virtual int pq_leader_scan_init(uint keyno MY_ATTRIBUTE((unused)), ++ void *&scan_ctx MY_ATTRIBUTE((unused)), ++ uint &n_threads MY_ATTRIBUTE((unused))) { ++ return (0); ++ } ++ ++ virtual int pq_worker_scan_init(uint keyno MY_ATTRIBUTE((unused)), ++ void *scan_ctx MY_ATTRIBUTE((unused))) { ++ return (0); ++ } ++ + /** + This callback is called by each parallel load thread at the beginning of + the parallel load for the adapter scan. +@@ -4601,6 +4644,11 @@ class handler { + return 0; + } + ++ virtual int pq_worker_scan_next(void *scan_ctx MY_ATTRIBUTE((unused)), ++ uchar* buf MY_ATTRIBUTE((unused))) { ++ return (0); ++ } ++ + /** + End of the parallel scan. + @param[in] scan_ctx A scan context created by parallel_scan_init. +@@ -4609,6 +4657,14 @@ class handler { + return; + } + ++ virtual int pq_leader_scan_end(void *parallel_scan_ctx MY_ATTRIBUTE((unused))) { ++ return (0); ++ } ++ ++ virtual int pq_worker_scan_end(void *parallel_scan_ctx MY_ATTRIBUTE((unused))) { ++ return (0); ++ } ++ + /** + Submit a dd::Table object representing a core DD table having + hardcoded data to be filled in by the DDSE. This function can be +diff --git a/sql/item.cc b/sql/item.cc +index cad56f47..cd9f2d7c 100644 +--- a/sql/item.cc ++++ b/sql/item.cc +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -174,7 +175,11 @@ Item::Item(THD *thd, const Item *item) + derived_used(item->derived_used), + m_accum_properties(item->m_accum_properties) { + #ifndef NDEBUG +- assert(item->contextualized); ++ /* ++ PQ will build a tmp table to store result, so the origin item of tmp item ++ which is created by resolvation could be uncontextualized. ++ */ ++ assert(thd->parallel_exec || item->contextualized); + contextualized = true; + #endif // NDEBUG + +@@ -1964,45 +1969,12 @@ void Item_name_const::print(const THD *thd, String *str, + str->append(')'); + } + +-/* +- need a special class to adjust printing : references to aggregate functions +- must not be printed as refs because the aggregate functions that are added to +- the front of select list are not printed as well. +-*/ +-class Item_aggregate_ref : public Item_ref { +- public: +- Item_aggregate_ref(Name_resolution_context *context_arg, Item **item, +- const char *db_name_arg, const char *table_name_arg, +- const char *field_name_arg, Query_block *depended_from_arg) +- : Item_ref(context_arg, item, db_name_arg, table_name_arg, +- field_name_arg) { +- depended_from = depended_from_arg; +- } +- +- void print(const THD *thd, String *str, +- enum_query_type query_type) const override { +- if (ref) +- (*ref)->print(thd, str, query_type); +- else +- Item_ident::print(thd, str, query_type); +- } +- Ref_Type ref_type() const override { return AGGREGATE_REF; } +- +- /** +- Walker processor used by Query_block::transform_grouped_to_derived to +- replace an aggregate's reference to one in the new derived table's (hidden) +- select list. +- +- @param arg An info object of type Item::Aggregate_ref_update +- @returns false +- */ +- bool update_aggr_refs(uchar *arg) override { +- auto *info = pointer_cast(arg); +- if (*ref != info->m_target) return false; +- ref = info->m_owner->add_hidden_item(info->m_target); +- return false; +- } +-}; ++bool Item_aggregate_ref::update_aggr_refs(uchar *arg) { ++ auto *info = pointer_cast(arg); ++ if (*ref != info->m_target) return false; ++ ref = info->m_owner->add_hidden_item(info->m_target); ++ return false; ++} + + /** + 1. Move SUM items out from item tree and replace with reference. +@@ -2948,6 +2920,16 @@ my_decimal *Item_field::val_decimal(my_decimal *decimal_value) { + return field->val_decimal(decimal_value); + } + ++const uchar *Item_field::val_extra(uint32 *len) { ++ assert(len != nullptr); ++ *len = field->extra_length; ++ if (*len == 0) { ++ return nullptr; ++ } ++ ++ return (field->ptr + field->pack_length() - *len); ++} ++ + bool Item_field::get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) { + if ((null_value = field->is_null()) || field->get_date(ltime, fuzzydate)) { + memset(ltime, 0, sizeof(*ltime)); +@@ -4696,7 +4678,7 @@ static Item **find_field_in_group_list(Item *find_item, ORDER *group_list) { + - resolved item - if the item was resolved + */ + +-static Item **resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, ++Item **resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, + Query_block *select) { + DBUG_TRACE; + Item **select_ref = nullptr; +@@ -5880,27 +5862,29 @@ bool Item::eq_by_collation(Item *item, bool binary_cmp, + @param table Table for which the field is created + */ + +-Field *Item::make_string_field(TABLE *table) const { ++Field *Item::make_string_field(TABLE *table, MEM_ROOT *root) const { + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; ++ + assert(collation.collation); + if (data_type() == MYSQL_TYPE_JSON) + field = +- new (*THR_MALLOC) Field_json(max_length, m_nullable, item_name.ptr()); ++ new (pq_check_root) Field_json(max_length, m_nullable, item_name.ptr()); + else if (data_type() == MYSQL_TYPE_GEOMETRY) { +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_geom(max_length, m_nullable, item_name.ptr(), + Field::GEOM_GEOMETRY, Nullable()); + } else if (max_length / collation.collation->mbmaxlen > + CONVERT_IF_BIGGER_TO_BLOB) +- field = new (*THR_MALLOC) Field_blob( ++ field = new (pq_check_root) Field_blob( + max_length, m_nullable, item_name.ptr(), collation.collation, true); + /* Item_type_holder holds the exact type, do not change it */ + else if (max_length > 0 && + (type() != Item::TYPE_HOLDER || data_type() != MYSQL_TYPE_STRING)) +- field = new (*THR_MALLOC) Field_varstring( ++ field = new (pq_check_root) Field_varstring( + max_length, m_nullable, item_name.ptr(), table->s, collation.collation); + else +- field = new (*THR_MALLOC) Field_string( ++ field = new (pq_check_root) Field_string( + max_length, m_nullable, item_name.ptr(), collation.collation); + if (field) field->init(table); + return field; +@@ -5917,67 +5901,69 @@ Field *Item::make_string_field(TABLE *table) const { + */ + + Field *Item::tmp_table_field_from_field_type(TABLE *table, +- bool fixed_length) const { ++ bool fixed_length, ++ MEM_ROOT *root) const { + /* + The field functions defines a field to be not null if null_ptr is not 0 + */ + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; + + switch (data_type()) { + case MYSQL_TYPE_DECIMAL: + case MYSQL_TYPE_NEWDECIMAL: +- field = Field_new_decimal::create_from_item(this); ++ field = Field_new_decimal::create_from_item(this, root); + break; + case MYSQL_TYPE_TINY: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_tiny(max_length, m_nullable, item_name.ptr(), unsigned_flag); + break; + case MYSQL_TYPE_SHORT: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_short(max_length, m_nullable, item_name.ptr(), unsigned_flag); + break; + case MYSQL_TYPE_LONG: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_long(max_length, m_nullable, item_name.ptr(), unsigned_flag); + break; + case MYSQL_TYPE_LONGLONG: +- field = new (*THR_MALLOC) Field_longlong(max_length, m_nullable, ++ field = new (pq_check_root) Field_longlong(max_length, m_nullable, + item_name.ptr(), unsigned_flag); + break; + case MYSQL_TYPE_FLOAT: +- field = new (*THR_MALLOC) Field_float( ++ field = new (pq_check_root) Field_float( + max_length, m_nullable, item_name.ptr(), decimals, unsigned_flag); + break; + case MYSQL_TYPE_DOUBLE: +- field = new (*THR_MALLOC) Field_double( ++ field = new (pq_check_root) Field_double( + max_length, m_nullable, item_name.ptr(), decimals, unsigned_flag); + break; + case MYSQL_TYPE_INT24: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_medium(max_length, m_nullable, item_name.ptr(), unsigned_flag); + break; + case MYSQL_TYPE_DATE: + case MYSQL_TYPE_NEWDATE: +- field = new (*THR_MALLOC) Field_newdate(m_nullable, item_name.ptr()); ++ field = new (pq_check_root) Field_newdate(m_nullable, item_name.ptr()); + break; + case MYSQL_TYPE_TIME: + field = +- new (*THR_MALLOC) Field_timef(m_nullable, item_name.ptr(), decimals); ++ new (pq_check_root) Field_timef(m_nullable, item_name.ptr(), decimals); + break; + case MYSQL_TYPE_TIMESTAMP: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_timestampf(m_nullable, item_name.ptr(), decimals); + break; + case MYSQL_TYPE_DATETIME: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_datetimef(m_nullable, item_name.ptr(), decimals); + break; + case MYSQL_TYPE_YEAR: + assert(max_length == 4); // Field_year is only for length 4. +- field = new (*THR_MALLOC) Field_year(m_nullable, item_name.ptr()); ++ field = new (pq_check_root) Field_year(m_nullable, item_name.ptr()); + break; + case MYSQL_TYPE_BIT: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_bit_as_char(max_length, m_nullable, item_name.ptr()); + break; + case MYSQL_TYPE_INVALID: +@@ -5990,7 +5976,7 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table, + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_NULL: + if (fixed_length && max_length <= CONVERT_IF_BIGGER_TO_BLOB) { +- field = new (*THR_MALLOC) Field_string( ++ field = new (pq_check_root) Field_string( + max_length, m_nullable, item_name.ptr(), collation.collation); + break; + } +@@ -5999,26 +5985,26 @@ Field *Item::tmp_table_field_from_field_type(TABLE *table, + case MYSQL_TYPE_SET: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_VARCHAR: +- return make_string_field(table); ++ return make_string_field(table, root); + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_BLOB: + if (this->type() == Item::TYPE_HOLDER) +- field = new (*THR_MALLOC) Field_blob( ++ field = new (pq_check_root) Field_blob( + max_length, m_nullable, item_name.ptr(), collation.collation, true); + else +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_blob(max_length, m_nullable, item_name.ptr(), + collation.collation, false); + break; // Blob handled outside of case + case MYSQL_TYPE_GEOMETRY: +- field = new (*THR_MALLOC) Field_geom( ++ field = new (pq_check_root) Field_geom( + max_length, m_nullable, item_name.ptr(), get_geometry_type(), {}); + break; + case MYSQL_TYPE_JSON: + field = +- new (*THR_MALLOC) Field_json(max_length, m_nullable, item_name.ptr()); ++ new (pq_check_root) Field_json(max_length, m_nullable, item_name.ptr()); + } + if (field) field->init(table); + return field; +@@ -6037,6 +6023,10 @@ void Item_field::make_field(Send_field *tmp_field) { + tmp_field->org_col_name = + m_orig_field_name != nullptr ? m_orig_field_name : ""; + tmp_field->field = true; ++ if (ref) { ++ tmp_field->org_col_name = ref_col_name; ++ tmp_field->org_table_name = orig_table_name(); ++ } + } + + /** +@@ -6138,6 +6128,10 @@ type_conversion_status Item::save_in_field(Field *field, bool no_conversions) { + + type_conversion_status Item::save_in_field_inner(Field *field, + bool no_conversions) { ++ uint32 extra_len; ++ const uchar *extra = val_extra(&extra_len); ++ field->store_extra(extra, extra_len); ++ + // Storing of arrays should be handled by specialized subclasses. + assert(!returns_array()); + +@@ -7427,7 +7421,8 @@ Item_ref::Item_ref(Name_resolution_context *context_arg, Item **item, + const char *db_name_arg, const char *table_name_arg, + const char *field_name_arg, bool alias_of_expr_arg) + : Item_ident(context_arg, db_name_arg, table_name_arg, field_name_arg), +- ref(item) { ++ ref(item), ++ copy_type(WITH_CONTEXT_REF) { + m_alias_of_expr = alias_of_expr_arg; + /* + This constructor used to create some internals references over fixed items +@@ -9824,16 +9819,17 @@ uint32 Item_aggregate_type::display_length(Item *item) { + created field + */ + +-Field *Item_aggregate_type::make_field_by_type(TABLE *table, bool strict) { ++Field *Item_aggregate_type::make_field_by_type(TABLE *table, bool strict, MEM_ROOT *root) { + /* + The field functions defines a field to be not null if null_ptr is not 0 + */ + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; + + switch (data_type()) { + case MYSQL_TYPE_ENUM: + assert(m_typelib != nullptr); +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_enum(max_length, is_nullable(), item_name.ptr(), + get_enum_pack_length(m_typelib->count), m_typelib, + collation.collation); +@@ -9841,17 +9837,17 @@ Field *Item_aggregate_type::make_field_by_type(TABLE *table, bool strict) { + break; + case MYSQL_TYPE_SET: + assert(m_typelib != nullptr); +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_set(max_length, is_nullable(), item_name.ptr(), + get_set_pack_length(m_typelib->count), m_typelib, + collation.collation); + if (field) field->init(table); + break; + case MYSQL_TYPE_NULL: +- field = make_string_field(table); ++ field = make_string_field(table, root); + break; + default: +- field = tmp_table_field_from_field_type(table, false); ++ field = tmp_table_field_from_field_type(table, false, root); + break; + } + if (field == nullptr) return nullptr; +diff --git a/sql/item.h b/sql/item.h +index 62abaddc..92045542 100644 +--- a/sql/item.h ++++ b/sql/item.h +@@ -2,6 +2,7 @@ + #define ITEM_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -69,6 +70,7 @@ + #include "sql/trigger_def.h" // enum_trigger_variable_type + #include "sql_string.h" + #include "template_utils.h" ++#include "sql_class.h" + + class Item; + class Item_field; +@@ -86,6 +88,9 @@ typedef Bounds_checked_array Ref_item_array; + + void item_init(void); /* Init item functions */ + ++/** this item needs extra bytes for storing count info. */ ++extern bool need_extra(Item_sum *ref_item); ++ + /** + Default condition filtering (selectivity) values used by + get_filtering_effect() and friends when better estimates +@@ -779,6 +784,26 @@ class Item : public Parse_tree_node { + friend class udf_handler; + virtual bool is_expensive_processor(uchar *) { return false; } + ++ public: ++ /** ++ During resolve/optimize phase, a item maybe subsituted by a new one, for example ++ convert_constant_item()/resolve_const_item(), this point to old item for new item. ++ */ ++ Item *origin_item{nullptr}; ++ ++ /** ++ During create_tmp_table, const_item can be skipped when hidden_field_count <= 0; ++ and thus, these skipped items will not create result_field in tmp table. Here, we ++ should mark it when sending data to MQ. ++ */ ++ bool skip_create_tmp_table{false}; ++ ++ /** ++ During itemize (or new item()), some item are added to THD::m_item_list for ease of ++ releasing the space allocated at runtime. ++ */ ++ bool pq_alloc_item{false}; ++ + protected: + /** + Sets the result value of the function an empty string, using the current +@@ -1007,6 +1032,18 @@ class Item : public Parse_tree_node { + */ + Item(THD *thd, const Item *item); + ++ virtual Item *pq_clone(THD *thd, Query_block *select); ++ ++ virtual bool pq_copy_from(THD *thd, Query_block *select, Item *item); ++ ++ virtual size_t pq_extra_len(bool) { return 0; } ++ ++ virtual const uchar *val_extra(uint32 *len) { ++ assert(len != nullptr); ++ *len = 0; ++ return nullptr; ++ } ++ + /** + Parse-time context-independent constructor. + +@@ -1098,7 +1135,7 @@ class Item : public Parse_tree_node { + */ + virtual void notify_removal() {} + virtual void make_field(Send_field *field); +- virtual Field *make_string_field(TABLE *table) const; ++ virtual Field *make_string_field(TABLE *table, MEM_ROOT *root = nullptr) const; + virtual bool fix_fields(THD *, Item **); + /** + Fix after tables have been moved from one query_block level to the parent +@@ -2845,7 +2882,7 @@ class Item : public Parse_tree_node { + // used in row subselects to get value of elements + virtual void bring_value() {} + +- Field *tmp_table_field_from_field_type(TABLE *table, bool fixed_length) const; ++ Field *tmp_table_field_from_field_type(TABLE *table, bool fixed_length, MEM_ROOT *root = nullptr) const; + virtual Item_field *field_for_view_update() { return nullptr; } + /** + Informs an item that it is wrapped in a truth test, in case it wants to +@@ -3419,6 +3456,8 @@ class Item_basic_constant : public Item { + } + bool basic_const_item() const override { return true; } + void set_str_value(String *str) { str_value = *str; } ++ ++ bool pq_copy_from(THD *thd, Query_block *select, Item* item) override; + }; + + /***************************************************************************** +@@ -3632,6 +3671,9 @@ class Item_name_const final : public Item { + return false; + } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item* item) override; ++ + protected: + type_conversion_status save_in_field_inner(Field *field, + bool no_conversions) override { +@@ -3696,6 +3738,8 @@ class Item_ident : public Item { + the source base table. + */ + const char *m_orig_db_name; ++ ++ public: + /** + Names the original table that is the source of the field. If field is from + - a non-aliased base table, the same as table_name. +@@ -3706,6 +3750,8 @@ class Item_ident : public Item { + - a temporary table (in optimization stage), the name of the source base tbl + */ + const char *m_orig_table_name; ++ ++ protected: + /** + Names the field in the source base table. If field is from + - an expression, a NULL pointer. +@@ -3762,6 +3808,7 @@ class Item_ident : public Item { + cached_table should be replaced by table_ref ASAP. + */ + TABLE_LIST *cached_table; ++ uint m_tableno{0}; + Query_block *depended_from; + + Item_ident(Name_resolution_context *context_arg, const char *db_name_arg, +@@ -3906,6 +3953,8 @@ class Item_ident : public Item { + bool any_privileges); + bool is_strong_side_column_not_in_fd(uchar *arg) override; + bool is_column_not_in_fd(uchar *arg) override; ++ ++ bool pq_copy_from(THD *thd, Query_block *select, Item* item) override; + }; + + class Item_ident_for_show final : public Item { +@@ -3969,8 +4018,9 @@ class Item_field : public Item_ident { + TABLE_LIST *table_ref; + /// Source field + Field *field; ++ const char *ref_col_name{nullptr}; ++ bool ref{false}; + +- private: + /// Result field + Field *result_field{nullptr}; + +@@ -4043,6 +4093,8 @@ class Item_field : public Item_ident { + + bool itemize(Parse_context *pc, Item **res) override; + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item* item) override; + enum Type type() const override { return FIELD_ITEM; } + bool eq(const Item *item, bool binary_cmp) const override; + double val_real() override; +@@ -4052,6 +4104,7 @@ class Item_field : public Item_ident { + longlong val_time_temporal_at_utc() override; + longlong val_date_temporal_at_utc() override; + my_decimal *val_decimal(my_decimal *) override; ++ const uchar *val_extra(uint32 *len) override; + String *val_str(String *) override; + bool val_json(Json_wrapper *result) override; + bool send(Protocol *protocol, String *str_arg) override; +@@ -4214,6 +4267,11 @@ class Item_field : public Item_ident { + bool replace_field_processor(uchar *arg) override; + bool strip_db_table_name_processor(uchar *) override; + ++ size_t pq_extra_len(bool) override { ++ return field->item_sum_ref && need_extra(field->item_sum_ref) ++ ? sizeof(longlong) : 0; ++ } ++ + /** + Checks if the current object represents an asterisk select list item + +@@ -4327,6 +4385,7 @@ class Item_null : public Item_basic_constant { + + Item *safe_charset_converter(THD *thd, const CHARSET_INFO *tocs) override; + bool check_partition_func_processor(uchar *) override { return false; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /// Dynamic parameters used as placeholders ('?') inside prepared statements +@@ -4704,6 +4763,9 @@ class Item_int : public Item_num { + bool no_conversions) override; + + public: ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item* item) override; ++ + enum Type type() const override { return INT_ITEM; } + Item_result result_type() const override { return INT_RESULT; } + longlong val_int() override { +@@ -4742,6 +4804,8 @@ class Item_int_0 final : public Item_int { + public: + Item_int_0() : Item_int(NAME_STRING("0"), 0, 1) {} + explicit Item_int_0(const POS &pos) : Item_int(pos, NAME_STRING("0"), 0, 1) {} ++ ++ Item *pq_clone(THD *, Query_block *) override { return this; } + }; + + /* +@@ -4822,6 +4886,7 @@ class Item_uint : public Item_int { + enum_query_type query_type) const override; + Item_num *neg() override; + uint decimal_precision() const override { return max_length; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* decimal (fixed point) constant */ +@@ -4869,6 +4934,7 @@ class Item_decimal : public Item_num { + bool eq(const Item *, bool binary_cmp) const override; + void set_decimal_value(const my_decimal *value_par); + bool check_partition_func_processor(uchar *) override { return false; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_float : public Item_num { +@@ -4952,6 +5018,7 @@ class Item_float : public Item_num { + void print(const THD *thd, String *str, + enum_query_type query_type) const override; + bool eq(const Item *, bool binary_cmp) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_pi : public Item_float { +@@ -5074,6 +5141,8 @@ class Item_string : public Item_basic_constant { + fixed = true; + } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + /* + This is used in stored procedures to avoid memory leaks and + does a deep copy of its argument. +@@ -5168,7 +5237,7 @@ double double_from_string_with_check(const CHARSET_INFO *cs, const char *cptr, + const char *end); + + class Item_static_string_func : public Item_string { +- const Name_string func_name; ++ Name_string func_name; + + public: + Item_static_string_func(const Name_string &name_par, const char *str, +@@ -5196,6 +5265,8 @@ class Item_static_string_func : public Item_string { + func_arg->banned_function_name = func_name.ptr(); + return true; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* for show tables */ +@@ -5300,6 +5371,7 @@ class Item_hex_string : public Item_basic_constant { + bool check_partition_func_processor(uchar *) override { return false; } + static LEX_CSTRING make_hex_str(const char *str, size_t str_length); + uint decimal_precision() const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + void hex_string_init(const char *str, uint str_length); +@@ -5318,6 +5390,8 @@ class Item_bin_string final : public Item_hex_string { + + static LEX_CSTRING make_bin_str(const char *str, size_t str_length); + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + private: + void bin_string_init(const char *str, size_t str_length); + }; +@@ -5340,9 +5414,8 @@ class Item_bin_string final : public Item_hex_string { + available. + */ + class Item_result_field : public Item { +- protected: +- Field *result_field{nullptr}; /* Save result here */ + public: ++ Field *result_field{nullptr}; /* Save result here */ + Item_result_field() = default; + explicit Item_result_field(const POS &pos) : Item(pos) {} + +@@ -5433,15 +5506,26 @@ class Item_ref : public Item_ident { + public: + Item **ref; + ++ enum PQ_copy_type { ++ WITH_CONTEXT = 0, ++ WITHOUT_CONTEXT, ++ WITH_CONTEXT_REF, ++ WITH_REF_ONLY ++ }; ++ ++ PQ_copy_type copy_type; ++ + public: + Item_ref(Name_resolution_context *context_arg, const char *db_name_arg, + const char *table_name_arg, const char *field_name_arg) + : Item_ident(context_arg, db_name_arg, table_name_arg, field_name_arg), +- ref(nullptr) {} ++ ref(nullptr), ++ copy_type(WITH_CONTEXT) {} + Item_ref(const POS &pos, const char *db_name_arg, const char *table_name_arg, + const char *field_name_arg) + : Item_ident(pos, db_name_arg, table_name_arg, field_name_arg), +- ref(nullptr) {} ++ ref(nullptr), ++ copy_type(WITHOUT_CONTEXT) {} + + /* + This constructor is used in two scenarios: +@@ -5467,7 +5551,8 @@ class Item_ref : public Item_ident { + Item_ref(THD *thd, Item_ref *item) + : Item_ident(thd, item), + result_field(item->result_field), +- ref(item->ref) {} ++ ref(item->ref), ++ copy_type(WITH_REF_ONLY) {} + enum Type type() const override { return REF_ITEM; } + bool eq(const Item *item, bool binary_cmp) const override { + const Item *it = const_cast(item)->real_item(); +@@ -5617,6 +5702,8 @@ class Item_ref : public Item_ident { + return (*ref)->check_column_in_group_by(arg); + } + bool collect_item_field_or_ref_processor(uchar *arg) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -5658,6 +5745,8 @@ class Item_view_ref final : public Item_ref { + + bool fix_fields(THD *, Item **) override; + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + /** + Takes into account whether an Item in a derived table / view is part of an + inner table of an outer join. +@@ -5866,6 +5955,8 @@ class Item_int_with_ref : public Item_int { + } + Item *clone_item() const override; + Item *real_item() override { return ref; } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* +@@ -5915,6 +6006,8 @@ class Item_datetime_with_ref final : public Item_temporal_with_ref { + assert(0); + return val_int(); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* +@@ -5940,6 +6033,8 @@ class Item_time_with_ref final : public Item_temporal_with_ref { + assert(0); + return val_int(); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -6125,6 +6220,7 @@ class Item_default_value final : public Item_field { + } + + Item *transform(Item_transformer transformer, uchar *args) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + /// The argument for this function +@@ -6320,8 +6416,9 @@ class Item_trigger_field final : public Item_field, + }; + + class Item_cache : public Item_basic_constant { +- protected: ++ public: + Item *example{nullptr}; ++ protected: + table_map used_table_map{0}; + /** + Field that this object will get value from. This is used by +@@ -6449,6 +6546,9 @@ class Item_cache : public Item_basic_constant { + if (!example) return INT_RESULT; + return Field::result_merge_type(example->data_type()); + } ++ ++ Item *get_example() { return example;} ++ bool pq_copy_from(THD *thd, Query_block *select, Item* item) override; + }; + + class Item_cache_int final : public Item_cache { +@@ -6477,6 +6577,7 @@ class Item_cache_int final : public Item_cache { + bool get_time(MYSQL_TIME *ltime) override { return get_time_from_int(ltime); } + Item_result result_type() const override { return INT_RESULT; } + bool cache_value() override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_cache_real final : public Item_cache { +@@ -6498,6 +6599,8 @@ class Item_cache_real final : public Item_cache { + Item_result result_type() const override { return REAL_RESULT; } + bool cache_value() override; + void store_value(Item *expr, double value); ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_cache_decimal final : public Item_cache { +@@ -6520,6 +6623,8 @@ class Item_cache_decimal final : public Item_cache { + Item_result result_type() const override { return DECIMAL_RESULT; } + bool cache_value() override; + void store_value(Item *expr, my_decimal *d); ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_cache_str final : public Item_cache { +@@ -6554,6 +6659,8 @@ class Item_cache_str final : public Item_cache { + const CHARSET_INFO *charset() const { return value->charset(); } + bool cache_value() override; + void store_value(Item *expr, String &s); ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_cache_row final : public Item_cache { +@@ -6610,6 +6717,8 @@ class Item_cache_row final : public Item_cache { + void bring_value() override; + void cleanup() override { Item_cache::cleanup(); } + bool cache_value() override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_cache_datetime : public Item_cache { +@@ -6649,6 +6758,8 @@ class Item_cache_datetime : public Item_cache { + Item_cache::clear(); + str_value_cached = false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /// An item cache for values of type JSON. +@@ -6706,7 +6817,7 @@ class Item_aggregate_type : public Item { + + Item_result result_type() const override; + bool join_types(THD *, Item *); +- Field *make_field_by_type(TABLE *table, bool strict); ++ Field *make_field_by_type(TABLE *table, bool strict, MEM_ROOT *root = nullptr); + static uint32 display_length(Item *item); + Field::geometry_type get_geometry_type() const override { + return geometry_type; +@@ -6852,4 +6963,42 @@ inline Item *GetNthVisibleField(const mem_root_deque &fields, + return nullptr; + } + ++/* ++ need a special class to adjust printing : references to aggregate functions ++ must not be printed as refs because the aggregate functions that are added to ++ the front of select list are not printed as well. ++*/ ++class Item_aggregate_ref : public Item_ref { ++ public: ++ Item_aggregate_ref(Name_resolution_context *context_arg, Item **item, ++ const char *db_name_arg, const char *table_name_arg, const char *field_name_arg, ++ Query_block *depended_from_arg) ++ : Item_ref(context_arg, item, db_name_arg, table_name_arg, field_name_arg) { ++ depended_from = depended_from_arg; ++ } ++ ~Item_aggregate_ref() {} ++ void print(const THD *thd, String *str, ++ enum_query_type query_type) const override { ++ if (ref != nullptr && (*ref) != nullptr) { ++ (*ref)->print(thd, str, query_type); ++ } else { ++ Item_ident::print(thd, str, query_type); ++ } ++ } ++ ++ Ref_Type ref_type() const override { return AGGREGATE_REF; } ++ ++ /** ++ Walker processor used by Query_block::transform_grouped_to_derived to replace ++ an aggregate's reference to one in the new derived table's (hidden) select ++ list. ++ ++ @param arg An info object of type Item::Aggregate_ref_update ++ @returns false ++ */ ++ bool update_aggr_refs(uchar *arg) override; ++ ++ Item *pq_clone(class THD *thd, class Query_block *select) override; ++}; ++ + #endif /* ITEM_INCLUDED */ +diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc +index 443940f6..a3721293 100644 +--- a/sql/item_cmpfunc.cc ++++ b/sql/item_cmpfunc.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -6884,6 +6885,7 @@ bool Item_equal::fix_fields(THD *thd, Item **) { + not_null_tables_cache = used_tables_cache = 0; + bool nullable = false; + while ((item = li++)) { ++ if (!item->fixed && item->fix_fields(thd, &item)) return true; + used_tables_cache |= item->used_tables(); + not_null_tables_cache |= item->not_null_tables(); + nullable |= item->is_nullable(); +diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h +index b77a6570..4044ce36 100644 +--- a/sql/item_cmpfunc.h ++++ b/sql/item_cmpfunc.h +@@ -2,6 +2,7 @@ + #define ITEM_CMPFUNC_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -370,6 +371,7 @@ class Item_func_true : public Item_func_bool_const { + str->append("true"); + } + enum Functype functype() const override { return TRUE_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /// A predicate that is "always false". +@@ -384,6 +386,7 @@ class Item_func_false : public Item_func_bool_const { + void print(const THD *, String *str, enum_query_type) const override { + str->append("false"); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -448,6 +451,8 @@ class Item_func_truth final : public Item_bool_func { + } + } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + protected: + Bool_test truth_test; ///< The value we're testing for. + }; +@@ -717,6 +722,8 @@ class Item_func_xor final : public Item_bool_func2 { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_not : public Item_bool_func { +@@ -735,6 +742,8 @@ class Item_func_not : public Item_bool_func { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -892,6 +901,7 @@ class Item_func_trig_cond final : public Item_bool_func { + plan_idx idx() const { return m_idx; } + + bool contains_only_equi_join_condition() const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_not_all : public Item_func_not { +@@ -1046,6 +1056,8 @@ class Item_func_eq : public Item_func_comparison { + /// we save a list of all of the fields that were considered equal. + void ensure_multi_equality_fields_are_available(table_map left_side_tables, + table_map right_side_tables); ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1078,6 +1090,8 @@ class Item_func_equal final : public Item_func_comparison { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1098,6 +1112,8 @@ class Item_func_ge final : public Item_func_comparison { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1118,6 +1134,8 @@ class Item_func_gt final : public Item_func_comparison { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1138,6 +1156,8 @@ class Item_func_le final : public Item_func_comparison { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1180,6 +1200,8 @@ class Item_func_lt final : public Item_func_comparison { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1200,6 +1222,8 @@ class Item_func_ne final : public Item_func_comparison { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* +@@ -1236,6 +1260,7 @@ class Item_func_opt_neg : public Item_int_func { + } + bool eq(const Item *item, bool binary_cmp) const override; + bool subst_argument_checker(uchar **) override { return true; } ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_between final : public Item_func_opt_neg { +@@ -1288,6 +1313,8 @@ class Item_func_between final : public Item_func_opt_neg { + args[0]->not_null_tables() | + (args[1]->not_null_tables() & args[2]->not_null_tables()); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_strcmp final : public Item_bool_func2 { +@@ -1308,6 +1335,8 @@ class Item_func_strcmp final : public Item_bool_func2 { + fix_char_length(2); // returns "1" or "0" or "-1" + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + struct interval_range { +@@ -1332,15 +1361,27 @@ class Item_func_interval final : public Item_int_func { + allowed_arg_cols = 0; // Fetch this value from first argument + } + ++ Item_func_interval(const POS &pos, Item_row *tmprow) ++ : super(pos, tmprow), ++ row(tmprow), ++ use_decimal_comparison(false), ++ intervals(0) { ++ allowed_arg_cols = 0; ++ } ++ + bool itemize(Parse_context *pc, Item **res) override; + longlong val_int() override; + bool resolve_type(THD *) override; ++ enum Functype functype() const override { return INTERVAL_FUNC; } + const char *func_name() const override { return "interval"; } + uint decimal_precision() const override { return 2; } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; + void update_used_tables() override; + ++ Item* pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; ++ + private: + // Runs in CTOR init list, cannot access *this as Item_func_interval + static Item_row *alloc_row(const POS &pos, MEM_ROOT *mem_root, Item *expr1, +@@ -1387,6 +1428,7 @@ class Item_func_coalesce : public Item_func_numhybrid { + enum Item_result result_type() const override { return hybrid_type; } + const char *func_name() const override { return "coalesce"; } + enum Functype functype() const override { return COALESCE_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_ifnull final : public Item_func_coalesce { +@@ -1406,6 +1448,7 @@ class Item_func_ifnull final : public Item_func_coalesce { + const char *func_name() const override { return "ifnull"; } + Field *tmp_table_field(TABLE *table) override; + uint decimal_precision() const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1420,6 +1463,7 @@ class Item_func_any_value final : public Item_func_coalesce { + const char *func_name() const override { return "any_value"; } + bool aggregate_check_group(uchar *arg) override; + bool aggregate_check_distinct(uchar *arg) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_if final : public Item_func { +@@ -1462,6 +1506,8 @@ class Item_func_if final : public Item_func { + not_null_tables_cache = + (args[1]->not_null_tables() & args[2]->not_null_tables()); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_nullif final : public Item_bool_func2 { +@@ -1500,6 +1546,8 @@ class Item_func_nullif final : public Item_bool_func2 { + inherit from Item_func instead of Item_bool_func2 + */ + bool is_bool_func() const override { return false; } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* Functions to handle the optimized IN */ +@@ -1554,6 +1602,8 @@ class in_vector { + + virtual bool is_row_result() const { return false; } + ++ virtual in_vector* pq_clone(THD *thd MY_ATTRIBUTE((unused))) { return nullptr; } ++ + /** + Fill the vector by evaluating the items passed as arguments. + Note that null values are skipped so the vector may end up containing +@@ -1627,6 +1677,7 @@ class in_longlong : public in_vector { + } + bool find_item(Item *item) override; + bool compare_elems(uint pos1, uint pos2) const override; ++ void resize_and_sort_ll() {resize_and_sort();} + + private: + void set(uint pos, Item *item) override { val_item(item, &base[pos]); } +@@ -1971,6 +2022,8 @@ class Item_func_case final : public Item_func { + return cmp_collation.collation; + } + enum Functype functype() const override { return CASE_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + /** +@@ -2068,6 +2121,8 @@ class Item_func_in final : public Item_func_opt_neg { + not_null_tables_cache |= args[0]->not_null_tables(); + } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + private: + /** + Usable if @ is made only of constants. Returns true if one +@@ -2192,6 +2247,8 @@ class Item_func_isnull : public Item_bool_func { + return args[0]->collation.collation; + } + bool fix_fields(THD *thd, Item **ref) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + /* Functions used by HAVING for rewriting IN subquery */ +@@ -2254,6 +2311,7 @@ class Item_func_isnotnull final : public Item_bool_func { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_like final : public Item_bool_func2 { +@@ -2315,6 +2373,7 @@ class Item_func_like final : public Item_bool_func2 { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + /** +@@ -2393,6 +2452,7 @@ class Item_cond : public Item_bool_func { + /// Treat UNKNOWN result like FALSE because callers see no difference + bool ignore_unknown() const { return abort_on_null; } + bool equality_substitution_analyzer(uchar **) override { return true; } ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + /* +@@ -2487,6 +2547,7 @@ class Item_equal final : public Item_bool_func { + Item_equal(Item *c, Item_field *f); + Item_equal(Item_equal *item_equal); + ++ inline List get_fields() {return fields;} + inline Item *get_const() { return const_item; } + void set_const(Item *c) { const_item = c; } + bool compare_const(THD *thd, Item *c); +@@ -2555,6 +2616,8 @@ class Item_equal final : public Item_bool_func { + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; + Item *m_const_folding[2]; ///< temporary area used for constant folding ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + + private: + void check_covering_prefix_keys(); +@@ -2609,6 +2672,8 @@ class Item_cond_and final : public Item_cond { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + + bool contains_only_equi_join_condition() const override; + }; +@@ -2638,6 +2703,7 @@ class Item_cond_or final : public Item_cond { + table_map read_tables, + const MY_BITMAP *fields_to_ignore, + double rows_in_table) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /// Builds condition: (a AND b) IS TRUE +diff --git a/sql/item_func.cc b/sql/item_func.cc +index 77e847f6..936a55ea 100644 +--- a/sql/item_func.cc ++++ b/sql/item_func.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -6555,10 +6556,13 @@ static int get_var_with_binlog(THD *thd, enum_sql_command sql_command, + Binlog_user_var_event *user_var_event; + user_var_entry *var_entry; + ++ /* obtain user variables from leader thread */ ++ THD *entry_thd = thd->is_worker() ? thd->pq_leader : thd; ++ + /* Protects thd->user_vars. */ +- mysql_mutex_lock(&thd->LOCK_thd_data); +- var_entry = get_variable(thd, name, nullptr); +- mysql_mutex_unlock(&thd->LOCK_thd_data); ++ mysql_mutex_lock(&entry_thd->LOCK_thd_data); ++ var_entry = get_variable(entry_thd, name, nullptr); ++ mysql_mutex_unlock(&entry_thd->LOCK_thd_data); + + *out_entry = var_entry; + +@@ -6845,6 +6849,29 @@ bool Item_func_get_user_var::set_value(THD *thd, sp_rcontext * /*ctx*/, + suv->update()); + } + ++bool Item_func_get_user_var::pq_copy_from(THD *thd, Query_block *select, ++ Item *item) { ++ if (Item_var_func::pq_copy_from(thd, select, item)) { ++ return true; ++ } ++ Item_func_get_user_var *orig_item = ++ dynamic_cast(item); ++ assert(orig_item); ++ ++ // obtain var_entry from leader ++#ifndef NDEBUG ++ THD *entry_thd = thd->pq_leader; ++ assert(entry_thd); ++ mysql_mutex_lock(&entry_thd->LOCK_thd_data); ++ var_entry = get_variable(entry_thd, name, nullptr); ++ mysql_mutex_unlock(&entry_thd->LOCK_thd_data); ++#endif ++ if (orig_item != nullptr) { ++ m_cached_result_type = orig_item->m_cached_result_type; ++ } ++ return false; ++} ++ + bool Item_user_var_as_out_param::fix_fields(THD *thd, Item **ref) { + assert(fixed == 0); + +diff --git a/sql/item_func.h b/sql/item_func.h +index 3dbb10a8..ed329605 100644 +--- a/sql/item_func.h ++++ b/sql/item_func.h +@@ -2,6 +2,7 @@ + #define ITEM_FUNC_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -285,7 +286,9 @@ class Item_func : public Item_result_field { + JSON_UNQUOTE_FUNC, + MEMBER_OF_FUNC, + STRCMP_FUNC, +- TRUE_FUNC ++ TRUE_FUNC, ++ JSON_FUNC, ++ XML_FUNC + }; + enum optimize_type { + OPTIMIZE_NONE, +@@ -698,6 +701,7 @@ class Item_func : public Item_result_field { + bool check_column_in_group_by(uchar *arg) override; + + longlong val_int_from_real(); ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_real_func : public Item_func { +@@ -972,6 +976,9 @@ class Item_func_connection_id final : public Item_int_func { + return ((func_arg->source == VGS_GENERATED_COLUMN) || + (func_arg->source == VGS_CHECK_CONSTRAINT)); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_typecast_signed final : public Item_int_func { +@@ -985,6 +992,7 @@ class Item_typecast_signed final : public Item_int_func { + void print(const THD *thd, String *str, + enum_query_type query_type) const override; + enum Functype functype() const override { return TYPECAST_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_typecast_unsigned final : public Item_int_func { +@@ -998,14 +1006,16 @@ class Item_typecast_unsigned final : public Item_int_func { + void print(const THD *thd, String *str, + enum_query_type query_type) const override; + enum Functype functype() const override { return TYPECAST_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_typecast_decimal final : public Item_func { + public: + Item_typecast_decimal(const POS &pos, Item *a, int len, int dec) +- : Item_func(pos, a) { ++ : Item_func(pos, a), pq_precision(len) { + set_data_type_decimal(len, dec); + } ++ int pq_precision; + String *val_str(String *str) override; + double val_real() override; + longlong val_int() override; +@@ -1026,6 +1036,7 @@ class Item_typecast_decimal final : public Item_func { + enum Functype functype() const override { return TYPECAST_FUNC; } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1055,6 +1066,7 @@ class Item_typecast_real final : public Item_func { + enum Functype functype() const override { return TYPECAST_FUNC; } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_additive_op : public Item_num_op { +@@ -1082,6 +1094,7 @@ class Item_func_plus final : public Item_func_additive_op { + double real_op() override; + my_decimal *decimal_op(my_decimal *) override; + enum Functype functype() const override { return PLUS_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_minus final : public Item_func_additive_op { +@@ -1099,6 +1112,7 @@ class Item_func_minus final : public Item_func_additive_op { + my_decimal *decimal_op(my_decimal *) override; + bool resolve_type(THD *thd) override; + enum Functype functype() const override { return MINUS_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mul final : public Item_num_op { +@@ -1114,6 +1128,7 @@ class Item_func_mul final : public Item_num_op { + bool check_partition_func_processor(uchar *) override { return false; } + bool check_function_as_value_generator(uchar *) override { return false; } + enum Functype functype() const override { return MUL_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_div final : public Item_num_op { +@@ -1130,6 +1145,7 @@ class Item_func_div final : public Item_num_op { + bool resolve_type(THD *thd) override; + void result_precision() override; + enum Functype functype() const override { return DIV_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_int_div final : public Item_int_func { +@@ -1148,6 +1164,7 @@ class Item_func_int_div final : public Item_int_func { + + bool check_partition_func_processor(uchar *) override { return false; } + bool check_function_as_value_generator(uchar *) override { return false; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mod final : public Item_num_op { +@@ -1164,6 +1181,7 @@ class Item_func_mod final : public Item_num_op { + bool check_partition_func_processor(uchar *) override { return false; } + bool check_function_as_value_generator(uchar *) override { return false; } + enum Functype functype() const override { return MOD_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_neg final : public Item_func_num1 { +@@ -1183,6 +1201,7 @@ class Item_func_neg final : public Item_func_num1 { + } + bool check_partition_func_processor(uchar *) override { return false; } + bool check_function_as_value_generator(uchar *) override { return false; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_abs final : public Item_func_num1 { +@@ -1196,6 +1215,7 @@ class Item_func_abs final : public Item_func_num1 { + bool check_partition_func_processor(uchar *) override { return false; } + bool check_function_as_value_generator(uchar *) override { return false; } + enum Functype functype() const override { return ABS_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + // A class to handle logarithmic and trigonometric functions +@@ -1215,6 +1235,7 @@ class Item_func_exp final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "exp"; } + enum Functype functype() const override { return EXP_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_ln final : public Item_dec_func { +@@ -1223,6 +1244,7 @@ class Item_func_ln final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "ln"; } + enum Functype functype() const override { return LN_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_log final : public Item_dec_func { +@@ -1232,6 +1254,7 @@ class Item_func_log final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "log"; } + enum Functype functype() const override { return LOG_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_log2 final : public Item_dec_func { +@@ -1239,6 +1262,7 @@ class Item_func_log2 final : public Item_dec_func { + Item_func_log2(const POS &pos, Item *a) : Item_dec_func(pos, a) {} + double val_real() override; + const char *func_name() const override { return "log2"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_log10 final : public Item_dec_func { +@@ -1247,6 +1271,7 @@ class Item_func_log10 final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "log10"; } + enum Functype functype() const override { return LOG10_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_sqrt final : public Item_dec_func { +@@ -1255,6 +1280,7 @@ class Item_func_sqrt final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "sqrt"; } + enum Functype functype() const override { return SQRT_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_pow final : public Item_dec_func { +@@ -1262,6 +1288,7 @@ class Item_func_pow final : public Item_dec_func { + Item_func_pow(const POS &pos, Item *a, Item *b) : Item_dec_func(pos, a, b) {} + double val_real() override; + const char *func_name() const override { return "pow"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_acos final : public Item_dec_func { +@@ -1270,6 +1297,7 @@ class Item_func_acos final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "acos"; } + enum Functype functype() const override { return ACOS_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_asin final : public Item_dec_func { +@@ -1278,6 +1306,7 @@ class Item_func_asin final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "asin"; } + enum Functype functype() const override { return ASIN_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_atan final : public Item_dec_func { +@@ -1287,6 +1316,7 @@ class Item_func_atan final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "atan"; } + enum Functype functype() const override { return ATAN_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_cos final : public Item_dec_func { +@@ -1295,6 +1325,7 @@ class Item_func_cos final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "cos"; } + enum Functype functype() const override { return COS_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_sin final : public Item_dec_func { +@@ -1303,6 +1334,7 @@ class Item_func_sin final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "sin"; } + enum Functype functype() const override { return SIN_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_tan final : public Item_dec_func { +@@ -1311,6 +1343,7 @@ class Item_func_tan final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "tan"; } + enum Functype functype() const override { return TAN_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_cot final : public Item_dec_func { +@@ -1319,6 +1352,7 @@ class Item_func_cot final : public Item_dec_func { + double val_real() override; + const char *func_name() const override { return "cot"; } + enum Functype functype() const override { return COT_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_int_val : public Item_func_num1 { +@@ -1339,6 +1373,7 @@ class Item_func_ceiling final : public Item_func_int_val { + bool check_partition_func_processor(uchar *) override { return false; } + bool check_function_as_value_generator(uchar *) override { return false; } + enum Functype functype() const override { return CEILING_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_floor final : public Item_func_int_val { +@@ -1352,6 +1387,7 @@ class Item_func_floor final : public Item_func_int_val { + bool check_partition_func_processor(uchar *) override { return false; } + bool check_function_as_value_generator(uchar *) override { return false; } + enum Functype functype() const override { return FLOOR_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* This handles round and truncate */ +@@ -1375,6 +1411,7 @@ class Item_func_round final : public Item_func_num1 { + enum Functype functype() const override { + return truncate ? TRUNCATE_FUNC : ROUND_FUNC; + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_rand final : public Item_real_func { +@@ -1413,6 +1450,8 @@ class Item_func_rand final : public Item_real_func { + (func_arg->source == VGS_CHECK_CONSTRAINT)); + } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + private: + void seed_random(Item *val); + }; +@@ -1533,6 +1572,7 @@ class Item_func_min final : public Item_func_min_max { + : Item_func_min_max(pos, opt_list, true) {} + const char *func_name() const override { return "least"; } + enum Functype functype() const override { return LEAST_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_max final : public Item_func_min_max { +@@ -1541,6 +1581,7 @@ class Item_func_max final : public Item_func_min_max { + : Item_func_min_max(pos, opt_list, false) {} + const char *func_name() const override { return "greatest"; } + enum Functype functype() const override { return GREATEST_FUNC; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1639,6 +1680,7 @@ class Item_func_length : public Item_int_func { + max_length = 10; + return false; + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_bit_length final : public Item_func_length { +@@ -1649,6 +1691,7 @@ class Item_func_bit_length final : public Item_func_length { + return Item_func_length::val_int() * 8; + } + const char *func_name() const override { return "bit_length"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_char_length final : public Item_int_func { +@@ -1663,6 +1706,9 @@ class Item_func_char_length final : public Item_int_func { + max_length = 10; + return Item_int_func::resolve_type(thd); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_coercibility final : public Item_int_func { +@@ -1677,6 +1723,8 @@ class Item_func_coercibility final : public Item_int_func { + set_nullable(false); + return Item_int_func::resolve_type(thd); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_locate : public Item_int_func { +@@ -1695,6 +1743,8 @@ class Item_func_locate : public Item_int_func { + bool resolve_type(THD *thd) override; + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_instr final : public Item_func_locate { +@@ -1703,6 +1753,7 @@ class Item_func_instr final : public Item_func_locate { + : Item_func_locate(pos, a, b) {} + + const char *func_name() const override { return "instr"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_validate_password_strength final : public Item_int_func { +@@ -1731,6 +1782,7 @@ class Item_func_field final : public Item_int_func { + longlong val_int() override; + const char *func_name() const override { return "field"; } + bool resolve_type(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_ascii final : public Item_int_func { +@@ -1744,6 +1796,7 @@ class Item_func_ascii final : public Item_int_func { + max_length = 3; + return Item_int_func::resolve_type(thd); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_ord final : public Item_int_func { +@@ -1753,6 +1806,7 @@ class Item_func_ord final : public Item_int_func { + Item_func_ord(const POS &pos, Item *a) : Item_int_func(pos, a) {} + longlong val_int() override; + const char *func_name() const override { return "ord"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_find_in_set final : public Item_int_func { +@@ -1770,6 +1824,7 @@ class Item_func_find_in_set final : public Item_int_func { + const CHARSET_INFO *compare_collation() const override { + return cmp_collation.collation; + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* Base class for all bit functions: '~', '|', '^', '&', '>>', '<<' */ +@@ -1815,6 +1870,8 @@ class Item_func_bit : public Item_func { + return get_time_from_string(ltime); + } + ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; ++ + private: + /** + @brief Performs the operation on integers to produce a result of type +@@ -1856,6 +1913,7 @@ class Item_func_bit_or final : public Item_func_bit_two_param { + Item_func_bit_or(const POS &pos, Item *a, Item *b) + : Item_func_bit_two_param(pos, a, b) {} + const char *func_name() const override { return "|"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + longlong int_op() override { return eval_int_op(std::bit_or()); } +@@ -1869,6 +1927,7 @@ class Item_func_bit_and final : public Item_func_bit_two_param { + Item_func_bit_and(const POS &pos, Item *a, Item *b) + : Item_func_bit_two_param(pos, a, b) {} + const char *func_name() const override { return "&"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + longlong int_op() override { return eval_int_op(std::bit_and()); } +@@ -1882,6 +1941,7 @@ class Item_func_bit_xor final : public Item_func_bit_two_param { + Item_func_bit_xor(const POS &pos, Item *a, Item *b) + : Item_func_bit_two_param(pos, a, b) {} + const char *func_name() const override { return "^"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + longlong int_op() override { return eval_int_op(std::bit_xor()); } +@@ -1904,6 +1964,7 @@ class Item_func_bit_count final : public Item_int_func { + max_length = MAX_BIGINT_WIDTH + 1; + return false; + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_shift : public Item_func_bit { +@@ -1926,6 +1987,7 @@ class Item_func_shift_left final : public Item_func_shift { + Item_func_shift_left(const POS &pos, Item *a, Item *b) + : Item_func_shift(pos, a, b) {} + const char *func_name() const override { return "<<"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + longlong int_op() override { return eval_int_op(); } +@@ -1937,6 +1999,7 @@ class Item_func_shift_right final : public Item_func_shift { + Item_func_shift_right(const POS &pos, Item *a, Item *b) + : Item_func_shift(pos, a, b) {} + const char *func_name() const override { return ">>"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + longlong int_op() override { return eval_int_op(); } +@@ -1956,6 +2019,7 @@ class Item_func_bit_neg final : public Item_func_bit { + enum_query_type query_type) const override { + Item_func::print(thd, str, query_type); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + + private: + longlong int_op() override; +@@ -1986,6 +2050,8 @@ class Item_func_last_insert_id final : public Item_int_func { + func_arg->banned_function_name = func_name(); + return true; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_benchmark final : public Item_int_func { +@@ -2019,6 +2085,8 @@ class Item_func_benchmark final : public Item_int_func { + func_arg->banned_function_name = func_name(); + return true; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + void item_func_sleep_init(); +@@ -2053,6 +2121,7 @@ class Item_func_sleep final : public Item_int_func { + return Item_int_func::resolve_type(thd); + } + longlong val_int() override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_udf_func : public Item_func { +@@ -2277,6 +2346,7 @@ class Item_func_get_lock final : public Item_int_func { + return true; + } + uint decimal_precision() const override { return max_length; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_release_lock final : public Item_int_func { +@@ -2461,6 +2531,7 @@ class Item_func_can_access_table : public Item_int_func { + set_nullable(true); + return false; + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_can_access_user : public Item_int_func { +@@ -2562,6 +2633,8 @@ class Item_func_is_visible_dd_object : public Item_int_func { + set_nullable(true); + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_table_rows : public Item_int_func { +@@ -2577,6 +2650,8 @@ class Item_func_internal_table_rows : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_avg_row_length : public Item_int_func { +@@ -2592,6 +2667,8 @@ class Item_func_internal_avg_row_length : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_data_length : public Item_int_func { +@@ -2607,6 +2684,8 @@ class Item_func_internal_data_length : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_max_data_length : public Item_int_func { +@@ -2622,6 +2701,8 @@ class Item_func_internal_max_data_length : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_index_length : public Item_int_func { +@@ -2637,6 +2718,8 @@ class Item_func_internal_index_length : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_data_free : public Item_int_func { +@@ -2652,6 +2735,8 @@ class Item_func_internal_data_free : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_auto_increment : public Item_int_func { +@@ -2667,6 +2752,8 @@ class Item_func_internal_auto_increment : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_checksum : public Item_int_func { +@@ -2681,6 +2768,8 @@ class Item_func_internal_checksum : public Item_int_func { + null_on_null = false; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_keys_disabled : public Item_int_func { +@@ -3301,6 +3390,9 @@ class Item_func_get_user_var : public Item_var_func, + Settable_routine_parameter *get_settable_routine_parameter() override { + return this; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + /* +@@ -3425,6 +3517,9 @@ class Item_func_get_system_var final : public Item_var_func { + + void cleanup() override; + bool bind(THD *thd); ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class JOIN; +@@ -3914,6 +4009,8 @@ class Item_func_found_rows final : public Item_int_func { + func_arg->banned_function_name = func_name(); + return true; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + void uuid_short_init(); +@@ -3949,6 +4046,8 @@ class Item_func_version final : public Item_static_string_func { + explicit Item_func_version(const POS &pos); + + bool itemize(Parse_context *pc, Item **res) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +diff --git a/sql/item_geofunc.h b/sql/item_geofunc.h +index 84267e17..178240ec 100644 +--- a/sql/item_geofunc.h ++++ b/sql/item_geofunc.h +@@ -2,6 +2,7 @@ + #define ITEM_GEOFUNC_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -690,6 +691,8 @@ class Item_func_latfromgeohash : public Item_func_latlongfromgeohash { + false) {} + + const char *func_name() const override { return "ST_LATFROMGEOHASH"; } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -703,6 +706,8 @@ class Item_func_longfromgeohash : public Item_func_latlongfromgeohash { + } + + const char *func_name() const override { return "ST_LONGFROMGEOHASH"; } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_centroid : public Item_geometry_func { +@@ -1085,6 +1090,7 @@ class Item_func_st_contains final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_contains"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_st_crosses final : public Item_func_spatial_relation { +@@ -1096,6 +1102,7 @@ class Item_func_st_crosses final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_crosses"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_st_disjoint final : public Item_func_spatial_relation { +@@ -1107,6 +1114,7 @@ class Item_func_st_disjoint final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_disjoint"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_st_equals final : public Item_func_spatial_relation { +@@ -1118,6 +1126,7 @@ class Item_func_st_equals final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_equals"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_st_intersects final : public Item_func_spatial_relation { +@@ -1129,6 +1138,7 @@ class Item_func_st_intersects final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_intersects"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrcontains final : public Item_func_spatial_relation { +@@ -1140,6 +1150,7 @@ class Item_func_mbrcontains final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrcontains"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrcoveredby final : public Item_func_spatial_relation { +@@ -1151,6 +1162,7 @@ class Item_func_mbrcoveredby final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrcoveredby"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrcovers final : public Item_func_spatial_relation { +@@ -1162,6 +1174,7 @@ class Item_func_mbrcovers final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrcovers"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrdisjoint final : public Item_func_spatial_relation { +@@ -1173,6 +1186,7 @@ class Item_func_mbrdisjoint final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrdisjoint"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrequals final : public Item_func_spatial_relation { +@@ -1184,6 +1198,7 @@ class Item_func_mbrequals final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrequals"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrintersects final : public Item_func_spatial_relation { +@@ -1195,6 +1210,7 @@ class Item_func_mbrintersects final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrintersects"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbroverlaps final : public Item_func_spatial_relation { +@@ -1206,6 +1222,7 @@ class Item_func_mbroverlaps final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbroverlaps"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrtouches final : public Item_func_spatial_relation { +@@ -1217,6 +1234,7 @@ class Item_func_mbrtouches final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrtouches"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_mbrwithin final : public Item_func_spatial_relation { +@@ -1228,6 +1246,7 @@ class Item_func_mbrwithin final : public Item_func_spatial_relation { + const char *func_name() const override { return "mbrwithin"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_st_overlaps final : public Item_func_spatial_relation { +@@ -1239,6 +1258,7 @@ class Item_func_st_overlaps final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_overlaps"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_st_touches final : public Item_func_spatial_relation { +@@ -1250,6 +1270,7 @@ class Item_func_st_touches final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_touches"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_st_within final : public Item_func_spatial_relation { +@@ -1261,6 +1282,7 @@ class Item_func_st_within final : public Item_func_spatial_relation { + const char *func_name() const override { return "st_within"; } + bool eval(const dd::Spatial_reference_system *srs, const gis::Geometry *g1, + const gis::Geometry *g2, bool *result, bool *null) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1787,6 +1809,8 @@ class Item_func_st_srid_mutator : public Item_geometry_func { + if (param_type_is_default(thd, 1, 2, MYSQL_TYPE_LONGLONG)) return true; + return Item_geometry_func::resolve_type(thd); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /// This class implements the one-parameter ST_SRID function which +diff --git a/sql/item_inetfunc.h b/sql/item_inetfunc.h +index bc559061..5caad1d6 100644 +--- a/sql/item_inetfunc.h ++++ b/sql/item_inetfunc.h +@@ -2,6 +2,7 @@ + #define ITEM_INETFUNC_INCLUDED + + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -54,6 +55,8 @@ class Item_func_inet_aton : public Item_int_func { + unsigned_flag = true; + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /************************************************************************* +@@ -76,6 +79,8 @@ class Item_func_inet_ntoa : public Item_str_func { + set_nullable(true); + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /************************************************************************* +@@ -133,6 +138,8 @@ class Item_func_inet6_aton : public Item_func_inet_str_base { + return false; + } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + protected: + bool calc_value(String *arg, String *buffer) override; + }; +@@ -160,6 +167,8 @@ class Item_func_inet6_ntoa : public Item_func_inet_str_base { + return false; + } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + protected: + bool calc_value(String *arg, String *buffer) override; + }; +diff --git a/sql/item_json_func.h b/sql/item_json_func.h +index 3701eb4f..dd772b64 100644 +--- a/sql/item_json_func.h ++++ b/sql/item_json_func.h +@@ -2,6 +2,7 @@ + #define ITEM_JSON_FUNC_INCLUDED + + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -192,6 +193,7 @@ class Item_json_func : public Item_func { + double val_real() override; + my_decimal *val_decimal(my_decimal *decimal_value) override; + ++ enum Functype functype() const override { return JSON_FUNC; } + void cleanup() override; + + Item_result cast_to_int_type() const override { return INT_RESULT; } +@@ -343,6 +345,8 @@ class Item_func_json_schema_valid final : public Item_bool_func { + + void cleanup() override; + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + private: + // Wrap the object in a unique_ptr so that the relevant rapidjson destructors + // are called. +@@ -416,6 +420,8 @@ class Item_func_json_contains final : public Item_int_func { + enum_const_item_cache can_cache_json_arg(Item *arg) override { + return (arg == args[0] || arg == args[1]) ? CACHE_JSON_VALUE : CACHE_NONE; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -541,6 +547,7 @@ class Item_func_json_depth final : public Item_int_func { + } + + longlong val_int() override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -894,6 +901,7 @@ class Item_func_json_quote : public Item_str_func { + } + + String *val_str(String *tmpspace) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +diff --git a/sql/item_pfs_func.h b/sql/item_pfs_func.h +index 42b7734f..af701cba 100644 +--- a/sql/item_pfs_func.h ++++ b/sql/item_pfs_func.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -78,6 +79,8 @@ class Item_func_pfs_format_bytes final : public Item_str_func { + const char *func_name() const override { return "format_bytes"; } + bool resolve_type(THD *) override; + String *val_str(String *str) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + /** format_pico_time() */ +@@ -93,6 +96,7 @@ class Item_func_pfs_format_pico_time final : public Item_str_func { + const char *func_name() const override { return "format_pico_time"; } + bool resolve_type(THD *) override; + String *val_str(String *str) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + #endif /* ITEM_PFS_FUNC_INCLUDED */ +diff --git a/sql/item_regexp_func.h b/sql/item_regexp_func.h +index 4c84589c..06e69af6 100644 +--- a/sql/item_regexp_func.h ++++ b/sql/item_regexp_func.h +@@ -2,6 +2,7 @@ + #define SQL_ITEM_REGEXP_FUNC_H_ + + /* Copyright (c) 2017, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -263,6 +264,8 @@ class Item_func_regexp_instr : public Item_func_regexp { + bool get_time(MYSQL_TIME *t) override { return get_time_from_int(t); } + /// @} + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + protected: + int pos_arg_pos() const override { return 2; } + int occ_arg_pos() const override { return 3; } +@@ -307,6 +310,8 @@ class Item_func_regexp_like : public Item_func_regexp { + bool get_time(MYSQL_TIME *t) override { return get_time_from_int(t); } + /// @} + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + protected: + int pos_arg_pos() const override { return -1; } + int occ_arg_pos() const override { return -1; } +diff --git a/sql/item_row.h b/sql/item_row.h +index 6cdeed0b..59231ebd 100644 +--- a/sql/item_row.h ++++ b/sql/item_row.h +@@ -2,6 +2,7 @@ + #define ITEM_ROW_INCLUDED + + /* Copyright (c) 2002, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -152,6 +153,8 @@ class Item_row : public Item { + bool null_inside() override { return with_null; } + void bring_value() override; + bool check_function_as_value_generator(uchar *) override { return false; } ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + #endif /* ITEM_ROW_INCLUDED */ +diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h +index c1a0b07f..f2f9a03c 100644 +--- a/sql/item_strfunc.h ++++ b/sql/item_strfunc.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -185,6 +186,7 @@ class Item_func_md5 final : public Item_str_ascii_func { + String *val_str_ascii(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "md5"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_sha : public Item_str_ascii_func { +@@ -193,6 +195,7 @@ class Item_func_sha : public Item_str_ascii_func { + String *val_str_ascii(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "sha"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_sha2 : public Item_str_ascii_func { +@@ -202,6 +205,7 @@ class Item_func_sha2 : public Item_str_ascii_func { + String *val_str_ascii(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "sha2"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_to_base64 final : public Item_str_ascii_func { +@@ -212,6 +216,7 @@ class Item_func_to_base64 final : public Item_str_ascii_func { + String *val_str_ascii(String *) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "to_base64"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_statement_digest final : public Item_str_ascii_func { +@@ -270,6 +275,7 @@ class Item_func_from_base64 final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "from_base64"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_aes_encrypt final : public Item_str_func { +@@ -286,6 +292,7 @@ class Item_func_aes_encrypt final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "aes_encrypt"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_aes_decrypt : public Item_str_func { +@@ -301,6 +308,7 @@ class Item_func_aes_decrypt : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "aes_decrypt"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_random_bytes : public Item_str_func { +@@ -317,6 +325,7 @@ class Item_func_random_bytes : public Item_str_func { + String *val_str(String *a) override; + + const char *func_name() const override { return "random_bytes"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_concat : public Item_str_func { +@@ -331,6 +340,7 @@ class Item_func_concat : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "concat"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_concat_ws : public Item_str_func { +@@ -347,6 +357,7 @@ class Item_func_concat_ws : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "concat_ws"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_reverse : public Item_str_func { +@@ -359,6 +370,7 @@ class Item_func_reverse : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "reverse"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_replace : public Item_str_func { +@@ -372,6 +384,7 @@ class Item_func_replace : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "replace"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_insert : public Item_str_func { +@@ -386,6 +399,7 @@ class Item_func_insert : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "insert"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_str_conv : public Item_str_func { +@@ -397,6 +411,7 @@ class Item_str_conv : public Item_str_func { + public: + Item_str_conv(const POS &pos, Item *item) : Item_str_func(pos, item) {} + String *val_str(String *) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_lower : public Item_str_conv { +@@ -404,6 +419,7 @@ class Item_func_lower : public Item_str_conv { + Item_func_lower(const POS &pos, Item *item) : Item_str_conv(pos, item) {} + const char *func_name() const override { return "lower"; } + bool resolve_type(THD *) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_upper : public Item_str_conv { +@@ -411,6 +427,7 @@ class Item_func_upper : public Item_str_conv { + Item_func_upper(const POS &pos, Item *item) : Item_str_conv(pos, item) {} + const char *func_name() const override { return "upper"; } + bool resolve_type(THD *) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_left : public Item_str_func { +@@ -421,6 +438,7 @@ class Item_func_left : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "left"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_right : public Item_str_func { +@@ -432,6 +450,7 @@ class Item_func_right : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "right"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_substr : public Item_str_func { +@@ -450,6 +469,7 @@ class Item_func_substr : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "substr"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_substr_index final : public Item_str_func { +@@ -461,6 +481,7 @@ class Item_func_substr_index final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "substring_index"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_trim : public Item_str_func { +@@ -542,18 +563,23 @@ class Item_func_trim : public Item_str_func { + } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_ltrim final : public Item_func_trim { + public: + Item_func_ltrim(const POS &pos, Item *a) + : Item_func_trim(pos, a, TRIM_LTRIM) {} ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_rtrim final : public Item_func_trim { + public: + Item_func_rtrim(const POS &pos, Item *a) + : Item_func_trim(pos, a, TRIM_RTRIM) {} ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_sysconst : public Item_str_func { +@@ -602,6 +628,8 @@ class Item_func_database : public Item_func_sysconst { + const Name_string fully_qualified_func_name() const override { + return NAME_STRING("database()"); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_user : public Item_func_sysconst { +@@ -649,6 +677,7 @@ class Item_func_user : public Item_func_sysconst { + } + + String *val_str(String *) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_current_user : public Item_func_user { +@@ -669,6 +698,7 @@ class Item_func_current_user : public Item_func_user { + } + + String *val_str(String *) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_soundex : public Item_str_func { +@@ -680,6 +710,7 @@ class Item_func_soundex : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "soundex"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_elt final : public Item_str_func { +@@ -691,15 +722,16 @@ class Item_func_elt final : public Item_str_func { + String *val_str(String *str) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "elt"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_make_set final : public Item_str_func { + typedef Item_str_func super; + ++ public: + Item *item; + String tmp_str; + +- public: + Item_func_make_set(const POS &pos, Item *a, PT_item_list *opt_list) + : Item_str_func(pos, opt_list), item(a) {} + +@@ -730,6 +762,8 @@ class Item_func_make_set final : public Item_str_func { + Item *transform(Item_transformer transformer, uchar *arg) override; + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_format final : public Item_str_ascii_func { +@@ -748,6 +782,8 @@ class Item_func_format final : public Item_str_ascii_func { + const char *func_name() const override { return "format"; } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_char final : public Item_str_func { +@@ -767,6 +803,7 @@ class Item_func_char final : public Item_str_func { + return false; + } + const char *func_name() const override { return "char"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_repeat final : public Item_str_func { +@@ -778,6 +815,7 @@ class Item_func_repeat final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "repeat"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_space final : public Item_str_func { +@@ -786,6 +824,7 @@ class Item_func_space final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "space"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_rpad final : public Item_str_func { +@@ -797,6 +836,8 @@ class Item_func_rpad final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "rpad"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_lpad final : public Item_str_func { +@@ -808,6 +849,8 @@ class Item_func_lpad final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "lpad"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_uuid_to_bin final : public Item_str_func { +@@ -822,6 +865,7 @@ class Item_func_uuid_to_bin final : public Item_str_func { + String *val_str(String *) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "uuid_to_bin"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_bin_to_uuid final : public Item_str_ascii_func { +@@ -836,6 +880,7 @@ class Item_func_bin_to_uuid final : public Item_str_ascii_func { + String *val_str_ascii(String *) override; + bool resolve_type(THD *thd) override; + const char *func_name() const override { return "bin_to_uuid"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_is_uuid final : public Item_bool_func { +@@ -861,6 +906,7 @@ class Item_func_conv final : public Item_str_func { + const char *func_name() const override { return "conv"; } + String *val_str(String *) override; + bool resolve_type(THD *) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_hex : public Item_str_ascii_func { +@@ -875,6 +921,7 @@ class Item_func_hex : public Item_str_ascii_func { + set_data_type_string(args[0]->max_length * 2U, default_charset()); + return false; + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_unhex final : public Item_str_func { +@@ -888,6 +935,7 @@ class Item_func_unhex final : public Item_str_func { + const char *func_name() const override { return "unhex"; } + String *val_str(String *) override; + bool resolve_type(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + #ifndef NDEBUG +@@ -945,6 +993,7 @@ class Item_typecast_char final : public Item_str_func { + bool resolve_type(THD *) override; + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_load_file final : public Item_str_func { +@@ -989,6 +1038,7 @@ class Item_func_export_set final : public Item_str_func { + String *val_str(String *str) override; + bool resolve_type(THD *) override; + const char *func_name() const override { return "export_set"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_quote : public Item_str_func { +@@ -999,6 +1049,7 @@ class Item_func_quote : public Item_str_func { + const char *func_name() const override { return "quote"; } + String *val_str(String *) override; + bool resolve_type(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_conv_charset final : public Item_str_func { +@@ -1050,6 +1101,7 @@ class Item_func_conv_charset final : public Item_str_func { + const char *func_name() const override { return "convert"; } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_set_collation final : public Item_str_func { +@@ -1074,6 +1126,9 @@ class Item_func_set_collation final : public Item_str_func { + /* this function is transparent for view updating */ + return args[0]->field_for_view_update(); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_charset final : public Item_str_func { +@@ -1088,6 +1143,8 @@ class Item_func_charset final : public Item_str_func { + set_nullable(false); + return Item_str_func::resolve_type(thd); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_collation : public Item_str_func { +@@ -1103,6 +1160,8 @@ class Item_func_collation : public Item_str_func { + set_nullable(false); + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_weight_string final : public Item_str_func { +@@ -1134,6 +1193,7 @@ class Item_func_weight_string final : public Item_str_func { + bool resolve_type(THD *) override; + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_crc32 final : public Item_int_func { +@@ -1150,6 +1210,7 @@ class Item_func_crc32 final : public Item_int_func { + return false; + } + longlong val_int() override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_uncompressed_length final : public Item_int_func { +@@ -1165,6 +1226,7 @@ class Item_func_uncompressed_length final : public Item_int_func { + return false; + } + longlong val_int() override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_compress final : public Item_str_func { +@@ -1179,6 +1241,7 @@ class Item_func_compress final : public Item_str_func { + } + const char *func_name() const override { return "compress"; } + String *val_str(String *str) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_uncompress final : public Item_str_func { +@@ -1194,6 +1257,7 @@ class Item_func_uncompress final : public Item_str_func { + } + const char *func_name() const override { return "uncompress"; } + String *val_str(String *str) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_uuid final : public Item_str_func { +@@ -1218,6 +1282,8 @@ class Item_func_uuid final : public Item_str_func { + return ((func_arg->source == VGS_GENERATED_COLUMN) || + (func_arg->source == VGS_CHECK_CONSTRAINT)); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_gtid_subtract final : public Item_str_ascii_func { +@@ -1342,6 +1408,8 @@ class Item_func_get_dd_create_options final : public Item_str_func { + const char *func_name() const override { return "get_dd_create_options"; } + + String *val_str(String *) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_get_dd_schema_options final : public Item_str_func { +@@ -1389,6 +1457,7 @@ class Item_func_internal_get_comment_or_error final : public Item_str_func { + } + + String *val_str(String *) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_get_dd_tablespace_private_data final : public Item_str_func { +diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc +index f3d5d862..0d965d96 100644 +--- a/sql/item_subselect.cc ++++ b/sql/item_subselect.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2002, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -3453,7 +3454,7 @@ bool subselect_hash_sj_engine::exec(THD *thd) { + } + + /* Set tmp_param only if its usable, i.e. there are Copy_field's. */ +- tmp_param = &(item->unit->outer_query_block()->join->tmp_table_param); ++ tmp_param = (item->unit->outer_query_block()->join->tmp_table_param); + if (tmp_param && tmp_param->copy_fields.empty()) tmp_param = nullptr; + } // if (!is_materialized) + +diff --git a/sql/item_sum.cc b/sql/item_sum.cc +index 4882708f..67ebbdb9 100644 +--- a/sql/item_sum.cc ++++ b/sql/item_sum.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -631,22 +632,24 @@ bool Item_sum::has_aggregate_ref_in_group_by(uchar *) { + return aggr_query_block != nullptr && aggr_query_block->group_fix_field; + } + +-Field *Item_sum::create_tmp_field(bool, TABLE *table) { ++Field *Item_sum::create_tmp_field(bool, TABLE *table, MEM_ROOT *root) { + DBUG_TRACE; + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; ++ + switch (result_type()) { + case REAL_RESULT: +- field = new (*THR_MALLOC) Field_double( ++ field = new (pq_check_root) Field_double( + max_length, is_nullable(), item_name.ptr(), decimals, false, true); + break; + case INT_RESULT: +- field = new (*THR_MALLOC) Field_longlong(max_length, is_nullable(), ++ field = new (pq_check_root) Field_longlong(max_length, is_nullable(), + item_name.ptr(), unsigned_flag); + break; + case STRING_RESULT: + return make_string_field(table); + case DECIMAL_RESULT: +- field = Field_new_decimal::create_from_item(this); ++ field = Field_new_decimal::create_from_item(this, root); + break; + case ROW_RESULT: + default: +@@ -1732,9 +1735,11 @@ bool Item_sum_hybrid::setup_hybrid(Item *item, Item *value_arg) { + return false; + } + +-Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table) { ++Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table, MEM_ROOT *root) { + DBUG_TRACE; + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; ++ + if (args[0]->type() == Item::FIELD_ITEM) { + field = down_cast(args[0])->field; + +@@ -1753,22 +1758,22 @@ Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table) { + */ + switch (args[0]->data_type()) { + case MYSQL_TYPE_DATE: +- field = new (*THR_MALLOC) Field_newdate(is_nullable(), item_name.ptr()); ++ field = new (pq_check_root) Field_newdate(is_nullable(), item_name.ptr()); + break; + case MYSQL_TYPE_TIME: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_timef(is_nullable(), item_name.ptr(), decimals); + break; + case MYSQL_TYPE_TIMESTAMP: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_timestampf(is_nullable(), item_name.ptr(), decimals); + break; + case MYSQL_TYPE_DATETIME: +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_datetimef(is_nullable(), item_name.ptr(), decimals); + break; + default: +- return Item_sum::create_tmp_field(group, table); ++ return Item_sum::create_tmp_field(group, table, root); + } + if (field) field->init(table); + return field; +@@ -2073,6 +2078,10 @@ double Aggregator_simple::arg_val_real() { + return item_sum->args[0]->val_real(); + } + ++longlong Aggregator_simple::arg_val_int() { ++ return item_sum->args[0]->val_int(); ++} ++ + bool Aggregator_simple::arg_is_null(bool use_null_value) { + Item **item = item_sum->args; + const uint item_count = item_sum->arg_count; +@@ -2098,6 +2107,11 @@ double Aggregator_distinct::arg_val_real() { + : item_sum->args[0]->val_real(); + } + ++longlong Aggregator_distinct::arg_val_int() { ++ return use_distinct_values ? table->field[0]->val_int() ++ : item_sum->args[0]->val_int(); ++} ++ + bool Aggregator_distinct::arg_is_null(bool use_null_value) { + if (use_distinct_values) { + const bool rc = table->field[0]->is_null(); +@@ -2124,7 +2138,7 @@ bool Item_sum_count::add() { + if (aggr->arg_is_null(false)) { + return current_thd->is_error(); + } +- count++; ++ count += is_fake ? args[0]->val_int() : 1; + return current_thd->is_error(); + } + +@@ -2169,6 +2183,27 @@ void Item_sum_count::cleanup() { + bool Item_sum_avg::resolve_type(THD *thd) { + if (Item_sum_sum::resolve_type(thd)) return true; + ++ /** ++ * for the rewritten Item_sum_avg, we should keep the same precision as that ++ * of Item_sum_sum. For the case of "3.1415926", we obtain ++ * precision = len(31415926) = 8; ++ * decimals = len(1315926) = 7; ++ * max_length = len(3.1415926 + signed_flag) = 9; ++ * if scale == 2: scale(3.1415926) = 3.14 ++ */ ++ if (pq_avg_type == PQ_WORKER || pq_avg_type == PQ_LEADER) { ++ prec_increment = 0; ++ if (hybrid_type == DECIMAL_RESULT) { ++ f_precision = args[0]->decimal_precision() + DECIMAL_LONGLONG_DIGITS; ++ decimals = args[0]->decimals; ++ max_length = my_decimal_precision_to_length_no_truncation( ++ f_precision, decimals, unsigned_flag); ++ f_scale = args[0]->decimals; ++ dec_bin_size = my_decimal_get_binary_size(f_precision, f_scale); ++ } ++ return false; ++ } ++ + set_nullable(true); + null_value = true; + prec_increment = thd->variables.div_precincrement; +@@ -2194,28 +2229,32 @@ bool Item_sum_avg::resolve_type(THD *thd) { + + Item *Item_sum_avg::copy_or_same(THD *thd) { + DBUG_TRACE; +- Item *result = ++ Item_sum_avg *result = + m_is_window_function ? this : new (thd->mem_root) Item_sum_avg(thd, this); ++ if (!result) return result; ++ result->pq_avg_type = pq_avg_type; + return result; + } + +-Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table) { ++Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table, MEM_ROOT *root) { + DBUG_TRACE; + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; ++ + if (group) { + /* + We must store both value and counter in the temporary table in one field. + The easiest way is to do this is to store both value in a string + and unpack on access. + */ +- field = new (*THR_MALLOC) Field_string( ++ field = new (pq_check_root) Field_string( + ((hybrid_type == DECIMAL_RESULT) ? dec_bin_size : sizeof(double)) + + sizeof(longlong), + false, item_name.ptr(), &my_charset_bin); + } else if (hybrid_type == DECIMAL_RESULT) + field = Field_new_decimal::create_from_item(this); + else +- field = new (*THR_MALLOC) Field_double( ++ field = new (pq_check_root) Field_double( + max_length, is_nullable(), item_name.ptr(), decimals, false, true); + if (field) field->init(table); + return field; +@@ -2226,7 +2265,17 @@ void Item_sum_avg::clear() { Item_sum_sum::clear(); } + bool Item_sum_avg::add() { + assert(!m_is_window_function); + if (Item_sum_sum::add()) return true; +- if (!aggr->arg_is_null(true)) m_count++; ++ if (!aggr->arg_is_null(true)) { ++ if (pq_avg_type == PQ_REBUILD) { ++ uint32 extra_len; ++ auto val = args[0]->val_extra((uint32 *)&extra_len); ++ if (val != nullptr) { ++ m_count += uint8korr(val); ++ } ++ } else { ++ m_count++; ++ } ++ } + return false; + } + +@@ -2251,6 +2300,10 @@ double Item_sum_avg::val_real() { + null_value = true; + return 0.0; + } ++ if (pq_avg_type == PQ_WORKER) { ++ return Item_sum_sum::val_real(); ++ } ++ + return Item_sum_sum::val_real() / ulonglong2double(m_count); + } + } +@@ -2333,6 +2386,10 @@ my_decimal *Item_sum_avg::val_decimal(my_decimal *val) { + return result; + } + ++ if (pq_avg_type == PQ_WORKER) { ++ return (dec_buffs + curr_dec_buff); ++ } ++ + sum_dec = dec_buffs + curr_dec_buff; + int2my_decimal(E_DEC_FATAL_ERROR, m_count, false, &cnt); + my_decimal_div(E_DEC_FATAL_ERROR, val, sum_dec, &cnt, prec_increment); +@@ -2340,6 +2397,11 @@ my_decimal *Item_sum_avg::val_decimal(my_decimal *val) { + } + } + ++const uchar *Item_sum_avg::val_extra(uint32 *len) { ++ *len = pq_extra_len(false); ++ return (const uchar*)(&m_count); ++} ++ + String *Item_sum_avg::val_str(String *str) { + if (aggr) aggr->endup(); + if (hybrid_type == DECIMAL_RESULT) return val_string_from_decimal(str); +@@ -2571,9 +2633,11 @@ Item *Item_sum_variance::copy_or_same(THD *thd) { + If we're grouping, then we need some space to serialize variables into, to + pass around. + */ +-Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table) { ++Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table, MEM_ROOT *root) { + DBUG_TRACE; + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; ++ + if (group) { + /* + We must store both value and counter in the temporary table in one field. +@@ -2581,10 +2645,10 @@ Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table) { + and unpack on access. + */ + field = +- new (*THR_MALLOC) Field_string(sizeof(double) * 2 + sizeof(longlong), ++ new (pq_check_root) Field_string(sizeof(double) * 2 + sizeof(longlong), + false, item_name.ptr(), &my_charset_bin); + } else +- field = new (*THR_MALLOC) Field_double( ++ field = new (pq_check_root) Field_double( + max_length, is_nullable(), item_name.ptr(), decimals, false, true); + + if (field != nullptr) field->init(table); +@@ -3305,7 +3369,9 @@ void Item_sum_count::reset_field() { + longlong nr = 0; + assert(aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR); + +- if (!args[0]->is_nullable() || !args[0]->is_null()) nr = 1; ++ if (!args[0]->is_nullable() || !args[0]->is_null()) { ++ nr = is_fake ? args[0]->val_int() : 1; ++ } + int8store(result_field->field_ptr(), nr); + } + +@@ -3318,8 +3384,14 @@ void Item_sum_avg::reset_field() { + if (args[0]->null_value) { + arg_dec = &decimal_zero; + tmp = 0; +- } else +- tmp = 1; ++ } else { ++ uint32 extra_len; ++ if (pq_avg_type == PQ_REBUILD && args[0]->val_extra(&extra_len) != nullptr) { ++ tmp = sint8korr(args[0]->val_extra(&extra_len)); ++ } else { ++ tmp = 1; ++ } ++ } + my_decimal2binary(E_DEC_FATAL_ERROR, arg_dec, res, f_precision, f_scale); + res += dec_bin_size; + int8store(res, tmp); +@@ -3329,7 +3401,13 @@ void Item_sum_avg::reset_field() { + if (args[0]->null_value) + memset(res, 0, sizeof(double) + sizeof(longlong)); + else { +- longlong tmp = 1; ++ longlong tmp; ++ uint32 extra_len; ++ if (pq_avg_type == PQ_REBUILD && args[0]->val_extra(&extra_len) != nullptr) { ++ tmp = sint8korr(args[0]->val_extra(&extra_len)); ++ } else { ++ tmp = 1; ++ } + float8store(res, nr); + res += sizeof(double); + int8store(res, tmp); +@@ -3405,13 +3483,15 @@ void Item_sum_count::update_field() { + uchar *res = result_field->field_ptr(); + + nr = sint8korr(res); +- if (!args[0]->is_nullable() || !args[0]->is_null()) nr++; ++ if (!args[0]->is_nullable() || !args[0]->is_null()) { ++ nr += is_fake ? args[0]->val_int() : 1; ++ } + int8store(res, nr); + } + + void Item_sum_avg::update_field() { + DBUG_TRACE; +- longlong field_count; ++ ulonglong field_count; + uchar *res = result_field->field_ptr(); + + assert(aggr->Aggrtype() != Aggregator::DISTINCT_AGGREGATOR); +@@ -3421,12 +3501,19 @@ void Item_sum_avg::update_field() { + if (!args[0]->null_value) { + binary2my_decimal(E_DEC_FATAL_ERROR, res, dec_buffs + 1, f_precision, + f_scale); +- field_count = sint8korr(res + dec_bin_size); ++ field_count = uint8korr(res + dec_bin_size); + my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs, arg_val, dec_buffs + 1); + my_decimal2binary(E_DEC_FATAL_ERROR, dec_buffs, res, f_precision, + f_scale); + res += dec_bin_size; +- field_count++; ++ if (pq_avg_type == PQ_REBUILD) { ++ uint32 extra_len; ++ if (args[0]->val_extra(&extra_len) != nullptr) { ++ field_count += uint8korr(args[0]->val_extra(&extra_len)); ++ } ++ } else { ++ field_count++; ++ } + int8store(res, field_count); + } + } else { +@@ -3439,7 +3526,14 @@ void Item_sum_avg::update_field() { + old_nr += nr; + float8store(res, old_nr); + res += sizeof(double); +- field_count++; ++ if (pq_avg_type == PQ_REBUILD) { ++ uint32 extra_len; ++ if (args[0]->val_extra(&extra_len) != nullptr) { ++ field_count += sint8korr(args[0]->val_extra(&extra_len)); ++ } ++ } else { ++ field_count++; ++ } + int8store(res, field_count); + } + } +@@ -3574,6 +3668,8 @@ Item_avg_field::Item_avg_field(Item_result res_type, Item_sum_avg *item) { + field = item->get_result_field(); + set_nullable(true); + hybrid_type = res_type; ++ avg_item = item; ++ pq_avg_type = item->pq_avg_type; + set_data_type(hybrid_type == DECIMAL_RESULT ? MYSQL_TYPE_NEWDECIMAL + : MYSQL_TYPE_DOUBLE); + prec_increment = item->prec_increment; +@@ -3595,6 +3691,10 @@ double Item_avg_field::val_real() { + res = (field->field_ptr() + sizeof(double)); + count = sint8korr(res); + ++ if (pq_avg_type == PQ_WORKER) { ++ return nr; ++ } ++ + if ((null_value = !count)) return 0.0; + return nr / (double)count; + } +@@ -3606,6 +3706,13 @@ my_decimal *Item_avg_field::val_decimal(my_decimal *dec_buf) { + if ((null_value = !count)) return nullptr; + + my_decimal dec_count, dec_field; ++ ++ if (pq_avg_type == PQ_WORKER) { ++ binary2my_decimal(E_DEC_FATAL_ERROR, field->ptr, dec_buf, f_precision, ++ f_scale); ++ return dec_buf; ++ } ++ + binary2my_decimal(E_DEC_FATAL_ERROR, field->field_ptr(), &dec_field, + f_precision, f_scale); + int2my_decimal(E_DEC_FATAL_ERROR, count, false, &dec_count); +@@ -3614,6 +3721,11 @@ my_decimal *Item_avg_field::val_decimal(my_decimal *dec_buf) { + return dec_buf; + } + ++const uchar *Item_avg_field::val_extra(uint32 *len) { ++ *len = pq_extra_len(false); ++ return (field->ptr + field->pack_length() - *len); ++} ++ + String *Item_avg_field::val_str(String *str) { + // fix_fields() never calls for this Item + if (hybrid_type == DECIMAL_RESULT) return val_string_from_decimal(str); +@@ -4218,8 +4330,9 @@ void Item_func_group_concat::cleanup() { + row_count = 0; + } + +-Field *Item_func_group_concat::make_string_field(TABLE *table_arg) const { ++Field *Item_func_group_concat::make_string_field(TABLE *table_arg, MEM_ROOT *root) const { + Field *field; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; + assert(collation.collation); + /* + Use mbminlen to determine maximum number of characters. +@@ -4233,11 +4346,11 @@ Field *Item_func_group_concat::make_string_field(TABLE *table_arg) const { + const uint32 max_characters = + group_concat_max_len / collation.collation->mbminlen; + if (max_characters > CONVERT_IF_BIGGER_TO_BLOB) +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_blob(max_characters * collation.collation->mbmaxlen, + is_nullable(), item_name.ptr(), collation.collation, true); + else +- field = new (*THR_MALLOC) Field_varstring( ++ field = new (pq_check_root) Field_varstring( + max_characters * collation.collation->mbmaxlen, is_nullable(), + item_name.ptr(), table_arg->s, collation.collation); + +@@ -6196,8 +6309,8 @@ void Item_rollup_sum_switcher::print(const THD *thd, String *str, + } + } + +-Field *Item_rollup_sum_switcher::create_tmp_field(bool group, TABLE *table) { +- return master()->create_tmp_field(group, table); ++Field *Item_rollup_sum_switcher::create_tmp_field(bool group, TABLE *table, MEM_ROOT *root) { ++ return master()->create_tmp_field(group, table, root); + } + + void Item_rollup_sum_switcher::clear() { +@@ -6483,3 +6596,8 @@ void Item_sum_collect::reset_field() { + add(); + store_result_field(); + } ++ ++bool need_extra(Item_sum *ref_item) ++{ ++ return ref_item->sum_func() == Item_sum::AVG_FUNC; ++} +diff --git a/sql/item_sum.h b/sql/item_sum.h +index 53c77703..89ddfab9 100644 +--- a/sql/item_sum.h ++++ b/sql/item_sum.h +@@ -2,6 +2,7 @@ + #define ITEM_SUM_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -148,6 +149,8 @@ class Aggregator { + virtual my_decimal *arg_val_decimal(my_decimal *value) = 0; + /** Floating point value of being-aggregated argument */ + virtual double arg_val_real() = 0; ++ /** Longlong point value of being-aggregated argument */ ++ virtual longlong arg_val_int() = 0; + /** + NULLness of being-aggregated argument. + +@@ -497,6 +500,7 @@ class Item_sum : public Item_func { + WF allowance status afterwards. + */ + nesting_map save_deny_window_func; ++ Item_sum *orig_func {nullptr}; + + protected: + /** +@@ -606,7 +610,8 @@ class Item_sum : public Item_func { + aggregator_clear(); + } + virtual void make_unique() { force_copy_fields = true; } +- virtual Field *create_tmp_field(bool group, TABLE *table); ++ virtual Field *create_tmp_field(bool group, TABLE *table, ++ MEM_ROOT *root = nullptr); + + /// argument used by walk method collect_grouped_aggregates ("cga") + struct Collect_grouped_aggregate_info { +@@ -752,6 +757,10 @@ class Item_sum : public Item_func { + */ + bool wf_common_init(); + ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; ++ virtual Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, Item *item); ++ virtual uint32 pq_extra_size() { return 0; } ++ + protected: + /* + Raise an error (ER_NOT_SUPPORTED_YET) with the detail that this +@@ -877,6 +886,7 @@ class Aggregator_distinct : public Aggregator { + void endup() override; + my_decimal *arg_val_decimal(my_decimal *value) override; + double arg_val_real() override; ++ longlong arg_val_int() override; + bool arg_is_null(bool use_null_value) override; + + bool unique_walk_function(void *element); +@@ -899,6 +909,7 @@ class Aggregator_simple : public Aggregator { + void endup() override {} + my_decimal *arg_val_decimal(my_decimal *value) override; + double arg_val_real() override; ++ longlong arg_val_int() override; + bool arg_is_null(bool use_null_value) override; + }; + +@@ -943,6 +954,7 @@ class Item_sum_num : public Item_sum { + return get_time_from_numeric(ltime); /* Decimal or real */ + } + void reset_field() override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_sum_int : public Item_sum_num { +@@ -979,10 +991,12 @@ class Item_sum_int : public Item_sum_num { + }; + + class Item_sum_sum : public Item_sum_num { ++ public: ++ my_decimal dec_buffs[2]; ++ + protected: + Item_result hybrid_type; + double sum; +- my_decimal dec_buffs[2]; + uint curr_dec_buff; + bool resolve_type(THD *thd) override; + /** +@@ -1025,6 +1039,8 @@ class Item_sum_sum : public Item_sum_num { + void update_field() override; + const char *func_name() const override { return "sum"; } + Item *copy_or_same(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_sum_count : public Item_sum_int { +@@ -1037,8 +1053,15 @@ class Item_sum_count : public Item_sum_int { + void cleanup() override; + + public: +- Item_sum_count(const POS &pos, Item *item_par, PT_window *w) +- : Item_sum_int(pos, item_par, w), count(0) {} ++ /* ++ * mark faked item_sum_count. when we have read one more ++ * records from table, the faked-item_sum_sum have the same result with the original ++ * Item_sum_count, then is_fake = false; ++ */ ++ bool is_fake{false}; //mark Item_sum_count ++ ++ Item_sum_count(const POS &pos, Item *item_par, PT_window *w, bool fake = false) ++ : Item_sum_int(pos, item_par, w), count(0), is_fake(fake) {} + Item_sum_count(Item_int *number) : Item_sum_int(number), count(0) {} + /** + Constructs an instance for COUNT(DISTINCT) +@@ -1051,11 +1074,11 @@ class Item_sum_count : public Item_sum_int { + */ + + Item_sum_count(const POS &pos, PT_item_list *list, PT_window *w) +- : Item_sum_int(pos, list, w), count(0) { ++ : Item_sum_int(pos, list, w), count(0), is_fake(false) { + set_distinct(true); + } + Item_sum_count(THD *thd, Item_sum_count *item) +- : Item_sum_int(thd, item), count(item->count) {} ++ : Item_sum_int(thd, item), count(item->count), is_fake(false) {} + enum Sumfunctype sum_func() const override { + return has_with_distinct() ? COUNT_DISTINCT_FUNC : COUNT_FUNC; + } +@@ -1075,6 +1098,9 @@ class Item_sum_count : public Item_sum_int { + void update_field() override; + const char *func_name() const override { return "count"; } + Item *copy_or_same(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, ++ Item *item) override; + }; + + /* Item to get the value of a stored sum function */ +@@ -1134,14 +1160,28 @@ class Item_sum_num_field : public Item_sum_hybrid_field { + bool is_null() override { return update_null_value() || null_value; } + }; + ++enum PqAvgType{ ++ PQ_LEADER, ++ PQ_WORKER, ++ PQ_REBUILD, ++ PQ_INVALID ++}; ++ + class Item_avg_field : public Item_sum_num_field { + public: + uint f_precision, f_scale, dec_bin_size; + uint prec_increment; ++ Item_sum_avg *avg_item; ++ PqAvgType pq_avg_type; + Item_avg_field(Item_result res_type, Item_sum_avg *item); + enum Type type() const override { return FIELD_AVG_ITEM; } + double val_real() override; + my_decimal *val_decimal(my_decimal *) override; ++ size_t pq_extra_len(bool) override { ++ return ((pq_avg_type == PQ_WORKER || pq_avg_type == PQ_LEADER) ++ ? sizeof(longlong) : 0); ++ }; ++ const uchar *val_extra(uint32 *len) override; + String *val_str(String *) override; + bool resolve_type(THD *) override { return false; } + const char *func_name() const override { +@@ -1280,6 +1320,7 @@ class Item_sum_avg final : public Item_sum_sum { + typedef Item_sum_sum super; + my_decimal m_avg_dec; + double m_avg; ++ PqAvgType pq_avg_type {PQ_INVALID}; + + Item_sum_avg(const POS &pos, Item *item_par, bool distinct, PT_window *w) + : Item_sum_sum(pos, item_par, distinct, w) {} +@@ -1305,12 +1346,21 @@ class Item_sum_avg final : public Item_sum_sum { + } + const char *func_name() const override { return "avg"; } + Item *copy_or_same(THD *thd) override; +- Field *create_tmp_field(bool group, TABLE *table) override; ++ Field *create_tmp_field(bool group, TABLE *table, MEM_ROOT *root) override; + void cleanup() override { + m_count = 0; + m_frame_null_count = 0; + Item_sum_sum::cleanup(); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, ++ Query_block *select, Item *item) override; ++ size_t pq_extra_len(bool group) override { ++ return ((!group && (pq_avg_type == PQ_WORKER || pq_avg_type == PQ_LEADER)) ++ ? sizeof(longlong) : 0); ++ } ++ const uchar *val_extra(uint32* len) override; + }; + + class Item_sum_variance; +@@ -1439,7 +1489,8 @@ class Item_sum_variance : public Item_sum_num { + return sample ? "var_samp" : "variance"; + } + Item *copy_or_same(THD *thd) override; +- Field *create_tmp_field(bool group, TABLE *table) override; ++ Field *create_tmp_field(bool group, TABLE *table, ++ MEM_ROOT *root = nullptr) override; + enum Item_result result_type() const override { return REAL_RESULT; } + void cleanup() override { + count = 0; +@@ -1475,6 +1526,8 @@ class Item_std_field final : public Item_variance_field { + */ + + class Item_sum_std : public Item_sum_variance { ++ typedef Item_sum_variance Item_supper; ++ + public: + Item_sum_std(const POS &pos, Item *item_par, uint sample_arg, PT_window *w) + : Item_sum_variance(pos, item_par, sample_arg, w) {} +@@ -1645,12 +1698,13 @@ class Item_sum_hybrid : public Item_sum { + void cleanup() override; + bool any_value() { return was_values; } + void no_rows_in_result() override; +- Field *create_tmp_field(bool group, TABLE *table) override; ++ Field *create_tmp_field(bool group, TABLE *table, MEM_ROOT *root = nullptr) override; + bool uses_only_one_row() const override { return m_optimize; } + bool add() override; + Item *copy_or_same(THD *thd) override; + bool check_wf_semantics1(THD *thd, Query_block *select, + Window_evaluation_requirements *r) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + + private: + /* +@@ -1675,6 +1729,8 @@ class Item_sum_min final : public Item_sum_hybrid { + : Item_sum_hybrid(thd, item) {} + enum Sumfunctype sum_func() const override { return MIN_FUNC; } + const char *func_name() const override { return "min"; } ++ Item* pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, Item *item) override; + + private: + Item_sum_min *clone_hybrid(THD *thd) const override; +@@ -1689,6 +1745,8 @@ class Item_sum_max final : public Item_sum_hybrid { + : Item_sum_hybrid(thd, item) {} + enum Sumfunctype sum_func() const override { return MAX_FUNC; } + const char *func_name() const override { return "max"; } ++ Item* pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, Item *item) override; + + private: + Item_sum_max *clone_hybrid(THD *thd) const override; +@@ -1822,6 +1880,7 @@ class Item_sum_bit : public Item_sum { + bool add() override; + /// @returns true iff this is BIT_AND. + inline bool is_and() const { return reset_bits != 0; } ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + + private: + /** +@@ -1866,6 +1925,8 @@ class Item_sum_or final : public Item_sum_bit { + Item_sum_or(THD *thd, Item_sum_or *item) : Item_sum_bit(thd, item) {} + const char *func_name() const override { return "bit_or"; } + Item *copy_or_same(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_sum_and final : public Item_sum_bit { +@@ -1876,6 +1937,8 @@ class Item_sum_and final : public Item_sum_bit { + Item_sum_and(THD *thd, Item_sum_and *item) : Item_sum_bit(thd, item) {} + const char *func_name() const override { return "bit_and"; } + Item *copy_or_same(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_sum_xor final : public Item_sum_bit { +@@ -1888,6 +1951,8 @@ class Item_sum_xor final : public Item_sum_bit { + Item_sum_xor(THD *thd, Item_sum_xor *item) : Item_sum_bit(thd, item) {} + const char *func_name() const override { return "bit_xor"; } + Item *copy_or_same(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ Item_sum *pq_rebuild_sum_func(THD *thd, Query_block *select, Item *item) override; + }; + + /* +@@ -2133,7 +2198,8 @@ class Item_func_group_concat final : public Item_sum { + enum Sumfunctype sum_func() const override { return GROUP_CONCAT_FUNC; } + const char *func_name() const override { return "group_concat"; } + Item_result result_type() const override { return STRING_RESULT; } +- Field *make_string_field(TABLE *table_arg) const override; ++ Field *make_string_field(TABLE *table_arg, ++ MEM_ROOT *root = nullptr) const override; + void clear() override; + bool add() override; + void reset_field() override { assert(0); } // not used +@@ -2697,7 +2763,7 @@ class Item_rollup_sum_switcher final : public Item_sum { + } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; +- Field *create_tmp_field(bool group, TABLE *table) override; ++ Field *create_tmp_field(bool group, TABLE *table, MEM_ROOT *root = nullptr) override; + + enum Sumfunctype sum_func() const override { return master()->sum_func(); } + enum Sumfunctype real_sum_func() const override { +diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h +index 21bf2eae..76984ae2 100644 +--- a/sql/item_timefunc.h ++++ b/sql/item_timefunc.h +@@ -2,6 +2,7 @@ + #define ITEM_TIMEFUNC_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -69,6 +70,7 @@ class Item_func_period_add final : public Item_int_func { + longlong val_int() override; + const char *func_name() const override { return "period_add"; } + bool resolve_type(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_period_diff final : public Item_int_func { +@@ -78,6 +80,7 @@ class Item_func_period_diff final : public Item_int_func { + longlong val_int() override; + const char *func_name() const override { return "period_diff"; } + bool resolve_type(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_to_days final : public Item_int_func { +@@ -93,6 +96,7 @@ class Item_func_to_days final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_to_seconds final : public Item_int_func { +@@ -118,6 +122,7 @@ class Item_func_to_seconds final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_dayofmonth final : public Item_int_func { +@@ -132,6 +137,7 @@ class Item_func_dayofmonth final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -166,6 +172,7 @@ class Item_func_month final : public Item_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_monthname final : public Item_str_func { +@@ -181,6 +188,7 @@ class Item_func_monthname final : public Item_str_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_dayofyear final : public Item_int_func { +@@ -194,6 +202,7 @@ class Item_func_dayofyear final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_hour final : public Item_int_func { +@@ -207,6 +216,7 @@ class Item_func_hour final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_time_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_minute final : public Item_int_func { +@@ -220,6 +230,7 @@ class Item_func_minute final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_time_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_quarter final : public Item_int_func { +@@ -233,6 +244,7 @@ class Item_func_quarter final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_second final : public Item_int_func { +@@ -246,6 +258,7 @@ class Item_func_second final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_time_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_week final : public Item_int_func { +@@ -261,6 +274,7 @@ class Item_func_week final : public Item_int_func { + const char *func_name() const override { return "week"; } + enum Functype functype() const override { return WEEK_FUNC; } + bool resolve_type(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_yearweek final : public Item_int_func { +@@ -274,6 +288,7 @@ class Item_func_yearweek final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_year final : public Item_int_func { +@@ -289,6 +304,7 @@ class Item_func_year final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_typecast_year final : public Item_int_func { +@@ -300,6 +316,7 @@ class Item_typecast_year final : public Item_int_func { + const char *func_name() const override { return "cast_as_year"; } + enum Functype functype() const override { return TYPECAST_FUNC; } + bool resolve_type(THD *thd) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -338,6 +355,7 @@ class Item_func_weekday : public Item_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_date_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -363,6 +381,7 @@ class Item_func_dayname final : public Item_func_weekday { + enum Item_result result_type() const override { return STRING_RESULT; } + bool resolve_type(THD *thd) override; + bool check_partition_func_processor(uchar *) override { return true; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* +@@ -446,6 +465,8 @@ class Item_func_unix_timestamp final : public Item_timeval_func { + return ((func_arg->source == VGS_GENERATED_COLUMN) || + (func_arg->source == VGS_CHECK_CONSTRAINT)); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_time_to_sec final : public Item_int_func { +@@ -460,6 +481,7 @@ class Item_func_time_to_sec final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_time_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -561,6 +583,7 @@ class Item_temporal_hybrid_func : public Item_str_func { + } + bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override; + bool get_time(MYSQL_TIME *ltime) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + /* +@@ -859,6 +882,7 @@ class Item_date_literal final : public Item_date_func { + table_map not_null_tables() const override { return used_tables(); } + void cleanup() override { assert(marker == MARKER_NONE); } + bool eq(const Item *item, bool binary_cmp) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -866,6 +890,7 @@ class Item_date_literal final : public Item_date_func { + */ + class Item_time_literal final : public Item_time_func { + MYSQL_TIME_cache cached_time; ++ uint pq_dec_arg; + + public: + /** +@@ -874,6 +899,7 @@ class Item_time_literal final : public Item_time_func { + @param dec_arg number of fractional digits in ltime. + */ + Item_time_literal(MYSQL_TIME *ltime, uint dec_arg) { ++ pq_dec_arg = dec_arg; + set_data_type_time(std::min(dec_arg, uint(DATETIME_MAX_DECIMALS))); + cached_time.set_time(ltime, decimals); + fixed = true; +@@ -900,6 +926,7 @@ class Item_time_literal final : public Item_time_func { + table_map not_null_tables() const override { return used_tables(); } + void cleanup() override { assert(marker == MARKER_NONE); } + bool eq(const Item *item, bool binary_cmp) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -944,6 +971,7 @@ class Item_datetime_literal final : public Item_datetime_func { + table_map not_null_tables() const override { return used_tables(); } + void cleanup() override { assert(marker == MARKER_NONE); } + bool eq(const Item *item, bool binary_cmp) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -981,6 +1009,8 @@ class Item_func_at_time_zone final : public Item_datetime_func { + + const char *specifier_string() const { return m_specifier_string; } + ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ + protected: + bool check_type() const; + +@@ -1045,6 +1075,7 @@ class Item_func_curtime_local final : public Item_func_curtime { + Item_func_curtime_local(const POS &pos, uint8 dec_arg) + : Item_func_curtime(pos, dec_arg) {} + const char *func_name() const override { return "curtime"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_curtime_utc final : public Item_func_curtime { +@@ -1055,6 +1086,7 @@ class Item_func_curtime_utc final : public Item_func_curtime { + Item_func_curtime_utc(const POS &pos, uint8 dec_arg) + : Item_func_curtime(pos, dec_arg) {} + const char *func_name() const override { return "utc_time"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1099,6 +1131,7 @@ class Item_func_curdate_local final : public Item_func_curdate { + public: + explicit Item_func_curdate_local(const POS &pos) : Item_func_curdate(pos) {} + const char *func_name() const override { return "curdate"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_curdate_utc final : public Item_func_curdate { +@@ -1108,6 +1141,7 @@ class Item_func_curdate_utc final : public Item_func_curdate { + public: + Item_func_curdate_utc(const POS &pos) : Item_func_curdate(pos) {} + const char *func_name() const override { return "utc_date"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -1207,6 +1241,8 @@ class Item_func_sysdate_local final : public Item_datetime_func { + table_map get_initial_pseudo_tables() const override { + return RAND_TABLE_BIT; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_from_days final : public Item_date_func { +@@ -1222,6 +1258,8 @@ class Item_func_from_days final : public Item_date_func { + if (param_type_is_default(thd, 0, 1, MYSQL_TYPE_LONGLONG)) return true; + return Item_date_func::resolve_type(thd); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_date_format final : public Item_str_func { +@@ -1240,6 +1278,8 @@ class Item_func_date_format final : public Item_str_func { + bool resolve_type(THD *thd) override; + uint format_length(const String *format); + bool eq(const Item *item, bool binary_cmp) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_from_unixtime final : public Item_datetime_func { +@@ -1250,6 +1290,7 @@ class Item_func_from_unixtime final : public Item_datetime_func { + enum Functype functype() const override { return FROM_UNIXTIME_FUNC; } + bool resolve_type(THD *thd) override; + bool get_date(MYSQL_TIME *res, my_time_flags_t fuzzy_date) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* +@@ -1280,6 +1321,7 @@ class Item_func_convert_tz final : public Item_datetime_func { + bool resolve_type(THD *) override; + bool get_date(MYSQL_TIME *res, my_time_flags_t fuzzy_date) override; + void cleanup() override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_sec_to_time final : public Item_time_func { +@@ -1295,6 +1337,7 @@ class Item_func_sec_to_time final : public Item_time_func { + } + const char *func_name() const override { return "sec_to_time"; } + bool get_time(MYSQL_TIME *ltime) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + extern const char *interval_names[]; +@@ -1329,6 +1372,7 @@ class Item_date_add_interval final : public Item_temporal_hybrid_func { + bool eq(const Item *item, bool binary_cmp) const override; + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_extract final : public Item_int_func { +@@ -1381,6 +1425,9 @@ class Item_extract final : public Item_int_func { + } + return true; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_typecast_date final : public Item_date_func { +@@ -1406,6 +1453,8 @@ class Item_typecast_date final : public Item_date_func { + bool is_explicit_cast() const { return m_explicit_cast; } + bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzy_date) override; + const char *cast_type() const { return "date"; } ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_typecast_time final : public Item_time_func { +@@ -1443,6 +1492,9 @@ class Item_typecast_time final : public Item_time_func { + set_nullable(true); + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_typecast_datetime final : public Item_datetime_func { +@@ -1481,6 +1533,8 @@ class Item_typecast_datetime final : public Item_datetime_func { + return false; + } + bool get_date(MYSQL_TIME *res, my_time_flags_t fuzzy_date) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_makedate final : public Item_date_func { +@@ -1495,6 +1549,8 @@ class Item_func_makedate final : public Item_date_func { + if (param_type_is_default(thd, 0, -1, MYSQL_TYPE_LONGLONG)) return true; + return Item_date_func::resolve_type(thd); + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_add_time final : public Item_temporal_hybrid_func { +@@ -1522,6 +1578,7 @@ class Item_func_add_time final : public Item_temporal_hybrid_func { + const char *func_name() const override { return "add_time"; } + enum Functype functype() const override { return ADDTIME_FUNC; } + int get_sign() const { return sign; } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_timediff final : public Item_time_func { +@@ -1544,6 +1601,7 @@ class Item_func_timediff final : public Item_time_func { + return false; + } + bool get_time(MYSQL_TIME *ltime) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_maketime final : public Item_time_func { +@@ -1561,6 +1619,7 @@ class Item_func_maketime final : public Item_time_func { + } + const char *func_name() const override { return "maketime"; } + bool get_time(MYSQL_TIME *ltime) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_microsecond final : public Item_int_func { +@@ -1574,6 +1633,7 @@ class Item_func_microsecond final : public Item_int_func { + bool check_valid_arguments_processor(uchar *) override { + return !has_time_args(); + } ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_timestamp_diff final : public Item_int_func { +@@ -1594,6 +1654,7 @@ class Item_func_timestamp_diff final : public Item_int_func { + } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + enum date_time_format { +@@ -1619,6 +1680,7 @@ class Item_func_get_format final : public Item_str_ascii_func { + } + void print(const THD *thd, String *str, + enum_query_type query_type) const override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_str_to_date final : public Item_temporal_hybrid_func { +@@ -1633,6 +1695,8 @@ class Item_func_str_to_date final : public Item_temporal_hybrid_func { + : Item_temporal_hybrid_func(pos, a, b) {} + const char *func_name() const override { return "str_to_date"; } + bool resolve_type(THD *) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; ++ bool pq_copy_from(THD *thd, Query_block *select, Item *item) override; + }; + + class Item_func_last_day final : public Item_date_func { +@@ -1656,6 +1720,7 @@ class Item_func_internal_update_time final : public Item_datetime_func { + const char *func_name() const override { return "internal_update_time"; } + bool resolve_type(THD *thd) override; + bool get_date(MYSQL_TIME *res, my_time_flags_t fuzzy_date) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class Item_func_internal_check_time final : public Item_datetime_func { +@@ -1666,6 +1731,7 @@ class Item_func_internal_check_time final : public Item_datetime_func { + const char *func_name() const override { return "internal_check_time"; } + bool resolve_type(THD *thd) override; + bool get_date(MYSQL_TIME *res, my_time_flags_t fuzzy_date) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /* Function prototypes */ +diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h +index ba0ba0e0..5b2957d4 100644 +--- a/sql/item_xmlfunc.h ++++ b/sql/item_xmlfunc.h +@@ -2,6 +2,7 @@ + #define ITEM_XMLFUNC_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -74,6 +75,7 @@ class Item_xml_str_func : public Item_str_func { + if (!nodeset_func_permanent) nodeset_func = nullptr; + } + bool check_function_as_value_generator(uchar *) override { return false; } ++ enum Functype functype() const override { return XML_FUNC; } + + protected: + /** +diff --git a/sql/join_optimizer/access_path.cc b/sql/join_optimizer/access_path.cc +index 2ea9c00a..6558a662 100644 +--- a/sql/join_optimizer/access_path.cc ++++ b/sql/join_optimizer/access_path.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2020, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -212,6 +213,10 @@ table_map GetUsedTables(const AccessPath *path) { + .table->pos_in_table_list->map(); + case AccessPath::CACHE_INVALIDATOR: + return GetUsedTables(path->cache_invalidator().child); ++ case AccessPath::PARALLEL_SCAN: ++ return path->parallel_scan().table->pos_in_table_list->map(); ++ case AccessPath::PQBLOCK_SCAN: ++ return path->pqblock_scan().table->pos_in_table_list->map(); + } + assert(false); + return 0; +@@ -706,6 +711,20 @@ unique_ptr_destroy_only CreateIteratorFromAccessPath( + NewIterator(thd, move(child), param.name); + break; + } ++ case AccessPath::PARALLEL_SCAN: { ++ const auto ¶m = path->parallel_scan(); ++ iterator = NewIterator( ++ thd, param.tab, param.table, nullptr, join, param.gather, ++ param.stable_sort, param.ref_length); ++ break; ++ } ++ case AccessPath::PQBLOCK_SCAN: { ++ const auto ¶m = path->pqblock_scan(); ++ iterator = NewIterator( ++ thd, param.table, param.table->record[0], &join->examined_rows, ++ param.gather, param.need_rowid); ++ break; ++ } + } + + path->iterator = iterator.get(); +diff --git a/sql/join_optimizer/access_path.h b/sql/join_optimizer/access_path.h +index 86526e82..b01e1f7d 100644 +--- a/sql/join_optimizer/access_path.h ++++ b/sql/join_optimizer/access_path.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2020, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -37,6 +38,7 @@ + + class Common_table_expr; + class Filesort; ++class Gather_operator; + class Item; + class Item_func_eq; + class JOIN; +@@ -197,7 +199,9 @@ struct AccessPath { + WEEDOUT, + REMOVE_DUPLICATES, + ALTERNATIVE, +- CACHE_INVALIDATOR ++ CACHE_INVALIDATOR, ++ PARALLEL_SCAN, ++ PQBLOCK_SCAN + } type; + + /// Whether this access path counts as one that scans a base table, +@@ -558,6 +562,22 @@ struct AccessPath { + assert(type == CACHE_INVALIDATOR); + return u.cache_invalidator; + } ++ auto ¶llel_scan() { ++ assert(type == PARALLEL_SCAN); ++ return u.parallel_scan; ++ } ++ const auto ¶llel_scan() const { ++ assert(type == PARALLEL_SCAN); ++ return u.parallel_scan; ++ } ++ auto &pqblock_scan() { ++ assert(type == PQBLOCK_SCAN); ++ return u.pqblock_scan; ++ } ++ const auto &pqblock_scan() const { ++ assert(type == PQBLOCK_SCAN); ++ return u.pqblock_scan; ++ } + + private: + // We'd prefer if this could be an std::variant, but we don't have C++17 yet. +@@ -762,6 +782,18 @@ struct AccessPath { + AccessPath *child; + const char *name; + } cache_invalidator; ++ struct { ++ QEP_TAB *tab; ++ TABLE *table; ++ Gather_operator *gather; ++ bool stable_sort; /** determine whether using stable sort */ ++ uint ref_length; ++ } parallel_scan; ++ struct { ++ TABLE *table; ++ Gather_operator *gather; ++ bool need_rowid; ++ } pqblock_scan; + } u; + }; + static_assert(std::is_trivially_destructible::value, +@@ -1226,6 +1258,31 @@ inline AccessPath *NewInvalidatorAccessPath(THD *thd, AccessPath *child, + return path; + } + ++inline AccessPath *NewParallelScanAccessPath(THD *thd, QEP_TAB *tab, ++ TABLE *table, ++ Gather_operator *gather, ++ bool stable_sort, uint ref_length) { ++ AccessPath *path = new (thd->mem_root) AccessPath; ++ path->type = AccessPath::PARALLEL_SCAN; ++ path->parallel_scan().tab = tab; ++ path->parallel_scan().table = table; ++ path->parallel_scan().gather = gather; ++ path->parallel_scan().stable_sort = stable_sort; ++ path->parallel_scan().ref_length = ref_length; ++ return path; ++} ++ ++inline AccessPath *NewPQBlockScanAccessPath(THD *thd, TABLE *table, ++ Gather_operator *gather, ++ bool need_rowid) { ++ AccessPath *path = new (thd->mem_root) AccessPath; ++ path->type = AccessPath::PQBLOCK_SCAN; ++ path->pqblock_scan().table = table; ++ path->pqblock_scan().gather = gather; ++ path->pqblock_scan().need_rowid = need_rowid; ++ return path; ++} ++ + void FindTablesToGetRowidFor(AccessPath *path); + + unique_ptr_destroy_only CreateIteratorFromAccessPath( +diff --git a/sql/join_optimizer/explain_access_path.cc b/sql/join_optimizer/explain_access_path.cc +index 1b7a586a..468360c5 100644 +--- a/sql/join_optimizer/explain_access_path.cc ++++ b/sql/join_optimizer/explain_access_path.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2020, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -801,6 +802,65 @@ ExplainData ExplainAccessPath(const AccessPath *path, JOIN *join) { + path->cache_invalidator().name + ")"); + children.push_back({path->cache_invalidator().child}); + break; ++ case AccessPath::PARALLEL_SCAN: { ++ description.push_back(string("Parallel scan on ") + ++ path->parallel_scan().table->alias + ++ path->parallel_scan().table->file->explain_extra()); ++ Gather_operator *gather = path->parallel_scan().gather; ++ AccessPath *root_path = gather->m_workers[0]->thd_worker->lex->unit->m_root_access_path; ++ children.push_back({root_path, "", gather->m_template_join}); ++ break; ++ } ++ case AccessPath::PQBLOCK_SCAN: { ++ auto ¶m = path->pqblock_scan(); ++ Gather_operator *gather = param.gather; ++ TABLE *table = param.table; ++ const KEY *key = &(table->key_info[gather->keyno]); ++ int tab_idx = gather->m_template_join->pq_tab_idx; ++ assert(tab_idx >= (int)gather->m_template_join->const_tables && ++ gather->m_template_join->qep_tab[tab_idx].do_parallel_scan); ++ QEP_TAB *tab = &gather->m_template_join->qep_tab[tab_idx]; ++ ++ string str; ++ switch (tab->type()) { ++ case JT_ALL: ++ str = string("PQblock scan on ") + table->alias; ++ break; ++ case JT_RANGE: ++ str = string("PQblock range scan on ") + table->alias + " using " + ++ key->name; ++ if (table->file->pushed_idx_cond != nullptr) { ++ str += string(", with index condition: ") + ++ ItemToString(table->file->pushed_idx_cond); ++ } ++ break; ++ case JT_REF: ++ str = string("PQblock lookup on ") + table->alias + ++ string(" using ") + key->name + " (" + ++ RefToString(tab->ref(), key, /*include_nulls=*/false); ++ if (tab->m_reversed_access) { ++ str += string("; iterate backwards"); ++ } ++ str += string(")"); ++ if (table->file->pushed_idx_cond != nullptr) { ++ str += string(", with index condition: ") + ++ ItemToString(table->file->pushed_idx_cond); ++ } ++ break; ++ case JT_INDEX_SCAN: ++ str = ++ string("PQblock scan on ") + table->alias + " using " + key->name; ++ if (tab->m_reversed_access) { ++ str += string(" (reverse)"); ++ } ++ break; ++ default: ++ assert(0); ++ break; ++ } ++ description.push_back(str + table->file->explain_extra()); ++ break; ++ } + } + if (path->num_output_rows >= 0.0) { + double first_row_cost; +diff --git a/sql/join_optimizer/walk_access_paths.h b/sql/join_optimizer/walk_access_paths.h +index b1b7fabd..2f1a30ee 100644 +--- a/sql/join_optimizer/walk_access_paths.h ++++ b/sql/join_optimizer/walk_access_paths.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2020, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -86,6 +87,8 @@ void WalkAccessPaths(AccessPath *path, const JOIN *join, + case AccessPath::ZERO_ROWS_AGGREGATED: + case AccessPath::MATERIALIZED_TABLE_FUNCTION: + case AccessPath::UNQUALIFIED_COUNT: ++ case AccessPath::PARALLEL_SCAN: ++ case AccessPath::PQBLOCK_SCAN: + // No children. + return; + case AccessPath::NESTED_LOOP_JOIN: +diff --git a/sql/lex.h b/sql/lex.h +index 74fb8362..e76b0abd 100644 +--- a/sql/lex.h ++++ b/sql/lex.h +@@ -2,6 +2,7 @@ + #define LEX_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -859,6 +860,8 @@ static const SYMBOL symbols[] = { + /* + Insert new optimizer hint keywords after that commentary: + */ ++ {SYM_H("PQ", PQ_HINT)}, ++ {SYM_H("NO_PQ", NO_PQ_HINT)}, + {SYM_H("BKA", BKA_HINT)}, + {SYM_H("BNL", BNL_HINT)}, + {SYM_H("DUPSWEEDOUT", DUPSWEEDOUT_HINT)}, +diff --git a/sql/mdl.cc b/sql/mdl.cc +index 7aba6f20..d2bfbc0d 100644 +--- a/sql/mdl.cc ++++ b/sql/mdl.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2007, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -52,6 +53,7 @@ + #include "prealloced_array.h" + #include "sql/debug_sync.h" + #include "sql/thr_malloc.h" ++#include "sql/sql_class.h" + + extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *system_charset_info; + +@@ -2635,6 +2637,8 @@ bool equivalent(const MDL_ticket *a, const MDL_ticket *b, + } + #endif /* not defined NDEBUG */ + ++THD *invalid_thd = (THD *)0x1; ++ + /** + Check whether the context already holds a compatible lock ticket + on an object. +@@ -2654,6 +2658,13 @@ MDL_ticket *MDL_context::find_ticket(MDL_request *mdl_request, + enum_mdl_duration *result_duration) { + auto h = m_ticket_store.find(*mdl_request); + *result_duration = h.m_dur; ++ // PQ worker need to judge its leader got this ticket or not ++ // during xa recover, get_thd() maybe NULL ++ if (!h.m_ticket && get_thd() && get_thd() != invalid_thd && ++ get_thd()->pq_leader) { ++ return get_thd()->pq_leader->mdl_context.find_ticket(mdl_request, ++ result_duration); ++ } + return h.m_ticket; + } + +diff --git a/sql/memory/aligned_atomic.h b/sql/memory/aligned_atomic.h +index 3fed8df6..a668bce1 100644 +--- a/sql/memory/aligned_atomic.h ++++ b/sql/memory/aligned_atomic.h +@@ -77,7 +77,9 @@ static inline size_t _cache_line_size() { + + #elif defined(__linux__) + static inline size_t _cache_line_size() { +- return sysconf(_SC_LEVEL1_DCACHE_LINESIZE); ++ long size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); ++ if (size == -1 || size == 0) return 64; ++ return static_cast(size); + } + + #else +diff --git a/sql/msg_queue.cc b/sql/msg_queue.cc +new file mode 100644 +index 00000000..79dfb740 +--- /dev/null ++++ b/sql/msg_queue.cc +@@ -0,0 +1,403 @@ ++/* Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "msg_queue.h" ++#include ++ ++#ifndef NDEBUG ++bool dbug_pq_worker_stall = 0; ++#endif ++ ++/** ++ * send data of nbytes to MQ ++ * @nbytes ++ * @data ++ * @written: number of bytes that have written into MQ ++ * @nowait: the mode of write data, i.e., blocking/non-blocking mode ++ * ++ * @return the send status ++ */ ++MQ_RESULT MQueue_handle::send_bytes(uint32 nbytes, const void *data, ++ uint32 *written, bool nowait) { ++ uint32 used; ++ uint32 ringsize = m_queue->m_ring_size; ++ uint32 sent = 0; ++ uint64 available = 0; ++ ++ /** only worker thread can send data to message queue */ ++ THD *thd = current_thd; ++ assert(!m_queue->m_sender_event->PQ_caller || ++ thd == m_queue->m_sender_event->PQ_caller); ++ ++ uint64 rb, wb; ++ while (!thd->is_pq_error() && (sent < nbytes)) { ++ /** atomically obtain the read position */ ++ rb = atomic_read_u64(&m_queue->m_bytes_read); ++ /** atomically obtain the write position */ ++ wb = atomic_read_u64(&m_queue->m_bytes_written); ++ assert(wb >= rb); ++ ++ used = wb - rb; ++ assert(used <= ringsize); ++ uint32x2_t v_a = {ringsize, nbytes}; ++ uint32x2_t v_b = {used, sent}; ++ uint64x2_t v = vsubl_u32(v_a, v_b); ++ uint64 m_a = vgetq_lane_u64(v, 0); ++ uint64 m_b = vgetq_lane_u64(v, 1); ++ available = std::min(m_a, m_b); ++ ++ compiler_barrier(); ++ if (m_queue->detached == MQ_HAVE_DETACHED) { ++ *written = sent; ++ return MQ_DETACHED; ++ } ++ ++ if (m_queue->detached == MQ_TMP_DETACHED) { ++ *written = sent; ++ return MQ_SUCCESS; ++ } ++ ++ /** ++ * Considering the case that available = 0: sender first notifies receiver ++ * to receive data from MQ. If nowait = false, then sender enters into a ++ * blocking status until receiver has received data from MQ; otherwise, ++ * sender directly returns. ++ */ ++ if (available == 0) { ++ /** notify receiver to receive data from MQ */ ++ end_wait(m_queue->m_receiver_event); ++ /** blocking mode by default, i.e, nowait = false */ ++ if (nowait) { ++ *written = sent; ++ return MQ_WOULD_BLOCK; ++ } ++ /** sender enters into the blocking wait status */ ++ set_wait(m_queue->m_sender_event); ++ /** reset the wait status */ ++ reset_wait(m_queue->m_sender_event); ++ } else { ++ uint32 offset; ++ uint32 sent_once; ++ ++ /** compute the real write position in ring array */ ++ offset = MOD(wb, ringsize); ++ sent_once = std::min((uint32)available, ringsize - offset); ++ ++ /** this barrier ensures that memcpy() is finished before end_wait() */ ++ memory_barrier(); ++ memcpy(&m_queue->m_buffer[offset], ++ reinterpret_cast(const_cast(data)) + sent, ++ sent_once); ++ sent += sent_once; ++ ++ /** atomically update the write position */ ++ atomic_inc_bytes_written(sent_once); ++ /** notify receiver to receive data */ ++ end_wait(m_queue->m_receiver_event); ++ } ++#ifndef NDEBUG ++ if (dbug_pq_worker_stall) { ++ sleep(1); // 1 second ++ } ++#endif ++ } ++ ++ if (thd->is_pq_error()) { ++ set_datched_status(MQ_HAVE_DETACHED); ++ return MQ_DETACHED; ++ } ++ ++ assert(sent == nbytes); ++ *written = sent; ++ return MQ_SUCCESS; ++} ++ ++/** ++ * sending data to message queue is divided into two steps: ++ * (s1) send the message length ++ * (s2) send the whole message ++ * ++ * Note that: the sending process is in a blocking mode. ++ * @data: the message data ++ * @len: the message length ++ * @nowait: the sending status ++ */ ++MQ_RESULT MQueue_handle::send(const void *data, uint32 len, bool nowait) { ++ MQ_RESULT res; ++ uint32 nbytes = len; ++ uint32 written; ++ ++ /** (1) write the message length into MQ */ ++ res = send_bytes(WORD_LENGTH, (char *)&nbytes, &written, nowait); ++ if (res != MQ_SUCCESS) { ++ assert(res == MQ_DETACHED); ++ return res; ++ } ++ assert((!written && m_queue->detached == MQ_TMP_DETACHED) || ++ written == WORD_LENGTH); ++ ++ /** (2) write the message data into MQ */ ++ res = send_bytes(nbytes, data, &written, nowait); ++ if (res != MQ_SUCCESS) { ++ assert(res == MQ_DETACHED); ++ return res; ++ } ++ assert((!written && m_queue->detached == MQ_TMP_DETACHED) || ++ written == nbytes); ++ return MQ_SUCCESS; ++} ++ ++/** sending message to MQ in a Field_raw_data manner */ ++MQ_RESULT MQueue_handle::send(Field_raw_data *fm) { ++ MQ_RESULT res = MQ_SUCCESS; ++ uint32 written; ++ ++ /** (s1) sending the variable-field's length_bytes */ ++ if (fm->m_var_len) { ++ res = send_bytes(1, (void *)&fm->m_var_len, &written); ++ assert((res == MQ_SUCCESS && written == 1) || res == MQ_DETACHED || ++ (!written && m_queue->detached == MQ_TMP_DETACHED)); ++ } ++ ++ /** (s2) sending the data of field->ptr */ ++ if (MQ_SUCCESS == res) { ++ res = send_bytes(fm->m_len, fm->m_ptr, &written); ++ assert((res == MQ_SUCCESS && written == fm->m_len) || ++ res == MQ_DETACHED || ++ (!written && m_queue->detached == MQ_TMP_DETACHED)); ++ } ++ return res; ++} ++ ++/** ++ * receive data of bytes_needed from MQ ++ * @bytes_needed: number of bytes needed from the read position ++ * @nbytesp: the acutal bytes read ++ * @datap: the data read ++ * @nowait: reading mode, nowait=true by default ++ * @return ++ */ ++MQ_RESULT MQueue_handle::receive_bytes(uint32 bytes_needed, uint32 *nbytesp, ++ void *datap, bool nowait) { ++ uint64 rb, wb; ++ uint32 used, offset; ++ ++ *nbytesp = 0; ++ uint32 ringsize = m_queue->m_ring_size; ++ THD *thd = current_thd; ++ assert(!m_queue->m_receiver_event->PQ_caller || ++ thd == m_queue->m_receiver_event->PQ_caller); ++ while (!thd->is_killed() && !thd->pq_error) { ++ rb = atomic_read_u64(&m_queue->m_bytes_read) + m_consume_pending; ++ wb = atomic_read_u64(&m_queue->m_bytes_written); ++ assert(wb >= rb); ++ ++ used = wb - rb; ++ assert(used <= ringsize); ++ offset = MOD(rb, ringsize); ++ ++ /** we have enough space and then directly read bytes_needed data into datap ++ */ ++ if (used >= bytes_needed) { ++ /** (s1) read data located in [offset, ..., ringsize] */ ++ if (offset + bytes_needed <= ringsize) { ++ memcpy(datap, &m_queue->m_buffer[offset], bytes_needed); ++ } else { ++ /** (s2) read data located in [offset, ringsize], [0, bytes_needed - ++ * (ringsize - offset))] */ ++ int part_1 = ringsize - offset; ++ int part_2 = bytes_needed - part_1; ++ ++ memcpy(datap, &m_queue->m_buffer[offset], part_1); ++ memcpy((char *)datap + part_1, &m_queue->m_buffer[0], part_2); ++ } ++ ++ *nbytesp = bytes_needed; ++ memory_barrier(); ++ ++ /** notify sender that there is available space in MQ */ ++ end_wait(m_queue->m_sender_event); ++ return MQ_SUCCESS; ++ } ++ ++ /** ++ * there are not enough data for receiver, and receiver only ++ * receives the data located into [offset, ringsize]. ++ */ ++ if (offset + used >= ringsize) { ++ memcpy(datap, &m_queue->m_buffer[offset], ringsize - offset); ++ *nbytesp = ringsize - offset; ++ ++ memory_barrier(); ++ end_wait(m_queue->m_sender_event); ++ return MQ_SUCCESS; ++ } ++ ++ /** ++ * if m_queue is detached and there are still data in m_queue, ++ * receiver can receive data until it reads all data. ++ */ ++ if (m_queue->detached == MQ_HAVE_DETACHED) { ++ read_barrier(); ++ if (wb != atomic_read_u64(&m_queue->m_bytes_written)) { ++ continue; ++ } ++ return MQ_DETACHED; ++ } ++ ++ /** ++ * updating the read position, note that ++ * { ++ * atomic_inc_bytes_read(m_consume_pending); ++ * m_consume_pending = 0; ++ * } ++ * should be a group of atomic operation. ++ */ ++ if (m_consume_pending > 0) { ++ offset = m_consume_pending; ++ m_consume_pending = 0; ++ ++ /** ensure that: consume_pending has written into memory */ ++ memory_barrier(); ++ atomic_inc_bytes_read(offset); ++ } ++ ++ /** the blocking-read mode */ ++ if (nowait) { ++ return MQ_WOULD_BLOCK; ++ } ++ ++ set_wait(m_queue->m_receiver_event); ++ reset_wait(m_queue->m_receiver_event); ++ } ++ return MQ_DETACHED; ++} ++ ++/** ++ * receive message from MQ ++ * @datap ++ * @nbytesp ++ * @nowait ++ */ ++MQ_RESULT MQueue_handle::receive(void **datap, uint32 *nbytesp, bool nowait) { ++ MQ_RESULT res; ++ uint32 nbytes, offset; ++ uint32 rb = 0; ++ /** ++ * only when m_consume_pending is greater than 1/4 * m_ring_size, we update ++ * the read position m_read_bytes using m_consume_pending; otherwise, the ++ * number of read bytes is firstly accumulated to m_consume_pending and then ++ * using (m_read_bytes + m_consume_pending) as the real read position in ring ++ * array. ++ * ++ */ ++ if (m_consume_pending > m_queue->m_ring_size / 4) { ++ offset = m_consume_pending; ++ m_consume_pending = 0; ++ ++ memory_barrier(); ++ atomic_inc_bytes_read(offset); ++ } ++ ++ /** ++ * for the receive process: ++ * (1) first, we read the message length from MQ; ++ * (2) then, we read the message data. ++ * As in the non-blocking mode, we read bytes from MQ as many as possible ++ * when there is available data in MQ. Thus, we should remember the read ++ * info. of last receive process. ++ * ++ */ ++ ++ /** (1) read the message length */ ++ while (!m_length_word_complete) { ++ assert(m_partial_bytes < WORD_LENGTH); ++ res = receive_bytes(WORD_LENGTH - m_partial_bytes, &rb, ++ &m_buffer[m_partial_bytes], nowait); ++ if (res != MQ_SUCCESS) { ++ return res; ++ } ++ ++ uint32x2_t v_a = {m_partial_bytes, m_consume_pending}; ++ uint32x2_t v_b = {rb, rb}; ++ ++ v_a = vadd_u32(v_a, v_b); ++ m_partial_bytes = vget_lane_u32(v_a, 0); ++ m_consume_pending = vget_lane_u32(v_a, 1); ++ ++ if (m_partial_bytes >= WORD_LENGTH) { ++ assert(m_partial_bytes == WORD_LENGTH); ++ m_expected_bytes = *(uint32 *)m_buffer; ++ m_length_word_complete = true; ++ m_partial_bytes = 0; ++ } ++ } ++ nbytes = m_expected_bytes; ++ ++ /** re-allocing local buffer when m_buffer_len is smaller than nbytes */ ++ if (m_buffer_len < nbytes || DBUG_EVALUATE_IF("pq_mq_error3", true, false)) { ++ while (m_buffer_len < nbytes) { ++ m_buffer_len *= 2; ++ } ++ if (m_buffer) { ++ destroy(m_buffer); ++ } ++ THD *thd = current_thd; ++ assert(!m_queue->m_receiver_event->PQ_caller || ++ thd == m_queue->m_receiver_event->PQ_caller); ++ m_buffer = new (thd->pq_mem_root) char[m_buffer_len]; ++ /** if m_buffer allocates fail, then directly return my_error */ ++ if (m_buffer == nullptr || DBUG_EVALUATE_IF("pq_mq_error3", true, false)) { ++ my_error(ER_STD_BAD_ALLOC_ERROR, MYF(0), "", "(MQ::receive)"); ++ return MQ_DETACHED; ++ } ++ } ++ ++ /** (2) read data of nbytes **/ ++ for (;;) { ++ size_t still_needed; ++ assert(m_partial_bytes <= nbytes); ++ ++ still_needed = nbytes - m_partial_bytes; ++ res = receive_bytes(still_needed, &rb, &m_buffer[m_partial_bytes], nowait); ++ if (res != MQ_SUCCESS) { ++ return res; ++ } ++ uint32x2_t v_a = {m_partial_bytes, m_consume_pending}; ++ uint32x2_t v_b = {rb, rb}; ++ v_a = vadd_u32(v_a, v_b); ++ m_partial_bytes = vget_lane_u32(v_a, 0); ++ m_consume_pending = vget_lane_u32(v_a, 1); ++ if (m_partial_bytes >= nbytes) { ++ break; ++ } ++ } ++ ++ /** reset for next read */ ++ m_length_word_complete = false; ++ m_partial_bytes = 0; ++ ++ *nbytesp = nbytes; ++ *datap = m_buffer; ++ return MQ_SUCCESS; ++} +diff --git a/sql/msg_queue.h b/sql/msg_queue.h +new file mode 100644 +index 00000000..92d75f8c +--- /dev/null ++++ b/sql/msg_queue.h +@@ -0,0 +1,322 @@ ++#ifndef _MESSAGE_QUEUE_PQ ++#define _MESSAGE_QUEUE_PQ ++ ++/* Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "log.h" ++#include "my_dbug.h" ++#include "my_sys.h" ++#include "pq_global.h" ++#include "sql_base.h" ++#include "sql_class.h" ++#include "sql_error.h" ++ ++#define MQ_BUFFER_SIZE 1024 ++#define RING_SIZE 1048576 ++ ++// one message cannot exceed 2^32 bytes ++const size_t WORD_LENGTH = sizeof(uint32); ++enum MQ_RESULT { ++ MQ_SUCCESS, /** Sent or received a message */ ++ MQ_WOULD_BLOCK, /** Not completed; retry later */ ++ MQ_DETACHED /** worker has detached queue */ ++}; ++ ++enum MQ_DETACHED_STATUS { ++ MQ_NOT_DETACHED = 0, /** not detached from MQ */ ++ MQ_HAVE_DETACHED, /** have detached from MQ */ ++ MQ_TMP_DETACHED /** tmp detached from MQ, and reset MQ_NOT_DETACHED later */ ++}; ++ ++enum MESSAGE_TYPE { EMPTY_MSG, ERROR_MSG }; ++ ++/** ++ * field raw data, used to store in MQ ++ * ++ * */ ++struct Field_raw_data { ++ uchar *m_ptr; /** point to raw data buffer */ ++ uint32 m_len; /** length of message */ ++ uchar m_var_len; /** length of varstring */ ++ ++ /** ++ * m_need_send = true indicates message should be sent to MQ; ++ * otherwise, m_need_send = false. ++ */ ++ bool m_need_send; ++ Field_raw_data() { ++ m_ptr = nullptr; ++ m_len = 0; ++ m_var_len = 0; ++ m_need_send = true; ++ } ++}; ++ ++const uint RING_SIZE_MOD = RING_SIZE - 1; ++#define MOD(x, y) ((x)&RING_SIZE_MOD) ++ ++/** lock-free operation */ ++#define write_barrier() __atomic_thread_fence(__ATOMIC_RELEASE) ++#define read_barrier() __atomic_thread_fence(__ATOMIC_ACQUIRE) ++#define compiler_barrier() __asm__ __volatile__("" ::: "memory") ++#define memory_barrier() __sync_synchronize() ++ ++/** atomic CAS operation */ ++static inline bool atomic_compare_exchange_u64(volatile uint64 *ptr, ++ uint64 *expected, ++ uint64 newval) { ++ bool ret; ++ uint64 current; ++ current = __sync_val_compare_and_swap(ptr, *expected, newval); ++ ret = current == *expected; ++ *expected = current; ++ return ret; ++} ++ ++/** atomic read uint64 value */ ++static inline uint64 atomic_read_u64(volatile uint64 *ptr) { ++ uint64 old = 0; ++ atomic_compare_exchange_u64(ptr, &old, 0); ++ return old; ++} ++ ++/** atomic write uint64 value */ ++static inline uint64 atomic_write_u64(volatile uint64 *ptr, uint64 val) { ++ uint64 old = *ptr; ++ while (!atomic_compare_exchange_u64(ptr, &old, val)) ++ ; ++ return old; ++} ++ ++class MQ_event { ++ public: ++ THD *PQ_caller; ++ std::atomic latch; ++ ++ public: ++ MQ_event(THD *thd) : PQ_caller(thd), latch(false) {} ++ MQ_event() { ++ PQ_caller = nullptr; ++ latch = false; ++ } ++ ~MQ_event() {} ++ ++ /** set waiting status */ ++ void set_latch() { latch.store(true); } ++ ++ /** ++ * Check whether the partner has set latch's status in a loop manner until ++ * reaching the maximal timeout time (100ms, by default). ++ */ ++ void wait_latch(uint64 wait_max_time = 100) { ++ struct timespec start_ts; ++ struct timespec end_ts; ++ ulong difftime; ++ ++ set_timespec(&start_ts, 0); ++ latch.load(); ++ THD *thd = PQ_caller ? PQ_caller : current_thd; ++ while (!latch && (thd && !thd->is_killed())) { ++ set_timespec(&end_ts, 0); ++ difftime = (end_ts.tv_sec - start_ts.tv_sec) * TIME_THOUSAND + ++ (end_ts.tv_nsec - start_ts.tv_nsec) / TIME_MILLION; ++ ++ if (difftime >= wait_max_time) break; ++ latch.load(); ++ } ++ } ++ ++ /** reset waiting status */ ++ void reset_latch() { latch.store(false); } ++}; ++ ++class MQueue { ++ public: ++ MQ_event *m_sender_event; /** sender's event */ ++ MQ_event *m_receiver_event; /** receiver's event */ ++ uint64 m_bytes_written; /** number of written bytes */ ++ uint64 m_bytes_read; /** number of read bytes */ ++ char *m_buffer; /** ring array that stores the message */ ++ uint32 m_ring_size; /** the size of ring array */ ++ MQ_DETACHED_STATUS detached; /** the status of message queue */ ++ ++ public: ++ MQueue() ++ : m_sender_event(nullptr), ++ m_receiver_event(nullptr), ++ m_bytes_written(0), ++ m_bytes_read(0), ++ m_buffer(nullptr), ++ m_ring_size(0), ++ detached(MQ_NOT_DETACHED) {} ++ ++ MQueue(MQ_event *sender_event, MQ_event *receiver_event, char *ring, ++ size_t ring_size) ++ : m_sender_event(sender_event), ++ m_receiver_event(receiver_event), ++ m_bytes_written(0), ++ m_bytes_read(0), ++ m_buffer(ring), ++ m_ring_size(ring_size), ++ detached(MQ_NOT_DETACHED) {} ++ ~MQueue() {} ++}; ++ ++class MQueue_handle { ++ public: ++ MQueue *m_queue; /** message queue */ ++ ++ private: ++ char *m_buffer; /** local buffer for cache reading */ ++ uint32 m_buffer_len; /** the length of local buffer */ ++ uint32 m_consume_pending; /** accumulated read bytes for updating read number ++ of bytes in batch manner */ ++ uint32 m_partial_bytes; /** partial bytes that has been read in one message */ ++ uint32 m_expected_bytes; /** expected bytes for one complete message */ ++ bool m_length_word_complete; /** indicates whether message's length has been ++ completely read */ ++ ++ public: ++ public: ++ MQueue_handle() ++ : m_queue(nullptr), ++ m_buffer(nullptr), ++ m_buffer_len(0), ++ m_consume_pending(0), ++ m_partial_bytes(0), ++ m_expected_bytes(0), ++ m_length_word_complete(false) {} ++ ++ MQueue_handle(MQueue *queue, size_t buffer_len) ++ : m_queue(queue), ++ m_buffer(nullptr), ++ m_buffer_len(buffer_len), ++ m_consume_pending(0), ++ m_partial_bytes(0), ++ m_expected_bytes(0), ++ m_length_word_complete(false) {} ++ ++ ~MQueue_handle() {} ++ ++ public: ++ MQ_RESULT send(Field_raw_data *fm); ++ MQ_RESULT send(const void *datap, uint32 len, bool nowait = false); ++ MQ_RESULT receive(void **datap, uint32 *, bool nowait = true); ++ ++ inline MQueue *get_mqueue() { return m_queue; } ++ inline MQ_event *get_receiver() { return m_queue->m_receiver_event; } ++ ++ inline MQ_event *get_sender() { return m_queue->m_sender_event; } ++ /** set detached status */ ++ inline void set_datched_status(MQ_DETACHED_STATUS status) { ++ if (m_queue) m_queue->detached = status; ++ } ++ ++ inline bool send_exception_msg(MESSAGE_TYPE msg) { ++ MQ_RESULT result = send((void *)&msg, 1); ++ if ((result != MQ_SUCCESS && result != MQ_DETACHED) || ++ DBUG_EVALUATE_IF("pq_mq_error6", true, false)) { ++ /* ++ * Note that: if we can not send error msg to MQ, then the only ++ * solution is to generate my_error info. ++ */ ++ my_error(ER_PARALLEL_EXEC_ERROR, MYF(0)); ++ return true; ++ } ++ return false; ++ } ++ ++ /** ++ * init the handle structure ++ * @retval: ++ * false for success, and otherwise true. ++ */ ++ bool init_mqueue_handle(THD *thd) { ++ if (!m_buffer_len) return true; ++ m_buffer = new (thd->pq_mem_root) char[m_buffer_len]; ++ if (!m_buffer) return true; ++ ++ return false; ++ } ++ ++ /** cleanup the allocated buffer */ ++ void cleanup() { ++ destroy(m_buffer); ++ if (m_queue) { ++ destroy(m_queue->m_sender_event); ++ destroy(m_queue->m_buffer); ++ } ++ destroy(m_queue); ++ } ++ ++ private: ++ /**increase #read bytes in atomic manner */ ++ void atomic_inc_bytes_read(int n) { ++ read_barrier(); ++ atomic_write_u64(&m_queue->m_bytes_read, ++ atomic_read_u64(&m_queue->m_bytes_read) + n); ++ } ++ ++ /** increase #written bytes in atomic manner */ ++ void atomic_inc_bytes_written(int n) { ++ write_barrier(); ++ atomic_write_u64(&m_queue->m_bytes_written, ++ atomic_read_u64(&m_queue->m_bytes_written) + n); ++ } ++ ++ /** let event into wait status */ ++ void set_wait(MQ_event *event) { ++ assert(event != nullptr); ++ event->wait_latch(); ++ } ++ ++ /** end event's wait status */ ++ void end_wait(MQ_event *event) { ++ assert(event != nullptr); ++ event->set_latch(); ++ } ++ ++ /** reset event's wait status */ ++ void reset_wait(MQ_event *event) { ++ assert(event != nullptr); ++ event->reset_latch(); ++ } ++ ++ MQ_RESULT send_bytes(uint32 nbytes, const void *data, uint32 *written, ++ bool nowait = false); ++ MQ_RESULT receive_bytes(uint32 bytes_needed, uint32 *nbytesp, void *datap, ++ bool nowait = true); ++}; ++ ++#endif +diff --git a/sql/mysqld.cc b/sql/mysqld.cc +index 83643f76..1fa395ff 100644 +--- a/sql/mysqld.cc ++++ b/sql/mysqld.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -857,6 +858,7 @@ MySQL clients support the protocol: + #include "thr_mutex.h" + #include "typelib.h" + #include "violite.h" ++#include "sql/sql_parallel.h" + + #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE + #include "storage/perfschema/pfs_server.h" +@@ -1096,6 +1098,8 @@ static PSI_mutex_key key_LOCK_admin_tls_ctx_options; + static PSI_mutex_key key_LOCK_rotate_binlog_master_key; + static PSI_mutex_key key_LOCK_partial_revokes; + #endif /* HAVE_PSI_INTERFACE */ ++static PSI_mutex_key key_LOCK_pq_threads_running; ++static PSI_cond_key key_COND_pq_threads_running; + + /** + Statement instrumentation key for replication. +@@ -2645,6 +2649,8 @@ static void clean_up_mutexes() { + mysql_mutex_destroy(&LOCK_rotate_binlog_master_key); + mysql_mutex_destroy(&LOCK_admin_tls_ctx_options); + mysql_mutex_destroy(&LOCK_partial_revokes); ++ mysql_mutex_destroy(&LOCK_pq_threads_running); ++ mysql_cond_destroy(&COND_pq_threads_running); + } + + /**************************************************************************** +@@ -5092,6 +5098,8 @@ static int init_thread_environment() { + MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_partial_revokes, &LOCK_partial_revokes, + MY_MUTEX_INIT_FAST); ++ mysql_mutex_init(key_LOCK_pq_threads_running, &LOCK_pq_threads_running, MY_MUTEX_INIT_FAST); ++ mysql_cond_init(key_COND_pq_threads_running, &COND_pq_threads_running); + return 0; + } + +@@ -8598,6 +8606,14 @@ static int show_net_compression(THD *thd, SHOW_VAR *var, char *buff) { + return 0; + } + ++static int show_pq_memory(THD *, SHOW_VAR *var, char *buff) { ++ var->type = SHOW_INT; ++ var->value = buff; ++ unsigned int *value = reinterpret_cast(buff); ++ *value = get_pq_memory_total(); ++ return 0; ++} ++ + static int show_net_compression_algorithm(THD *thd, SHOW_VAR *var, char *buff) { + const char *s = thd->get_protocol()->get_compression_algorithm(); + var->type = SHOW_CHAR; +@@ -9180,6 +9196,13 @@ SHOW_VAR status_vars[] = { + SHOW_SCOPE_ALL}, + {"Prepared_stmt_count", (char *)&show_prepared_stmt_count, SHOW_FUNC, + SHOW_SCOPE_GLOBAL}, ++ {"PQ_threads_refused", (char*)¶llel_threads_refused, SHOW_INT, ++ SHOW_SCOPE_GLOBAL}, ++ {"PQ_memory_refused", (char*)¶llel_memory_refused, SHOW_INT, ++ SHOW_SCOPE_GLOBAL}, ++ {"PQ_threads_running", (char*)¶llel_threads_running, SHOW_INT, ++ SHOW_SCOPE_GLOBAL}, ++ {"PQ_memory_used", (char*)&show_pq_memory, SHOW_FUNC, SHOW_SCOPE_GLOBAL}, + {"Queries", (char *)&show_queries, SHOW_FUNC, SHOW_SCOPE_ALL}, + {"Questions", (char *)offsetof(System_status_var, questions), + SHOW_LONGLONG_STATUS, SHOW_SCOPE_ALL}, +@@ -11107,7 +11130,8 @@ static PSI_mutex_info all_server_mutexes[]= + { &key_LOCK_tls_ctx_options, "LOCK_tls_ctx_options", 0, 0, "A lock to control all of the --ssl-* CTX related command line options for client server connection port"}, + { &key_LOCK_admin_tls_ctx_options, "LOCK_admin_tls_ctx_options", 0, 0, "A lock to control all of the --ssl-* CTX related command line options for administrative connection port"}, + { &key_LOCK_rotate_binlog_master_key, "LOCK_rotate_binlog_master_key", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, +- { &key_monitor_info_run_lock, "Source_IO_monitor::run_lock", 0, 0, PSI_DOCUMENT_ME} ++ { &key_monitor_info_run_lock, "Source_IO_monitor::run_lock", 0, 0, PSI_DOCUMENT_ME}, ++ { &key_LOCK_pq_threads_running, "LOCK_pq_threads_running", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME} + }; + /* clang-format on */ + +@@ -11217,7 +11241,8 @@ static PSI_cond_info all_server_conds[]= + { &key_COND_compress_gtid_table, "COND_compress_gtid_table", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, + { &key_commit_order_manager_cond, "Commit_order_manager::m_workers.cond", 0, 0, PSI_DOCUMENT_ME}, + { &key_cond_slave_worker_hash, "Relay_log_info::slave_worker_hash_lock", 0, 0, PSI_DOCUMENT_ME}, +- { &key_monitor_info_run_cond, "Source_IO_monitor::run_cond", 0, 0, PSI_DOCUMENT_ME} ++ { &key_monitor_info_run_cond, "Source_IO_monitor::run_cond", 0, 0, PSI_DOCUMENT_ME}, ++ { &key_COND_pq_threads_running, "COND_pq_threads_running", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME} + }; + /* clang-format on */ + +@@ -11227,6 +11252,7 @@ PSI_thread_key key_thread_one_connection; + PSI_thread_key key_thread_compress_gtid_table; + PSI_thread_key key_thread_parser_service; + PSI_thread_key key_thread_handle_con_admin_sockets; ++PSI_thread_key key_thread_parallel_query; + + /* clang-format off */ + static PSI_thread_info all_server_threads[]= +@@ -11245,6 +11271,7 @@ static PSI_thread_info all_server_threads[]= + { &key_thread_compress_gtid_table, "compress_gtid_table", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, + { &key_thread_parser_service, "parser_service", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, + { &key_thread_handle_con_admin_sockets, "admin_interface", PSI_FLAG_USER, 0, PSI_DOCUMENT_ME}, ++ { &key_thread_parallel_query, "parallel_query", PSI_FLAG_SINGLETON, 0, PSI_DOCUMENT_ME}, + }; + /* clang-format on */ + +diff --git a/sql/mysqld.h b/sql/mysqld.h +index a7a80a22..f778a5a0 100644 +--- a/sql/mysqld.h ++++ b/sql/mysqld.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2010, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -148,6 +149,9 @@ enum enum_server_operational_state { + }; + enum_server_operational_state get_server_state(); + ++extern uint parallel_threads_running; ++extern uint parallel_threads_refused; ++extern uint parallel_memory_refused; + extern bool opt_large_files, server_id_supplied; + extern bool opt_bin_log; + extern bool opt_log_slave_updates; +@@ -481,6 +485,7 @@ extern PSI_thread_key key_thread_compress_gtid_table; + extern PSI_thread_key key_thread_parser_service; + extern PSI_thread_key key_thread_handle_con_admin_sockets; + extern PSI_cond_key key_monitor_info_run_cond; ++extern PSI_thread_key key_thread_parallel_query; + + extern PSI_file_key key_file_binlog; + extern PSI_file_key key_file_binlog_index; +diff --git a/sql/opt_explain.cc b/sql/opt_explain.cc +index aabeb29f..7eb6d786 100644 +--- a/sql/opt_explain.cc ++++ b/sql/opt_explain.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -72,6 +73,7 @@ + #include "sql/mysqld_thd_manager.h" // Global_THD_manager + #include "sql/opt_costmodel.h" + #include "sql/opt_explain_format.h" ++#include "sql/opt_explain_json.h" + #include "sql/opt_range.h" // QUICK_SELECT_I + #include "sql/opt_trace.h" // Opt_trace_* + #include "sql/protocol.h" +@@ -94,6 +96,7 @@ + #include "sql/temp_table_param.h" // Func_ptr + #include "sql_string.h" + #include "template_utils.h" ++#include "sql/sql_parallel.h" + + class Opt_trace_context; + +@@ -428,7 +431,7 @@ class Explain_join : public Explain_table_base { + Query_block *query_block_arg, bool need_tmp_table_arg, + bool need_order_arg, bool distinct_arg) + : Explain_table_base(CTX_JOIN, explain_thd_arg, query_thd_arg, +- query_block_arg), ++ query_block_arg, nullptr), + need_tmp_table(need_tmp_table_arg), + need_order(need_order_arg), + distinct(distinct_arg), +@@ -448,6 +451,7 @@ class Explain_join : public Explain_table_base { + bool end_simple_sort_context(Explain_sort_clause clause, + enum_parsing_context ctx); + bool explain_qep_tab(size_t tab_num); ++ bool explain_pq_gather(QEP_TAB *tab); + + protected: + bool shallow_explain() override; +@@ -1135,6 +1139,12 @@ bool Explain_join::explain_modify_flags() { + break; + default:; + }; ++ ++ if (query_thd->parallel_exec && ++ (const_cast(query_thd))->is_worker() == false) { ++ fmt->entry()->mod_type = MT_GATHER; ++ } ++ + return false; + } + +@@ -1275,6 +1285,10 @@ bool Explain_join::shallow_explain() { + if (begin_sort_context(ESC_BUFFER_RESULT, CTX_BUFFER_RESULT)) + return true; /* purecov: inspected */ + ++ if (join->thd->parallel_exec && join->primary_tables == 0) { ++ join->primary_tables = 1; ++ } ++ + for (size_t t = 0, cnt = fmt->is_hierarchical() ? join->primary_tables + : join->tables; + t < cnt; t++) { +@@ -1293,6 +1307,45 @@ bool Explain_join::shallow_explain() { + return false; + } + ++bool Explain_join::explain_pq_gather(QEP_TAB *tab) { ++ assert(tab->gather); ++ ++ JOIN *gather_join = tab->gather->m_template_join; ++ Query_block *query_block = gather_join->query_block; ++ const Explain_format_flags *flags = &gather_join->explain_flags; ++ const bool need_tmp_table = flags->any(ESP_USING_TMPTABLE); ++ const bool need_order = flags->any(ESP_USING_FILESORT); ++ const bool distinct = flags->get(ESC_DISTINCT, ESP_EXISTS); ++ query_block->join->best_read = this->join->best_read; ++ bool ret = true; ++ ++ gather_join->thd->lock_query_plan(); ++ bool explain_other = explain_thd != query_thd; ++ Explain_join *ej = new (gather_join->thd->pq_mem_root) ++ Explain_join(explain_thd, explain_other ? gather_join->thd : explain_thd, ++ query_block, need_tmp_table, need_order, distinct); ++ if (ej == nullptr) { ++ goto END; ++ } ++ ++ if (!explain_other) { ++ ej->query_thd = gather_join->thd; ++ } ++ ++ if (ej->fmt->begin_context(CTX_GATHER, nullptr)) { ++ goto END; ++ } ++ ++ ret = ej->shallow_explain() || ej->explain_subqueries(); ++ if (!ret) { ++ ret = ej->fmt->end_context(CTX_GATHER); ++ } ++ ++END: ++ gather_join->thd->unlock_query_plan(); ++ return ret; ++} ++ + bool Explain_join::explain_qep_tab(size_t tabnum) { + tab = join->qep_tab + tabnum; + if (!tab->position()) return false; +@@ -1322,6 +1375,11 @@ bool Explain_join::explain_qep_tab(size_t tabnum) { + Semijoin_mat_exec *const sjm = tab->sj_mat_exec(); + const enum_parsing_context c = sjm ? CTX_MATERIALIZATION : CTX_QEP_TAB; + ++ if (tab->gather) { ++ need_tmp_table = false; ++ need_order = false; ++ } ++ + if (fmt->begin_context(c) || prepare_columns()) return true; + + fmt->entry()->query_block_id = table->pos_in_table_list->query_block_id(); +@@ -1348,6 +1406,9 @@ bool Explain_join::explain_qep_tab(size_t tabnum) { + + if (fmt->end_context(c)) return true; + ++ // explain parallel query execute plan ++ if (tab->gather) explain_pq_gather(tab); ++ + if (first_non_const) { + if (end_simple_sort_context(ESC_GROUP_BY, CTX_SIMPLE_GROUP_BY)) return true; + if (end_simple_sort_context(ESC_DISTINCT, CTX_SIMPLE_DISTINCT)) return true; +@@ -1613,6 +1674,13 @@ bool Explain_join::explain_extra() { + push_extra(ET_USING_SECONDARY_ENGINE, table->file->table_type())) + return true; + ++ if (tab->gather) { ++ StringBuffer<64> buff(cs); ++ buff.append_ulonglong(tab->gather->m_dop); ++ buff.append(" workers"); ++ if (push_extra(ET_PARALLEL_EXE, buff)) return true; ++ } ++ + return false; + } + +@@ -2066,9 +2134,13 @@ static bool ExplainIterator(THD *ethd, const THD *query_thd, + default: + break; + } +- explain += PrintQueryPlan(base_level, unit->root_access_path(), ++ if (ethd->parallel_exec && ethd->lex->is_explain_analyze) { ++ explain += ethd->pq_explain; ++ } else { ++ explain += PrintQueryPlan(base_level, unit->root_access_path(), + unit->is_union() ? nullptr : join, + /*is_root_of_join=*/!unit->is_union()); ++ } + } else { + explain += PrintQueryPlan(0, /*path=*/nullptr, /*join=*/nullptr, + /*is_root_of_join=*/false); +diff --git a/sql/opt_explain_format.h b/sql/opt_explain_format.h +index 947197cc..1439c921 100644 +--- a/sql/opt_explain_format.h ++++ b/sql/opt_explain_format.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -91,6 +92,7 @@ enum Extra_tag { + ET_SKIP_RECORDS_IN_RANGE, + ET_USING_SECONDARY_ENGINE, + ET_REMATERIALIZE, ++ ET_PARALLEL_EXE, + //------------------------------------ + ET_total + }; +@@ -131,7 +133,7 @@ class context; + } + + // Table modification type +-enum enum_mod_type { MT_NONE, MT_INSERT, MT_UPDATE, MT_DELETE, MT_REPLACE }; ++enum enum_mod_type { MT_NONE, MT_INSERT, MT_UPDATE, MT_DELETE, MT_REPLACE, MT_GATHER }; + + /** + Helper class for table property buffering +@@ -523,6 +525,8 @@ class Explain_format { + + virtual bool is_tree() const { return false; } + ++ virtual bool is_json() const { return false; } ++ + /** + Send EXPLAIN header item(s) to output stream + +diff --git a/sql/opt_explain_json.cc b/sql/opt_explain_json.cc +index 4fe19821..f5f7dcd3 100644 +--- a/sql/opt_explain_json.cc ++++ b/sql/opt_explain_json.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -94,7 +95,8 @@ static const char *json_extra_tags[ET_total] = { + "table_function", // ET_TABLE_FUNCTION + "skip_records_in_range_due_to_force", // ET_SKIP_RECORDS_IN_RANGE + "using_secondary_engine", // ET_USING_SECONDARY_ENGINE +- "rematerialize" // ET_REMATERIALIZE ++ "rematerialize", // ET_REMATERIALIZE ++ "parallel_execute" // ET_PARALLEL_EXE + }; + + // JSON key names +@@ -172,6 +174,7 @@ class sort_ctx; + class subquery_ctx; + class union_result_ctx; + class window_ctx; ++class join_ctx; + + /** + @note Keep in sync with the @c list_names array. +@@ -266,6 +269,14 @@ class context : public Explain_context { + */ + virtual size_t id(bool hide = false) = 0; + ++ virtual size_t join_tabs_count() { ++ assert(0); ++ return 0; ++ } ++ ++ virtual sort_ctx* get_sort() { return nullptr; } ++ virtual window_ctx* get_window() { return nullptr; } ++ + virtual bool cacheable() { + assert(0); + return true; +@@ -394,6 +405,13 @@ class context : public Explain_context { + return false; + } + ++ virtual int add_gather(join_ctx *ctx MY_ATTRIBUTE((unused)), ++ Query_expression *subquery ++ MY_ATTRIBUTE((unused))) { ++ assert(0); ++ return false; ++ } ++ + /// Helper function to format output for derived subquery if any + virtual bool format_derived(Opt_trace_context *) { return false; } + +@@ -402,6 +420,9 @@ class context : public Explain_context { + + /// Helper function to format output for HAVING, ORDER/GROUP BY subqueries + virtual bool format_query_expression(Opt_trace_context *) { return false; } ++ ++ /// Helper function to format output for parallel gather if any ++ virtual bool format_gather(Opt_trace_context *) { return false; } + }; + + /** +@@ -629,7 +650,7 @@ bool table_base_ctx::format_body(Opt_trace_context *json, + Opt_trace_object *obj) { + StringBuffer<64> buff; + +- if (mod_type != MT_NONE) obj->add(mod_type_name[mod_type], true); ++ if (mod_type != MT_NONE && mod_type != MT_GATHER) obj->add(mod_type_name[mod_type], true); + + if (!col_id.is_empty() && !is_hidden_id) obj->add(K_SELECT_ID, col_id.value); + +@@ -701,7 +722,7 @@ bool table_base_ctx::format_body(Opt_trace_context *json, + if (format_where(json)) return true; + } + +- return format_derived(json) || format_query_expression(json); ++ return format_gather(json) || format_derived(json) || format_query_expression(json); + } + + /** +@@ -764,6 +785,7 @@ class union_result_ctx : public table_base_ctx, public unit_ctx { + class table_with_where_and_derived : public table_base_ctx { + public: + List where_subqueries; ///< associated WHERE clause subqueries ++ List gather; ///< associated parallel gather + + table_with_where_and_derived(enum_parsing_context type_arg, + const char *name_arg, context *parent_arg) +@@ -796,6 +818,8 @@ class table_with_where_and_derived : public table_base_ctx { + } + return false; + } ++ ++ virtual bool format_gather(Opt_trace_context *json); + }; + + /** +@@ -842,6 +866,9 @@ class message_ctx : public joinable_ctx, public table_with_where_and_derived { + bool format_derived(Opt_trace_context *json) override { + return table_with_where_and_derived::format_derived(json); + } ++ virtual bool format_gather(Opt_trace_context *json) { ++ return table_with_where_and_derived::format_gather(json); ++ } + bool format_where(Opt_trace_context *json) override { + return table_with_where_and_derived::format_where(json); + } +@@ -860,6 +887,9 @@ class message_ctx : public joinable_ctx, public table_with_where_and_derived { + int add_where_subquery(subquery_ctx *ctx, Query_expression *) override { + return where_subqueries.push_back(ctx); + } ++ virtual int add_gather(join_ctx *ctx, Query_expression *) { ++ return gather.push_back(ctx); ++ } + }; + + /** +@@ -893,6 +923,9 @@ class join_tab_ctx : public joinable_ctx, public table_with_where_and_derived { + bool format_derived(Opt_trace_context *json) override { + return table_with_where_and_derived::format_derived(json); + } ++ virtual bool format_gather(Opt_trace_context *json) { ++ return table_with_where_and_derived::format_gather(json); ++ } + bool format_where(Opt_trace_context *json) override { + return table_with_where_and_derived::format_where(json); + } +@@ -921,6 +954,10 @@ class join_tab_ctx : public joinable_ctx, public table_with_where_and_derived { + return -1; + } + ++ virtual int add_gather(join_ctx *ctx, Query_expression *subquery MY_ATTRIBUTE((unused))) { ++ return gather.push_back(ctx); ++ } ++ + bool find_and_set_derived(context *subquery) override { + if (query_block_id == subquery->id()) { + derived_from.push_back(subquery); +@@ -969,6 +1006,10 @@ class simple_sort_ctx : public joinable_ctx { + return join_tab->add_where_subquery(ctx, subquery); + } + ++ virtual int add_gather(join_ctx *ctx, Query_expression *subquery) { ++ return join_tab->add_gather(ctx, subquery); ++ } ++ + bool find_and_set_derived(context *subquery) override { + return join_tab->find_and_set_derived(subquery); + } +@@ -1029,6 +1070,7 @@ class simple_sort_with_subqueries_ctx : public simple_sort_ctx { + class join_ctx : public unit_ctx, virtual public qep_row { + protected: + List join_tabs; ///< hosted JOIN_TAB nodes ++ public: + sort_ctx *sort; + window_ctx *window; + +@@ -1078,6 +1120,10 @@ class join_ctx : public unit_ctx, virtual public qep_row { + bool dependent() override; + int add_where_subquery(subquery_ctx *ctx, + Query_expression *subquery) override; ++ virtual int add_gather(join_ctx *ctx, Query_expression *subquery); ++ virtual size_t join_tabs_count() { return join_tabs.elements; } ++ virtual sort_ctx* get_sort() { return sort; } ++ virtual window_ctx* get_window() { return window; } + }; + + /** +@@ -1321,6 +1367,8 @@ bool join_ctx::format_body(Opt_trace_context *json, Opt_trace_object *obj) { + Opt_trace_object insert_from(json, "insert_from"); + if (format_body_inner(json, obj)) return true; /* purecov: inspected */ + } ++ } else if (join_tabs.elements && (join_tabs.head()->get_mod_type() == MT_GATHER)) { ++ join_tabs.head()->format(json); + } else if (format_body_inner(json, obj)) + return true; /* purecov: inspected */ + return format_query_expression(json); +@@ -1451,6 +1499,22 @@ int join_ctx::add_where_subquery(subquery_ctx *ctx, + return false; + } + ++int join_ctx::add_gather(join_ctx *ctx, Query_expression *subquery) { ++ if (sort) ++ return sort->join_ctx::add_gather(ctx, subquery); ++ else if (window) ++ return window->join_ctx::add_gather(ctx, subquery); ++ ++ List_iterator it(join_tabs); ++ joinable_ctx *j; ++ while ((j = it++)) { ++ int ret = j->add_gather(ctx, subquery); ++ if (ret > 0) return true; ++ } ++ ++ return false; ++} ++ + /** + Context class to group materialized JOIN_TABs to "materialized" array. + Is used for semijoin materialization. +@@ -1490,6 +1554,9 @@ class materialize_ctx : public joinable_ctx, + Query_expression *subquery) override { + return join_ctx::add_where_subquery(ctx, subquery); + } ++ virtual int add_gather(join_ctx *ctx, Query_expression *subquery) { ++ return join_ctx::add_gather(ctx, subquery); ++ } + bool find_and_set_derived(context *subquery) override { + return join_ctx::find_and_set_derived(subquery); + } +@@ -1541,6 +1608,37 @@ class materialize_ctx : public joinable_ctx, + } + }; + ++bool table_with_where_and_derived::format_gather(Opt_trace_context *json) { ++ if (gather.elements == 0) ++ return false; ++ else if (gather.elements == 1) { ++ if (gather.head()->get_sort()) ++ return gather.head()->get_sort()->join_ctx::format(json); ++ else if (gather.head()->get_window()) ++ return gather.head()->get_window()->join_ctx::format(json); ++ else if (gather.head()->join_tabs_count()) { ++ return gather.head()->format(json); ++ } ++ } else { ++ Opt_trace_array loops(json, K_NESTED_LOOP); ++ ++ List_iterator it(gather); ++ context *c; ++ while ((c = it++)) { ++ Opt_trace_object anonymous_wrapper(json); ++ if (c->get_sort()) ++ return c->get_sort()->join_ctx::format(json); ++ else if (c->get_window()) ++ return c->get_window()->join_ctx::format(json); ++ else if (c->join_tabs_count() && c->format(json)) { ++ return true; ++ } ++ } ++ } ++ ++ return false; ++} ++ + /** + Context class to represent JOIN_TABs in duplication weedout sequence + */ +@@ -1568,6 +1666,9 @@ class duplication_weedout_ctx : public joinable_ctx, public join_ctx { + Query_expression *subquery) override { + return join_ctx::add_where_subquery(ctx, subquery); + } ++ virtual int add_gather(join_ctx *ctx, Query_expression *subquery) { ++ return join_ctx::add_gather(ctx, subquery); ++ } + bool find_and_set_derived(context *subquery) override { + return join_ctx::find_and_set_derived(subquery); + } +@@ -1691,7 +1792,7 @@ bool Explain_format_JSON::begin_context(enum_parsing_context ctx_arg, + return true; + break; + case CTX_ORDER_BY: { +- assert(current_context->type == CTX_JOIN); ++ assert(current_context->type == CTX_JOIN || current_context->type == CTX_GATHER); + sort_ctx *ctx = new (*THR_MALLOC) sort_with_subqueries_ctx( + CTX_ORDER_BY, K_ORDERING_OPERATION, current_context, SQ_ORDER_BY, + flags, ESC_ORDER_BY); +@@ -1704,7 +1805,8 @@ bool Explain_format_JSON::begin_context(enum_parsing_context ctx_arg, + assert(current_context->type == CTX_JOIN || + current_context->type == CTX_ORDER_BY || + current_context->type == CTX_DISTINCT || +- current_context->type == CTX_WINDOW); ++ current_context->type == CTX_WINDOW || ++ current_context->type == CTX_GATHER); + sort_ctx *ctx = new (*THR_MALLOC) sort_with_subqueries_ctx( + CTX_GROUP_BY, K_GROUPING_OPERATION, current_context, SQ_GROUP_BY, + flags, ESC_GROUP_BY); +@@ -1729,7 +1831,8 @@ bool Explain_format_JSON::begin_context(enum_parsing_context ctx_arg, + current_context->type == CTX_ORDER_BY || + current_context->type == CTX_DISTINCT || + current_context->type == CTX_WINDOW || +- current_context->type == CTX_GROUP_BY); ++ current_context->type == CTX_GROUP_BY || ++ current_context->type == CTX_GATHER); + sort_ctx *ctx = + new (*THR_MALLOC) sort_ctx(CTX_BUFFER_RESULT, K_BUFFER_RESULT, + current_context, flags, ESC_BUFFER_RESULT); +@@ -1749,7 +1852,8 @@ bool Explain_format_JSON::begin_context(enum_parsing_context ctx_arg, + current_context->type == CTX_BUFFER_RESULT || + current_context->type == CTX_SIMPLE_GROUP_BY || + current_context->type == CTX_SIMPLE_ORDER_BY || +- current_context->type == CTX_SIMPLE_DISTINCT); ++ current_context->type == CTX_SIMPLE_DISTINCT || ++ current_context->type == CTX_GATHER); + join_tab_ctx *ctx = + new (*THR_MALLOC) join_tab_ctx(CTX_QEP_TAB, current_context); + if (ctx == nullptr || current_context->add_join_tab(ctx)) return true; +@@ -1828,7 +1932,8 @@ bool Explain_format_JSON::begin_context(enum_parsing_context ctx_arg, + current_context->type == CTX_DISTINCT || + current_context->type == CTX_WINDOW || + current_context->type == CTX_BUFFER_RESULT || +- current_context->type == CTX_MATERIALIZATION); ++ current_context->type == CTX_MATERIALIZATION || ++ current_context->type == CTX_GATHER); + duplication_weedout_ctx *ctx = + new (*THR_MALLOC) duplication_weedout_ctx(current_context); + if (ctx == nullptr || current_context->add_join_tab(ctx)) return true; +@@ -1893,6 +1998,14 @@ bool Explain_format_JSON::begin_context(enum_parsing_context ctx_arg, + current_context = ctx; + break; + } ++ case CTX_GATHER: { ++ join_ctx *ctx = ++ new (*THR_MALLOC) join_ctx(CTX_GATHER, K_QUERY_BLOCK, current_context); ++ if (ctx == nullptr || current_context->add_gather(ctx, subquery)) ++ return true; ++ current_context = ctx; ++ break; ++ } + case CTX_HAVING: { + subquery_ctx *ctx = + new (*THR_MALLOC) subquery_ctx(CTX_HAVING, nullptr, current_context); +diff --git a/sql/opt_explain_json.h b/sql/opt_explain_json.h +index 34085427..5fd860fd 100644 +--- a/sql/opt_explain_json.h ++++ b/sql/opt_explain_json.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -51,6 +52,7 @@ class Explain_format_JSON : public Explain_format { + bool end_context(enum_parsing_context context) override; + bool flush_entry() override { return false; } + qep_row *entry() override; ++ bool is_json() const override { return true; } + }; + + #endif // OPT_EXPLAIN_FORMAT_JSON_INCLUDED +diff --git a/sql/opt_explain_traditional.cc b/sql/opt_explain_traditional.cc +index 945cfacd..b7fb916f 100644 +--- a/sql/opt_explain_traditional.cc ++++ b/sql/opt_explain_traditional.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -79,11 +80,12 @@ static const char *traditional_extra_tags[ET_total] = { + "Table function:", // ET_TABLE_FUNCTION + "Index dive skipped due to FORCE", // ET_SKIP_RECORDS_IN_RANGE + "Using secondary engine", // ET_USING_SECONDARY_ENGINE +- "Rematerialize" // ET_REMATERIALIZE ++ "Rematerialize", // ET_REMATERIALIZE ++ "Parallel execute" // ET_PARALLEL_SCAN + }; + + static const char *mod_type_name[] = {"NONE", "INSERT", "UPDATE", "DELETE", +- "REPLACE"}; ++ "REPLACE", "SIMPLE"}; + + bool Explain_format_traditional::send_headers(Query_result *result) { + return ((nil = new Item_null) == nullptr || +@@ -251,6 +253,7 @@ bool Explain_format_traditional::flush_entry() { + case ET_USING_JOIN_BUFFER: + case ET_FIRST_MATCH: + case ET_REMATERIALIZE: ++ case ET_PARALLEL_EXE: + brackets = true; // for backward compatibility + break; + default: +diff --git a/sql/opt_hints.cc b/sql/opt_hints.cc +index af3b4e3b..9233e67d 100644 +--- a/sql/opt_hints.cc ++++ b/sql/opt_hints.cc +@@ -84,6 +84,8 @@ struct st_opt_hint_info opt_hint_info[] = { + {"JOIN_INDEX", false, false, false}, + {"GROUP_INDEX", false, false, false}, + {"ORDER_INDEX", false, false, false}, ++ {"PQ", true, true, false}, ++ {"NO_PQ", true, true, false}, + {"DERIVED_CONDITION_PUSHDOWN", true, true, false}, + {nullptr, false, false, false}}; + +diff --git a/sql/opt_hints.h b/sql/opt_hints.h +index 9b19e116..5744b746 100644 +--- a/sql/opt_hints.h ++++ b/sql/opt_hints.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -83,6 +84,8 @@ enum opt_hints_enum { + JOIN_INDEX_HINT_ENUM, + GROUP_INDEX_HINT_ENUM, + ORDER_INDEX_HINT_ENUM, ++ PQ_HINT_ENUM, ++ NO_PQ_HINT_ENUM, + DERIVED_CONDITION_PUSHDOWN_HINT_ENUM, + MAX_HINT_ENUM + }; +diff --git a/sql/opt_range.cc b/sql/opt_range.cc +index ec848bf6..341a655f 100644 +--- a/sql/opt_range.cc ++++ b/sql/opt_range.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -188,6 +189,7 @@ + #include "sql/thr_malloc.h" + #include "sql/uniques.h" // Unique + #include "template_utils.h" ++#include "pq_range.h" + + using std::max; + using std::min; +@@ -920,6 +922,74 @@ void QUICK_SELECT_I::trace_quick_description(Opt_trace_context *trace) { + range_trace.add_utf8("used_index", range_info.ptr(), range_info.length()); + } + ++bool QUICK_SELECT_I::pq_copy_from(THD *thd MY_ATTRIBUTE((unused)), ++ QUICK_SELECT_I *quick) { ++ records = quick->records; ++ cost_est.reset(); ++ cost_est += quick->cost_est; ++ max_used_key_length = quick->max_used_key_length; ++ used_key_parts = quick->used_key_parts; ++ forced_by_hint = quick->forced_by_hint; ++ last_rowid = quick->last_rowid; ++ return false; ++} ++ ++uint QUICK_RANGE_SELECT::quick_select_type() { return PQ_RANGE_SELECT; } ++ ++QUICK_SELECT_I *QUICK_RANGE_SELECT::pq_clone(THD *thd, TABLE *table) { ++ bool create_err = false; ++ QUICK_SELECT_I *pq_quick = ++ new QUICK_RANGE_SELECT(thd, table, index, false, nullptr, &create_err); ++ if (create_err || pq_quick->pq_copy_from(thd, this) || pq_quick->init() || ++ DBUG_EVALUATE_IF("pq_clone_error1", true, false)) { ++ delete pq_quick; ++ return nullptr; ++ } ++ return pq_quick; ++} ++ ++// select *from t1 where a > 1 and a < 5; ++bool QUICK_RANGE_SELECT::pq_copy_from(THD *thd, QUICK_SELECT_I *quick) { ++ QUICK_SELECT_I::pq_copy_from(thd, quick); ++ QUICK_RANGE_SELECT *quick_range_select = ++ dynamic_cast(quick); ++ assert(quick_range_select); ++ in_ror_merged_scan = quick_range_select->in_ror_merged_scan; ++ bitmap_copy(&column_bitmap, &quick_range_select->column_bitmap); ++ for (size_t ix = 0; ix < quick_range_select->ranges.size(); ++ix) { ++ QUICK_RANGE *orig = quick_range_select->ranges[ix]; ++ QUICK_RANGE *range = new (alloc.get()) ++ QUICK_RANGE(orig->min_key, orig->min_length, orig->min_keypart_map, ++ orig->max_key, orig->max_length, orig->max_keypart_map, ++ orig->flag, orig->rkey_func_flag); ++ if (!range) { ++ return true; ++ } ++ ranges.push_back(range); ++ } ++ ++ free_file = quick_range_select->free_file; ++ ++ /*Will init by reset: cur_range,last_range,qr_traversal_ctx,mrr_buf_desc */ ++ mrr_flags = quick_range_select->mrr_flags; ++ mrr_buf_size = quick_range_select->mrr_buf_size; ++ uint range_key_size = quick_range_select->used_key_parts; ++ ++ key_parts = (KEY_PART *)alloc->Alloc(sizeof(KEY_PART) * range_key_size); ++ if (!key_parts) { ++ return true; ++ } ++ memcpy(key_parts, quick_range_select->key_parts, ++ sizeof(KEY_PART) * range_key_size); ++ ++ for (uint i = 0; i < range_key_size; i++) { ++ key_parts[i].field = key_part_info[i].field; ++ } ++ ++ dont_free = quick_range_select->dont_free; ++ return false; ++} ++ + QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr, + bool no_alloc, MEM_ROOT *parent_alloc, + bool *create_error) +@@ -1008,6 +1078,36 @@ QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param, TABLE *table) + thd->variables.range_alloc_block_size, 0); + } + ++bool QUICK_INDEX_MERGE_SELECT::pq_copy_from(THD *thd, QUICK_SELECT_I *quick) { ++ QUICK_SELECT_I::pq_copy_from(thd, quick); ++ QUICK_INDEX_MERGE_SELECT *quick_index_merge_select = ++ dynamic_cast(quick); ++ assert(quick_index_merge_select); ++ ++ List_iterator_fast it( ++ quick_index_merge_select->quick_selects); ++ QUICK_RANGE_SELECT *quick_select; ++ while ((quick_select = it++)) { ++ QUICK_SELECT_I *quick_select_new = quick_select->pq_clone(thd, head); ++ if (quick_select_new == nullptr) { ++ return true; ++ } ++ quick_selects.push_back((QUICK_RANGE_SELECT *)quick_select_new); ++ } ++ ++ return false; ++} ++ ++QUICK_SELECT_I *QUICK_INDEX_MERGE_SELECT::pq_clone(THD *thd, TABLE *tab) { ++ QUICK_SELECT_I *pq_quick = new QUICK_INDEX_MERGE_SELECT(thd, tab); ++ if (pq_quick->pq_copy_from(thd, this) || pq_quick->init()) { ++ delete pq_quick; ++ return nullptr; ++ } ++ ++ return pq_quick; ++} ++ + int QUICK_INDEX_MERGE_SELECT::init() { + DBUG_TRACE; + return 0; +@@ -1289,6 +1389,37 @@ bool QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick) { + return quick_selects.push_back(quick); + } + ++bool QUICK_ROR_INTERSECT_SELECT::pq_copy_from(THD *thd, QUICK_SELECT_I* quick) { ++ QUICK_SELECT_I::pq_copy_from(thd, quick); ++ QUICK_ROR_INTERSECT_SELECT *quick_ror_intersect_select = dynamic_cast(quick); ++ assert(quick_ror_intersect_select); ++ scans_inited = quick_ror_intersect_select->scans_inited; ++ if (quick_ror_intersect_select->cpk_quick) { ++ cpk_quick = dynamic_cast(quick_ror_intersect_select->cpk_quick->pq_clone(thd, head)); ++ if (!cpk_quick) return true; ++ } ++ ++ List_iterator_fast it(quick_ror_intersect_select->quick_selects); ++ QUICK_RANGE_SELECT *orig_quick_range_select = nullptr; ++ QUICK_RANGE_SELECT *new_quick_range_select = nullptr; ++ while ((orig_quick_range_select = it++)) { ++ new_quick_range_select = dynamic_cast(orig_quick_range_select->pq_clone(thd, head)); ++ if (new_quick_range_select == nullptr) return true; ++ quick_selects.push_back(new_quick_range_select); ++ } ++ ++ return false; ++} ++ ++QUICK_SELECT_I* QUICK_ROR_INTERSECT_SELECT::pq_clone(THD *thd, TABLE *table) { ++ QUICK_SELECT_I *pq_quick = new QUICK_ROR_INTERSECT_SELECT(thd, table, need_to_fetch_row, NULL); ++ if (pq_quick->pq_copy_from(thd, this) || pq_quick->init()) { ++ delete pq_quick; ++ return nullptr; ++ } ++ return pq_quick; ++} ++ + QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT() { + DBUG_TRACE; + quick_selects.delete_elements(); +@@ -1393,6 +1524,36 @@ int QUICK_ROR_UNION_SELECT::reset() { + return 0; + } + ++bool QUICK_ROR_UNION_SELECT::pq_copy_from(THD *thd, QUICK_SELECT_I *quick) { ++ QUICK_SELECT_I::pq_copy_from(thd, quick); ++ QUICK_ROR_UNION_SELECT *quick_ror_union_select = ++ dynamic_cast(quick); ++ assert(quick_ror_union_select); ++ ++ List_iterator_fast it(quick_ror_union_select->quick_selects); ++ QUICK_SELECT_I *quick_select; ++ while ((quick_select = it++)) { ++ QUICK_SELECT_I *quick_select_new = quick_select->pq_clone(thd, head); ++ if (quick_select_new == nullptr) { ++ return true; ++ } ++ quick_selects.push_back(quick_select_new); ++ } ++ ++ return false; ++} ++ ++QUICK_SELECT_I* QUICK_ROR_UNION_SELECT::pq_clone(THD *thd, TABLE *table) { ++ QUICK_SELECT_I *pq_quick = ++ new QUICK_ROR_UNION_SELECT(thd, table); ++ if (pq_quick->pq_copy_from(thd, this) || ++ pq_quick->init() || DBUG_EVALUATE_IF("pq_clone_error1", true, false)) { ++ delete pq_quick; ++ return nullptr; ++ } ++ return pq_quick; ++} ++ + bool QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range) { + return quick_selects.push_back(quick_sel_range); + } +@@ -10285,6 +10446,27 @@ QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, + q->dont_free = true; // Don't free shared mem + } + ++QUICK_SELECT_I* QUICK_SELECT_DESC::pq_clone(THD* thd, TABLE* table) { ++ QUICK_RANGE_SELECT *quick_range_select = ++ dynamic_cast(this->QUICK_RANGE_SELECT::pq_clone(thd, table)); ++ if(!quick_range_select) { ++ return nullptr; ++ } ++ QUICK_SELECT_I *pq_quick = quick_range_select->make_reverse(m_used_key_parts); ++ delete quick_range_select; ++ if(!pq_quick || pq_quick->init()) { ++ if(pq_quick) { ++ delete pq_quick; ++ } ++ return nullptr; ++ } ++ return pq_quick; ++} ++ ++uint QUICK_SELECT_DESC::quick_select_type() { ++ return PQ_RANGE_SELECT; ++} ++ + int QUICK_SELECT_DESC::get_next() { + DBUG_TRACE; + +diff --git a/sql/opt_range.h b/sql/opt_range.h +index 58515d0a..049c8f26 100644 +--- a/sql/opt_range.h ++++ b/sql/opt_range.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -264,6 +265,8 @@ class QUICK_SELECT_I { + QUICK_SELECT_I(const QUICK_SELECT_I &) = default; + virtual ~QUICK_SELECT_I() {} + ++ virtual uint quick_select_type() { return PQ_QUICK_SELECT_NONE; } ++ + /* + Do post-constructor initialization. + SYNOPSIS +@@ -429,6 +432,9 @@ class QUICK_SELECT_I { + */ + virtual void get_fields_used(MY_BITMAP *used_fields) = 0; + void trace_quick_description(Opt_trace_context *trace); ++ ++ virtual bool pq_copy_from(THD *thd,QUICK_SELECT_I*); ++ virtual QUICK_SELECT_I* pq_clone(THD *, TABLE*){ return nullptr; } + }; + + class PARAM; +@@ -516,6 +522,7 @@ class QUICK_RANGE_SELECT : public QUICK_SELECT_I { + MEM_ROOT *parent_alloc, bool *create_error); + ~QUICK_RANGE_SELECT() override; + ++ uint quick_select_type() override; + void need_sorted_output() override; + int init() override; + int reset(void) override; +@@ -544,6 +551,9 @@ class QUICK_RANGE_SELECT : public QUICK_SELECT_I { + bitmap_set_bit(used_fields, key_parts[i].field->field_index()); + } + ++ bool pq_copy_from(THD *thd, QUICK_SELECT_I*) override; ++ QUICK_SELECT_I* pq_clone(THD *thd, TABLE *table) override; ++ + private: + /* Default copy ctor used by QUICK_SELECT_DESC */ + QUICK_RANGE_SELECT(const QUICK_RANGE_SELECT &) = default; +@@ -637,6 +647,8 @@ class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I { + void add_keys_and_lengths(String *key_names, String *used_lengths) override; + void add_info_string(String *str) override; + bool is_keys_used(const MY_BITMAP *fields) override; ++ virtual bool pq_copy_from(THD *thd, QUICK_SELECT_I* quick); ++ virtual QUICK_SELECT_I* pq_clone(THD *thd, TABLE *tab); + #ifndef NDEBUG + void dbug_dump(int indent, bool verbose) override; + #endif +@@ -725,6 +737,8 @@ class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I { + #endif + int init_ror_merged_scan(bool reuse_handler) override; + bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); ++ bool pq_copy_from(THD *thd, QUICK_SELECT_I*) override; ++ QUICK_SELECT_I* pq_clone(THD *thd, TABLE *table) override; + + /* + Range quick selects this intersection consists of, not including +@@ -811,6 +825,9 @@ class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I { + void dbug_dump(int indent, bool verbose) override; + #endif + ++ bool pq_copy_from(THD *thd, QUICK_SELECT_I*) override; ++ QUICK_SELECT_I* pq_clone(THD *thd, TABLE *table) override; ++ + bool push_quick_back(QUICK_SELECT_I *quick_sel_range); + + List quick_selects; /* Merged quick selects */ +@@ -995,6 +1012,9 @@ class QUICK_SELECT_DESC : public QUICK_RANGE_SELECT { + return this; // is already reverse sorted + } + ++ QUICK_SELECT_I* pq_clone(THD*, TABLE*); ++ uint quick_select_type() override; ++ + private: + bool range_reads_after_key(QUICK_RANGE *range); + int reset(void) override { +diff --git a/sql/parse_tree_hints.cc b/sql/parse_tree_hints.cc +index b3413928..8e0a3586 100644 +--- a/sql/parse_tree_hints.cc ++++ b/sql/parse_tree_hints.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -269,6 +270,15 @@ bool PT_qb_level_hint::contextualize(Parse_context *pc) { + else + pc->select->add_base_options(SELECT_STRAIGHT_JOIN); + break; ++ case PQ_HINT_ENUM: ++ if (args > 0) ++ pc->thd->pq_dop = args; ++ else ++ pc->thd->pq_dop = pc->thd->variables.parallel_default_dop; ++ break; ++ case NO_PQ_HINT_ENUM: ++ pc->thd->no_pq = true; ++ break; + default: + assert(0); + } +diff --git a/sql/parse_tree_items.h b/sql/parse_tree_items.h +index 254ecf57..6632f22d 100644 +--- a/sql/parse_tree_items.h ++++ b/sql/parse_tree_items.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2013, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -161,6 +162,7 @@ class PTI_function_call_nonkeyword_now final : public Item_func_now_local { + : super(pos, dec_arg) {} + + bool itemize(Parse_context *pc, Item **res) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class PTI_function_call_nonkeyword_sysdate : public Parse_tree_item { +@@ -256,6 +258,7 @@ class PTI_text_literal_text_string : public PTI_text_literal { + : super(pos, is_7bit_arg, literal_arg) {} + + bool itemize(Parse_context *pc, Item **res) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class PTI_text_literal_nchar_string : public PTI_text_literal { +@@ -267,6 +270,7 @@ class PTI_text_literal_nchar_string : public PTI_text_literal { + : super(pos, is_7bit_arg, literal_arg) {} + + bool itemize(Parse_context *pc, Item **res) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class PTI_text_literal_underscore_charset : public PTI_text_literal { +@@ -289,6 +293,8 @@ class PTI_text_literal_underscore_charset : public PTI_text_literal { + set_cs_specified(true); + return false; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class PTI_text_literal_concat : public PTI_text_literal { +@@ -341,6 +347,8 @@ class PTI_literal_underscore_charset_hex_num : public Item_string { + set_cs_specified(true); + return check_well_formed_result(&str_value, true, true) == nullptr; + } ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class PTI_literal_underscore_charset_bin_num : public Item_string { +@@ -379,6 +387,8 @@ class PTI_user_variable final : public Item_func_get_user_var { + PTI_user_variable(const POS &pos, const LEX_STRING &var) : super(pos, var) {} + + bool itemize(Parse_context *pc, Item **res) override; ++ ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + /** +@@ -415,6 +425,7 @@ class PTI_count_sym : public Item_sum_count { + : super(pos, (Item *)nullptr, w) {} + + bool itemize(Parse_context *pc, Item **res) override; ++ Item *pq_clone(THD *thd, Query_block *select) override; + }; + + class PTI_in_sum_expr : public Parse_tree_item { +@@ -427,6 +438,7 @@ class PTI_in_sum_expr : public Parse_tree_item { + : super(pos), expr(expr_arg) {} + + bool itemize(Parse_context *pc, Item **res) override; ++ Item *pq_clone(THD *, Query_block *) override { return this; } + }; + + class PTI_singlerow_subselect : public Parse_tree_item { +diff --git a/sql/parse_tree_node_base.h b/sql/parse_tree_node_base.h +index dd550dc9..a2bbab37 100644 +--- a/sql/parse_tree_node_base.h ++++ b/sql/parse_tree_node_base.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2013, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -61,6 +62,7 @@ enum enum_parsing_context { + CTX_INSERT_VALUES, ///< INSERT ... VALUES + CTX_INSERT_UPDATE, ///< INSERT ... ON DUPLICATE KEY UPDATE ... + CTX_JOIN, ++ CTX_GATHER, + CTX_QEP_TAB, + CTX_MATERIALIZATION, + CTX_DUPLICATES_WEEDOUT, +diff --git a/sql/parse_tree_nodes.cc b/sql/parse_tree_nodes.cc +index d25cd950..06ee91e5 100644 +--- a/sql/parse_tree_nodes.cc ++++ b/sql/parse_tree_nodes.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2013, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -1743,6 +1744,8 @@ bool PT_create_table_default_collation::contextualize( + bool PT_locking_clause::contextualize(Parse_context *pc) { + LEX *lex = pc->thd->lex; + ++ pc->thd->locking_clause = true; ++ + if (lex->is_explain()) return false; + + if (m_locked_row_action == Locked_row_action::SKIP) +diff --git a/sql/pq_clone.cc b/sql/pq_clone.cc +new file mode 100644 +index 00000000..3e0e77e0 +--- /dev/null ++++ b/sql/pq_clone.cc +@@ -0,0 +1,1215 @@ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "sql/pq_clone.h" ++#include "include/my_dbug.h" ++#include "include/mysql/psi/mysql_thread.h" ++#include "sql/mysqld.h" ++#include "sql/opt_range.h" ++#include "sql/sql_base.h" ++#include "sql/sql_lex.h" ++#include "sql/sql_opt_exec_shared.h" ++#include "sql/sql_optimizer.h" ++#include "sql/sql_parallel.h" ++#include "sql/sql_resolver.h" ++#include "sql/system_variables.h" ++ ++class COND_CMP; ++bool propagate_cond_constants(THD *thd, I_List *save_list, ++ Item *and_father, Item *cond); ++ ++bool POSITION::pq_copy(THD *thd, POSITION *orig) { ++ rows_fetched = orig->rows_fetched; ++ read_cost = orig->read_cost; ++ filter_effect = orig->filter_effect; ++ prefix_rowcount = orig->prefix_rowcount; ++ prefix_cost = orig->prefix_cost; ++ table = orig->table; ++ if (orig->key) { ++ key = orig->key->pq_clone(thd); ++ if (key == nullptr) { ++ return true; ++ } ++ } else { ++ key = nullptr; ++ } ++ ref_depend_map = orig->ref_depend_map; ++ use_join_buffer = orig->use_join_buffer; ++ sj_strategy = orig->sj_strategy; ++ n_sj_tables = orig->n_sj_tables; ++ dups_producing_tables = orig->dups_producing_tables; ++ first_loosescan_table = orig->first_loosescan_table; ++ loosescan_need_tables = orig->loosescan_need_tables; ++ loosescan_key = orig->loosescan_key; ++ loosescan_parts = orig->loosescan_parts; ++ first_firstmatch_table = orig->first_firstmatch_table; ++ first_firstmatch_rtbl = orig->first_firstmatch_rtbl; ++ firstmatch_need_tables = orig->firstmatch_need_tables; ++ first_dupsweedout_table = orig->first_dupsweedout_table; ++ dupsweedout_tables = orig->dupsweedout_tables; ++ sjm_scan_last_inner = orig->sjm_scan_last_inner; ++ sjm_scan_need_tables = orig->sjm_scan_need_tables; ++ ++ return false; ++} ++ ++bool QEP_TAB::pq_copy(THD *thd, QEP_TAB *orig) { ++ set_type(orig->type()); ++ set_index(orig->index()); ++ set_first_inner(orig->first_inner()); ++ set_last_inner(orig->last_inner()); ++ set_first_sj_inner(orig->first_sj_inner()); ++ set_last_sj_inner(orig->last_sj_inner()); ++ keys().merge(orig->keys()); ++ m_reversed_access = orig->m_reversed_access; ++ do_parallel_scan = orig->do_parallel_scan; ++ firstmatch_return = orig->firstmatch_return; ++ cache_idx_cond = orig->cache_idx_cond; ++ loosescan_key_len = orig->loosescan_key_len; ++ POSITION *position = new (thd->pq_mem_root) POSITION; ++ if (!position || position->pq_copy(thd, orig->position())) { ++ return true; ++ } ++ ++ set_position(position); ++ if (orig->pq_cond) { ++ JOIN *join = this->join(); ++ if (join == nullptr) { ++ return true; ++ } ++ pq_cond = orig->pq_cond->pq_clone(join->thd, join->query_block); ++ if (pq_cond == nullptr) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool TABLE::pq_copy(THD *thd, void *select_arg, TABLE *orig) { ++ Query_block *select = static_cast(select_arg); ++ possible_quick_keys = orig->possible_quick_keys; ++ covering_keys = orig->covering_keys; ++ key_read = orig->key_read; ++ const_table = orig->const_table; ++ nullable = orig->nullable; ++ null_row = orig->null_row; ++ m_cost_model = orig->m_cost_model; ++ memcpy(record[0], orig->record[0], orig->s->rec_buff_length); ++ ++ reginfo = orig->reginfo; ++ ++ file->pushed_idx_cond_keyno = orig->file->pushed_idx_cond_keyno; ++ Item *index_pushdown = orig->file->pushed_idx_cond; ++ // needs deep copy ++ file->pushed_idx_cond = ++ index_pushdown ? index_pushdown->pq_clone(thd, select) : nullptr; ++ Item *copy_index_pushdown = file->pushed_idx_cond; ++ if ((index_pushdown && copy_index_pushdown == nullptr) || ++ (copy_index_pushdown && ++ copy_index_pushdown->fix_fields(thd, ©_index_pushdown))) { ++ return true; ++ } ++ ++ return false; ++} ++ ++/* ++ * copy table_ref info. ++ * ++ * @retval: ++ * false if copy successfully, and otherwise true. ++ */ ++bool TABLE_REF::pq_copy(JOIN *join, TABLE_REF *ref, QEP_TAB *qep_tab) { ++ THD *thd = join->thd; ++ key_parts = ref->key_parts; ++ key_length = ref->key_length; ++ key_err = ref->key_err; ++ key = ref->key; ++ null_rejecting = ref->null_rejecting; ++ depend_map = ref->depend_map; ++ use_count = ref->use_count; ++ disable_cache = ref->disable_cache; ++ ++ if (!(key_buff = thd->pq_mem_root->ArrayAlloc(ALIGN_SIZE(key_length))) || ++ !(key_buff2 = ++ thd->pq_mem_root->ArrayAlloc(ALIGN_SIZE(key_length))) || ++ !(key_copy = thd->pq_mem_root->ArrayAlloc(key_parts)) || ++ !(items = thd->pq_mem_root->ArrayAlloc(key_parts)) || ++ !(cond_guards = thd->pq_mem_root->ArrayAlloc(key_parts))) { ++ return true; ++ } ++ ++ if (ref->null_ref_key != nullptr) { ++ null_ref_key = key_buff; ++ } ++ ++ memcpy(key_buff, ref->key_buff, ALIGN_SIZE(key_length)); ++ memcpy(key_buff2, ref->key_buff2, ALIGN_SIZE(key_length)); ++ uchar *key_buff_tmp = key_buff; ++ ++ for (uint i = 0; i < key_parts; i++) { ++ items[i] = ref->items[i]->pq_clone(thd, join->query_block); ++ if (items[i] == nullptr) { ++ return true; ++ } ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || items[i]); ++ if (!items[i]->fixed) { ++ items[i]->fix_fields(thd, &items[i]); ++ } ++ ++ if (qep_tab->table()->key_info) { ++ KEY *const keyinfo = qep_tab->table()->key_info + key; ++ bool maybe_null = keyinfo->key_part[i].null_bit; ++ qep_tab->position()->key->val = items[i]; ++ qep_tab->position()->key->used_tables = ++ qep_tab->position()->key->val->used_tables(); ++ if (ref->key_copy[i] != nullptr) { ++ key_copy[i] = get_store_key( ++ thd, qep_tab->position()->key->val, ++ qep_tab->position()->key->used_tables, join->const_table_map, ++ &keyinfo->key_part[i], key_buff_tmp, maybe_null); ++ } ++ ++ key_buff_tmp += keyinfo->key_part[i].store_length; ++ } ++ ++ if (!key_copy[i]) { ++ key_copy[i] = ref->key_copy[i]; ++ } ++ cond_guards[i] = ref->cond_guards[i]; ++ } ++ return false; ++} ++/* ++ * get table index ++ * ++ * @retval: ++ * -1 means not found. ++ */ ++int get_qep_tab_index(QEP_TAB *src, TABLE_LIST *first_tbl) { ++ int index = 0; ++ for (TABLE_LIST *tl = first_tbl; tl != nullptr; tl = tl->next_leaf) { ++ if (src->table_ref == tl) { ++ return index; ++ } ++ index++; ++ } ++ return -1; ++} ++ ++TABLE_LIST *get_next_table(TABLE_LIST *start_table, ++ table_list_type_enum list_type) { ++ if (list_type == TABLE_LIST_TYPE_DEFAULT) { ++ return start_table->next_local; ++ } else if (list_type == TABLE_LIST_TYPE_LEAF) { ++ return start_table->next_leaf; ++ } else if (list_type == TABLE_LIST_TYPE_MERGE) { ++ return start_table->merge_underlying_list; ++ } else { ++ return start_table->next_global; ++ } ++ return nullptr; ++} ++ ++TABLE_LIST *get_table_by_index(TABLE_LIST *start_table, ++ table_list_type_enum list_type, int index) { ++ if (start_table == nullptr) { ++ return nullptr; ++ } ++ if (list_type == TABLE_LIST_TYPE_MERGE) { ++ start_table = start_table->merge_underlying_list; ++ } ++ int it = 0; ++ for (TABLE_LIST *tbl_list = start_table; tbl_list != nullptr; it++) { ++ if (it == index) { ++ return tbl_list; ++ } ++ tbl_list = get_next_table(tbl_list, list_type); ++ } ++ return nullptr; ++} ++ ++int get_qep_tab_index(QEP_TAB *tab, JOIN *join) { ++ for (uint i = 0; i < join->tables; i++) { ++ if (&join->qep_tab0[i] == tab) { ++ return i; ++ } ++ } ++ return -1; ++} ++ ++bool copy_flush(QEP_TAB *des, JOIN *orig, int index, JOIN *join) { ++ QEP_TAB *src = &orig->qep_tab[index]; ++ SJ_TMP_TABLE_TAB sjtabs[MAX_TABLES]; ++ SJ_TMP_TABLE_TAB *last_tab = sjtabs; ++ if (src->flush_weedout_table->tabs != nullptr) { ++ for (SJ_TMP_TABLE_TAB *t = src->flush_weedout_table->tabs; ++ t < src->flush_weedout_table->tabs_end; t++) { ++ int n = get_qep_tab_index(t->qep_tab, orig); ++ if (n == -1) { ++ return false; ++ } ++ last_tab->qep_tab = &join->qep_tab[n]; ++ ++last_tab; ++ } ++ } ++ ++ SJ_TMP_TABLE *sjtbl = create_sj_tmp_table(join->thd, join, sjtabs, last_tab); ++ des->flush_weedout_table = sjtbl; ++ QEP_TAB *start = &orig->qep_tab[index]; ++ int dis = 0; ++ for (uint i = index + 1; i < orig->tables; i++) { ++ QEP_TAB *t = &orig->qep_tab[i]; ++ if (t->check_weed_out_table == start->flush_weedout_table) { ++ dis = i - index; ++ break; ++ } ++ } ++ ++ QEP_TAB *last_sj_tab = des + dis; ++ last_sj_tab->check_weed_out_table = sjtbl; ++ return true; ++} ++/** ++ * duplicate qep_tabs in JOIN ++ * ++ * @join : target JOIN ++ * @orig : origin JOIN ++ * @setup : setup qep_tab object ++ * ++ */ ++bool pq_dup_tabs(JOIN *join, JOIN *orig, bool setup MY_ATTRIBUTE((unused))) { ++ Item *m_having_cond = nullptr; ++ ++ join->const_tables = orig->const_tables; ++ join->primary_tables = orig->primary_tables; ++ Query_block *select = join->query_block; ++ ++ // phase 1. Create qep_tab and qep_tab->qs; ++ QEP_shared *qs = new (join->thd->pq_mem_root) QEP_shared[join->tables + 1]; ++ if (qs == nullptr) { ++ goto err; ++ } ++ join->qep_tab0 = new (join->thd->pq_mem_root) QEP_TAB[join->tables + 1]; ++ if (join->qep_tab0 == nullptr) { ++ goto err; ++ } ++ join->qep_tab = join->qep_tab0; ++ ++ for (uint i = 0; i < join->tables; i++) { ++ join->qep_tab[i].set_qs(&qs[i]); ++ join->qep_tab[i].set_join(join); ++ join->qep_tab[i].set_idx(i); ++ join->qep_tab[i].match_tab = orig->qep_tab[i].match_tab; ++ join->qep_tab[i].flush_weedout_table = orig->qep_tab[i].flush_weedout_table; ++ join->qep_tab[i].check_weed_out_table = ++ orig->qep_tab[i].check_weed_out_table; ++ join->qep_tab[i].op_type = orig->qep_tab[i].op_type; ++ join->qep_tab[i].table_ref = orig->qep_tab[i].table_ref; ++ join->qep_tab[i].using_dynamic_range = orig->qep_tab[i].using_dynamic_range; ++ } ++ ++ for (uint i = 0; i < join->primary_tables; i++) { ++ QEP_TAB *tab = &join->qep_tab[i]; ++ QEP_TAB *orig_tab = &orig->qep_tab[i]; ++ ++ // phase 3. Set tables to qep_tab according to db/table name ++ if (tab->pq_copy(join->thd, orig_tab)) { ++ goto err; ++ } ++ TABLE *tb = orig_tab->table(); ++ tab->table_name = new (join->thd->pq_mem_root) ++ LEX_CSTRING{tb->s->table_name.str, tb->s->table_name.length}; ++ ++ tab->db = new (join->thd->pq_mem_root) ++ LEX_CSTRING{tb->s->db.str, tb->s->db.length}; ++ if (tab->table_name == nullptr || tab->db == nullptr) { ++ goto err; ++ } ++ ++ /* ++ * note: currently, setup is true. ++ * Because duplicate qep_tabs in JOIN need fix_field to convert item to ++ * field. ++ */ ++ assert(select->leaf_tables); ++ /* ++ * setup physic table object ++ * Sometimes there are multiple tables with the same name in the ++ * leaf_tables, such as select empnum from t1 where hours in (select hours ++ * from t1); The leaf_tables has two t1's in it,at this point we need to ++ * copy the corresponding table of the same name. ++ */ ++ int index = get_qep_tab_index(orig_tab, orig->query_block->leaf_tables); ++ if (index == -1) { ++ goto err; ++ } ++ TABLE_LIST *tl = ++ get_table_by_index(select->leaf_tables, TABLE_LIST_TYPE_LEAF, index); ++ if (tl == nullptr) { ++ goto err; ++ } ++ bitmap_copy(tl->table->read_set, tab->table_ref->table->read_set); ++ bitmap_copy(tl->table->write_set, tab->table_ref->table->write_set); ++ tab->set_table(tl->table); ++ tab->table_ref = tl; ++ ++ // phase 4. Copy table properties from leader ++ if (tab->ref().pq_copy(join, &orig_tab->ref(), tab)) { ++ goto err; ++ } ++ ++ if (orig_tab->table()) { ++ if (tab->table()->pq_copy(join->thd, (void *)select, orig_tab->table())) { ++ goto err; ++ } ++ tab->set_keyread_optim(); ++ } ++ ++ // phase 2. clone conditions in qep_tab ++ Item *condition = orig_tab->condition(); ++ if ((condition != nullptr) && i < orig->primary_tables) { ++ Item *cond = condition->pq_clone(join->thd, select); ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || cond); ++ if (cond == nullptr) { ++ goto err; ++ } ++ if (cond->fix_fields(join->thd, &cond)) { ++ goto err; ++ } ++ tab->set_condition(cond); ++ tab->set_condition_optim(); ++ } ++ ++ // phase 2. clone cache_idx_cond in qep_tab ++ Item *cache_idx_cond = orig_tab->cache_idx_cond; ++ if ((cache_idx_cond != nullptr) && i < orig->primary_tables) { ++ Item *cond = cache_idx_cond->pq_clone(join->thd, select); ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || cond); ++ if (cond == nullptr) { ++ goto err; ++ } ++ if (cond->fix_fields(join->thd, &cond)) { ++ goto err; ++ } ++ tab->cache_idx_cond = cond; ++ } ++ ++ // phase 5. setup pq condition for index push down ++ if ((tab->has_pq_cond && !tab->pq_cond) || ++ (tab->pq_cond && tab->pq_cond->fix_fields(join->thd, &tab->pq_cond)) || ++ DBUG_EVALUATE_IF("pq_clone_error2", true, false)) { ++ sql_print_warning("[Parallel query]: ICP condition pushdown failed"); ++ goto err; ++ } ++ ++ // phase 6. copy quick select ++ MEM_ROOT *saved_mem_root = join->thd->mem_root; ++ if (orig_tab->quick()) { ++ QUICK_SELECT_I *quick = ++ orig_tab->quick()->pq_clone(join->thd, tab->table()); ++ assert(DBUG_EVALUATE_IF("pq_clone_error1", true, false) || quick); ++ if (quick == nullptr) { ++ goto err; ++ } ++ tab->set_quick(quick); ++ tab->set_quick_optim(); ++ } ++ join->thd->mem_root = saved_mem_root; ++ } ++ ++ // phase 7. Copy having condition ++ m_having_cond = select->having_cond(); ++ if (m_having_cond) { ++ assert(m_having_cond->is_bool_func()); ++ join->thd->where = "having clause"; ++ select->having_fix_field = true; ++ select->resolve_place = Query_block::RESOLVE_HAVING; ++ if (!m_having_cond->fixed && ++ (m_having_cond->fix_fields(join->thd, &m_having_cond) || ++ m_having_cond->check_cols(1))) { ++ goto err; ++ } ++ ++ select->having_fix_field = false; ++ select->resolve_place = Query_block::RESOLVE_NONE; ++ } ++ ++ for (uint i = 0; i < join->tables; i++) { ++ QEP_TAB *t = &orig->qep_tab[i]; ++ if (t->flush_weedout_table != nullptr) { ++ if (!copy_flush(&join->qep_tab[i], orig, i, join)) { ++ goto err; ++ } ++ } ++ } ++ return false; ++ ++err: ++ return true; ++} ++ ++/* ++ * clone order structure ++ */ ++ORDER *pq_dup_order(THD *thd, Query_block *select, ORDER *orig) { ++ ORDER *order = new (thd->pq_mem_root) ORDER(); ++ if (order == nullptr) { ++ return nullptr; ++ } ++ ++ if ((*orig->item)->is_derived_used() || !orig->in_field_list) { ++ order->item_initial = (*orig->item)->pq_clone(thd, select); ++ } else { ++ order->item_initial = orig->item_initial->pq_clone(thd, select); ++ } ++ ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || ++ order->item_initial); ++ if (order->item_initial == nullptr) { ++ return nullptr; ++ } ++ ++ order->next = nullptr; ++ order->item = &order->item_initial; ++ order->direction = orig->direction; ++ order->in_field_list = orig->in_field_list; ++ order->used_alias = orig->used_alias; ++ order->field_in_tmp_table = nullptr; ++ order->buff = nullptr; ++ order->used = 0; ++ order->depend_map = 0; ++ order->is_position = orig->is_position; ++ order->is_explicit = orig->is_explicit; ++ ++ return order; ++} ++ ++int get_table_index(TABLE_LIST *start_table, table_list_type_enum list_type, ++ TABLE_LIST *tl) { ++ if (start_table == nullptr) { ++ return -1; ++ } ++ int index = 0; ++ for (TABLE_LIST *tbl_list = start_table; tbl_list != nullptr; index++) { ++ if (tbl_list == tl) { ++ return index; ++ } ++ tbl_list = get_next_table(tbl_list, list_type); ++ } ++ return -1; ++} ++ ++TABLE_LIST *copy_table(THD *thd, TABLE_LIST *src, Query_block *select, ++ Query_block *orig) { ++ TABLE_LIST *ptr = new (thd->mem_root) TABLE_LIST; ++ if (ptr == nullptr) { ++ return nullptr; ++ } ++ ptr->query_block = select; ++ ptr->derived = src->derived; ++ ptr->effective_algorithm = src->effective_algorithm; ++ ptr->outer_join = src->outer_join; ++ if (src->merge_underlying_list != nullptr) { ++ TABLE_LIST *foundtable = nullptr; ++ int index = get_table_index(orig->leaf_tables, TABLE_LIST_TYPE_GLOBAL, ++ src->merge_underlying_list); ++ if (index != -1) { ++ foundtable = get_table_by_index(select->leaf_tables, ++ TABLE_LIST_TYPE_GLOBAL, index); ++ if (foundtable == nullptr) { ++ return nullptr; ++ } ++ ptr->merge_underlying_list = foundtable; ++ } else { ++ ptr->merge_underlying_list = ++ copy_table(thd, src->merge_underlying_list, select, orig); ++ } ++ } ++ ptr->field_translation = nullptr; ++ ptr->table_name = src->table_name; ++ ptr->table_name_length = src->table_name_length; ++ ptr->alias = src->alias; ++ ptr->is_alias = src->is_alias; ++ ptr->table_function = src->table_function; ++ if (src->table_function) { ++ ptr->derived_key_list.clear(); ++ } ++ ptr->is_fqtn = src->is_fqtn; ++ ptr->db = src->db; ++ ptr->db_length = src->db_length; ++ ptr->set_tableno(src->tableno()); ++ ptr->set_lock({TL_UNLOCK, THR_DEFAULT}); ++ ptr->updating = false; ++ ptr->ignore_leaves = false; ++ ptr->is_system_view = src->is_system_view; ++ ++ if (!ptr->is_derived() && !ptr->is_table_function() && ++ is_infoschema_db(ptr->db, ptr->db_length)) { ++ dd::info_schema::convert_table_name_case( ++ const_cast(ptr->db), const_cast(ptr->table_name)); ++ ST_SCHEMA_TABLE *schema_table = nullptr; ++ if (!ptr->is_system_view) { ++ schema_table = find_schema_table(thd, ptr->table_name); ++ if (schema_table) { ++ ptr->schema_table = schema_table; ++ } ++ } ++ } ++ ptr->cacheable_table = true; ++ ptr->index_hints = nullptr; ++ ptr->option = nullptr; ++ ptr->next_name_resolution_table = nullptr; ++ ptr->partition_names = nullptr; ++ MDL_REQUEST_INIT(&ptr->mdl_request, MDL_key::TABLE, ptr->db, ptr->table_name, ++ MDL_SHARED_READ, MDL_TRANSACTION); ++ return ptr; ++} ++ ++bool copy_table_field(TABLE_LIST *src, TABLE_LIST *des, THD *thd, ++ Query_block *dest_select) { ++ int count = src->field_translation_end - src->field_translation; ++ if (count <= 0) { ++ return false; ++ } ++ if (des->field_translation_end - des->field_translation != count) { ++ return true; ++ } ++ if (des->field_translation[0].item != nullptr) { ++ return false; ++ } ++ for (int i = 0; i < count; i++) { ++ des->field_translation[i].name = src->field_translation[i].name; ++ if (src->field_translation[i].item == nullptr) { ++ return true; ++ } ++ des->field_translation[i].item = ++ src->field_translation[i].item->pq_clone(thd, dest_select); ++ if (des->field_translation[i].item == nullptr) { ++ return true; ++ } ++ } ++ return false; ++} ++ ++bool copy_merge_table_field(THD *thd, Query_block *dest_select, int tableindex, ++ int mergeindex, TABLE_LIST *srctb) { ++ TABLE_LIST *tb = get_table_by_index(dest_select->table_list.first, ++ TABLE_LIST_TYPE_DEFAULT, tableindex); ++ if (tb == nullptr) { ++ return true; ++ } ++ TABLE_LIST *mergetable = ++ get_table_by_index(tb, TABLE_LIST_TYPE_MERGE, mergeindex); ++ if (mergetable == nullptr) { ++ return true; ++ } ++ if (copy_table_field(srctb, mergetable, thd, dest_select)) { ++ return true; ++ } ++ return false; ++} ++ ++bool copy_global_table_list_field(THD *thd, Query_block *orig, ++ Query_block *dest_select) { ++ int tableindex = 0; ++ for (TABLE_LIST *tbl_list = orig->leaf_tables; tbl_list != nullptr; ++ tbl_list = tbl_list->next_global) { ++ if (tbl_list->field_translation != nullptr) { ++ TABLE_LIST *src = get_table_by_index(dest_select->leaf_tables, ++ TABLE_LIST_TYPE_GLOBAL, tableindex); ++ if (src == nullptr) { ++ return true; ++ } ++ if (copy_table_field(tbl_list, src, thd, dest_select)) { ++ return true; ++ } ++ } ++ tableindex++; ++ } ++ return false; ++} ++ ++bool init_table_field_space(THD *thd, TABLE_LIST *src, TABLE_LIST *des) { ++ int count = src->field_translation_end - src->field_translation; ++ if (count > 0 && des->field_translation == nullptr) { ++ Field_translator *transl = (Field_translator *)thd->stmt_arena->alloc( ++ count * sizeof(Field_translator)); ++ if (transl == nullptr) { ++ return true; ++ } ++ for (int i = 0; i < count; i++) { ++ transl[i].name = nullptr; ++ transl[i].item = nullptr; ++ } ++ des->field_translation = transl; ++ des->field_translation_end = transl + count; ++ } ++ return false; ++} ++ ++bool copy_leaf_tables(THD *thd, Query_block *orig, Query_block *dest_select) { ++ TABLE_LIST *last = nullptr; ++ dest_select->leaf_tables = nullptr; ++ for (TABLE_LIST *tbl_list = orig->leaf_tables; tbl_list != nullptr; ++ tbl_list = tbl_list->next_leaf) { ++ TABLE_LIST *tl = copy_table(thd, tbl_list, dest_select, orig); ++ if (tl == nullptr) { ++ return true; ++ } ++ if (dest_select->leaf_tables == nullptr) { ++ dest_select->leaf_tables = tl; ++ last = tl; ++ } else { ++ last->next_name_resolution_table = tl; ++ last->next_leaf = tl; ++ last = tl; ++ } ++ } ++ last->next_leaf = nullptr; ++ return false; ++} ++ ++void set_up_leaf_tables(THD *thd, Query_block *select) { ++ select->partitioned_table_count = 0; ++ for (TABLE_LIST *tr = select->leaf_tables; tr != nullptr; ++ tr = tr->next_leaf) { ++ TABLE *const table = tr->table; ++ select->leaf_table_count++; ++ if (select->first_execution && ++ select->opt_hints_qb && // QB hints initialized ++ !tr->opt_hints_table) // Table hints are not adjusted yet ++ { ++ tr->opt_hints_table = select->opt_hints_qb->adjust_table_hints(tr); ++ } ++ if (table == nullptr) { ++ continue; ++ } ++ table->pos_in_table_list = tr; ++ } ++ if (select->opt_hints_qb) { ++ select->opt_hints_qb->check_unresolved(thd); ++ } ++} ++ ++bool copy_global_tables(THD *thd, Query_block *orig, Query_block *dest_select) { ++ for (TABLE_LIST *tbl_list = orig->leaf_tables; tbl_list != nullptr; ++ tbl_list = tbl_list->next_global) { ++ int index = ++ get_table_index(orig->leaf_tables, TABLE_LIST_TYPE_LEAF, tbl_list); ++ TABLE_LIST *tmp = nullptr; ++ if (index != -1) { ++ tmp = get_table_by_index(dest_select->leaf_tables, TABLE_LIST_TYPE_LEAF, ++ index); ++ } else { ++ tmp = copy_table(thd, tbl_list, dest_select, orig); ++ } ++ if (tmp == nullptr) { ++ return true; ++ } ++ thd->lex->add_to_query_tables(tmp); ++ } ++ return false; ++} ++ ++bool copy_table_list(THD *thd, Query_block *orig, Query_block *dest_select) { ++ for (TABLE_LIST *tbl_list = orig->table_list.first; tbl_list != nullptr; ++ tbl_list = tbl_list->next_local) { ++ int index = ++ get_table_index(orig->leaf_tables, TABLE_LIST_TYPE_GLOBAL, tbl_list); ++ TABLE_LIST *tmp = nullptr; ++ if (index != -1) { ++ tmp = get_table_by_index(dest_select->leaf_tables, TABLE_LIST_TYPE_GLOBAL, ++ index); ++ } else { ++ tmp = copy_table(thd, tbl_list, dest_select, orig); ++ } ++ if (tmp == nullptr) { ++ return true; ++ } ++ dest_select->table_list.link_in_list(tmp, &tmp->next_local); ++ } ++ return false; ++} ++ ++bool init_table_list_field_space(THD *thd, Query_block *select, ++ table_list_type_enum list_type) { ++ TABLE_LIST *start_src = nullptr; ++ TABLE_LIST *start_des = nullptr; ++ if (list_type == TABLE_LIST_TYPE_DEFAULT) { ++ start_src = select->orig->table_list.first; ++ start_des = select->table_list.first; ++ } else { ++ start_src = select->orig->leaf_tables; ++ start_des = select->leaf_tables; ++ } ++ int tableindex = 0; ++ for (TABLE_LIST *tbl_list = start_src; tbl_list != nullptr; tableindex++) { ++ if (tbl_list->field_translation != nullptr) { ++ TABLE_LIST *des = get_table_by_index(start_des, list_type, tableindex); ++ if (des == nullptr) { ++ return true; ++ } ++ if (init_table_field_space(thd, tbl_list, des)) { ++ return true; ++ } ++ } ++ tbl_list = get_next_table(tbl_list, list_type); ++ } ++ return false; ++} ++bool init_field_space(THD *thd, Query_block *orig, Query_block *select) { ++ if (init_table_list_field_space(thd, select, TABLE_LIST_TYPE_DEFAULT) || ++ init_table_list_field_space(thd, select, TABLE_LIST_TYPE_GLOBAL)) { ++ return true; ++ } ++ ++ int tableindex = 0; ++ for (TABLE_LIST *tbl_list = orig->table_list.first; tbl_list != nullptr; ++ tbl_list = tbl_list->next_local) { ++ if (tbl_list->merge_underlying_list != nullptr) { ++ int mergeindex = 0; ++ for (TABLE_LIST *tb = tbl_list->merge_underlying_list; tb != nullptr; ++ tb = tb->merge_underlying_list) { ++ if (tb->field_translation != nullptr) { ++ TABLE_LIST *ta = get_table_by_index( ++ select->table_list.first, TABLE_LIST_TYPE_DEFAULT, tableindex); ++ if (ta == nullptr) { ++ return true; ++ } ++ TABLE_LIST *mergetable = ++ get_table_by_index(ta, TABLE_LIST_TYPE_MERGE, mergeindex); ++ if (mergetable == nullptr) { ++ return true; ++ } ++ if (init_table_field_space(thd, tb, mergetable)) { ++ return true; ++ } ++ } ++ mergeindex++; ++ } ++ } ++ tableindex++; ++ } ++ return false; ++} ++ ++bool copy_merge_table_list_field(THD *thd, Query_block *orig, ++ Query_block *dest_select) { ++ int tableindex = 0; ++ int mergeindex = 0; ++ for (TABLE_LIST *tbl_list = orig->table_list.first; tbl_list != nullptr; ++ tbl_list = tbl_list->next_local) { ++ if (tbl_list->merge_underlying_list != nullptr) { ++ mergeindex = 0; ++ for (TABLE_LIST *tb = tbl_list->merge_underlying_list; tb != nullptr; ++ tb = tb->merge_underlying_list) { ++ if (tb->field_translation != nullptr && ++ copy_merge_table_field(thd, dest_select, tableindex, mergeindex, ++ tb)) { ++ return true; ++ } ++ mergeindex++; ++ } ++ } ++ tableindex++; ++ } ++ return false; ++} ++ ++bool copy_table_list_field(THD *thd, Query_block *orig, ++ Query_block *dest_select) { ++ int tableindex = 0; ++ for (TABLE_LIST *tbl_list = orig->table_list.first; tbl_list != nullptr; ++ tbl_list = tbl_list->next_local) { ++ if (tbl_list->field_translation != nullptr) { ++ TABLE_LIST *src = get_table_by_index(dest_select->table_list.first, ++ TABLE_LIST_TYPE_DEFAULT, tableindex); ++ if (src == nullptr) { ++ return true; ++ } ++ if (copy_table_field(tbl_list, src, thd, dest_select)) { ++ return true; ++ } ++ } ++ tableindex++; ++ } ++ return false; ++} ++ ++bool copy_all_table_list(THD *thd, Query_block *orig, ++ Query_block *dest_select) { ++ if (copy_leaf_tables(thd, orig, dest_select) || ++ copy_global_tables(thd, orig, dest_select) || ++ copy_table_list(thd, orig, dest_select)) { ++ return true; ++ } ++ if (init_field_space(thd, orig, dest_select) || ++ copy_merge_table_list_field(thd, orig, dest_select) || ++ copy_global_table_list_field(thd, orig, dest_select) || ++ copy_table_list_field(thd, orig, dest_select)) { ++ return true; ++ } ++ return false; ++} ++ ++Query_block *pq_dup_select(THD *thd, Query_block *orig) { ++ Item *new_item = nullptr; ++ Item *where = nullptr; ++ Item *having = nullptr; ++ ORDER *group = nullptr; ++ ORDER *group_new = nullptr; ++ ORDER *order = nullptr; ++ ORDER *order_new = nullptr; ++ Query_block *select = nullptr; ++ SQL_I_List orig_list; ++ ++ LEX *lex = new (thd->pq_mem_root) LEX(); ++ if (lex == nullptr) { ++ goto err; ++ } ++ lex->reset(); ++ lex->result = orig->parent_lex->result; ++ lex->sql_command = orig->parent_lex->sql_command; ++ lex->explain_format = orig->parent_lex->explain_format; ++ lex->is_explain_analyze = orig->parent_lex->is_explain_analyze; ++ thd->lex = lex; ++ lex->thd = thd; ++ thd->query_plan.set_query_plan(SQLCOM_SELECT, lex, false); ++ ++ select = lex->new_query(nullptr); ++ if (!select || DBUG_EVALUATE_IF("dup_select_abort1", true, false)) { ++ goto err; ++ } ++ select->orig = orig; ++ select->renumber(thd->lex); ++ select->with_sum_func = orig->with_sum_func; ++ select->n_child_sum_items = orig->n_child_sum_items; ++ select->n_sum_items = orig->n_sum_items; ++ select->select_n_having_items = orig->select_n_having_items; ++ select->select_n_where_fields = orig->select_n_where_fields; ++ select->m_active_options = orig->m_active_options; ++ lex->set_current_query_block(select); ++ lex->unit = select->master_query_expression(); ++ thd->lex->query_block = select; ++ ++ // phase 1. clone tables and open/lock them ++ if (copy_all_table_list(thd, orig, select)) { ++ goto err; ++ } ++ ++ assert(select->context.query_block == select); ++ select->context.table_list = select->context.first_name_resolution_table = ++ select->leaf_tables; ++ ++ // phase 1. open tables and lock them ++ if (open_tables_for_query(thd, thd->lex->query_tables, 0) || ++ lock_tables(thd, thd->lex->query_tables, thd->lex->table_count, 0)) { ++ goto err; ++ } ++ set_up_leaf_tables(thd, select); ++ // phase 1. copy table->nullable ++ // before setup_fields, propagate_nullability will change table->nullable, ++ // which may affect item->maybe_null, so we copy it here. ++ // see in Query_block:: prepare ++ for (TABLE_LIST *tl = orig->leaf_tables; tl != nullptr; tl = tl->next_leaf) { ++ for (TABLE_LIST *tbl_list = select->leaf_tables; tbl_list != nullptr; ++ tbl_list = tbl_list->next_leaf) { ++ const char *db = tbl_list->db; ++ const char *table_name = tbl_list->table_name; ++ const char *alias = tbl_list->alias; ++ ++ if (!strncmp(db, tl->db, strlen(db)) && strlen(tl->db) == strlen(db) && ++ !strncmp(table_name, tl->table_name, strlen(table_name)) && ++ strlen(tl->table_name) == strlen(table_name) && ++ !strncmp(alias, tl->alias, strlen(alias)) && ++ strlen(tl->alias) == strlen(alias)) { ++ if (tl->table != nullptr && tl->table->is_nullable()) { ++ tbl_list->table->set_nullable(); ++ } ++ break; ++ } ++ } ++ } ++ ++ // phase 2. clone select fields list ++ for (Item *item : orig->fields) { ++ if (item->hidden) { ++ continue; ++ } ++ new_item = item->pq_clone(thd, select); ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || new_item); ++ if (new_item == nullptr) { ++ goto err; ++ } ++ ++ select->fields.push_back(new_item); ++ } ++ ++ // phase 3. duplicate group list ++ /* ++ * for template select_lex, we use leader's saved_group_list_ptrs to ++ * restore original group_list, and then copy it to template. For ++ * worker's select_lex, we directly use template's info to generate ++ * its group_list. ++ */ ++ if (orig->saved_group_list_ptrs) { ++ restore_list(orig->saved_group_list_ptrs, orig_list); ++ assert(orig_list.elements == orig->group_list.elements); ++ } else { // the case of template select_lex ++ orig_list = orig->group_list; ++ } ++ ++ // duplicate group list ++ if (orig_list.elements) { ++ for (group = orig_list.first; group; group = group->next) { ++ group_new = pq_dup_order(thd, select, group); ++ if (group_new == nullptr) { ++ goto err; ++ } ++ ++ select->group_list.link_in_list(group_new, &group_new->next); ++ } ++ } ++ ++ if (orig->saved_order_list_ptrs) { ++ restore_list(orig->saved_order_list_ptrs, orig_list); ++ assert(orig_list.elements == orig->order_list.elements); ++ } else { // the case of template select_lex ++ orig_list = orig->order_list; ++ } ++ ++ // duplicate order list ++ if (orig_list.elements) { ++ for (order = orig_list.first; order; order = order->next) { ++ order_new = pq_dup_order(thd, select, order); ++ if (order_new == nullptr) { ++ goto err; ++ } ++ ++ select->order_list.link_in_list(order_new, &order_new->next); ++ } ++ } ++ ++ /** mianly used for optimized_group_by */ ++ if (select->group_list.elements) { ++ select->fix_prepare_information_for_order(thd, &select->group_list, ++ &select->saved_group_list_ptrs); ++ } ++ if (select->order_list.elements) { ++ select->fix_prepare_information_for_order(thd, &select->order_list, ++ &select->saved_order_list_ptrs); ++ } ++ ++ if (select->setup_base_ref_items(thd) || ++ DBUG_EVALUATE_IF("dup_select_abort2", true, false)) { ++ goto err; ++ } ++ ++ thd->mark_used_columns = MARK_COLUMNS_READ; ++ ++ // phase 5. duplicate where cond ++ if (orig->where_cond()) { ++ where = orig->where_cond()->pq_clone(thd, select); ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || where); ++ if (where == nullptr) { ++ goto err; ++ } ++ select->set_where_cond(where); ++ } else { ++ select->set_where_cond(nullptr); ++ } ++ ++ // phase 6. duplicate having cond ++ if (orig->having_cond()) { ++ having = orig->having_cond()->pq_clone(thd, select); ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || having); ++ if (having == nullptr) { ++ goto err; ++ } ++ select->set_having_cond(having); ++ } else { ++ select->set_having_cond(nullptr); ++ } ++ ++ // phase 7: allow local set functions in HAVING and ORDER BY ++ lex->allow_sum_func |= (nesting_map)1 << (nesting_map)select->nest_level; ++ select->set_query_result(lex->result); ++ return select; ++ ++err: ++ return nullptr; ++} ++ ++/** ++ * resolve query block, setup tables list, fields list, group list\order list ++ * ++ * @select: query block ++ * ++ */ ++static bool pq_select_prepare(THD *thd, Query_block *select, ++ mem_root_deque &orig_all_fields) { ++ // Setup.1 setup all fields ++ int all_fields_count = select->fields.size(); ++ thd->mark_used_columns = MARK_COLUMNS_READ; ++ ulong want_privilege = 0; ++ if (setup_fields(thd, want_privilege, true, true, false, nullptr, ++ &select->fields, select->base_ref_items, true)) { ++ return true; ++ } ++ ++ // Setup.2 setup GROUP BY clause ++ if (select->group_list.elements && select->setup_group(thd)) { ++ return true; ++ } ++ select->hidden_group_field_count = select->fields.size() - all_fields_count; ++ ++ // Setup.3 setup ORDER BY clause ++ if (select->order_list.elements && ++ setup_order(thd, select->base_ref_items, select->table_list.first, ++ &select->fields, select->order_list.first)) { ++ return true; ++ } ++ ++ select->hidden_order_field_count = select->fields.size() - all_fields_count; ++ ++ if (select->order_list.elements && select->setup_order_final(thd)) { ++ return true; ++ } ++ ++ // Setup.4: check item's property */ ++ if (select->fields.size() != orig_all_fields.size()) { ++ return true; ++ } ++ ++ Item *orig_item = nullptr; ++ uint i = 0; ++ for (Item *item : select->fields) { ++ orig_item = orig_all_fields[i]; ++ if (item == nullptr || (item->type() != orig_item->type())) return true; ++ i++; ++ } ++ ++ return false; ++} ++ ++JOIN *pq_make_join(THD *thd, JOIN *join) { ++ JOIN *pq_join = nullptr; ++ Query_block *select = pq_dup_select(thd, join->query_block); ++ if (!select || pq_select_prepare(thd, select, join->query_block->fields)) { ++ goto err; ++ } ++ ++ thd->lex->unit->set_prepared(); ++ ++ pq_join = new (thd->pq_mem_root) JOIN(thd, select); ++ if (!pq_join || DBUG_EVALUATE_IF("dup_join_abort", true, false)) { ++ goto err; ++ } ++ pq_join->pq_copy_from(join); ++ /** ++ * limit cannot push down to worker, for the cases: ++ * (1) with aggregation ++ * (2) with sorting after optimized-group-by ++ */ ++ if (join->query_expression()->select_limit_cnt) { ++ if (join->query_block->with_sum_func || // c1 ++ (join->pq_rebuilt_group && // c2 ++ join->pq_last_sort_idx >= (int)join->primary_tables)) { ++ pq_join->m_select_limit = HA_POS_ERROR; // no limit ++ pq_join->query_expression()->select_limit_cnt = HA_POS_ERROR; ++ } ++ } ++ return pq_join; ++ ++err: ++ return nullptr; ++} ++ ++bool System_variables::pq_copy_from(struct System_variables orig) { ++ pseudo_thread_id = orig.pseudo_thread_id; ++ sql_mode = orig.sql_mode; ++ collation_connection = orig.collation_connection; ++ div_precincrement = orig.div_precincrement; ++ time_zone = orig.time_zone; ++ big_tables = orig.big_tables; ++ lc_time_names = orig.lc_time_names; ++ my_aes_mode = orig.my_aes_mode; ++ transaction_isolation = orig.transaction_isolation; ++ option_bits = orig.option_bits; ++ explicit_defaults_for_timestamp = orig.explicit_defaults_for_timestamp; ++ sortbuff_size = orig.sortbuff_size; ++ join_buff_size = orig.join_buff_size; ++ return false; ++} ++ ++bool System_status_var::pq_merge_status(struct System_status_var worker) { ++ filesort_range_count += worker.filesort_range_count; ++ filesort_rows += worker.filesort_rows; ++ filesort_scan_count += worker.filesort_scan_count; ++ ++ ha_read_first_count += worker.ha_read_first_count; ++ ha_read_last_count += worker.ha_read_last_count; ++ ha_read_key_count += worker.ha_read_key_count; ++ ha_read_next_count += worker.ha_read_next_count; ++ ha_read_prev_count += worker.ha_read_prev_count; ++ ha_read_rnd_count += worker.ha_read_rnd_count; ++ ha_read_rnd_next_count += worker.ha_read_rnd_next_count; ++ return false; ++} ++ ++bool THD::pq_copy_from(THD *thd) { ++ variables.pq_copy_from(thd->variables); ++ start_time = thd->start_time; ++ user_time = thd->user_time; ++ m_query_string = thd->m_query_string; ++ tx_isolation = thd->tx_isolation; ++ tx_read_only = thd->tx_read_only; ++ parallel_exec = thd->parallel_exec; ++ pq_dop = thd->pq_dop; ++ arg_of_last_insert_id_function = thd->arg_of_last_insert_id_function; ++ first_successful_insert_id_in_prev_stmt = ++ thd->first_successful_insert_id_in_prev_stmt; ++ first_successful_insert_id_in_prev_stmt_for_binlog = ++ thd->first_successful_insert_id_in_prev_stmt_for_binlog; ++ first_successful_insert_id_in_cur_stmt = ++ thd->first_successful_insert_id_in_cur_stmt; ++ stmt_depends_on_first_successful_insert_id_in_prev_stmt = ++ thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt; ++ return false; ++} ++ ++bool THD::pq_merge_status(THD *thd) { ++ status_var.pq_merge_status(thd->status_var); ++ current_found_rows += thd->current_found_rows; ++ pq_current_found_rows = thd->current_found_rows; ++ m_examined_row_count += thd->m_examined_row_count; ++ return false; ++} ++ ++bool THD::pq_status_reset() { ++ current_found_rows = 0; ++ m_examined_row_count = 0; ++ return false; ++} +diff --git a/sql/pq_clone.h b/sql/pq_clone.h +new file mode 100644 +index 00000000..9cbcac80 +--- /dev/null ++++ b/sql/pq_clone.h +@@ -0,0 +1,58 @@ ++#ifndef PQ_CLONE_INCLUDE_H ++#define PQ_CLONE_INCLUDE_H ++ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "sql/sql_list.h" ++class Item; ++class Item_ident; ++class THD; ++class Query_block; ++class JOIN; ++class ORDER; ++class ORDER_with_src; ++class TABLE_LIST; ++enum table_list_type_enum { ++ TABLE_LIST_TYPE_DEFAULT, ++ TABLE_LIST_TYPE_LEAF, ++ TABLE_LIST_TYPE_GLOBAL, ++ TABLE_LIST_TYPE_MERGE ++}; ++ ++bool pq_dup_tabs(JOIN *pq_join, JOIN *join, bool setup); ++ ++TABLE_LIST *get_table_by_index(TABLE_LIST *start_table, ++ table_list_type_enum list_type, int index); ++ ++int get_table_index(TABLE_LIST *start_table, table_list_type_enum list_type, ++ TABLE_LIST *tl); ++ ++extern Item **resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, ++ Query_block *select); ++ ++JOIN *pq_make_join(THD *thd, JOIN *join); ++ ++bool copy_all_table_list(THD *thd, Query_block *orig, Query_block *dest_select); ++ ++#endif // PQ_CLONE_INCLUDE_H +diff --git a/sql/pq_clone_item.cc b/sql/pq_clone_item.cc +new file mode 100644 +index 00000000..25e68f77 +--- /dev/null ++++ b/sql/pq_clone_item.cc +@@ -0,0 +1,1990 @@ ++#ifndef PQ_CLONE_ITEM_H ++#define PQ_CLONE_ITEM_H ++ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "item_geofunc.h" ++#include "item_inetfunc.h" ++#include "item_pfs_func.h" ++#include "mem_root_deque.h" ++#include "sql/item.h" ++#include "sql/item_cmpfunc.h" ++#include "sql/item_regexp_func.h" ++#include "sql/item_sum.h" ++#include "sql/item_timefunc.h" ++#include "sql/log.h" ++#include "sql/parse_tree_items.h" ++#include "sql/parse_tree_nodes.h" ++#include "sql/pq_clone.h" ++#include "sql/sql_optimizer.h" ++ ++#define CHECK_TYPE(T) \ ++ if (typeid(*this) != typeid(T) || \ ++ DBUG_EVALUATE_IF("simulate_item_type_mismatch", true, false)) { \ ++ sql_print_warning( \ ++ "Caller's type %s is not equals to this class type %s, " \ ++ "will not use parallel query, SQL= %s", \ ++ typeid(*this).name(), typeid(T).name(), thd->query().str); \ ++ assert(DBUG_EVALUATE_IF("simulate_item_type_mismatch", true, false) || \ ++ false); \ ++ return nullptr; \ ++ } ++ ++#define COPY_FROM_SUPER(D, B) \ ++ if (B::pq_copy_from(thd, select, item)) { \ ++ return true; \ ++ } \ ++ D *orig_item MY_ATTRIBUTE((unused)) = dynamic_cast(item); \ ++ assert(orig_item); ++ ++#define COPY_SELF_ATTR(OBJ) \ ++ if (!OBJ || OBJ->pq_copy_from(thd, select, this)) { \ ++ return nullptr; \ ++ } ++ ++#define PQ_CLONE_DEF(T) \ ++ Item *T::pq_clone(THD *thd, Query_block *select) { \ ++ CHECK_TYPE(T) \ ++ T *new_item = nullptr; ++ ++#define PQ_CLONE_RETURN \ ++ COPY_SELF_ATTR(new_item) \ ++ return new_item; \ ++ } ++ ++#define PQ_CLONE_ARGS \ ++ mem_root_deque item_list(thd->pq_mem_root); \ ++ for (uint i = 0; i < arg_count; i++) { \ ++ Item *arg = args[i]->pq_clone(thd, select); \ ++ if (arg == nullptr) return nullptr; \ ++ item_list.push_back(arg); \ ++ } ++ ++#define PQ_COPY_FROM_DEF(D, B) \ ++ bool D::pq_copy_from(THD *thd, Query_block *select, Item *item) { \ ++ COPY_FROM_SUPER(D, B) ++ ++#define PQ_COPY_FROM_RETURN \ ++ return false; \ ++ } ++ ++#define PQ_REBUILD_SUM_DEF(T) \ ++ Item_sum *T::pq_rebuild_sum_func(THD *thd, Query_block *select, \ ++ Item *item) { \ ++ CHECK_TYPE(T) \ ++ T *new_item = nullptr; ++ ++#define PQ_REBUILD_SUM_RETURN \ ++ COPY_SELF_ATTR(new_item) \ ++ return new_item; \ ++ } ++ ++#define ARG0 copy_args[0] ++#define ARG1 copy_args[1] ++#define ARG2 copy_args[2] ++#define ARG3 copy_args[3] ++#define ARG4 copy_args[4] ++#define COPY_FUNC_ITEM(T, ...) \ ++ Item *T::pq_clone(THD *thd, Query_block *select) { \ ++ CHECK_TYPE(T); \ ++ Item *copy_args[5]; \ ++ assert(arg_count < 5); \ ++ for (uint i = 0; i < arg_count; i++) { \ ++ copy_args[i] = args[i]->pq_clone(thd, select); \ ++ if (copy_args[i] == nullptr) { \ ++ return nullptr; \ ++ } \ ++ } \ ++ Item *new_item = nullptr; \ ++ new_item = new (thd->pq_mem_root) T(__VA_ARGS__); \ ++ COPY_SELF_ATTR(new_item) \ ++ return new_item; \ ++ } ++ ++Item *Item::pq_clone(THD *thd MY_ATTRIBUTE((unused)), ++ Query_block *select MY_ATTRIBUTE((unused))) { ++ sql_print_warning( ++ "Item type %s's deep copy method is not implemented, " ++ "will not use parallel query, SQL= %s", ++ typeid(*this).name(), thd->query().str); ++ assert(DBUG_EVALUATE_IF("simulate_no_item_copy_function", true, false) || ++ false); ++ return nullptr; ++} ++ ++bool Item::pq_copy_from(THD *thd MY_ATTRIBUTE((unused)), ++ Query_block *select MY_ATTRIBUTE((unused)), ++ Item *item) { ++ cmp_context = item->cmp_context; ++ marker = item->marker; ++ ++ collation = item->collation; ++ item_name.copy(item->item_name.ptr(), item->item_name.length(), ++ system_charset_info, item->item_name.is_autogenerated()); ++ orig_name.copy(item->orig_name.ptr(), item->orig_name.length(), ++ system_charset_info, item->orig_name.is_autogenerated()); ++ decimals = item->decimals; ++ derived_used = item->derived_used; ++ is_expensive_cache = item->is_expensive_cache; ++ m_accum_properties = item->m_accum_properties; ++ m_data_type = item->m_data_type; ++ m_is_window_function = item->m_is_window_function; ++ max_length = item->max_length; ++ m_nullable = item->is_nullable(); ++ null_value = item->null_value; ++ str_value = item->str_value; ++ hidden = item->hidden; ++ ++#ifndef NDEBUG ++ contextualized = item->contextualized; ++#endif ++ unsigned_flag = item->unsigned_flag; ++ ++ if (!pq_alloc_item && item->pq_alloc_item) thd->add_item(this); ++ ++ return false; ++} ++ ++/* Item_basic_constant start */ ++PQ_COPY_FROM_DEF(Item_basic_constant, Item) { ++ if (orig_item != nullptr) { ++ used_table_map = orig_item->used_table_map; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++/* Item_cache start */ ++PQ_COPY_FROM_DEF(Item_cache, Item_basic_constant) { ++ if (orig_item != nullptr) { ++ used_table_map = orig_item->used_table_map; ++ cached_field = orig_item->cached_field; ++ } ++ if (orig_item != nullptr && orig_item->example != nullptr) { ++ Item *example_arg = orig_item->example->pq_clone(thd, select); ++ if (example_arg == nullptr) return true; ++ if (!example_arg->fixed) { ++ example_arg->fix_fields(thd, &example_arg); ++ } ++ setup(example_arg); ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_cache_datetime) { ++ new_item = new (thd->pq_mem_root) Item_cache_datetime(data_type()); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_cache_decimal) { ++ new_item = new (thd->pq_mem_root) Item_cache_decimal(); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_cache_int) { ++ new_item = new (thd->pq_mem_root) Item_cache_int(); ++ if (new_item == nullptr) { ++ return nullptr; ++ } ++ if (origin_item) { ++ new_item->example = origin_item->pq_clone(thd, select); ++ if (new_item->example == nullptr) { ++ return nullptr; ++ } ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_cache_real) { ++ new_item = new (thd->pq_mem_root) Item_cache_real(); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_cache_row) { ++ new_item = new (thd->pq_mem_root) Item_cache_row(); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_cache_str) { ++ const Item *item = static_cast(this); ++ new_item = new (thd->pq_mem_root) Item_cache_str(item); ++} ++PQ_CLONE_RETURN ++/* Item_cache end */ ++ ++/* Item_hex_string start */ ++PQ_CLONE_DEF(Item_hex_string) { ++ new_item = new (thd->pq_mem_root) Item_hex_string(POS()); ++} ++PQ_CLONE_RETURN ++ ++// TOOD str_value copyed twice ++PQ_CLONE_DEF(Item_bin_string) { ++ new_item = new (thd->pq_mem_root) ++ Item_bin_string(str_value.ptr(), str_value.length()); ++} ++PQ_CLONE_RETURN ++/* Item_hex_string end */ ++ ++/* Item_null start */ ++PQ_CLONE_DEF(Item_null) { new_item = new (thd->pq_mem_root) Item_null(POS()); } ++PQ_CLONE_RETURN ++/* Item_null end */ ++ ++/* Item_num start */ ++PQ_CLONE_DEF(Item_int_with_ref) { ++ Item *pq_ref = ref->pq_clone(thd, select); ++ if (pq_ref == nullptr) return nullptr; ++ new_item = new (thd->pq_mem_root) ++ Item_int_with_ref(pq_ref->data_type(), value, pq_ref, unsigned_flag); ++} ++PQ_CLONE_RETURN ++/* Item_num end */ ++ ++/* Item_string start */ ++PQ_CLONE_DEF(Item_string) { ++ if (origin_item) return origin_item->pq_clone(thd, select); ++ ++ new_item = new (thd->pq_mem_root) Item_string( ++ static_cast(item_name), str_value.ptr(), str_value.length(), ++ collation.collation, collation.derivation, collation.repertoire); ++ if (new_item) { ++ new_item->set_cs_specified(m_cs_specified); ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_static_string_func) { ++ if (origin_item) return origin_item->pq_clone(thd, select); ++ ++ new_item = new (thd->pq_mem_root) ++ Item_static_string_func(func_name, str_value.ptr(), str_value.length(), ++ collation.collation, collation.derivation); ++} ++PQ_CLONE_RETURN ++/* Item_string end */ ++/* Item_basic_constant end */ ++ ++/* Item_ident start */ ++PQ_COPY_FROM_DEF(Item_ident, Item) { ++ DBUG_EXECUTE_IF("simulate_item_clone_attr_copy_error", return true;); ++ ++ context = &select->context; ++ ++ if (orig_item->cached_table == nullptr) { ++ m_tableno = orig_item->m_tableno; ++ } else { ++ m_tableno = orig_item->cached_table->m_tableno; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_COPY_FROM_DEF(Item_field, Item_ident) { ++ DBUG_EXECUTE_IF("simulate_item_field_copy_error", return true;); ++ ++ if (orig_item->table_ref != nullptr) { ++ m_tableno = orig_item->table_ref->m_tableno; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_field) { ++ DBUG_EXECUTE_IF("simulate_item_clone_error", return nullptr;); ++ DBUG_EXECUTE_IF("simulate_no_item_copy_function", ++ return Item::pq_clone(thd, select);); ++ ++ new_item = ++ new (thd->pq_mem_root) Item_field(POS(), db_name, table_name, field_name); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_default_value) { ++ Item *new_arg = nullptr; ++ if (arg) { ++ new_arg = arg->pq_clone(thd, select); ++ if (new_arg == nullptr) return nullptr; ++ } ++ new_item = new (thd->pq_mem_root) Item_default_value(POS(), new_arg); ++} ++PQ_CLONE_RETURN ++ ++int find_ref_in_table(TABLE_LIST *tl, Item **ref) { ++ int count = tl->field_translation_end - tl->field_translation; ++ if (count <= 0) { ++ return -1; ++ } ++ for (int i = 0; i < count; i++) { ++ if (*ref == tl->field_translation[i].item) { ++ return i; ++ } ++ } ++ return -1; ++} ++ ++TABLE_LIST *get_table_in_merge_tablelist(Query_block *select, TABLE_LIST *tb) { ++ int tableindex = 0; ++ for (TABLE_LIST *tbl_list = select->orig->table_list.first; ++ tbl_list != nullptr; tbl_list = tbl_list->next_local) { ++ if (tbl_list->merge_underlying_list != nullptr) { ++ int index = get_table_index(tbl_list->merge_underlying_list, ++ TABLE_LIST_TYPE_MERGE, tb); ++ if (index != -1) { ++ TABLE_LIST *tbl = get_table_by_index( ++ select->table_list.first, TABLE_LIST_TYPE_DEFAULT, tableindex); ++ return get_table_by_index(tbl, TABLE_LIST_TYPE_MERGE, index); ++ } ++ } ++ tableindex++; ++ } ++ return nullptr; ++} ++ ++Item *Item_view_ref::pq_clone(class THD *thd, class Query_block *select) { ++ Item_view_ref *item = nullptr; ++ Item **item_ref = nullptr; ++ ++ if (select->orig != nullptr) { ++ TABLE_LIST *found_table = nullptr; ++ int index = get_table_index(select->orig->table_list.first, ++ TABLE_LIST_TYPE_DEFAULT, cached_table); ++ if (index != -1) { ++ found_table = get_table_by_index(select->table_list.first, ++ TABLE_LIST_TYPE_DEFAULT, index); ++ } ++ ++ if (found_table == nullptr) { ++ index = get_table_index(select->orig->leaf_tables, TABLE_LIST_TYPE_GLOBAL, ++ cached_table); ++ if (index != -1) { ++ found_table = get_table_by_index(select->leaf_tables, ++ TABLE_LIST_TYPE_GLOBAL, index); ++ } ++ } ++ ++ if (found_table == nullptr) { ++ found_table = get_table_in_merge_tablelist(select, cached_table); ++ } ++ ++ if (found_table == nullptr) { ++ return nullptr; ++ } ++ ++ int field_index = find_ref_in_table(cached_table, ref); ++ if (field_index == -1 || found_table->field_translation == nullptr || ++ found_table->field_translation_end - found_table->field_translation <= ++ field_index) { ++ return nullptr; ++ } ++ ++ item_ref = &found_table->field_translation[field_index].item; ++ ++ const char *db_name; ++ if (found_table->is_view()) { ++ db_name = found_table->db; ++ } else { ++ db_name = nullptr; ++ } ++ ++ item = new (thd->pq_mem_root) ++ Item_view_ref(&select->context, item_ref, db_name, table_name, ++ orig_table_name(), field_name, found_table); ++ } else { ++ item_ref = new (thd->pq_mem_root) Item *(); ++ if (item_ref == nullptr) return nullptr; ++ *item_ref = (*ref)->pq_clone(thd, select); ++ if (*item_ref == nullptr) return nullptr; ++ item = new (thd->pq_mem_root) ++ Item_view_ref(&select->context, item_ref, db_name, table_name, ++ orig_table_name(), field_name, cached_table); ++ } ++ ++ if (item == nullptr || item->pq_copy_from(thd, select, this)) { ++ return nullptr; ++ } ++ ++ return item; ++} ++ ++/** ++ Item_aggregate_ref wil be created from ref in setup_fields() afterwards, ++ so clone ref for the item. ++*/ ++Item *Item_aggregate_ref::pq_clone(class THD *thd, class Query_block *select) { ++ Item *item_ref = (*ref)->pq_clone(thd, select); ++ if (item_ref == nullptr) { ++ return nullptr; ++ } ++ ++ return item_ref; ++} ++ ++Item *Item_ref::pq_clone(class THD *thd, class Query_block *select) { ++ /* ++ * c1: (Name_resolution_context, db_name, table_name, field_name) ++ * c2: (pos, db_name, table_name, field_name) ++ * c3: (context, ref, db_name, table_name, field_name) ++ * c4: (thd, ref_item) ++ */ ++ Item_ref *new_item = nullptr; ++ Name_resolution_context *new_context = &select->context; ++ ++ if (copy_type == WITH_CONTEXT) ++ new_item = new (thd->pq_mem_root) ++ Item_ref(new_context, db_name, table_name, field_name); ++ else if (copy_type == WITHOUT_CONTEXT) ++ new_item = ++ new (thd->pq_mem_root) Item_ref(POS(), db_name, table_name, field_name); ++ else if (copy_type == WITH_CONTEXT_REF) { ++ return (*ref)->pq_clone(thd, select); ++ } else { ++ assert(copy_type == WITH_REF_ONLY); ++ new_item = new (thd->pq_mem_root) Item_ref(thd, this); ++ } ++ if (new_item == nullptr || new_item->pq_copy_from(thd, select, this)) ++ return nullptr; ++ ++ new_item->context = &select->context; ++ return new_item; ++} ++ ++PQ_CLONE_DEF(Item_name_const) { ++ Item *name_arg, *val_arg; ++ if (name_item == nullptr) { ++ name_arg = nullptr; ++ } else { ++ name_arg = name_item->pq_clone(thd, select); ++ if (name_arg == nullptr) return nullptr; ++ } ++ if (value_item == nullptr) { ++ val_arg = nullptr; ++ } else { ++ val_arg = value_item->pq_clone(thd, select); ++ if (val_arg == nullptr) return nullptr; ++ } ++ new_item = new (thd->pq_mem_root) Item_name_const(POS(), name_arg, val_arg); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_name_const, Item) { ++ if (orig_item != nullptr) { ++ valid_args = orig_item->valid_args; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++/* Item_result_field start */ ++/* Item_func start */ ++PQ_COPY_FROM_DEF(Item_func, Item_result_field) { ++ if (orig_item != nullptr) { ++ null_on_null = orig_item->null_on_null; ++ used_tables_cache = orig_item->used_tables_cache; ++ not_null_tables_cache = orig_item->not_null_tables_cache; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++/* Item_func_bit start */ ++PQ_COPY_FROM_DEF(Item_func_bit, Item_func) { ++ if (orig_item != nullptr) { ++ hybrid_type = orig_item->hybrid_type; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(PTI_literal_underscore_charset_hex_num) { ++ LEX_STRING str = {const_cast(str_value.ptr()), str_value.length()}; ++ new_item = new (thd->pq_mem_root) ++ PTI_literal_underscore_charset_hex_num(POS(), collation.collation, str); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_bit_neg, POS(), ARG0) ++ ++COPY_FUNC_ITEM(Item_func_bit_and, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_bit_or, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_bit_xor, POS(), ARG0, ARG1) ++ ++COPY_FUNC_ITEM(Item_func_shift_left, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_shift_right, POS(), ARG0, ARG1) ++/* Item_func_bit end */ ++ ++PQ_CLONE_DEF(Item_func_case) { ++ PQ_CLONE_ARGS ++ new_item = new (thd->pq_mem_root) ++ Item_func_case(POS(), &item_list, nullptr, nullptr); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_case, Item_func) { ++ if (orig_item != nullptr) { ++ first_expr_num = orig_item->first_expr_num; ++ else_expr_num = orig_item->else_expr_num; ++ cached_result_type = orig_item->cached_result_type; ++ left_result_type = orig_item->left_result_type; ++ ncases = orig_item->ncases; ++ cmp_type = orig_item->cmp_type; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_if, ARG0, ARG1, ARG2) ++COPY_FUNC_ITEM(Item_func_month, POS(), ARG0) ++ ++/* Item_func_coalesce start */ ++PQ_CLONE_DEF(Item_func_coalesce) { ++ assert(arg_count < 3); ++ Item *new_args[2] = {nullptr}; ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ if (arg_count == 1) { ++ new_item = new (thd->pq_mem_root) Item_func_coalesce(POS(), new_args[0]); ++ } else if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_coalesce(POS(), new_args[0], new_args[1]); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_any_value, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_ifnull, POS(), ARG0, ARG1) ++/* Item_func_coalesce end */ ++ ++/* Item_func_min_max start */ ++PQ_CLONE_DEF(Item_func_max) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) Item_func_max(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_min) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) Item_func_min(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++/* Item_func_min_max end */ ++ ++/* Item_func_num1 start */ ++COPY_FUNC_ITEM(Item_func_abs, POS(), ARG0) ++ ++COPY_FUNC_ITEM(Item_func_ceiling, ARG0) ++COPY_FUNC_ITEM(Item_func_floor, ARG0) ++ ++COPY_FUNC_ITEM(Item_func_neg, ARG0) ++COPY_FUNC_ITEM(Item_func_round, ARG0, ARG1, truncate) ++/* Item_func_num1 end */ ++ ++/* Item_num_op start */ ++COPY_FUNC_ITEM(Item_func_plus, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_minus, ARG0, ARG1) ++ ++COPY_FUNC_ITEM(Item_func_div, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mod, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mul, ARG0, ARG1) ++/* Item_num_op end */ ++ ++/* Item_func_regexp start */ ++PQ_CLONE_DEF(Item_func_regexp_instr) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ new_item = ++ new (thd->pq_mem_root) Item_func_regexp_instr(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_regexp_like) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) Item_func_regexp_like(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++/* Item_func_regexp end */ ++ ++/* Item_func_weekday start */ ++PQ_CLONE_DEF(Item_func_weekday) { ++ PQ_CLONE_ARGS ++ new_item = new (thd->pq_mem_root) ++ Item_func_weekday(POS(), item_list[0], this->odbc_type); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_dayname, POS(), ARG0) ++/* Item_func_weekday end */ ++ ++/* Item_int_func start */ ++/* Item_bool_func2 start */ ++COPY_FUNC_ITEM(Item_func_eq, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_equal, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_ge, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_gt, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_le, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_lt, ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_ne, ARG0, ARG1) ++ ++PQ_CLONE_DEF(Item_func_like) { ++ Item *arg[3]; ++ for (uint i = 0; i < arg_count; i++) { ++ arg[i] = args[i]->pq_clone(thd, select); ++ if (arg[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) Item_func_like(arg[0], arg[1]); ++ } else if (arg_count == 3) { ++ new_item = new (thd->pq_mem_root) Item_func_like(arg[0], arg[1], arg[2]); ++ } else { ++ sql_print_warning("arg_count is wrong!"); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_nullif, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrcontains, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_strcmp, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_xor, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_contains, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrcoveredby, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrcovers, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_crosses, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_disjoint, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_equals, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_intersects, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_overlaps, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_touches, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_st_within, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrwithin, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrtouches, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbroverlaps, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrintersects, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrequals, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_mbrdisjoint, POS(), ARG0, ARG1) ++/* Item_bool_func2 end */ ++ ++/* Item_cond start */ ++PQ_COPY_FROM_DEF(Item_cond, Item_bool_func) { ++ Item *list_item; ++ List_iterator_fast list_it(orig_item->list); ++ while ((list_item = list_it++)) { ++ Item *arg = list_item->pq_clone(thd, select); ++ if (arg == nullptr) return true; ++ list.push_back(arg); ++ } ++ if (orig_item != nullptr) { ++ abort_on_null = orig_item->abort_on_null; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_cond_and) { ++ new_item = new (thd->pq_mem_root) Item_cond_and(); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_cond_and, Item_cond) { ++ if (orig_item != nullptr) { ++ cond_equal.max_members = orig_item->cond_equal.max_members; ++ } ++ Item_equal *item_equal; ++ List_iterator_fast it(orig_item->cond_equal.current_level); ++ for (size_t i = 0; (item_equal = it++); i++) { ++ Item_equal *new_item_equal = ++ dynamic_cast(item_equal->pq_clone(thd, select)); ++ if (new_item_equal == nullptr) return true; ++ cond_equal.current_level.push_back(new_item_equal); ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_cond_or) { new_item = new (thd->pq_mem_root) Item_cond_or(); } ++PQ_CLONE_RETURN ++/* Item_cond end */ ++ ++PQ_CLONE_DEF(Item_equal) { new_item = new (thd->pq_mem_root) Item_equal(); } ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_equal, Item_bool_func) { ++ Item_field *item_field; ++ List_iterator_fast it(orig_item->fields); ++ for (size_t i = 0; (item_field = it++); i++) { ++ Item_field *new_field = ++ dynamic_cast(item_field->pq_clone(thd, select)); ++ if (new_field == nullptr) return true; ++ fields.push_back(new_field); ++ } ++ if (orig_item != nullptr && orig_item->const_item != nullptr) { ++ const_item = orig_item->const_item->pq_clone(thd, select); ++ if (const_item == nullptr) { ++ return true; ++ } ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_true, POS()) ++ ++COPY_FUNC_ITEM(Item_func_isnotnull, ARG0) ++ ++PQ_CLONE_DEF(Item_func_isnull) { ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ new_item = new (thd->pq_mem_root) Item_func_isnull(POS(), arg); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_isnull, Item_bool_func) { ++ if (orig_item != nullptr) { ++ cached_value = orig_item->cached_value; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_json_schema_valid, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_not, ARG0) ++ ++PQ_CLONE_DEF(Item_func_truth) { ++ PQ_CLONE_ARGS ++ new_item = ++ new (thd->pq_mem_root) Item_func_truth(POS(), item_list[0], truth_test); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_extract) { ++ PQ_CLONE_ARGS ++ new_item = ++ new (thd->pq_mem_root) Item_extract(POS(), this->int_type, item_list[0]); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_extract, Item_int_func) { ++ if (orig_item != nullptr) { ++ date_value = orig_item->date_value; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_typecast_year) { ++ PQ_CLONE_ARGS ++ new_item = new (thd->pq_mem_root) Item_typecast_year(POS(), item_list[0]); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_ascii, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_bit_count, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_char_length) { ++ assert(arg_count == 1); ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ new_item = new (thd->pq_mem_root) Item_func_char_length(POS(), arg); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_char_length, Item_int_func) { ++ if (orig_item != nullptr) { ++ value.copy(orig_item->value); ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_coercibility, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_crc32, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_dayofmonth, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_dayofyear, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_field) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) Item_func_field(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_find_in_set, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_get_lock, POS(), ARG0, ARG1); ++COPY_FUNC_ITEM(Item_func_hour, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_inet_aton, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_int_div, POS(), ARG0, ARG1) ++ ++PQ_CLONE_DEF(Item_func_interval) { ++ assert(arg_count == 1 && args[0]->type() == Item::ROW_ITEM); ++ Item_row *row = down_cast(args[0]->pq_clone(thd, select)); ++ if (nullptr == row) return nullptr; ++ new_item = new (thd->pq_mem_root) Item_func_interval(POS(), row); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_interval, Item_int_func) { ++ if (orig_item != nullptr) { ++ use_decimal_comparison = orig_item->use_decimal_comparison; ++ intervals = orig_item->intervals; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_func_json_contains) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = ++ new (thd->pq_mem_root) Item_func_json_contains(thd, POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_json_depth, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_last_insert_id) { ++ Item *item_arg = nullptr; ++ if (arg_count == 1) { ++ item_arg = args[0]->pq_clone(thd, select); ++ if (item_arg == nullptr) { ++ return nullptr; ++ } ++ } ++ ++ if (arg_count == 0) { ++ new_item = new (thd->pq_mem_root) Item_func_last_insert_id(POS()); ++ } else if (arg_count == 1) { ++ new_item = new (thd->pq_mem_root) Item_func_last_insert_id(POS(), item_arg); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_length, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_bit_length, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_minute, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_locate) { ++ assert(arg_count < 4); ++ Item *new_args[4] = {nullptr}; ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_locate(POS(), new_args[0], new_args[1]); ++ } else if (arg_count == 3) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_locate(POS(), new_args[0], new_args[1], new_args[2]); ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_locate, Item_int_func) { ++ if (orig_item != nullptr) { ++ cmp_collation = orig_item->cmp_collation; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_instr, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_microsecond, POS(), ARG0) ++ ++PQ_COPY_FROM_DEF(Item_func_opt_neg, Item_int_func) { ++ if (orig_item != nullptr) { ++ negated = orig_item->negated; ++ pred_level = orig_item->pred_level; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_between, POS(), ARG0, ARG1, ARG2, negated); ++ ++PQ_CLONE_DEF(Item_func_in) { ++ PT_select_item_list pt_item; ++ for (uint i = 0; i < arg_count; i++) { ++ Item *arg = args[i]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ pt_item.value.push_back(arg); ++ } ++ new_item = new (thd->pq_mem_root) Item_func_in(POS(), &pt_item, negated); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_ord, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_period_add, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_period_diff, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_quarter, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_second, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_sleep, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_time_to_sec, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_timestamp_diff, POS(), ARG0, ARG1, int_type) ++COPY_FUNC_ITEM(Item_func_to_days, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_to_seconds, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_uncompressed_length, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_week, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_year, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_yearweek, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_typecast_signed, POS(), ARG0) ++COPY_FUNC_ITEM(Item_typecast_unsigned, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_can_access_table, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_is_visible_dd_object, POS(), ARG0) ++/* Item_int_func end */ ++ ++/* Item_real_func start */ ++/* Item_dec_func start*/ ++COPY_FUNC_ITEM(Item_func_sin, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_sqrt, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_cos, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_tan, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_cot, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_pow, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_ln, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_log2, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_log10, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_asin, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_acos, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_exp, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_atan) { ++ Item *item_args[2]; ++ assert(arg_count < 3); ++ for (uint i = 0; i < arg_count; i++) { ++ item_args[i] = args[i]->pq_clone(thd, select); ++ if (item_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 1) ++ new_item = new (thd->pq_mem_root) Item_func_atan(POS(), item_args[0]); ++ else if (arg_count == 2) ++ new_item = new (thd->pq_mem_root) ++ Item_func_atan(POS(), item_args[0], item_args[1]); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_log) { ++ Item *item_args[2]; ++ assert(arg_count < 3); ++ for (uint i = 0; i < arg_count; i++) { ++ item_args[i] = args[i]->pq_clone(thd, select); ++ if (item_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 1) ++ new_item = new (thd->pq_mem_root) Item_func_log(POS(), item_args[0]); ++ else if (arg_count == 2) ++ new_item = ++ new (thd->pq_mem_root) Item_func_log(POS(), item_args[0], item_args[1]); ++} ++PQ_CLONE_RETURN ++ ++/* Item_dec_func end*/ ++ ++COPY_FUNC_ITEM(Item_func_longfromgeohash, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_rand) { ++ new_item = new (thd->pq_mem_root) Item_func_rand(POS()); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_latfromgeohash, POS(), ARG0) ++/* Item_real_func end */ ++ ++/* Item_str_func start */ ++PQ_CLONE_DEF(Item_func_aes_decrypt) { ++ assert(arg_count < 4); ++ Item *new_args[4] = {nullptr}; ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_aes_decrypt(POS(), new_args[0], new_args[1]); ++ } else if (arg_count == 3) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_aes_decrypt(POS(), new_args[0], new_args[1], new_args[2]); ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_aes_encrypt) { ++ assert(arg_count < 4); ++ Item *new_args[4] = {nullptr}; ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_aes_encrypt(POS(), new_args[0], new_args[1]); ++ } else if (arg_count == 3) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_aes_encrypt(POS(), new_args[0], new_args[1], new_args[2]); ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_char) { ++ PQ_CLONE_ARGS ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ new_item = new (thd->pq_mem_root) Item_func_char(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_charset, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_collation, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_compress, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_concat) { ++ PQ_CLONE_ARGS ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ new_item = new (thd->pq_mem_root) Item_func_concat(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_concat_ws) { ++ PQ_CLONE_ARGS ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ new_item = new (thd->pq_mem_root) Item_func_concat_ws(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_conv, POS(), ARG0, ARG1, ARG2) ++COPY_FUNC_ITEM(Item_func_conv_charset, POS(), ARG0, conv_charset) ++ ++PQ_CLONE_DEF(Item_func_date_format) { ++ PQ_CLONE_ARGS ++ new_item = new (thd->pq_mem_root) Item_func_date_format( ++ POS(), item_list[0], item_list[1], this->is_time_format); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_date_format, Item_str_func) { ++ if (orig_item != nullptr) { ++ value.copy(orig_item->value); ++ fixed_length = orig_item->fixed_length; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_func_elt) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) Item_func_elt(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_export_set) { ++ PQ_CLONE_ARGS ++ ++ if (arg_count == 3) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_export_set(POS(), item_list[0], item_list[1], item_list[2]); ++ } else if (arg_count == 4) { ++ new_item = new (thd->pq_mem_root) Item_func_export_set( ++ POS(), item_list[0], item_list[1], item_list[2], item_list[3]); ++ } else if (arg_count == 5) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_export_set(POS(), item_list[0], item_list[1], item_list[2], ++ item_list[3], item_list[4]); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_from_base64, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_inet_ntoa, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_insert, POS(), ARG0, ARG1, ARG2, ARG3) ++ ++PQ_CLONE_DEF(Item_func_json_quote) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) Item_func_json_quote(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_left, POS(), ARG0, ARG1) ++ ++COPY_FUNC_ITEM(Item_func_lpad, POS(), ARG0, ARG1, ARG2) ++ ++PQ_COPY_FROM_DEF(Item_func_lpad, Item_str_func) { ++ if (orig_item != nullptr) lpad_str.copy(orig_item->lpad_str); ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_func_make_set) { ++ Item *arg_a = item->pq_clone(thd, select); ++ if (arg_a == nullptr) return nullptr; ++ ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = ++ new (thd->pq_mem_root) Item_func_make_set(POS(), arg_a, &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_table_rows) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_table_rows(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_avg_row_length) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_avg_row_length(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_data_length) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_data_length(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_max_data_length) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_max_data_length(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_index_length) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_index_length(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_data_free) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = ++ new (thd->pq_mem_root) Item_func_internal_data_free(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_auto_increment) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_auto_increment(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_update_time) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_update_time(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_check_time) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_check_time(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_checksum) { ++ PQ_CLONE_ARGS ++ ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ ++ new_item = ++ new (thd->pq_mem_root) Item_func_internal_checksum(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_internal_get_comment_or_error) { ++ PQ_CLONE_ARGS ++ PT_item_list pt_item_list; ++ pt_item_list.value = item_list; ++ new_item = new (thd->pq_mem_root) ++ Item_func_internal_get_comment_or_error(POS(), &pt_item_list); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_monthname, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_pfs_format_bytes, POS(), ARG0) ++ ++PQ_COPY_FROM_DEF(Item_func_pfs_format_bytes, Item_str_func) { ++ if (orig_item != nullptr) { ++ m_value = orig_item->m_value; ++ memcpy(orig_item->m_value_buffer, m_value_buffer, 20); ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_pfs_format_pico_time, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_quote, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_repeat, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_replace, POS(), ARG0, ARG1, ARG2) ++COPY_FUNC_ITEM(Item_func_reverse, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_random_bytes, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_right) { ++ PQ_CLONE_ARGS ++ new_item = ++ new (thd->pq_mem_root) Item_func_right(POS(), item_list[0], item_list[1]); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_rpad, POS(), ARG0, ARG1, ARG2) ++ ++PQ_COPY_FROM_DEF(Item_func_rpad, Item_str_func) { ++ if (orig_item != nullptr) { ++ rpad_str.copy(orig_item->rpad_str); ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_set_collation, POS(), ARG0, collation_string) ++ ++PQ_COPY_FROM_DEF(Item_func_set_collation, Item_str_func) { ++ if (orig_item != nullptr && orig_item->args[1] != nullptr) { ++ args[1] = orig_item->args[1]->pq_clone(thd, select); ++ if (args[1] == nullptr) return true; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_soundex, ARG0) ++COPY_FUNC_ITEM(Item_func_space, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_substr) { ++ assert(arg_count < 4); ++ Item *new_args[4] = {nullptr}; ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_substr(POS(), new_args[0], new_args[1]); ++ } else if (arg_count == 3) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_substr(POS(), new_args[0], new_args[1], new_args[2]); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_substr_index, POS(), ARG0, ARG1, ARG2) ++COPY_FUNC_ITEM(Item_func_database, POS()) ++COPY_FUNC_ITEM(Item_func_user, POS()) ++ ++PQ_CLONE_DEF(Item_func_trim) { ++ PQ_CLONE_ARGS ++ ++ if (arg_count > 1) ++ new_item = new (thd->pq_mem_root) ++ Item_func_trim(POS(), item_list[0], item_list[1], m_trim_mode); ++ else ++ new_item = ++ new (thd->pq_mem_root) Item_func_trim(POS(), item_list[0], m_trim_mode); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_ltrim, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_rtrim, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_uncompress, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_unhex, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_uuid, POS()) ++COPY_FUNC_ITEM(Item_func_get_dd_create_options, POS(), ARG0, ARG1, ARG2) ++PQ_CLONE_DEF(Item_func_uuid_to_bin) { ++ assert(arg_count < 3); ++ Item *new_args[4] = {nullptr}; ++ ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 1) { ++ new_item = new (thd->pq_mem_root) Item_func_uuid_to_bin(POS(), new_args[0]); ++ } else if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_uuid_to_bin(POS(), new_args[0], new_args[1]); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_weight_string, POS(), ARG0, result_length, ++ num_codepoints, flags, as_binary) ++COPY_FUNC_ITEM(Item_func_st_srid_mutator, POS(), ARG0, ARG1) ++ ++PQ_CLONE_DEF(Item_func_bin_to_uuid) { ++ assert(arg_count < 3); ++ Item *new_args[4] = {nullptr}; ++ ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 1) { ++ new_item = new (thd->pq_mem_root) Item_func_bin_to_uuid(POS(), new_args[0]); ++ } else if (arg_count == 2) { ++ new_item = new (thd->pq_mem_root) ++ Item_func_bin_to_uuid(POS(), new_args[0], new_args[1]); ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_format) { ++ assert(arg_count < 4); ++ Item *new_args[4] = {nullptr}; ++ for (uint i = 0; i < arg_count; i++) { ++ new_args[i] = args[i]->pq_clone(thd, select); ++ if (new_args[i] == nullptr) return nullptr; ++ } ++ ++ if (arg_count == 2) ++ new_item = new (thd->pq_mem_root) ++ Item_func_format(POS(), new_args[0], new_args[1]); ++ else if (arg_count == 3) ++ new_item = new (thd->pq_mem_root) ++ Item_func_format(POS(), new_args[0], new_args[1], new_args[2]); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_get_format) { ++ assert(arg_count == 1); ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ ++ new_item = new (thd->pq_mem_root) Item_func_get_format(POS(), type, arg); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_hex, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_inet6_aton, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_inet6_ntoa, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_md5, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_sha, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_sha2, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_to_base64, POS(), ARG0) ++ ++PQ_COPY_FROM_DEF(Item_str_conv, Item_str_func) { ++ if (orig_item != nullptr) { ++ multiply = orig_item->multiply; ++ converter = orig_item->converter; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_func_upper, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_lower, POS(), ARG0) ++ ++PQ_COPY_FROM_DEF(Item_temporal_hybrid_func, Item_str_func) { ++ if (orig_item != nullptr) { ++ sql_mode = orig_item->sql_mode; ++ ascii_buf.copy(orig_item->ascii_buf); ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_date_add_interval) { ++ Item *arg_a = args[0]->pq_clone(thd, select); ++ Item *arg_b = args[1]->pq_clone(thd, select); ++ if (arg_a == nullptr || arg_b == nullptr) return nullptr; ++ new_item = new (thd->pq_mem_root) ++ Item_date_add_interval(arg_a, arg_b, int_type, date_sub_interval); ++ if (new_item) { ++ new_item->set_data_type(data_type()); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_add_time, POS(), ARG0, ARG1, is_date, ++ sign == -1 ? true : false) ++ ++COPY_FUNC_ITEM(Item_func_str_to_date, POS(), ARG0, ARG1) ++ ++PQ_COPY_FROM_DEF(Item_func_str_to_date, Item_temporal_hybrid_func) { ++ if (orig_item != nullptr) { ++ cached_timestamp_type = orig_item->cached_timestamp_type; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_typecast_char, ARG0, cast_length, cast_cs) ++ ++PQ_CLONE_DEF(Item_date_literal) { ++ MYSQL_TIME ltime; ++ cached_time.get_time(<ime); ++ new_item = new (thd->pq_mem_root) Item_date_literal(<ime); ++} ++PQ_CLONE_RETURN ++/* Item_str_func end */ ++ ++COPY_FUNC_ITEM(Item_func_curdate_utc, POS()) ++COPY_FUNC_ITEM(Item_func_curdate_local, POS()) ++COPY_FUNC_ITEM(Item_func_from_days, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_makedate, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_typecast_date, POS(), ARG0) ++ ++PQ_COPY_FROM_DEF(Item_typecast_date, Item_date_func) { ++ if (orig_item != nullptr) { ++ m_explicit_cast = orig_item->m_explicit_cast; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_datetime_literal) { ++ MYSQL_TIME *ltime = new (thd->pq_mem_root) MYSQL_TIME(); ++ if (ltime != nullptr) { ++ this->get_date(ltime, 0); ++ new_item = new (thd->pq_mem_root) Item_datetime_literal( ++ ltime, this->cached_time.decimals(), thd->variables.time_zone); ++ } ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_convert_tz, POS(), ARG0, ARG1, ARG2) ++COPY_FUNC_ITEM(Item_func_from_unixtime, POS(), ARG0) ++ ++PQ_CLONE_DEF(Item_func_sysdate_local) { ++ new_item = new (thd->pq_mem_root) Item_func_sysdate_local(decimals); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_typecast_datetime) { ++ if (origin_item) { ++ return origin_item->pq_clone(thd, select); ++ } ++ Item *arg_item = args[0]->pq_clone(thd, select); ++ if (arg_item == nullptr) return nullptr; ++ ++ new_item = new (thd->pq_mem_root) Item_typecast_datetime(POS(), arg_item); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_typecast_datetime, Item_datetime_func) { ++ if (orig_item != nullptr) { ++ detect_precision_from_arg = orig_item->detect_precision_from_arg; ++ decimals = orig_item->decimals; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_func_at_time_zone) { ++ Item *arg_item = args[0]->pq_clone(thd, select); ++ if (arg_item == nullptr) return nullptr; ++ ++ new_item = new (thd->pq_mem_root) Item_func_at_time_zone( ++ POS(), arg_item, m_specifier_string, m_is_interval); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_curtime_local) { ++ PQ_CLONE_ARGS ++ new_item = ++ new (thd->pq_mem_root) Item_func_curtime_local(POS(), this->decimals); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_curtime_utc) { ++ new_item = new (thd->pq_mem_root) Item_func_curtime_utc(POS(), decimals); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_func_maketime, POS(), ARG0, ARG1, ARG2) ++COPY_FUNC_ITEM(Item_func_sec_to_time, POS(), ARG0) ++COPY_FUNC_ITEM(Item_func_timediff, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_typecast_time, POS(), ARG0) ++ ++PQ_COPY_FROM_DEF(Item_typecast_time, Item_time_func) { ++ if (orig_item != nullptr) { ++ detect_precision_from_arg = orig_item->detect_precision_from_arg; ++ decimals = orig_item->decimals; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_time_literal) { ++ MYSQL_TIME *ltime = new (thd->pq_mem_root) MYSQL_TIME(); ++ if (ltime == nullptr) return nullptr; ++ cached_time.get_time(ltime); ++ new_item = new (thd->pq_mem_root) Item_time_literal(ltime, pq_dec_arg); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_typecast_decimal) { ++ Item *item_arg = args[0]->pq_clone(thd, select); ++ if (item_arg == nullptr) return nullptr; ++ ++ new_item = new (thd->pq_mem_root) ++ Item_typecast_decimal(POS(), item_arg, pq_precision, decimals); ++} ++PQ_CLONE_RETURN ++ ++COPY_FUNC_ITEM(Item_typecast_real, ARG0) ++ ++PQ_CLONE_DEF(Item_func_get_system_var) { ++ sys_var *var_arg = var; ++ ++ if (var_arg == nullptr) { ++ var_arg = var_tracker.bind_system_variable(thd); ++ } ++ ++ if (var_arg != nullptr) { ++ new_item = new (thd->pq_mem_root) Item_func_get_system_var( ++ var_arg, var_type, &component, item_name.ptr(), item_name.length()); ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_get_system_var, Item_var_func) { ++ if (orig_item != nullptr) { ++ cached_llval = orig_item->cached_llval; ++ cached_dval = orig_item->cached_dval; ++ cached_strval.copy(orig_item->cached_strval); ++ cached_null_value = orig_item->cached_null_value; ++ used_query_id = orig_item->used_query_id; ++ cache_present = orig_item->cache_present; ++ } ++} ++PQ_COPY_FROM_RETURN ++/* Item_func end */ ++ ++/* Item sum start */ ++PQ_COPY_FROM_DEF(Item_sum, Item_result_field) { ++ if (orig_item != nullptr) { ++ force_copy_fields = orig_item->force_copy_fields; ++ with_distinct = orig_item->with_distinct; ++ max_aggr_level = orig_item->max_aggr_level; ++ max_sum_func_level = orig_item->max_sum_func_level; ++ allow_group_via_temp_table = orig_item->allow_group_via_temp_table; ++ save_deny_window_func = orig_item->save_deny_window_func; ++ used_tables_cache = orig_item->used_tables_cache; ++ forced_const = orig_item->forced_const; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++Item_sum *Item_sum::pq_rebuild_sum_func( ++ THD *thd MY_ATTRIBUTE((unused)), Query_block *select MY_ATTRIBUTE((unused)), ++ Item *item MY_ATTRIBUTE((unused))) { ++ sql_print_warning( ++ "Item type %s's rebuild sum method is not implemented, " ++ "will not use parallel query, SQL= %s", ++ typeid(*this).name(), thd->query().str); ++ assert(DBUG_EVALUATE_IF("simulate_no_item_rebuild_function", true, false) || ++ false); ++ return nullptr; ++} ++ ++PQ_COPY_FROM_DEF(Item_sum_bit, Item_sum) { ++ if (orig_item != nullptr) { ++ reset_bits = orig_item->reset_bits; ++ bits = orig_item->bits; ++ hybrid_type = orig_item->hybrid_type; ++ m_count = orig_item->m_count; ++ m_frame_null_count = orig_item->m_frame_null_count; ++ } ++ m_digit_cnt = nullptr; ++ m_digit_cnt_card = 0; ++ if (orig_item != nullptr) { ++ m_is_xor = orig_item->m_is_xor; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_REBUILD_SUM_DEF(Item_sum_and) { ++ new_item = new (thd->pq_mem_root) Item_sum_and(POS(), item, nullptr); ++} ++PQ_REBUILD_SUM_RETURN ++ ++PQ_CLONE_DEF(Item_sum_and) { ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ ++ new_item = new (thd->pq_mem_root) Item_sum_and(POS(), arg, nullptr); ++} ++PQ_CLONE_RETURN ++ ++PQ_REBUILD_SUM_DEF(Item_sum_or) { ++ new_item = new (thd->pq_mem_root) Item_sum_or(POS(), item, nullptr); ++} ++PQ_REBUILD_SUM_RETURN ++ ++PQ_CLONE_DEF(Item_sum_or) { ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ ++ new_item = new (thd->pq_mem_root) Item_sum_or(POS(), arg, nullptr); ++} ++PQ_CLONE_RETURN ++ ++PQ_REBUILD_SUM_DEF(Item_sum_xor) { ++ new_item = new (thd->pq_mem_root) Item_sum_xor(POS(), item, nullptr); ++} ++PQ_REBUILD_SUM_RETURN ++ ++PQ_CLONE_DEF(Item_sum_xor) { ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (nullptr == arg) return nullptr; ++ ++ new_item = new (thd->pq_mem_root) Item_sum_xor(POS(), arg, nullptr); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_sum_hybrid, Item_sum) { ++ if (orig_item == nullptr) { ++ return true; ++ } ++ ++ hybrid_type = orig_item->hybrid_type; ++ was_values = orig_item->was_values; ++ m_nulls_first = orig_item->m_nulls_first; ++ m_optimize = orig_item->m_optimize; ++ m_want_first = orig_item->m_want_first; ++ m_cnt = orig_item->m_cnt; ++ m_saved_last_value_at = orig_item->m_saved_last_value_at; ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_sum_max, ARG0); ++ ++PQ_REBUILD_SUM_DEF(Item_sum_max) { ++ new_item = new (thd->pq_mem_root) Item_sum_max(POS(), item, nullptr); ++} ++PQ_REBUILD_SUM_RETURN ++ ++COPY_FUNC_ITEM(Item_sum_min, ARG0); ++ ++PQ_REBUILD_SUM_DEF(Item_sum_min) { ++ new_item = new (thd->pq_mem_root) Item_sum_min(POS(), item, nullptr); ++} ++PQ_REBUILD_SUM_RETURN ++ ++PQ_COPY_FROM_DEF(Item_sum_num, Item_sum) { ++ DBUG_EXECUTE_IF("simulate_item_rebuild_attr_copy_error", return true;); ++ if (orig_item != nullptr) { ++ is_evaluated = orig_item->is_evaluated; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++COPY_FUNC_ITEM(Item_sum_count, POS(), ARG0, nullptr) ++ ++Item_sum *Item_sum_count::pq_rebuild_sum_func(THD *thd, Query_block *select, ++ Item *item) { ++ DBUG_EXECUTE_IF("simulate_item_rebuild_error", return nullptr;); ++ DBUG_EXECUTE_IF("simulate_no_item_rebuild_function", ++ return Item_sum::pq_rebuild_sum_func(thd, select, item);); ++ ++ Item_sum_count *new_item_sum = ++ new (thd->pq_mem_root) Item_sum_count(POS(), item, nullptr, true); ++ if (new_item_sum == nullptr || ++ new_item_sum->Item_sum_num::pq_copy_from(thd, select, this)) ++ return nullptr; ++ return new_item_sum; ++} ++ ++Item *PTI_count_sym::pq_clone(THD *thd, Query_block *select) { ++ CHECK_TYPE(PTI_count_sym) ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ Item_sum_count *new_count = ++ new (thd->pq_mem_root) Item_sum_count(POS(), arg, nullptr); ++ if (new_count == nullptr || new_count->pq_copy_from(thd, select, this)) ++ return nullptr; ++ return new_count; ++} ++ ++COPY_FUNC_ITEM(Item_sum_sum, POS(), ARG0, has_with_distinct(), nullptr) ++ ++PQ_REBUILD_SUM_DEF(Item_sum_sum) { ++ new_item = new (thd->pq_mem_root) ++ Item_sum_sum(POS(), item, has_with_distinct(), nullptr); ++} ++PQ_REBUILD_SUM_RETURN ++ ++PQ_CLONE_DEF(Item_sum_avg) { ++ assert(arg_count == 1); ++ Item *arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ new_item = new (thd->pq_mem_root) ++ Item_sum_avg(POS(), arg, has_with_distinct(), nullptr); ++ if (new_item) { ++ new_item->pq_avg_type = PQ_WORKER; ++ } ++} ++PQ_CLONE_RETURN ++ ++PQ_REBUILD_SUM_DEF(Item_sum_avg) { ++ new_item = new (thd->pq_mem_root) ++ Item_sum_avg(POS(), item, has_with_distinct(), nullptr); ++ if (new_item) { ++ new_item->pq_avg_type = PQ_REBUILD; ++ } ++} ++PQ_REBUILD_SUM_RETURN ++/* Item sum end */ ++/* Item_result_field end */ ++ ++PQ_CLONE_DEF(Item_row) { ++ assert(arg_count > 0); ++ Item *arg_head = items[0]->pq_clone(thd, select); ++ if (arg_head == nullptr) return nullptr; ++ mem_root_deque tail(thd->pq_mem_root); ++ for (uint i = 1; i < arg_count; i++) { ++ Item *arg_tail = items[i]->pq_clone(thd, select); ++ if (arg_tail == nullptr) return nullptr; ++ tail.push_back(arg_tail); ++ } ++ new_item = new (thd->pq_mem_root) Item_row(arg_head, tail); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_row, Item) { ++ // generated a random item_name for item_row ++ if (orig_item != nullptr) { ++ if (orig_item->item_name.length() == 0) { ++ assert(orig_item->item_name.ptr() == nullptr); ++ uint32 addr_mid_8 = ((uint64)this >> 32) << 24; ++ std::string std_addr = "ITEM_ROW" + std::to_string(addr_mid_8); ++ item_name.copy(std_addr.c_str(), std_addr.length(), system_charset_info, ++ true); ++ } ++ used_tables_cache = orig_item->used_tables_cache; ++ not_null_tables_cache = orig_item->not_null_tables_cache; ++ with_null = orig_item->with_null; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_float) { ++ new_item = ++ new (thd->pq_mem_root) Item_float(item_name, value, decimals, max_length); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_int) { new_item = new (thd->pq_mem_root) Item_int(this); } ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_int, Item_num) { ++ if (orig_item != nullptr) { ++ value = orig_item->value; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++PQ_CLONE_DEF(Item_uint) { ++ new_item = new (thd->pq_mem_root) Item_uint(item_name, value, max_length); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_decimal) { ++ new_item = new (thd->pq_mem_root) ++ Item_decimal(item_name, &decimal_value, decimals, max_length); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_version) { ++ new_item = new (thd->pq_mem_root) Item_func_version(POS()); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(PTI_function_call_nonkeyword_now) { ++ new_item = ++ new (thd->pq_mem_root) PTI_function_call_nonkeyword_now(POS(), decimals); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(PTI_text_literal_text_string) { ++ new_item = new (thd->pq_mem_root) ++ PTI_text_literal_text_string(POS(), is_7bit, literal); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(PTI_text_literal_nchar_string) { ++ new_item = new (thd->pq_mem_root) ++ PTI_text_literal_nchar_string(POS(), is_7bit, literal); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(PTI_text_literal_underscore_charset) { ++ new_item = new (thd->pq_mem_root) ++ PTI_text_literal_underscore_charset(POS(), is_7bit, cs, literal); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(PTI_user_variable) { ++ new_item = new (thd->pq_mem_root) PTI_user_variable(POS(), {const_cast(name.m_str), name.m_length}); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_get_user_var) { ++ new_item = new (thd->pq_mem_root) Item_func_get_user_var(POS(), name); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_connection_id) { ++ new_item = new (thd->pq_mem_root) Item_func_connection_id(POS()); ++} ++PQ_CLONE_RETURN ++ ++PQ_CLONE_DEF(Item_func_trig_cond) { ++ Item *arg = nullptr; ++ if (arg_count > 0) arg = args[0]->pq_clone(thd, select); ++ if (arg == nullptr) return nullptr; ++ new_item = new (thd->pq_mem_root) Item_func_trig_cond( ++ arg, trig_var, thd->lex->unit->first_query_block()->join, m_idx, ++ trig_type); ++} ++PQ_CLONE_RETURN ++ ++PQ_COPY_FROM_DEF(Item_func_connection_id, Item_int_func) { ++ if (orig_item != nullptr) { ++ value = orig_item->value; ++ } ++} ++PQ_COPY_FROM_RETURN ++ ++Item *Item_func_unix_timestamp::pq_clone(THD *thd, Query_block *select) { ++ Item *arg_item = nullptr; ++ if (arg_count > 0) { ++ arg_item = args[0]->pq_clone(thd, select); ++ if (arg_item == nullptr) return nullptr; ++ } ++ ++ Item_func_unix_timestamp *new_item = nullptr; ++ if (arg_count) { ++ new_item = new (thd->pq_mem_root) Item_func_unix_timestamp(POS(), arg_item); ++ } else { ++ new_item = new (thd->pq_mem_root) Item_func_unix_timestamp(POS()); ++ } ++ ++ if (!new_item || new_item->pq_copy_from(thd, select, this)) return nullptr; ++ ++ return new_item; ++} ++ ++Item *Item_func_current_user::pq_clone(THD *thd, Query_block *select) { ++ Item_func_current_user *new_item = ++ new (thd->pq_mem_root) Item_func_current_user(POS()); ++ if (!new_item || new_item->pq_copy_from(thd, select, this)) return nullptr; ++ ++ new_item->context = &select->context; ++ return new_item; ++} ++ ++COPY_FUNC_ITEM(Item_func_benchmark, POS(), ARG0, ARG1) ++COPY_FUNC_ITEM(Item_func_found_rows, POS()) ++ ++Item *Item_func_false::pq_clone(THD *thd, Query_block *select) { ++ CHECK_TYPE(Item_func_false); ++ ++ Item *new_item = new (thd->pq_mem_root) Item_func_false(POS()); ++ COPY_SELF_ATTR(new_item) ++ ++ if (item_name.ptr() == antijoin_null_cond) { ++ new_item->item_name.set(antijoin_null_cond); ++ } ++ ++ return new_item; ++} ++ ++Item *Item_datetime_with_ref::pq_clone(THD *thd, Query_block *select) { ++ CHECK_TYPE(Item_datetime_with_ref); ++ if (origin_item) { ++ return origin_item->pq_clone(thd, select); ++ } ++ ++ return ref->pq_clone(thd, select); ++} ++ ++Item *Item_time_with_ref::pq_clone(THD *thd, Query_block *select) { ++ CHECK_TYPE(Item_time_with_ref); ++ if (origin_item) { ++ return origin_item->pq_clone(thd, select); ++ } ++ ++ return ref->pq_clone(thd, select); ++} ++ ++#endif +diff --git a/sql/pq_condition.cc b/sql/pq_condition.cc +new file mode 100644 +index 00000000..29c7b77b +--- /dev/null ++++ b/sql/pq_condition.cc +@@ -0,0 +1,1017 @@ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++#include "sql/pq_condition.h" ++#include ++#include "sql/item_strfunc.h" ++#include "sql/item_sum.h" ++#include "sql/mysqld.h" ++#include "sql/opt_range.h" ++#include "sql/sql_lex.h" ++#include "sql/sql_optimizer.h" ++#include "sql/sql_parallel.h" ++#include "sql/sql_tmp_table.h" ++ ++const enum_field_types NO_PQ_SUPPORTED_FIELD_TYPES[] = { ++ MYSQL_TYPE_TINY_BLOB, MYSQL_TYPE_MEDIUM_BLOB, MYSQL_TYPE_BLOB, ++ MYSQL_TYPE_LONG_BLOB, MYSQL_TYPE_JSON, MYSQL_TYPE_GEOMETRY}; ++ ++const Item_sum::Sumfunctype NO_PQ_SUPPORTED_AGG_FUNC_TYPES[] = { ++ Item_sum::COUNT_DISTINCT_FUNC, ++ Item_sum::SUM_DISTINCT_FUNC, ++ Item_sum::AVG_DISTINCT_FUNC, ++ Item_sum::GROUP_CONCAT_FUNC, ++ Item_sum::JSON_AGG_FUNC, ++ Item_sum::UDF_SUM_FUNC, ++ Item_sum::STD_FUNC, ++ Item_sum::VARIANCE_FUNC, ++ Item_sum::SUM_BIT_FUNC}; ++ ++const Item_func::Functype NO_PQ_SUPPORTED_FUNC_TYPES[] = { ++ Item_func::FT_FUNC, Item_func::MATCH_FUNC, Item_func::SUSERVAR_FUNC, ++ Item_func::FUNC_SP, Item_func::JSON_FUNC, Item_func::SUSERVAR_FUNC, ++ Item_func::UDF_FUNC, Item_func::XML_FUNC}; ++ ++const char *NO_PQ_SUPPORTED_FUNC_ARGS[] = { ++ "rand", "json_valid", "json_length", ++ "json_type", "json_contains_path", "json_unquote", ++ "st_distance", "get_lock", "is_free_lock", ++ "is_used_lock", "release_lock", "sleep", ++ "xml_str", "json_func", ++ "weight_string", // Data truncation (MySQL BUG) ++ "des_decrypt" // Data truncation ++}; ++ ++const char *NO_PQ_SUPPORTED_FUNC_NO_ARGS[] = {"release_all_locks"}; ++ ++/** ++ * return true when type is a not_supported_field; return false otherwise. ++ */ ++bool pq_not_support_datatype(enum_field_types type) { ++ for (const enum_field_types &field_type : NO_PQ_SUPPORTED_FIELD_TYPES) { ++ if (type == field_type) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/** ++ * check PQ supported function type ++ */ ++bool pq_not_support_functype(Item_func::Functype type) { ++ for (const Item_func::Functype &func_type : NO_PQ_SUPPORTED_FUNC_TYPES) { ++ if (type == func_type) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/** ++ * check PQ supported function ++ */ ++bool pq_not_support_func(Item_func *func) { ++ if (pq_not_support_functype(func->functype())) { ++ return true; ++ } ++ ++ for (const char *funcname : NO_PQ_SUPPORTED_FUNC_ARGS) { ++ if (!strcmp(func->func_name(), funcname) && func->arg_count != 0) { ++ return true; ++ } ++ } ++ ++ for (const char *funcname : NO_PQ_SUPPORTED_FUNC_NO_ARGS) { ++ if (!strcmp(func->func_name(), funcname)) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/** ++ * check PQ support aggregation function ++ */ ++bool pq_not_support_aggr_functype(Item_sum::Sumfunctype type) { ++ for (const Item_sum::Sumfunctype &sum_func_type : ++ NO_PQ_SUPPORTED_AGG_FUNC_TYPES) { ++ if (type == sum_func_type) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/** ++ * check PQ supported ref function ++ */ ++bool pq_not_support_ref(Item_ref *ref, bool having) { ++ Item_ref::Ref_Type type = ref->ref_type(); ++ if (type == Item_ref::OUTER_REF) { ++ return true; ++ } ++ /** ++ * Now, when the sql contains a aggregate function after the 'having', ++ * we do not support parallel query. For example: ++ * select t1.col1 from t1 group by t1.col1 having avg(t1.col1) > 0; ++ * So, we disable the sql; ++ */ ++ if (having && type == Item_ref::AGGREGATE_REF) { ++ return true; ++ } ++ ++ return false; ++} ++ ++typedef bool (*PQ_CHECK_ITEM_FUN)(Item *item, bool having); ++ ++struct PQ_CHECK_ITEM_TYPE { ++ Item::Type item_type; ++ PQ_CHECK_ITEM_FUN fun_ptr; ++}; ++ ++bool check_pq_support_fieldtype(Item *item, bool having); ++ ++bool check_pq_support_fieldtype_of_field_item(Item *item, ++ bool MY_ATTRIBUTE((unused))) { ++ Field *field = static_cast(item)->field; ++ assert(field); ++ // not supported for generated column ++ if (field && (field->is_gcol() || pq_not_support_datatype(field->type()))) { ++ return false; ++ } ++ ++ return true; ++} ++ ++bool check_pq_support_fieldtype_of_func_item(Item *item, bool having) { ++ Item_func *func = static_cast(item); ++ assert(func); ++ ++ // check func type ++ if (pq_not_support_func(func)) { ++ return false; ++ } ++ ++ // the case of Item_func_make_set ++ if (!strcmp(func->func_name(), "make_set")) { ++ Item *arg_item = down_cast(func)->item; ++ if (arg_item && !check_pq_support_fieldtype(arg_item, having)) { ++ return false; ++ } ++ } ++ ++ // check func args type ++ for (uint i = 0; i < func->arg_count; i++) { ++ // c: args contain unsupported fields ++ Item *arg_item = func->arguments()[i]; ++ if (arg_item == nullptr || ++ !check_pq_support_fieldtype(arg_item, having)) { // c ++ return false; ++ } ++ } ++ ++ // the case of Item_equal ++ if (func->functype() == Item_func::MULT_EQUAL_FUNC) { ++ Item_equal *item_equal = down_cast(item); ++ assert(item_equal); ++ ++ // check const_item ++ Item *const_item = item_equal->get_const(); ++ if (const_item && ++ (const_item->type() == Item::SUM_FUNC_ITEM || // c1 ++ !check_pq_support_fieldtype(const_item, having))) { // c2 ++ return false; ++ } ++ ++ // check fields ++ Item *field_item = nullptr; ++ List fields = item_equal->get_fields(); ++ List_iterator_fast it(fields); ++ for (size_t i = 0; (field_item = it++); i++) { ++ if (!check_pq_support_fieldtype(field_item, having)) { ++ return false; ++ } ++ } ++ } ++ ++ return true; ++} ++ ++bool check_pq_support_fieldtype_of_cond_item(Item *item, bool having) { ++ Item_cond *cond = static_cast(item); ++ assert(cond); ++ ++ if (pq_not_support_functype(cond->functype())) { ++ return false; ++ } ++ ++ Item *arg_item = nullptr; ++ List_iterator_fast it(*cond->argument_list()); ++ for (size_t i = 0; (arg_item = it++); i++) { ++ if (arg_item->type() == Item::SUM_FUNC_ITEM || // c1 ++ !check_pq_support_fieldtype(arg_item, having)) { // c2 ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++bool check_pq_support_fieldtype_of_sum_func_item(Item *item, bool having) { ++ /** ++ * Now, when the sql contains a reference to the aggregate function after the ++ * 'having', we do not support parallel query. For example: select t1.col1, ++ * avg(t1.col1) as avg from t1 group by t1.col1 having avg > 0; So, we disable ++ * the sql. ++ */ ++ if (having) { ++ return false; ++ } ++ Item_sum *sum = static_cast(item); ++ if (!sum || pq_not_support_aggr_functype(sum->sum_func())) { ++ return false; ++ } ++ ++ for (uint i = 0; i < sum->argument_count(); i++) { ++ if (!check_pq_support_fieldtype(sum->get_arg(i), having)) { ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++bool check_pq_support_fieldtype_of_ref_item(Item *item, bool having) { ++ Item_ref *item_ref = down_cast(item); ++ if (item_ref == nullptr || pq_not_support_ref(item_ref, having)) { ++ return false; ++ } ++ ++ if (!check_pq_support_fieldtype(item_ref->ref[0], having)) { ++ return false; ++ } ++ ++ return true; ++} ++ ++bool check_pq_support_fieldtype_of_cache_item(Item *item, bool having) { ++ Item_cache *item_cache = dynamic_cast(item); ++ if (item_cache == nullptr) { ++ return false; ++ } ++ ++ Item *example_item = item_cache->get_example(); ++ if (example_item == nullptr || ++ example_item->type() == Item::SUM_FUNC_ITEM || // c1 ++ !check_pq_support_fieldtype(example_item, having)) { // c2 ++ return false; ++ } ++ ++ return true; ++} ++ ++bool check_pq_support_fieldtype_of_row_item(Item *item, bool having) { ++ // check each item in Item_row ++ Item_row *row_item = down_cast(item); ++ for (uint i = 0; i < row_item->cols(); i++) { ++ Item *n_item = row_item->element_index(i); ++ if (n_item == nullptr || n_item->type() == Item::SUM_FUNC_ITEM || // c1 ++ !check_pq_support_fieldtype(n_item, having)) { // c2 ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++PQ_CHECK_ITEM_TYPE g_check_item_type[] = { ++ {Item::INVALID_ITEM, nullptr}, ++ {Item::FIELD_ITEM, check_pq_support_fieldtype_of_field_item}, ++ {Item::FUNC_ITEM, check_pq_support_fieldtype_of_func_item}, ++ {Item::SUM_FUNC_ITEM, check_pq_support_fieldtype_of_sum_func_item}, ++ {Item::STRING_ITEM, nullptr}, ++ {Item::INT_ITEM, nullptr}, ++ {Item::REAL_ITEM, nullptr}, ++ {Item::NULL_ITEM, nullptr}, ++ {Item::VARBIN_ITEM, nullptr}, ++ {Item::METADATA_COPY_ITEM, nullptr}, ++ {Item::FIELD_AVG_ITEM, nullptr}, ++ {Item::DEFAULT_VALUE_ITEM, nullptr}, ++ {Item::PROC_ITEM, nullptr}, ++ {Item::COND_ITEM, check_pq_support_fieldtype_of_cond_item}, ++ {Item::REF_ITEM, check_pq_support_fieldtype_of_ref_item}, ++ {Item::FIELD_STD_ITEM, nullptr}, ++ {Item::FIELD_VARIANCE_ITEM, nullptr}, ++ {Item::INSERT_VALUE_ITEM, nullptr}, ++ {Item::SUBSELECT_ITEM, nullptr}, ++ {Item::ROW_ITEM, check_pq_support_fieldtype_of_row_item}, ++ {Item::CACHE_ITEM, check_pq_support_fieldtype_of_cache_item}, ++ {Item::TYPE_HOLDER, nullptr}, ++ {Item::PARAM_ITEM, nullptr}, ++ {Item::TRIGGER_FIELD_ITEM, nullptr}, ++ {Item::DECIMAL_ITEM, nullptr}, ++ {Item::XPATH_NODESET, nullptr}, ++ {Item::XPATH_NODESET_CMP, nullptr}, ++ {Item::VIEW_FIXER_ITEM, nullptr}, ++ {Item::FIELD_BIT_ITEM, nullptr}, ++ {Item::VALUES_COLUMN_ITEM, nullptr}}; ++ ++/** ++ * check item is supported by Parallel Query or not ++ * ++ * @retval: ++ * true : supported ++ * false : not supported ++ */ ++bool check_pq_support_fieldtype(Item *item, bool having) { ++ if (item == nullptr || pq_not_support_datatype(item->data_type())) { ++ return false; ++ } ++ ++ if (g_check_item_type[item->type()].fun_ptr != nullptr) { ++ return g_check_item_type[item->type()].fun_ptr(item, having); ++ } ++ ++ return true; ++} ++ ++/* ++ * check if order_list contains aggregate function ++ * ++ * @retval: ++ * true: contained ++ * false: ++ */ ++bool check_pq_sort_aggregation(const ORDER_with_src &order_list) { ++ if (order_list.order == nullptr) { ++ return false; ++ } ++ ++ ORDER *tmp = nullptr; ++ Item *order_item = nullptr; ++ ++ for (tmp = order_list.order; tmp; tmp = tmp->next) { ++ order_item = *(tmp->item); ++ if (!check_pq_support_fieldtype(order_item, false)) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/* ++ * generate item's result_field ++ * ++ * @retval: ++ * false: generate success ++ * ture: otherwise ++ */ ++bool pq_create_result_fields(THD *thd, Temp_table_param *param, ++ mem_root_deque &fields, bool save_sum_fields, ++ ulonglong select_options, MEM_ROOT *root) { ++ const bool not_all_columns = !(select_options & TMP_TABLE_ALL_COLUMNS); ++ long hidden_field_count = param->hidden_field_count; ++ Field *from_field = nullptr; ++ Field **tmp_from_field = &from_field; ++ Field **default_field = &from_field; ++ ++ bool force_copy_fields = false; ++ TABLE_SHARE s; ++ TABLE table; ++ table.s = &s; ++ ++ uint copy_func_count = param->func_count; ++ if (param->precomputed_group_by) { ++ copy_func_count += param->sum_func_count; ++ } ++ ++ Func_ptr_array *copy_func = new (root) Func_ptr_array(root); ++ if (copy_func == nullptr) { ++ return true; ++ } ++ ++ copy_func->reserve(copy_func_count); ++ for (Item *item : fields) { ++ Item::Type type = item->type(); ++ const bool is_sum_func = ++ type == Item::SUM_FUNC_ITEM && !item->m_is_window_function; ++ ++ if (not_all_columns && item != nullptr) { ++ if (item->has_aggregation() && type != Item::SUM_FUNC_ITEM) { ++ if (item->is_outer_reference()) item->update_used_tables(); ++ if (type == Item::SUBSELECT_ITEM || ++ (item->used_tables() & ~OUTER_REF_TABLE_BIT)) { ++ param->using_outer_summary_function = 1; ++ goto update_hidden; ++ } ++ } ++ ++ if (item->m_is_window_function) { ++ if (!param->m_window || param->m_window_frame_buffer) { ++ goto update_hidden; ++ } ++ ++ if (param->m_window != down_cast(item)->window()) { ++ goto update_hidden; ++ } ++ } else if (item->has_wf()) { ++ if (param->m_window == nullptr || !param->m_window->is_last()) { ++ goto update_hidden; ++ } ++ } ++ ++ if (item->const_item()) continue; ++ } ++ ++ if (is_sum_func && !save_sum_fields) { ++ /* Can't calc group yet */ ++ } else { ++ Field *new_field = nullptr; ++ if (param->schema_table) { ++ new_field = ++ item ? create_tmp_field_for_schema(item, &table, root) : nullptr; ++ } else { ++ new_field = ++ item ? create_tmp_field(thd, &table, item, type, copy_func, ++ tmp_from_field, default_field, false, //(1) ++ !force_copy_fields && not_all_columns, ++ item->marker == Item::MARKER_BIT || ++ param->bit_fields_as_long, //(2) ++ force_copy_fields, false, root) ++ : nullptr; ++ } ++ ++ if (new_field == nullptr) { ++ assert(thd->is_fatal_error()); ++ return true; ++ } ++ ++ if (not_all_columns && type == Item::SUM_FUNC_ITEM) { ++ ((Item_sum *)item)->result_field = new_field; ++ } ++ ++ s.fields++; ++ } ++ ++ update_hidden: ++ if (!--hidden_field_count) { ++ param->hidden_field_count = 0; ++ } ++ } // end of while ((item=li++)). ++ ++ if (s.fields == 0) return true; ++ ++ Field *result_field = nullptr; ++ ++ for (Item *item : fields) { ++ // c1: const_item will not produce field in the first rewritten table ++ if (item->const_item() || item->basic_const_item()) { ++ continue; ++ } ++ ++ if (item->has_aggregation() && item->type() != Item::SUM_FUNC_ITEM) { ++ if (item->type() == Item::SUBSELECT_ITEM || ++ (item->used_tables() & ~OUTER_REF_TABLE_BIT)) { ++ continue; ++ } ++ } ++ ++ result_field = item->get_result_field(); ++ if (result_field) { ++ enum_field_types field_type = result_field->type(); ++ // c3: result_field contains unsupported data type ++ if (pq_not_support_datatype(field_type)) { ++ return true; ++ } ++ } else { ++ // c4: item is not FIELD_ITEM and it has no result_field ++ if (item->type() != Item::FIELD_ITEM) { ++ return true; ++ } ++ ++ result_field = down_cast(item)->result_field; ++ if (result_field && pq_not_support_datatype(result_field->type())) { ++ return true; ++ } ++ } ++ } ++ ++ return false; ++} ++ ++/** ++ * check whether the select result fields is suitable for parallel query ++ * ++ * @return: ++ * true, suitable ++ * false. ++ */ ++bool check_pq_select_result_fields(JOIN *join) { ++ DBUG_ENTER("check result fields is suitable for parallel query or not"); ++ MEM_ROOT *pq_check_root = ::new MEM_ROOT(); ++ if (pq_check_root == nullptr) { ++ DBUG_RETURN(false); ++ } ++ ++ init_sql_alloc(key_memory_thd_main_mem_root, pq_check_root, ++ global_system_variables.query_alloc_block_size, ++ global_system_variables.query_prealloc_size); ++ ++ bool suit_for_parallel = false; ++ ++ mem_root_deque *tmp_all_fields = join->fields; ++ ++ join->tmp_table_param->pq_copy(join->saved_tmp_table_param); ++ join->tmp_table_param->copy_fields.clear(); ++ ++ Temp_table_param *tmp_param = ++ new (pq_check_root) Temp_table_param(*join->tmp_table_param); ++ ++ if (tmp_param == nullptr) { ++ // free the memory ++ free_root(pq_check_root, MYF(0)); ++ if (pq_check_root) { ++ ::delete pq_check_root; ++ } ++ DBUG_RETURN(suit_for_parallel); ++ } ++ ++ tmp_param->m_window_frame_buffer = true; ++ mem_root_deque tmplist(*tmp_all_fields); ++ tmp_param->hidden_field_count = CountHiddenFields(*tmp_all_fields); ++ ++ // create_tmp_table may change the original item's result_field, hence ++ // we must save it before. ++ std::vector saved_result_field(tmplist.size(), nullptr); ++ ++ int i = 0; ++ for (Item *tmp_item : *tmp_all_fields) { ++ if (tmp_item->type() == Item::FIELD_ITEM || ++ tmp_item->type() == Item::DEFAULT_VALUE_ITEM) { ++ saved_result_field[i] = down_cast(tmp_item)->result_field; ++ } else { ++ saved_result_field[i] = tmp_item->get_result_field(); ++ } ++ i++; ++ } ++ ++ if (pq_create_result_fields(join->thd, tmp_param, tmplist, true, ++ join->query_block->active_options(), ++ pq_check_root)) { ++ suit_for_parallel = false; ++ } else { ++ suit_for_parallel = true; ++ } ++ ++ // restore result_field ++ i = 0; ++ for (Item *tmp_item : *tmp_all_fields) { ++ if (tmp_item->type() == Item::FIELD_ITEM || ++ tmp_item->type() == Item::DEFAULT_VALUE_ITEM) { ++ down_cast(tmp_item)->result_field = saved_result_field[i]; ++ } else { ++ tmp_item->set_result_field(saved_result_field[i]); ++ } ++ i++; ++ } ++ ++ // free the memory ++ free_root(pq_check_root, MYF(0)); ++ if (pq_check_root) { ++ ::delete pq_check_root; ++ } ++ DBUG_RETURN(suit_for_parallel); ++} ++ ++/** ++ * check whether the select fields is suitable for parallel query ++ * ++ * @return: ++ * true, suitable ++ * false. ++ */ ++bool check_pq_select_fields(JOIN *join) { ++ // check whether contains blob, text, json and geometry field ++ for (Item *item : *join->query_block_fields) { ++ if (!check_pq_support_fieldtype(item, false)) { ++ return false; ++ } ++ } ++ ++ Item *n_where_cond = join->query_block->where_cond(); ++ Item *n_having_cond = join->query_block->having_cond(); ++ ++ if (n_where_cond && !check_pq_support_fieldtype(n_where_cond, false)) { ++ return false; ++ } ++ ++ /* ++ * For Having Aggr. function, the having_item will be pushed ++ * into all_fields in prepare phase. Currently, we have not support this ++ * operation. ++ */ ++ if (n_having_cond && !check_pq_support_fieldtype(n_having_cond, true)) { ++ return false; ++ } ++ ++ if (check_pq_sort_aggregation(join->order)) { ++ return false; ++ } ++ ++ if (!check_pq_select_result_fields(join)) { ++ return false; ++ } ++ ++ return true; ++} ++ ++/** ++ * choose a table that do parallel query, currently only do parallel scan on ++ * first no-const primary table. ++ * Disallow splitting inner tables, such as select * from t1 left join t2 on 1 ++ * where t1.a = 't1'. We can't split t2 when t1 is const table. ++ * Disallow splitting semijion inner tables,such as select * from t1 where ++ * exists (select * from t2). We can't split t2. ++ * ++ * @return: ++ * true, found a parallel scan table ++ * false, cann't found a parallel scan table ++ */ ++bool choose_parallel_scan_table(JOIN *join) { ++ QEP_TAB *tab = &join->qep_tab[join->const_tables]; ++ if (tab->is_inner_table_of_outer_join() || tab->m_qs->first_sj_inner() >= 0) { ++ return false; ++ } ++ tab->do_parallel_scan = true; ++ return true; ++} ++ ++void set_pq_dop(THD *thd) { ++ if (!thd->no_pq && thd->variables.force_parallel_execute && ++ thd->pq_dop == 0) { ++ thd->pq_dop = thd->variables.parallel_default_dop; ++ } ++} ++ ++/** ++ * check whether the parallel query is enabled and set the ++ * parallel query condition status ++ * ++ */ ++void set_pq_condition_status(THD *thd) { ++ set_pq_dop(thd); ++ ++ if (thd->pq_dop > 0) { ++ thd->m_suite_for_pq = PqConditionStatus::ENABLED; ++ } else { ++ thd->m_suite_for_pq = PqConditionStatus::NOT_SUPPORTED; ++ } ++} ++ ++bool suite_for_parallel_query(THD *thd) { ++ if (thd->in_sp_trigger != 0 || // store procedure or trigger ++ thd->m_attachable_trx || // attachable transaction ++ thd->tx_isolation == ++ ISO_SERIALIZABLE) { // serializable without snapshot read ++ return false; ++ } ++ ++ return true; ++} ++ ++bool suite_for_parallel_query(LEX *lex) { ++ if (lex->in_execute_ps) { ++ return false; ++ } ++ ++ return true; ++} ++ ++bool suite_for_parallel_query(Query_expression *unit) { ++ if (!unit->is_simple()) { ++ return false; ++ } ++ ++ return true; ++} ++ ++bool suite_for_parallel_query(TABLE_LIST *tbl_list) { ++ if (tbl_list->is_view() || // view ++ tbl_list->lock_descriptor().type > TL_READ || // explicit table lock ++ tbl_list->is_fulltext_searched() || // fulltext match search ++ current_thd->locking_clause) { ++ return false; ++ } ++ ++ TABLE *tb = tbl_list->table; ++ if (tb != nullptr && ++ (tb->s->tmp_table != NO_TMP_TABLE || // template table ++ tb->file->ht->db_type != DB_TYPE_INNODB || // Non-InnoDB table ++ tb->part_info)) { // partition table ++ return false; ++ } ++ ++ return true; ++} ++ ++bool suite_for_parallel_query(Query_block *select) { ++ if (select->first_inner_query_expression() != ++ nullptr || // nesting subquery, including view〝derived ++ // table〝subquery condition and so on. ++ select->outer_query_block() != nullptr || // nested subquery ++ select->is_distinct() || // select distinct ++ select->saved_windows_elements) { // windows function ++ return false; ++ } ++ ++ for (TABLE_LIST *tbl_list = select->table_list.first; tbl_list != nullptr; ++ tbl_list = tbl_list->next_local) { ++ if (!suite_for_parallel_query(tbl_list)) { ++ return false; ++ } ++ } ++ ++ for (TABLE_LIST *tbl_list = select->table_list.first; tbl_list != nullptr; ++ tbl_list = tbl_list->next_global) { ++ if (!suite_for_parallel_query(tbl_list)) { ++ return false; ++ } ++ } ++ ++ for (TABLE_LIST *tbl_list = select->leaf_tables; tbl_list != nullptr; ++ tbl_list = tbl_list->next_leaf) { ++ if (!suite_for_parallel_query(tbl_list)) { ++ return false; ++ } ++ } ++ return true; ++} ++ ++bool suite_for_parallel_query(JOIN *join) { ++ if ((join->best_read < join->thd->variables.parallel_cost_threshold) || ++ (join->primary_tables == join->const_tables) || ++ (join->select_distinct || join->select_count) || ++ (join->query_block_fields->size() > MAX_FIELDS) || ++ (join->rollup_state != JOIN::RollupState::NONE) || ++ (join->zero_result_cause != nullptr)) { ++ return false; ++ } ++ QEP_TAB *tab = &join->qep_tab[join->const_tables]; ++ // only support table/index full/range scan ++ join_type scan_type = tab->type(); ++ if (scan_type != JT_ALL && scan_type != JT_INDEX_SCAN && ++ scan_type != JT_REF && ++ (scan_type != JT_RANGE || !tab->quick() || ++ tab->quick()->quick_select_type() != PQ_RANGE_SELECT)) { ++ return false; ++ } ++ if (!check_pq_select_fields(join)) { ++ return false; ++ } ++ ++ return true; ++} ++ ++bool check_pq_running_threads(uint dop, ulong timeout_ms) { ++ bool success = false; ++ mysql_mutex_lock(&LOCK_pq_threads_running); ++ if (parallel_threads_running + dop > parallel_max_threads) { ++ if (timeout_ms > 0) { ++ struct timespec start_ts; ++ struct timespec end_ts; ++ struct timespec abstime; ++ ulong wait_timeout = timeout_ms; ++ int wait_result; ++ ++ start: ++ set_timespec(&start_ts, 0); ++ /* Calcuate the waiting period. */ ++ abstime.tv_sec = start_ts.tv_sec + wait_timeout / TIME_THOUSAND; ++ abstime.tv_nsec = ++ start_ts.tv_nsec + (wait_timeout % TIME_THOUSAND) * TIME_MILLION; ++ if (abstime.tv_nsec >= TIME_BILLION) { ++ abstime.tv_sec++; ++ abstime.tv_nsec -= TIME_BILLION; ++ } ++ wait_result = mysql_cond_timedwait(&COND_pq_threads_running, ++ &LOCK_pq_threads_running, &abstime); ++ if (parallel_threads_running + dop <= parallel_max_threads) { ++ success = true; ++ } else { ++ success = false; ++ if (!wait_result) { // wait isn't timeout ++ set_timespec(&end_ts, 0); ++ ulong difftime = (end_ts.tv_sec - start_ts.tv_sec) * TIME_THOUSAND + ++ (end_ts.tv_nsec - start_ts.tv_nsec) / TIME_MILLION; ++ wait_timeout -= difftime; ++ goto start; ++ } ++ } ++ } ++ } else { ++ success = true; ++ } ++ ++ if (success) { ++ uint32x2_t v_a = {parallel_threads_running, current_thd->pq_threads_running}; ++ uint32x2_t v_b = {dop, dop}; ++ v_a = vadd_u32(v_a, v_b); ++ parallel_threads_running = vget_lane_u32(v_a, 0); ++ current_thd->pq_threads_running = vget_lane_u32(v_a, 1); ++ } ++ ++ mysql_mutex_unlock(&LOCK_pq_threads_running); ++ return success; ++} ++ ++class PQCheck { ++ public: ++ explicit PQCheck(Query_block *select_lex_arg) : select_lex(select_lex_arg) {} ++ ++ virtual ~PQCheck() {} ++ ++ virtual bool suite_for_parallel_query(); ++ ++ protected: ++ virtual void set_select_id(); ++ virtual void set_select_type(); ++ ++ protected: ++ uint select_id{}; ++ enum_explain_type select_type{}; ++ ++ private: ++ Query_block *select_lex; ++}; ++ ++class PlanReadyPQCheck : public PQCheck { ++ public: ++ explicit PlanReadyPQCheck(Query_block *select_lex_arg) ++ : PQCheck(select_lex_arg), join(select_lex_arg->join) {} ++ ++ ~PlanReadyPQCheck() {} ++ ++ bool suite_for_parallel_query() override; ++ ++ private: ++ void set_select_id() override; ++ void set_select_type() override; ++ ++ private: ++ JOIN *join; ++ QEP_TAB *tab{nullptr}; ++}; ++ ++void PQCheck::set_select_id() { select_id = select_lex->select_number; } ++ ++void PQCheck::set_select_type() { select_type = select_lex->type(); } ++ ++bool PQCheck::suite_for_parallel_query() { ++ set_select_id(); ++ set_select_type(); ++ ++ if (select_id > 1 || select_type != enum_explain_type::EXPLAIN_SIMPLE) { ++ return false; ++ } ++ ++ return true; ++} ++ ++void PlanReadyPQCheck::set_select_id() { ++ if (tab && sj_is_materialize_strategy(tab->get_sj_strategy())) { ++ select_id = tab->sjm_query_block_id(); ++ } else { ++ PQCheck::set_select_id(); ++ } ++} ++ ++void PlanReadyPQCheck::set_select_type() { ++ if (tab && sj_is_materialize_strategy(tab->get_sj_strategy())) { ++ select_type = enum_explain_type::EXPLAIN_MATERIALIZED; ++ } else { ++ PQCheck::set_select_type(); ++ } ++} ++ ++bool PlanReadyPQCheck::suite_for_parallel_query() { ++ for (uint t = 0; t < join->tables; t++) { ++ tab = join->qep_tab + t; ++ if (!tab->position()) { ++ continue; ++ } ++ ++ if (!PQCheck::suite_for_parallel_query()) { ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++bool check_select_id_and_type(Query_block *select_lex) { ++ JOIN *join = select_lex->join; ++ std::unique_ptr check; ++ bool ret = false; ++ ++ if (join == nullptr) { ++ check.reset(new PQCheck(select_lex)); ++ goto END; ++ } ++ ++ switch (join->get_plan_state()) { ++ case JOIN::NO_PLAN: ++ case JOIN::ZERO_RESULT: ++ case JOIN::NO_TABLES: { ++ check.reset(new PQCheck(select_lex)); ++ break; ++ } ++ ++ case JOIN::PLAN_READY: { ++ check.reset(new PlanReadyPQCheck(select_lex)); ++ break; ++ } ++ ++ default: ++ assert(0); ++ } ++ ++END: ++ if (check != nullptr) { ++ ret = check->suite_for_parallel_query(); ++ } ++ ++ return ret; ++} ++ ++bool check_pq_conditions(THD *thd) { ++ // max PQ memory size limit ++ if (get_pq_memory_total() >= parallel_memory_limit) { ++ atomic_add(parallel_memory_refused, 1); ++ return false; ++ } ++ ++ // max PQ threads limit ++ if (!check_pq_running_threads(thd->pq_dop, ++ thd->variables.parallel_queue_timeout)) { ++ atomic_add(parallel_threads_refused, 1); ++ return false; ++ } ++ ++ // RBO limit ++ if (!suite_for_parallel_query(thd)) { ++ return false; ++ } ++ ++ if (!suite_for_parallel_query(thd->lex)) { ++ return false; ++ } ++ ++ if (!suite_for_parallel_query(thd->lex->unit)) { ++ return false; ++ } ++ ++ Query_block *select = thd->lex->unit->first_query_block(); ++ if (!suite_for_parallel_query(select)) { ++ return false; ++ } ++ ++ if (!suite_for_parallel_query(select->join)) { ++ return false; ++ } ++ ++ if (!check_select_id_and_type(select)) { ++ return false; ++ } ++ ++ if (!choose_parallel_scan_table(select->join)) { ++ return false; ++ } ++ ++ return true; ++} +diff --git a/sql/pq_condition.h b/sql/pq_condition.h +new file mode 100644 +index 00000000..29005855 +--- /dev/null ++++ b/sql/pq_condition.h +@@ -0,0 +1,35 @@ ++#ifndef PQ_CONDITION_H ++#define PQ_CONDITION_H ++ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++class THD; ++ ++enum class PqConditionStatus { INIT, NOT_SUPPORTED, ENABLED, SUPPORTED }; ++ ++void set_pq_condition_status(THD *thd); ++ ++bool check_pq_conditions(THD *thd); ++ ++#endif /* PQ_CONDITION_H */ +diff --git a/sql/pq_global.h b/sql/pq_global.h +new file mode 100644 +index 00000000..1c27bd61 +--- /dev/null ++++ b/sql/pq_global.h +@@ -0,0 +1,47 @@ ++#ifndef MYSQL_PQ_SQL_GLOBAL_H ++#define MYSQL_PQ_SQL_GLOBAL_H ++ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include ++#include ++#include "my_compiler.h" ++#include "my_alloc.h" ++ ++#define TIME_THOUSAND 1000 ++#define TIME_MILLION 1000000 ++#define TIME_BILLION 1000000000 ++ ++template ++T atomic_add(T &value, T n) { ++ return __sync_fetch_and_add(&value, n); ++} ++ ++template ++T atomic_sub(T &value, T n) { ++ return __sync_fetch_and_sub(&value, n); ++} ++ ++#endif //MYSQL_PQ_SQL_GLOBAL_H ++ +diff --git a/sql/pq_range.h b/sql/pq_range.h +new file mode 100644 +index 00000000..430c9e8a +--- /dev/null ++++ b/sql/pq_range.h +@@ -0,0 +1,39 @@ ++#ifndef PQ_RANGE_INCLUDED ++#define PQ_RANGE_INCLUDED ++ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++enum PQ_RANGE_TYPE { ++ PQ_QUICK_SELECT_NONE, ++ PQ_RANGE_SELECT, ++ PQ_RANGE_SELECT_DESC, ++ PQ_SKIP_SCAN_SELECT, ++ PQ_GROUP_MIN_MAX_SELECT, ++ PQ_INDEX_MERGE_SELECT, ++ PQ_ROR_INTERSECT_SELECT, ++ PQ_ROR_UNION_SELECT, ++ PQ_QUICK_SELECT_INVALID ++}; ++ ++#endif /* PQ_RANGE_INCLUDED */ +diff --git a/sql/protocol_classic.h b/sql/protocol_classic.h +index 557569c7..75e9f206 100644 +--- a/sql/protocol_classic.h ++++ b/sql/protocol_classic.h +@@ -2,6 +2,7 @@ + #define PROTOCOL_CLASSIC_INCLUDED + + /* Copyright (c) 2002, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -60,7 +61,7 @@ class Protocol_classic : public Protocol { + THD *m_thd; + String *packet; + String convert; +- uint field_pos; ++ int field_pos; + bool send_metadata; + #ifndef NDEBUG + enum enum_field_types *field_types; +diff --git a/sql/psi_memory_key.cc b/sql/psi_memory_key.cc +index 1658c775..8bcf9a58 100644 +--- a/sql/psi_memory_key.cc ++++ b/sql/psi_memory_key.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -147,6 +148,7 @@ PSI_memory_key key_memory_user_var_entry; + PSI_memory_key key_memory_user_var_entry_value; + PSI_memory_key key_memory_sp_cache; + PSI_memory_key key_memory_write_set_extraction; ++PSI_memory_key key_memory_pq_mem_root; + + #ifdef HAVE_PSI_INTERFACE + +@@ -168,6 +170,8 @@ static PSI_memory_info all_server_memory[] = { + PSI_DOCUMENT_ME}, + {&key_memory_thd_main_mem_root, "THD::main_mem_root", PSI_FLAG_THREAD, 0, + "Main mem root used for e.g. the query arena."}, ++ {&key_memory_pq_mem_root, "THD::parallel_query_mem_root", PSI_FLAG_THREAD, 0, ++ PSI_DOCUMENT_ME}, + {&key_memory_help, "help", 0, 0, + "Temporary memroot used to print help texts as part of usage " + "description."}, +diff --git a/sql/psi_memory_key.h b/sql/psi_memory_key.h +index df52ef8e..67480643 100644 +--- a/sql/psi_memory_key.h ++++ b/sql/psi_memory_key.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -164,6 +165,7 @@ extern PSI_memory_key key_memory_table_mapping_root; + extern PSI_memory_key key_memory_table_share; + extern PSI_memory_key key_memory_test_quick_select_exec; + extern PSI_memory_key key_memory_thd_main_mem_root; ++extern PSI_memory_key key_memory_pq_mem_root; + extern PSI_memory_key key_memory_thd_timer; + extern PSI_memory_key key_memory_thd_transactions; + extern PSI_memory_key key_memory_user_conn; +diff --git a/sql/query_result.cc b/sql/query_result.cc +index 745aa140..cd26aa8f 100644 +--- a/sql/query_result.cc ++++ b/sql/query_result.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -28,6 +29,7 @@ + #include "my_config.h" + #ifdef HAVE_UNISTD_H + #include ++#include "sql/sql_tmp_table.h" + #endif + + #include +@@ -56,6 +58,9 @@ + #include "sql/system_variables.h" + #include "sql_string.h" + #include "template_utils.h" // pointer_cast ++#include "sql_optimizer.h" ++#include "msg_queue.h" ++#include "sql/handler.h" + + using std::min; + +@@ -116,6 +121,230 @@ bool Query_result_send::send_eof(THD *thd) { + return false; + } + ++Query_result_mq::Query_result_mq(JOIN *join, ++ MQueue_handle *msg_handler, ++ handler *file, bool stab_output) : ++ Query_result(), m_table(nullptr), ++ m_param(nullptr), ++ send_fields(nullptr), ++ send_fields_size(0), ++ mq_fields_data(nullptr), ++ mq_fields_null_array(nullptr), ++ mq_fields_null_flag(nullptr) ++ { ++ m_join = join; ++ m_handler = msg_handler; ++ m_file = file; ++ m_stable_output = stab_output; ++ } ++ ++#define MQ_FIELDS_DATA_HEADER_LENGTH 4 ++ ++bool Query_result_mq::send_result_set_metadata(THD *thd, const mem_root_deque &list MY_ATTRIBUTE((unused)), ++ uint flags MY_ATTRIBUTE((unused))){ ++ ++ m_param = new (thd->pq_mem_root) Temp_table_param (); ++ if(!m_param || m_join->make_pq_tables_info()) ++ return true; ++ ++ send_fields = &m_join->tmp_fields[REF_SLICE_PQ_TMP]; ++ uint field_size = send_fields->size(); ++ send_fields_size = field_size + MQ_FIELDS_DATA_HEADER_LENGTH; ++ ++ mq_fields_data = new (thd->pq_mem_root) Field_raw_data[send_fields_size]{}; ++ mq_fields_null_array = new (thd->pq_mem_root) bool[2 * field_size]; ++ mq_fields_null_flag = new (thd->pq_mem_root) char[field_size / MQ_FIELDS_DATA_HEADER_LENGTH + 2]; ++ ++ if(!mq_fields_data || !mq_fields_null_array || !mq_fields_null_flag) { ++ return true; ++ } ++ ++ return false; ++} ++ ++bool Query_result_mq::send_data(THD *thd, const mem_root_deque &items MY_ATTRIBUTE((unused))) { ++ DBUG_ENTER("Query_result_mq::send_data"); ++ int null_num = 0; ++ uint32 total_copy_bytes = 0; ++ int i, j; ++ ++ //empty message ++ if (send_fields_size == MQ_FIELDS_DATA_HEADER_LENGTH){ ++ thd->inc_sent_row_count(1); ++ DBUG_RETURN(m_handler->send_exception_msg(EMPTY_MSG)); ++ } ++ ++ Field *result_field = nullptr; ++ int fields_idx= MQ_FIELDS_DATA_HEADER_LENGTH; ++ /* currently supporting ITEM_FIELD and ITEM_FUNC */ ++ //for(Item *item = it++; item; item = it++, fields_idx++){ ++ for(Item *item : *send_fields){ ++ assert(!item->skip_create_tmp_table); ++ //c0: skip null item ++ if (item->type() == Item::NULL_ITEM || item->type() == Item::STRING_ITEM) { ++ pq_build_mq_item(item, &mq_fields_data[fields_idx], ++ mq_fields_null_array, null_num, total_copy_bytes); ++ fields_idx++; ++ continue; ++ } ++ ++ //c2: check Item_copy. In the original execution plan, const_item will be ++ //transformed into Item_copy in tmp_table (or ORDERED_GROUP_BY) ++ #if 0 ++ if (item->type() == Item::COPY_STR_ITEM) { ++ Item *orig_item = down_cast(item)->get_item(); ++ assert(orig_item && !orig_item->skip_create_tmp_table); ++ if (orig_item->const_item() || ++ orig_item->basic_const_item()) { ++ pq_build_mq_item(orig_item, &mq_fields_data[fields_idx], ++ mq_fields_null_array,null_num, total_copy_bytes); ++ continue; ++ } ++ } ++ #endif ++ ++ //c3: check item_result_field and item_field ++ result_field = item->get_result_field(); ++ if (!result_field) { ++ if(item->type() == Item::FIELD_ITEM ++ && DBUG_EVALUATE_IF("pq_mq_error4", false, true)) { ++ result_field = down_cast(item)->field; ++ } else { ++ //c4: other cases will be shielded in JOIN::check_first_rewritten_tab ++ sql_print_error("not supported field"); ++ m_handler->send_exception_msg(ERROR_MSG); ++ DBUG_RETURN(true); ++ } ++ } else { ++ item->save_in_field(result_field, true); ++ } ++ ++ assert(result_field); ++ pq_build_mq_fields(result_field, &mq_fields_data[fields_idx], ++ mq_fields_null_array, null_num, total_copy_bytes); ++ ++ fields_idx++; ++ } ++ ++ assert((uint)null_num == 2 * (send_fields_size - MQ_FIELDS_DATA_HEADER_LENGTH)); ++ uint16 null_len = ((null_num % 8 == 0) ? ++ null_num / 8 : ++ null_num / 8 + 1) + 1; ++ ++ memset(mq_fields_null_flag, 0, null_len); ++ ++ /* ++ * Now, we use 2 bps/field as a header to send each Item, where the first bit indicates that the corresponding ++ * item is a CONST_ITEM or not, and the second bit indicates that the related result_field is NULL_FILED or not. ++ * These two bits have at most four status: ++ * (0, 0) => NOT_CONST_ITEM & NON_NULL_FIELD ++ * (0, 1) => NOT_CONST_ITEM & NULL_FIELD ++ * (1, 0) => CONST_ITEM & NON_NULL_FIELD ++ * (1, 1) => CONST_ITEM & NULL_FIELD (such as Item_null) ++ * ++ * Only for the first case (0, 0), we need send the field data to MQ. ++ * ++ * null_flag[j] = 0 indicates the corresponding field is NOT NULL (or it is not a const_item()). ++ * otherwise, null_flag[j] = 1. ++ */ ++ ++ for(i = 0; i < null_num; i++){ ++ if(mq_fields_null_array[i]){ ++ j = (i >> 3) + 1; ++ mq_fields_null_flag[j] += 1 << (7 - (i & 7)); ++ } ++ } ++ ++ mq_fields_data[3].m_ptr = (uchar *)mq_fields_null_flag; ++ mq_fields_data[3].m_len = null_len; ++ total_copy_bytes += null_len; ++ ++ /* there are at most 4096 fields and thus null_len is less than 2 * 2^12/8 = 2^10. ++ * So, we can use 2 bytes to store it. ++ */ ++ mq_fields_data[2].m_ptr = (uchar *) &null_len; ++ mq_fields_data[2].m_len = 2; ++ total_copy_bytes += 2; ++ ++ if (m_stable_output) { ++ assert(m_file && m_file->ht->db_type == DB_TYPE_INNODB); ++ mq_fields_data[1].m_ptr = &m_file->ref[0]; ++ mq_fields_data[1].m_len = m_file->ref_length; ++ mq_fields_data[1].m_var_len = 0; ++ mq_fields_data[1].m_need_send = m_file->ref_length ? true : false; ++ total_copy_bytes += m_file->ref_length; ++ } else { ++ mq_fields_data[1].m_need_send = false; ++ } ++ ++ /* for total_copy_bytes, it is less than 2^16 * 2^16 = 2^32 and ++ * we can use 4 bytes to store it ++ */ ++ mq_fields_data[0].m_ptr = (uchar *) &total_copy_bytes; ++ mq_fields_data[0].m_len = 4; ++ ++ // send messages to mq ++ MQ_RESULT res; ++ for (i = 0; i < (int) send_fields_size; i++) { ++ /* for the case of NULL field, we need not send msg to MQ */ ++ if (!mq_fields_data[i].m_need_send) ++ continue; ++ ++ res = m_handler->send(&mq_fields_data[i]); ++ ++ //In some case, we should detach the MQ and thus the MQ_DETACHED status can also ++ // be considered as an normal status. ++ if (res == MQ_DETACHED) ++ DBUG_RETURN(false); ++ ++ if (res != MQ_SUCCESS ++ || DBUG_EVALUATE_IF("pq_mq_error5", true, false)) { ++ sql_print_error("send message to MQ error"); ++ m_handler->send_exception_msg(ERROR_MSG); ++ DBUG_RETURN(true); ++ } ++ } ++ ++ thd->inc_sent_row_count(1); ++ DBUG_RETURN(false); ++} ++ ++void Query_result_mq::cleanup(THD *thd MY_ATTRIBUTE((unused))) { ++ if (m_param) { ++ m_param->cleanup(); ++ destroy(m_param); ++ m_param = nullptr; ++ } ++ ++ if (m_table) { ++ close_tmp_table(m_table); ++ free_tmp_table(m_table); ++ m_table = nullptr; ++ } ++ ++ if (mq_fields_data) { ++ destroy(mq_fields_data); ++ mq_fields_data = nullptr; ++ } ++ ++ if (mq_fields_null_array) { ++ destroy(mq_fields_null_array); ++ mq_fields_null_array = nullptr; ++ } ++ ++ if (mq_fields_null_flag) { ++ destroy(mq_fields_null_flag); ++ mq_fields_null_flag = nullptr; ++ } ++} ++ ++bool Query_result_mq::send_eof(THD * thd) { ++ ++ if (thd->is_error()) return true; ++ ::my_eof(thd); ++ return false; ++} ++ + static const String default_line_term("\n", default_charset_info); + static const String default_escaped("\\", default_charset_info); + static const String default_field_term("\t", default_charset_info); +diff --git a/sql/query_result.h b/sql/query_result.h +index e6e297bc..02497d26 100644 +--- a/sql/query_result.h ++++ b/sql/query_result.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -44,9 +45,15 @@ class Item; + class Item_subselect; + class PT_select_var; + class Query_expression; ++class TABLE; ++class Temp_table_param; + class THD; + struct CHARSET_INFO; + struct TABLE_LIST; ++class JOIN; ++class MQueue_handle; ++struct Field_raw_data; ++class handler; + + /* + This is used to get result from a query +@@ -68,6 +75,8 @@ class Query_result { + */ + double estimated_cost; + ++ virtual MQueue_handle *get_mq_handler () { return nullptr; } ++ + Query_result() : unit(nullptr), estimated_rowcount(0), estimated_cost(0) {} + virtual ~Query_result() {} + +@@ -208,6 +217,49 @@ class Query_result_interceptor : public Query_result { + bool is_interceptor() const final { return true; } + }; + ++class Query_result_mq : public Query_result { ++public: ++ Query_result_mq () : Query_result(), m_table(nullptr), ++ m_param(nullptr), ++ m_handler(nullptr), ++ m_join(nullptr), ++ send_fields(nullptr), ++ send_fields_size(0), ++ mq_fields_data(nullptr), ++ mq_fields_null_array(nullptr), ++ mq_fields_null_flag(nullptr), ++ m_file(nullptr), ++ m_stable_output(false) ++ {} ++ ++ Query_result_mq (JOIN *join, MQueue_handle *msg_handler, ++ handler *file=nullptr, bool stab_output=false); ++ ~Query_result_mq() {} ++ bool send_result_set_metadata(THD *thd, const mem_root_deque &list, ++ uint flags) override; ++ bool send_data(THD *thd, const mem_root_deque &items) override; ++ bool send_eof(THD *thd MY_ATTRIBUTE((unused))) override; ++ bool check_simple_query_block() const override { return false; } ++ void cleanup(THD *) override; ++ MQueue_handle *get_mq_handler() override { return m_handler; } ++ ++ TABLE *m_table{nullptr}; ++ Temp_table_param *m_param{nullptr}; ++ MQueue_handle *m_handler{nullptr}; ++ ++private: ++ JOIN *m_join{nullptr}; ++ mem_root_deque *send_fields{nullptr}; ++ uint send_fields_size{0}; ++ Field_raw_data *mq_fields_data{nullptr}; ++ bool *mq_fields_null_array{nullptr}; ++ char *mq_fields_null_flag{nullptr}; ++ ++ //for stable output ++ handler *m_file; ++ bool m_stable_output; ++}; ++ + class Query_result_send : public Query_result { + /** + True if we have sent result set metadata to the client. +diff --git a/sql/records.cc b/sql/records.cc +index 48b49d47..4c6efc57 100644 +--- a/sql/records.cc ++++ b/sql/records.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -53,6 +54,11 @@ + #include "sql/sql_tmp_table.h" + #include "sql/table.h" + #include "sql/timing_iterator.h" ++#include "sql/sql_parallel.h" ++#include "sql/exchange_sort.h" ++#include "sql/sql_parse.h" ++#include "sql/mysqld.h" ++#include "sql/exchange.h" + + using std::string; + using std::vector; +@@ -152,7 +158,7 @@ template class IndexScanIterator; + See AccessPath::count_examined_rows. + */ + AccessPath *create_table_access_path(THD *thd, TABLE *table, QEP_TAB *qep_tab, +- bool count_examined_rows) { ++ bool count_examined_rows, bool *pq_replace_path) { + // If only 'table' is given, assume no quick, no condition. + if (table != nullptr && qep_tab != nullptr) { + assert(table == qep_tab->table()); +@@ -160,16 +166,23 @@ AccessPath *create_table_access_path(THD *thd, TABLE *table, QEP_TAB *qep_tab, + table = qep_tab->table(); + } + ++ bool could_replace_path = false; ++ + AccessPath *path; + if (qep_tab != nullptr && qep_tab->quick() != nullptr) { + path = NewIndexRangeScanAccessPath(thd, table, qep_tab->quick(), + count_examined_rows); ++ could_replace_path = true; + } else if (qep_tab != nullptr && qep_tab->table_ref != nullptr && + qep_tab->table_ref->is_recursive_reference()) { + path = NewFollowTailAccessPath(thd, table, count_examined_rows); + } else { + path = NewTableScanAccessPath(thd, table, count_examined_rows); ++ could_replace_path = true; + } ++ ++ if (pq_replace_path) *pq_replace_path = could_replace_path; ++ + if (qep_tab != nullptr && qep_tab->position() != nullptr) { + SetCostOnTableAccessPath(*thd->cost_model(), qep_tab->position(), + /*is_after_filter=*/false, path); +@@ -320,6 +333,346 @@ int IndexRangeScanIterator::Read() { + return 0; + } + ++ParallelScanIterator::ParallelScanIterator(THD *thd, QEP_TAB *tab, TABLE *table, ++ ha_rows *examined_rows, JOIN *join, ++ Gather_operator *gather, ++ bool stab_output, uint ref_length) ++ : TableRowIterator(thd, table), ++ m_record(table->record[0]), ++ m_examined_rows(examined_rows), ++ m_dop(gather->m_dop), ++ m_join(join), ++ m_gather(gather), ++ m_record_gather(nullptr), ++ m_order(nullptr), ++ m_tab(tab), ++ m_stable_sort(stab_output), ++ m_ref_length(ref_length) { ++ thd->pq_iterator = this; ++} ++ ++/** ++ * construct filesort on leader when needing stab_output or merge_sort ++ * ++ * @retavl: false if success, and otherwise true ++ */ ++bool ParallelScanIterator::pq_make_filesort(Filesort **sort) { ++ *sort = NULL; ++ ++ /** construct sort order based on group */ ++ if (m_join->pq_rebuilt_group) { ++ assert(m_join->query_block->saved_group_list_ptrs); ++ restore_list(m_join->query_block->saved_group_list_ptrs, ++ m_join->query_block->group_list); ++ m_order = restore_optimized_group_order( ++ m_join->query_block->group_list, ++ m_join->saved_optimized_vars.optimized_group_flags); ++ } else { ++ /** ++ * if sorting is built after the first rewritten table, then ++ * we have no need to rebuilt the sort order on leader, because ++ * leader will do SortingIterator. ++ */ ++ if (m_join->pq_last_sort_idx >= (int)m_join->tables && ++ m_join->qep_tab[m_join->pq_last_sort_idx].filesort != nullptr) { ++ return false; ++ } else { ++ if ((m_order = m_join->order.order) == nullptr) { ++ if (m_join->m_ordered_index_usage == JOIN::ORDERED_INDEX_ORDER_BY && ++ m_join->query_block->saved_order_list_ptrs) { ++ restore_list(m_join->query_block->saved_order_list_ptrs, ++ m_join->query_block->order_list); ++ m_order = restore_optimized_group_order( ++ m_join->query_block->order_list, ++ m_join->saved_optimized_vars.optimized_order_flags); ++ } else { ++ std::vector used_key_fields; ++ if (get_table_key_fields(&m_join->qep_tab0[m_tab->pos], ++ used_key_fields) || ++ DBUG_EVALUATE_IF("pq_msort_error1", true, false)) ++ return true; ++ ++ if (set_key_order(m_tab, used_key_fields, &m_order, ++ &m_join->ref_items[REF_SLICE_PQ_TMP]) || ++ DBUG_EVALUATE_IF("pq_msort_error2", true, false)) ++ return true; ++ } ++ } ++ } ++ } ++ ++ /** support stable sort on TABLE/INDEX SCAN */ ++ if (m_order || m_stable_sort) { ++ *sort = m_tab->filesort; ++ if (!(*sort)) { ++ (*sort) = new (m_join->thd->pq_mem_root) ++ Filesort(m_join->thd, {m_tab->table()}, false, m_order, HA_POS_ERROR, ++ false, false, false, false); ++ if (!(*sort) || DBUG_EVALUATE_IF("pq_msort_error3", true, false)) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/** ++ * init the mq_record_gather ++ */ ++bool ParallelScanIterator::pq_init_record_gather() { ++ THD *thd = m_join->thd; ++ Filesort *sort = NULL; ++ if (pq_make_filesort(&sort)) return true; ++ m_record_gather = new (thd->pq_mem_root) MQ_record_gather(thd, m_tab); ++ if (!m_record_gather || ++ m_record_gather->mq_scan_init(sort, m_gather->m_dop, m_ref_length, ++ m_stable_sort) || ++ DBUG_EVALUATE_IF("pq_msort_error4", true, false)) ++ return true; ++ ++ /** set each worker's MQ_handle */ ++ for (uint i = 0; i < m_gather->m_dop; i++) { ++ m_gather->m_workers[i]->m_handle = ++ m_record_gather->m_exchange->get_mq_handle(i); ++ } ++ return false; ++} ++ ++/** ++ * launch worker threads ++ * ++ * @retval: false if success, and otherwise true ++ */ ++bool ParallelScanIterator::pq_launch_worker() { ++ THD *thd = m_join->thd; ++ assert(thd == current_thd); ++ ++ Gather_operator *gather = m_tab->gather; ++ PQ_worker_manager **workers = gather->m_workers; ++ int launch_workers = 0; ++ ++ /** when workers encounter error during execution, directly abort the parallel ++ * execution */ ++ for (uint i = 0; i < m_gather->m_dop; i++) { ++ assert(!workers[i]->thd_worker && ++ (workers[i]->m_status == PQ_worker_state::INIT)); ++ if (thd->is_error() || thd->pq_error) goto err; ++ my_thread_handle id; ++ id.thread = 0; ++ /** ++ * pq_worker_error8: all workers are fialed to landuch ++ * pq_worker_error9: worker's id in [0, 2, 4, ..] are failed to lanuch ++ */ ++ if (DBUG_EVALUATE_IF("pq_worker_error8", false, true) && ++ DBUG_EVALUATE_IF("pq_worker_error9", (i % 2), true)) { ++ mysql_thread_create(key_thread_parallel_query, &id, NULL, pq_worker_exec, ++ (void *)workers[i]); ++ } ++ workers[i]->thread_id = id; ++ int expected_status = PQ_worker_state::READY | PQ_worker_state::COMPELET | ++ PQ_worker_state::ERROR; ++ if (id.thread != 0) { ++ /** Record the thread id so that we can later determine whether the thread ++ * started */ ++ workers[i]->m_active = workers[i]->wait_for_status(thd, expected_status); ++ /** partial workers may fail before execution */ ++ if (!workers[i]->m_active || ++ DBUG_EVALUATE_IF("pq_worker_error7", (i >= m_gather->m_dop / 2), ++ false)) { ++ goto err; ++ } ++ launch_workers++; ++ } else { ++ sql_print_warning("worker %d has failed to start up\n", i); ++ MQueue_handle *mq_handler = m_record_gather->m_exchange->get_mq_handle(i); ++ if (mq_handler) mq_handler->set_datched_status(MQ_HAVE_DETACHED); ++ } ++ } ++ /** if all workers are not launched, then directly return false */ ++ if (!launch_workers) goto err; ++ return false; ++ ++err: ++ for (uint i = 0; i < m_gather->m_dop; i++) { ++ if (workers[i]->thread_id.thread && workers[i]->thd_worker) { ++ workers[i]->thd_worker->pq_error = true; ++ } ++ } ++ return true; ++} ++ ++/** ++ * wait all workers finish their execution ++ */ ++void ParallelScanIterator::pq_wait_workers_finished() { ++ THD *leader_thd = m_join->thd; ++ assert(leader_thd == current_thd); ++ ++ /** ++ * leader first detached the message queue, and then wait workers finish ++ * the execution. The reason for detach MQ is that leader has fetched the ++ * satisfied #records (e.g., limit operation). ++ */ ++ if (m_record_gather) { ++ Exchange *exchange = m_record_gather->m_exchange; ++ MQueue_handle *m_handle = nullptr; ++ for (uint i = 0; i < m_gather->m_dop; i++) { ++ if ((m_handle = exchange->get_mq_handle(i))) { ++ m_handle->set_datched_status(MQ_HAVE_DETACHED); ++ } ++ } ++ } ++ ++ /** ++ * wait all such workers to finish execution, two conditions must meet: ++ * c1: the worker thread has been created ++ * c2: the worker has not yet finished ++ */ ++ int expected_status = PQ_worker_state::COMPELET | PQ_worker_state::ERROR; ++ for (uint i = 0; i < m_gather->m_dop; i++) { ++ if (m_gather->m_workers[i]->thread_id.thread != 0) // c1 ++ { ++ if (m_gather->m_workers[i]->m_active && ++ !(((unsigned int)m_gather->m_workers[i]->m_status) & ++ PQ_worker_state::COMPELET)) { ++ m_gather->m_workers[i]->wait_for_status(leader_thd, expected_status); ++ } ++ my_thread_join(&m_gather->m_workers[i]->thread_id, NULL); ++ } ++ } ++} ++ ++int ParallelScanIterator::pq_error_code() { ++ THD *thd = m_join->thd; ++ ++ if (m_gather->m_ha_err == HA_ERR_TABLE_DEF_CHANGED) { ++ m_gather->m_ha_err = 0; ++ return HA_ERR_TABLE_DEF_CHANGED; ++ } ++ ++ if (thd->is_killed()) { ++ thd->send_kill_message(); ++ } ++ ++ /** collect worker threads status from DA info */ ++ JOIN *tmplate_join = m_gather->m_template_join; ++ THD *temp_thd = tmplate_join->thd; ++ thd->pq_status_reset(); ++ thd->pq_merge_status(temp_thd); ++ Diagnostics_area *da = temp_thd->get_stmt_da(); ++ if (temp_thd->is_error()) { ++ temp_thd->raise_condition(da->mysql_errno(), da->returned_sqlstate(), ++ Sql_condition::SL_ERROR, da->message_text()); ++ } ++ ++ if (da->cond_count() > 0) { ++ Diagnostics_area::Sql_condition_iterator it = da->sql_conditions(); ++ const Sql_condition *cond; ++ while ((cond = it++)) { ++ thd->raise_condition(cond->mysql_errno(), NULL, cond->severity(), ++ cond->message_text()); ++ } ++ } ++ /** output parallel error code */ ++ if (!temp_thd->is_error() && !thd->is_error() && thd->pq_error && ++ !thd->running_explain_analyze) { ++ my_error(ER_PARALLEL_EXEC_ERROR, MYF(0)); ++ } ++ return 1; ++} ++ ++bool ParallelScanIterator::Init() { ++ assert(current_thd == m_join->thd); ++ m_gather->waitReadEnd(); ++ if (m_gather->init() || /** cur innodb data, ++ should be called first(will change dop based on ++ split count) */ ++ pq_init_record_gather() || /** init mq_record_gather */ ++ pq_launch_worker() || /** launch worker threads */ ++ DBUG_EVALUATE_IF("pq_worker_error6", true, false)) { ++ m_join->thd->pq_error = true; ++ return true; ++ } ++ return false; ++} ++ ++int ParallelScanIterator::Read() { ++ /** kill query */ ++ if (m_join->thd->is_killed()) { ++ m_join->thd->send_kill_message(); ++ return 1; ++ } ++ /** fetch message from MQ to table->record[0] */ ++ if (m_record_gather->mq_scan_next()) return 0; ++ return -1; ++} ++ ++int ParallelScanIterator::End() { ++ m_gather->signalReadEnd(); ++ /** wait all workers to finish their execution */ ++ pq_wait_workers_finished(); ++ /** output error code */ ++ return pq_error_code(); ++} ++ ++ParallelScanIterator::~ParallelScanIterator() { ++ table()->file->ha_index_or_rnd_end(); ++ /** cleanup m_record_gather */ ++ if (m_record_gather) { ++ m_record_gather->mq_scan_end(); ++ } ++} ++ ++PQblockScanIterator::PQblockScanIterator(THD *thd, TABLE *table, uchar *record, ++ ha_rows *examined_rows, ++ Gather_operator *gather, ++ bool need_rowid) ++ : TableRowIterator(thd, table), ++ m_record(record), ++ m_examined_rows(examined_rows), ++ m_pq_ctx(gather->m_pq_ctx), ++ keyno(gather->keyno), ++ m_gather(gather), ++ m_need_rowid(need_rowid) { ++ thd->pq_iterator = this; ++} ++ ++bool PQblockScanIterator::Init() { ++ table()->file->pq_worker_scan_init(keyno, m_pq_ctx); ++ return false; ++} ++ ++int PQblockScanIterator::End() { ++ assert(thd() && thd()->pq_leader); ++ if (m_gather) m_gather->signalAll(); ++ return -1; ++} ++ ++PQblockScanIterator::~PQblockScanIterator() {} ++ ++int PQblockScanIterator::Read() { ++ int tmp; ++ while ((tmp = table()->file->ha_pq_next(m_record, m_pq_ctx))) { ++ /* ++ ha_rnd_next can return RECORD_DELETED for MyISAM when one thread is ++ reading and another deleting without locks. ++ */ ++ if (tmp == HA_ERR_RECORD_DELETED && !thd()->killed) continue; ++ return HandleError(tmp); ++ } ++ ++ if (m_examined_rows != nullptr) { ++ ++*m_examined_rows; ++ } ++ // write row_id into file ++ if (m_need_rowid) { ++ assert(table()->file->ht->db_type == DB_TYPE_INNODB); ++ assert(table()->record[0] == m_record); ++ table()->file->position(m_record); ++ } ++ ++ return 0; ++} ++ + TableScanIterator::TableScanIterator(THD *thd, TABLE *table, + double expected_rows, + ha_rows *examined_rows) +diff --git a/sql/records.h b/sql/records.h +index 5b6f47a8..20d77358 100644 +--- a/sql/records.h ++++ b/sql/records.h +@@ -1,6 +1,7 @@ + #ifndef SQL_RECORDS_H + #define SQL_RECORDS_H + /* Copyright (c) 2008, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -40,7 +41,7 @@ struct AccessPath; + struct TABLE; + + AccessPath *create_table_access_path(THD *thd, TABLE *table, QEP_TAB *qep_tab, +- bool count_examined_rows); ++ bool count_examined_rows, bool *pq_replace_path = nullptr); + + /** + Creates an iterator for the given table, then calls Init() on the resulting +diff --git a/sql/row_iterator.h b/sql/row_iterator.h +index 0f014582..aaf126fc 100644 +--- a/sql/row_iterator.h ++++ b/sql/row_iterator.h +@@ -2,6 +2,7 @@ + #define SQL_ROW_ITERATOR_H_ + + /* Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -186,6 +187,14 @@ class RowIterator { + virtual RowIterator *real_iterator() { return this; } + virtual const RowIterator *real_iterator() const { return this; } + ++ /** ++ * Ends the iterator ++ * @return 0: the normal iterator ++ * @return 1: the ParallelScanIterator ++ * @retrun -1: the PQblockScanIterator ++ */ ++ virtual int End() { return 0; } ++ + protected: + THD *thd() const { return m_thd; } + +diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc +index 1d9defb2..abd7484e 100644 +--- a/sql/signal_handler.cc ++++ b/sql/signal_handler.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -142,6 +143,9 @@ extern "C" void handle_fatal_signal(int sig) { + case THD::KILL_TIMEOUT: + kreason = "KILL_TIMEOUT"; + break; ++ case THD::KILL_PQ_QUERY: ++ kreason = "KILL_PQ_QUERY"; ++ break; + case THD::KILLED_NO_VALUE: + kreason = "KILLED_NO_VALUE"; + break; +diff --git a/sql/sorting_iterator.h b/sql/sorting_iterator.h +index 18ce47b1..8eb9c14f 100644 +--- a/sql/sorting_iterator.h ++++ b/sql/sorting_iterator.h +@@ -2,6 +2,7 @@ + #define SQL_SORTING_ITERATOR_H_ + + /* Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -86,6 +87,8 @@ class SortingIterator final : public RowIterator { + + int Read() override { return m_result_iterator->Read(); } + ++ int End() override { return m_source_iterator->End(); } ++ + void SetNullRowFlag(bool is_null_row) override { + if (m_result_iterator == nullptr) { + // If we don't have a result yet, it will come up with the flag unset. +diff --git a/sql/sp_head.cc b/sql/sp_head.cc +index 5faf0a2f..f4f4d71a 100644 +--- a/sql/sp_head.cc ++++ b/sql/sp_head.cc +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2002, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -2488,7 +2489,10 @@ bool sp_head::execute_trigger(THD *thd, const LEX_CSTRING &db_name, + + locker = MYSQL_START_SP(&psi_state, m_sp_share); + #endif ++ // disable paralle query for trigger ++ thd->in_sp_trigger++; + err_status = execute(thd, false); ++ thd->in_sp_trigger--; + #ifdef HAVE_PSI_SP_INTERFACE + MYSQL_END_SP(locker); + #endif +@@ -2666,7 +2670,10 @@ bool sp_head::execute_function(THD *thd, Item **argp, uint argcount, + + locker = MYSQL_START_SP(&psi_state, m_sp_share); + #endif ++ // disable paralle query for store function ++ thd->in_sp_trigger++; + err_status = execute(thd, true); ++ thd->in_sp_trigger--; + #ifdef HAVE_PSI_SP_INTERFACE + MYSQL_END_SP(locker); + #endif +@@ -2868,7 +2875,10 @@ bool sp_head::execute_procedure(THD *thd, mem_root_deque *args) { + + locker = MYSQL_START_SP(&psi_state, m_sp_share); + #endif ++ // disable parallel query for store procedure ++ thd->in_sp_trigger++; + if (!err_status) err_status = execute(thd, true); ++ thd->in_sp_trigger--; + #ifdef HAVE_PSI_SP_INTERFACE + MYSQL_END_SP(locker); + #endif +diff --git a/sql/sql_base.cc b/sql/sql_base.cc +index 9611eed3..2d471cbc 100644 +--- a/sql/sql_base.cc ++++ b/sql/sql_base.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -2719,7 +2720,9 @@ static bool tdc_wait_for_old_version(THD *thd, const char *db, + bool res = false; + + mysql_mutex_lock(&LOCK_open); +- if ((share = get_cached_table_share(db, table_name)) && ++ // when current thread is PQ thread, no need to wait for flush tables. because flush ++ // thread is waiting PQ leader thread finish. ++ if (!thd->is_worker() && (share = get_cached_table_share(db, table_name)) && + share->has_old_version()) { + struct timespec abstime; + set_timespec(&abstime, wait_timeout); +@@ -3277,7 +3280,7 @@ retry_share : { + + share_found: + if (!(flags & MYSQL_OPEN_IGNORE_FLUSH)) { +- if (share->has_old_version()) { ++ if (!thd->is_worker() && share->has_old_version()) { + /* + We already have an MDL lock. But we have encountered an old + version of table in the table definition cache which is possible +@@ -7921,10 +7924,16 @@ Field *find_field_in_tables(THD *thd, Item_ident *item, TABLE_LIST *first_table, + + for (cur_table = first_table; cur_table != last_table; + cur_table = cur_table->next_name_resolution_table) { +- Field *cur_field = find_field_in_table_ref( ++ Field *cur_field = nullptr; ++ if (thd->parallel_exec && item->m_tableno != cur_table->m_tableno) { ++ continue; ++ } else { ++ cur_field = find_field_in_table_ref( + thd, cur_table, name, length, item->item_name.ptr(), db, table_name, + ref, want_privilege, allow_rowid, &field_index, register_tree_change, + &actual_table); ++ } ++ + if ((cur_field == nullptr && thd->is_error()) || cur_field == WRONG_GRANT) + return nullptr; + +@@ -8900,7 +8909,7 @@ bool setup_fields(THD *thd, ulong want_privilege, bool allow_sum_func, + bool split_sum_funcs, bool column_update, + const mem_root_deque *typed_items, + mem_root_deque *fields, +- Ref_item_array ref_item_array) { ++ Ref_item_array ref_item_array, bool skip_check_grant) { + DBUG_TRACE; + + Query_block *const select = thd->lex->current_query_block(); +@@ -8913,12 +8922,14 @@ bool setup_fields(THD *thd, ulong want_privilege, bool allow_sum_func, + assert(want_privilege == 0 || want_privilege == SELECT_ACL || + want_privilege == INSERT_ACL || want_privilege == UPDATE_ACL); + assert(!(column_update && (want_privilege & SELECT_ACL))); +- if (want_privilege & SELECT_ACL) ++ if (!skip_check_grant) { ++ if (want_privilege & SELECT_ACL) + thd->mark_used_columns = MARK_COLUMNS_READ; + else if (want_privilege & (INSERT_ACL | UPDATE_ACL) && !column_update) + thd->mark_used_columns = MARK_COLUMNS_WRITE; + else + thd->mark_used_columns = MARK_COLUMNS_NONE; ++ } + + DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns)); + if (allow_sum_func) +diff --git a/sql/sql_base.h b/sql/sql_base.h +index df45796c..bf02ba7f 100644 +--- a/sql/sql_base.h ++++ b/sql/sql_base.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2010, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -219,7 +220,7 @@ bool setup_fields(THD *thd, ulong want_privilege, bool allow_sum_func, + bool split_sum_funcs, bool column_update, + const mem_root_deque *typed_items, + mem_root_deque *fields, +- Ref_item_array ref_item_array); ++ Ref_item_array ref_item_array, bool skip_check_grant = false); + bool fill_record(THD *thd, TABLE *table, const mem_root_deque &fields, + const mem_root_deque &values, MY_BITMAP *bitmap, + MY_BITMAP *insert_into_fields_bitmap, +diff --git a/sql/sql_class.cc b/sql/sql_class.cc +index e01d3381..d44ee374 100644 +--- a/sql/sql_class.cc ++++ b/sql/sql_class.cc +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -107,6 +108,7 @@ + #include "sql/xa.h" + #include "template_utils.h" + #include "thr_mutex.h" ++#include "sql/sql_parallel.h" + + class Parse_tree_root; + +@@ -337,6 +339,40 @@ void THD::enter_stage(const PSI_stage_info *new_stage, + return; + } + ++void THD::enter_cond(mysql_cond_t *cond, mysql_mutex_t *mutex, ++ const PSI_stage_info *stage, PSI_stage_info *old_stage, ++ const char *src_function, const char *src_file, ++ int src_line) { ++ DBUG_TRACE; ++ mysql_mutex_assert_owner(mutex); ++ /* ++ Sic: We don't lock LOCK_current_cond here. ++ If we did, we could end up in deadlock with THD::awake() ++ which locks current_mutex while LOCK_current_cond is locked. ++ */ ++ current_mutex = mutex; ++ current_cond = cond; ++ enter_stage(stage, old_stage, src_function, src_file, src_line); ++ return; ++} ++ ++void THD::exit_cond(const PSI_stage_info *stage, const char *src_function, ++ const char *src_file, int src_line) { ++ DBUG_TRACE; ++ /* ++ current_mutex must be unlocked _before_ LOCK_current_cond is ++ locked (if that would not be the case, you'll get a deadlock if someone ++ does a THD::awake() on you). ++ */ ++ mysql_mutex_assert_not_owner(current_mutex.load()); ++ mysql_mutex_lock(&LOCK_current_cond); ++ current_mutex = nullptr; ++ current_cond = nullptr; ++ mysql_mutex_unlock(&LOCK_current_cond); ++ enter_stage(stage, nullptr, src_function, src_file, src_line); ++ return; ++} ++ + void Open_tables_state::set_open_tables_state(Open_tables_state *state) { + this->open_tables = state->open_tables; + +@@ -371,6 +407,16 @@ THD::THD(bool enable_plugins) + m_dd_client(new dd::cache::Dictionary_client(this)), + m_query_string(NULL_CSTR), + m_db(NULL_CSTR), ++ pq_leader(nullptr), ++ parallel_exec(false), ++ pq_threads_running(0), ++ pq_dop(0), ++ no_pq(false), ++ in_sp_trigger(0), ++ locking_clause(0), ++ pq_error(false), ++ pq_check_fields(0), ++ pq_check_reclen(0), + rli_fake(nullptr), + rli_slave(nullptr), + initial_status_var(nullptr), +@@ -458,6 +504,13 @@ THD::THD(bool enable_plugins) + init_sql_alloc(key_memory_thd_main_mem_root, &main_mem_root, + global_system_variables.query_alloc_block_size, + global_system_variables.query_prealloc_size); ++ pq_mem_root = nullptr, ++ pq_mem_root = new MEM_ROOT(); ++ init_sql_alloc(key_memory_pq_mem_root, pq_mem_root, ++ global_system_variables.query_alloc_block_size, ++ global_system_variables.query_prealloc_size); ++ pq_mem_root->allocCBFunc = add_pq_memory; ++ pq_mem_root->freeCBFunc = sub_pq_memory; + stmt_arena = this; + thread_stack = nullptr; + m_catalog.str = "std"; +@@ -471,6 +524,7 @@ THD::THD(bool enable_plugins) + num_truncated_fields = 0L; + m_sent_row_count = 0L; + current_found_rows = 0; ++ pq_current_found_rows = 0; + previous_found_rows = 0; + is_operating_gtid_table_implicitly = false; + is_operating_substatement_implicitly = false; +@@ -517,6 +571,7 @@ THD::THD(bool enable_plugins) + mysql_mutex_init(key_LOCK_query_plan, &LOCK_query_plan, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_current_cond, &LOCK_current_cond, + MY_MUTEX_INIT_FAST); ++ mysql_mutex_init(0, &pq_lock_worker, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_COND_thr_lock, &COND_thr_lock); + + /* Variables with default values */ +@@ -979,7 +1034,9 @@ void THD::cleanup(void) { + + /* Protects user_vars. */ + mysql_mutex_lock(&LOCK_thd_data); +- user_vars.clear(); ++ if (!is_worker()) { ++ user_vars.clear(); ++ } + mysql_mutex_unlock(&LOCK_thd_data); + + /* +@@ -1113,6 +1170,7 @@ THD::~THD() { + mysql_mutex_destroy(&LOCK_thd_sysvar); + mysql_mutex_destroy(&LOCK_thd_protocol); + mysql_mutex_destroy(&LOCK_current_cond); ++ mysql_mutex_destroy(&pq_lock_worker); + mysql_cond_destroy(&COND_thr_lock); + #ifndef NDEBUG + dbug_sentry = THD_SENTRY_GONE; +@@ -1137,6 +1195,11 @@ THD::~THD() { + unregister_slave(this, true, true); + + free_root(&main_mem_root, MYF(0)); ++ if (pq_mem_root) ++ { ++ free_root(pq_mem_root, MYF(0)); ++ delete pq_mem_root; ++ } + + if (m_token_array != nullptr) { + my_free(m_token_array); +@@ -1187,6 +1250,17 @@ void THD::awake(THD::killed_state state_to_set) { + killed = state_to_set; + } + ++ /* Kill the workers if parallel query. */ ++ if (parallel_exec) { ++ mysql_mutex_lock(&pq_lock_worker); ++ for (auto pq_worker : pq_workers) { ++ mysql_mutex_lock(&pq_worker->LOCK_thd_data); ++ pq_worker->awake(state_to_set); ++ mysql_mutex_unlock(&pq_worker->LOCK_thd_data); ++ } ++ mysql_mutex_unlock(&pq_lock_worker); ++ } ++ + if (state_to_set != THD::KILL_QUERY && state_to_set != THD::KILL_TIMEOUT) { + if (this != current_thd || kill_immunizer) { + assert(!kill_immunizer || !kill_immunizer->is_active()); +@@ -1484,6 +1558,25 @@ void THD::cleanup_after_query() { + if (rli_slave) rli_slave->cleanup_after_query(); + // Set the default "cute" mode for the execution environment: + check_for_truncated_fields = CHECK_FIELD_IGNORE; ++ ++ if (in_sp_trigger == 0) { ++ // cleanup for parallel query ++ if (pq_threads_running > 0) { ++ release_pq_running_threads(pq_threads_running); ++ pq_threads_running = 0; ++ } ++ if(pq_mem_root) ++ free_root(pq_mem_root, MYF(0)); ++ pq_dop = 0; ++ no_pq = false; ++ locking_clause = 0; ++ pq_error = false; ++ pq_workers.clear(); ++ pq_explain.clear(); ++ ++ if (killed == THD::KILL_PQ_QUERY) ++ killed.store(THD::NOT_KILLED); // restore killed for next query ++ } + } + + /* +@@ -1552,6 +1645,22 @@ void THD::update_charset() { + variables.character_set_filesystem, ¬_used); + } + ++/** ++ * Record a transient change to a pointer to an Item whitin another Item. ++ */ ++void THD::change_item_tree(Item **place, Item *new_value) { ++ /* TODO: check for OOM condition here */ ++ if (!stmt_arena->is_regular()) { ++ DBUG_PRINT("info", ("change_item_tree place %p old_value %p new_value %p", ++ place, *place, new_value)); ++ nocheck_register_item_tree_change(place, new_value); ++ } ++ if (new_value != nullptr && new_value != *place) { ++ new_value->origin_item = *place; ++ } ++ *place = new_value; ++} ++ + int THD::send_explain_fields(Query_result *result) { + mem_root_deque field_list(current_thd->mem_root); + Item *item; +@@ -1657,16 +1766,18 @@ void THD::rollback_item_tree_changes() { + } + + void Query_arena::add_item(Item *item) { ++ item->pq_alloc_item = true; + item->next_free = m_item_list; + m_item_list = item; + } + +-void Query_arena::free_items() { ++void Query_arena::free_items(bool parallel_exec MY_ATTRIBUTE((unused))) { + Item *next; + DBUG_TRACE; + /* This works because items are allocated with (*THR_MALLOC)->Alloc() */ + for (; m_item_list; m_item_list = next) { + next = m_item_list->next_free; ++ assert(!parallel_exec || (parallel_exec && m_item_list->pq_alloc_item)); + m_item_list->delete_self(); + } + /* Postcondition: free_list is 0 */ +@@ -1808,7 +1919,8 @@ void THD::send_kill_message() const { + assuming it's come as far as the execution stage, so that the user + can look at the execution plan and statistics so far. + */ +- if (!running_explain_analyze) { ++ if ((pq_leader != nullptr && !pq_leader->running_explain_analyze) || ++ (pq_leader == nullptr && !running_explain_analyze)) { + my_error(err, MYF(ME_FATALERROR)); + } + } +diff --git a/sql/sql_class.h b/sql/sql_class.h +index e76edaaf..3815220d 100644 +--- a/sql/sql_class.h ++++ b/sql/sql_class.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -94,6 +95,7 @@ + #include "sql/resourcegroups/resource_group_basic_types.h" + #include "sql/rpl_context.h" // Rpl_thd_context + #include "sql/rpl_gtid.h" ++#include "sql/row_iterator.h" + #include "sql/session_tracker.h" // Session_tracker + #include "sql/sql_connect.h" + #include "sql/sql_const.h" +@@ -105,6 +107,7 @@ + #include "sql/system_variables.h" // system_variables + #include "sql/transaction_info.h" // Ha_trx_info + #include "sql/xa.h" ++#include "sql/pq_condition.h" + #include "sql_string.h" + #include "template_utils.h" + #include "thr_lock.h" +@@ -284,7 +287,7 @@ class Query_arena { + void reset_item_list() { m_item_list = nullptr; } + void set_item_list(Item *item) { m_item_list = item; } + void add_item(Item *item); +- void free_items(); ++ void free_items(bool parallel_exec = false); + void set_state(enum_state state_arg) { state = state_arg; } + enum_state get_state() const { return state; } + bool is_stmt_prepare() const { return state == STMT_INITIALIZED; } +@@ -935,6 +938,43 @@ class THD : public MDL_context_owner, + String m_rewritten_query; + + public: ++ /* parallel reader context */ ++ void *pq_ctx; ++ /* using for PQ worker threads */ ++ THD *pq_leader; ++ /* using for explain */ ++ bool parallel_exec; ++ /* parallel query running threads in session*/ ++ uint pq_threads_running; ++ /* degree of parallel */ ++ uint pq_dop; ++ /* disable parallel execute */ ++ bool no_pq; ++ /* disable parallel query for store procedure and trigger */ ++ uint in_sp_trigger; ++ /* select .. fro share/update */ ++ bool locking_clause; ++ /* indicates whether parallel query is supported */ ++ enum PqConditionStatus m_suite_for_pq{PqConditionStatus::INIT}; ++ ++ /* indicates whether occurring error during execution */ ++ bool pq_error{false}; ++ ++ /* save ParallelScanIterator or PQblockScanIterator here to call end() */ ++ RowIterator *pq_iterator{NULL}; ++ ++ /* check first table. */ ++ uint pq_check_fields{0}; ++ uint pq_check_reclen{0}; ++ ++ /* save PQ worker THDs. */ ++ std::vector pq_workers; ++ /* protects THD::pq_workers. */ ++ mysql_mutex_t pq_lock_worker; ++ ++ /* for explain analyze. */ ++ std::string pq_explain; ++ + /* Used to execute base64 coded binlog events in MySQL server */ + Relay_log_info *rli_fake; + /* Slave applier execution context */ +@@ -1190,7 +1230,7 @@ class THD : public MDL_context_owner, + return pointer_cast(m_protocol); + } + +- private: ++ public: + Protocol *m_protocol; // Current protocol + /** + SSL data attached to this connection. +@@ -1636,6 +1676,7 @@ class THD : public MDL_context_owner, + const char *m_trans_log_file; + char *m_trans_fixed_log_file; + my_off_t m_trans_end_pos; ++ public: + /**@}*/ + // NOTE: Ideally those two should be in Protocol, + // but currently its design doesn't allow that. +@@ -1796,6 +1837,7 @@ class THD : public MDL_context_owner, + Attachable_trx_rw &operator=(const Attachable_trx_rw &); + }; + ++ public: + Attachable_trx *m_attachable_trx; + + public: +@@ -2018,6 +2060,7 @@ class THD : public MDL_context_owner, + stable throughout the next query, see update_previous_found_rows. + */ + ulonglong current_found_rows; ++ ulonglong pq_current_found_rows; + + /* + Indicate if the gtid_executed table is being operated implicitly +@@ -2401,6 +2444,7 @@ class THD : public MDL_context_owner, + KILL_CONNECTION = ER_SERVER_SHUTDOWN, + KILL_QUERY = ER_QUERY_INTERRUPTED, + KILL_TIMEOUT = ER_QUERY_TIMEOUT, ++ KILL_PQ_QUERY = ER_PARALLEL_EXEC_ERROR, + KILLED_NO_VALUE /* means neither of the states */ + }; + std::atomic killed; +@@ -2685,36 +2729,10 @@ class THD : public MDL_context_owner, + void enter_cond(mysql_cond_t *cond, mysql_mutex_t *mutex, + const PSI_stage_info *stage, PSI_stage_info *old_stage, + const char *src_function, const char *src_file, +- int src_line) override { +- DBUG_TRACE; +- mysql_mutex_assert_owner(mutex); +- /* +- Sic: We don't lock LOCK_current_cond here. +- If we did, we could end up in deadlock with THD::awake() +- which locks current_mutex while LOCK_current_cond is locked. +- */ +- current_mutex = mutex; +- current_cond = cond; +- enter_stage(stage, old_stage, src_function, src_file, src_line); +- return; +- } ++ int src_line) override; + + void exit_cond(const PSI_stage_info *stage, const char *src_function, +- const char *src_file, int src_line) override { +- DBUG_TRACE; +- /* +- current_mutex must be unlocked _before_ LOCK_current_cond is +- locked (if that would not be the case, you'll get a deadlock if someone +- does a THD::awake() on you). +- */ +- mysql_mutex_assert_not_owner(current_mutex.load()); +- mysql_mutex_lock(&LOCK_current_cond); +- current_mutex = nullptr; +- current_cond = nullptr; +- mysql_mutex_unlock(&LOCK_current_cond); +- enter_stage(stage, nullptr, src_function, src_file, src_line); +- return; +- } ++ const char *src_file, int src_line) override; + + int is_killed() const final { return killed; } + bool might_have_commit_order_waiters() const final { +@@ -2809,7 +2827,12 @@ class THD : public MDL_context_owner, + in the next statement. + */ + inline void update_previous_found_rows() { +- previous_found_rows = current_found_rows; ++ if (pq_current_found_rows != 0) { ++ previous_found_rows = pq_current_found_rows; ++ pq_current_found_rows = 0; ++ } else { ++ previous_found_rows = current_found_rows; ++ } + } + + /** +@@ -2927,6 +2950,13 @@ class THD : public MDL_context_owner, + */ + inline bool is_error() const { return get_stmt_da()->is_error(); } + ++ inline bool is_pq_error() const { ++ return !pq_leader ? pq_error ++ : (pq_error || (pq_leader->is_killed() || ++ pq_leader->pq_error || ++ pq_leader->is_error())); ++ } ++ + /// Returns first Diagnostics Area for the current statement. + Diagnostics_area *get_stmt_da() { return m_stmt_da; } + +@@ -3010,15 +3040,7 @@ class THD : public MDL_context_owner, + /** + Record a transient change to a pointer to an Item within another Item. + */ +- void change_item_tree(Item **place, Item *new_value) { +- /* TODO: check for OOM condition here */ +- if (!stmt_arena->is_regular()) { +- DBUG_PRINT("info", ("change_item_tree place %p old_value %p new_value %p", +- place, *place, new_value)); +- nocheck_register_item_tree_change(place, new_value); +- } +- *place = new_value; +- } ++ void change_item_tree(Item **place, Item *new_value); + + /** + Remember that place was updated with new_value so it can be restored +@@ -3760,6 +3782,7 @@ class THD : public MDL_context_owner, + uint code, const char *message_text); + friend void my_message_sql(uint, const char *, myf); + ++ public: + /** + Raise a generic SQL condition. Also calls mysql_audit_notify() unless + the condition is handled by a SQL condition handler. +@@ -3775,7 +3798,6 @@ class THD : public MDL_context_owner, + Sql_condition::enum_severity_level level, + const char *msg, bool fatal_error = false); + +- public: + void set_command(enum enum_server_command command); + + inline enum enum_server_command get_command() const { return m_command; } +@@ -4032,6 +4054,10 @@ class THD : public MDL_context_owner, + + void mark_transaction_to_rollback(bool all); + ++ public: ++ /** This memory root is used for Parallel Query */ ++ MEM_ROOT *pq_mem_root; ++ + private: + /** The current internal error handler for this thread, or NULL. */ + Internal_error_handler *m_internal_handler; +@@ -4346,6 +4372,10 @@ class THD : public MDL_context_owner, + public: + bool is_system_user(); + void set_system_user(bool system_user_flag); ++ bool is_worker(); ++ bool pq_copy_from(THD *thd); ++ bool pq_merge_status(THD *thd); ++ bool pq_status_reset(); + + public: + Transactional_ddl_context m_transactional_ddl{this}; +@@ -4426,4 +4456,8 @@ inline void THD::set_system_user(bool system_user_flag) { + m_is_system_user.store(system_user_flag, std::memory_order_seq_cst); + } + ++inline bool THD::is_worker() { ++ return pq_leader != nullptr; ++} ++ + #endif /* SQL_CLASS_INCLUDED */ +diff --git a/sql/sql_executor.cc b/sql/sql_executor.cc +index 923d9a21..87a8ddf1 100644 +--- a/sql/sql_executor.cc ++++ b/sql/sql_executor.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -126,6 +127,7 @@ + #include "tables_contained_in.h" + #include "template_utils.h" + #include "thr_lock.h" ++#include "msg_queue.h" + + using std::make_pair; + using std::max; +@@ -193,7 +195,7 @@ string RefToString(const TABLE_REF &ref, const KEY *key, bool include_nulls) { + + bool JOIN::create_intermediate_table( + QEP_TAB *const tab, const mem_root_deque &tmp_table_fields, +- ORDER_with_src &tmp_table_group, bool save_sum_fields) { ++ ORDER_with_src &tmp_table_group, bool save_sum_fields, bool force_disk_table) { + DBUG_TRACE; + THD_STAGE_INFO(thd, stage_creating_tmp_table); + const bool windowing = m_windows.elements > 0; +@@ -208,7 +210,11 @@ bool JOIN::create_intermediate_table( + ? m_select_limit + : HA_POS_ERROR; + +- tab->tmp_table_param = new (thd->mem_root) Temp_table_param(tmp_table_param); ++ tab->tmp_table_param = new (thd->mem_root) Temp_table_param(*tmp_table_param); ++ if (tab->tmp_table_param == nullptr) { ++ return true; ++ } ++ + tab->tmp_table_param->skip_create_table = true; + + bool distinct_arg = +@@ -222,9 +228,9 @@ bool JOIN::create_intermediate_table( + TABLE *table = + create_tmp_table(thd, tab->tmp_table_param, tmp_table_fields, + tmp_table_group.order, distinct_arg, save_sum_fields, +- query_block->active_options(), tmp_rows_limit, ""); ++ query_block->active_options(), tmp_rows_limit, "", force_disk_table); + if (!table) return true; +- tmp_table_param.using_outer_summary_function = ++ tmp_table_param->using_outer_summary_function = + tab->tmp_table_param->using_outer_summary_function; + + assert(tab->idx() > 0); +@@ -259,11 +265,15 @@ bool JOIN::create_intermediate_table( + if (!group_list.empty() && simple_group) { + DBUG_PRINT("info", ("Sorting for group")); + +- if (m_ordered_index_usage != ORDERED_INDEX_GROUP_BY && +- add_sorting_to_table(const_tables, &group_list, ++ if (m_ordered_index_usage != ORDERED_INDEX_GROUP_BY) { ++ if (add_sorting_to_table(const_tables, &group_list, + /*force_stable_sort=*/false, + /*sort_before_group=*/true)) +- goto err; ++ goto err; ++ ++ pq_last_sort_idx = const_tables; ++ pq_rebuilt_group = true; ++ } + + if (alloc_group_fields(this, group_list.order)) goto err; + if (make_sum_func_list(*fields, true)) goto err; +@@ -289,11 +299,14 @@ bool JOIN::create_intermediate_table( + simple_order && rollup_state == RollupState::NONE && !m_windows_sort) { + DBUG_PRINT("info", ("Sorting for order")); + +- if (m_ordered_index_usage != ORDERED_INDEX_ORDER_BY && +- add_sorting_to_table(const_tables, &order, ++ if (m_ordered_index_usage != ORDERED_INDEX_ORDER_BY) { ++ if (add_sorting_to_table(const_tables, &order, + /*force_stable_sort=*/false, + /*sort_before_group=*/false)) + goto err; ++ ++ pq_last_sort_idx = const_tables; ++ } + order.clean(); + } + } +@@ -641,7 +654,7 @@ QEP_TAB::enum_op_type JOIN::get_end_select_func() { + more aggregate functions). Use end_send if the query should not + be grouped. + */ +- if (streaming_aggregation && !tmp_table_param.precomputed_group_by) { ++ if (streaming_aggregation && !tmp_table_param->precomputed_group_by) { + DBUG_PRINT("info", ("Using end_send_group")); + return QEP_TAB::OT_AGGREGATE; + } +@@ -1473,12 +1486,12 @@ AccessPath *GetAccessPathForDerivedTable( + if (query_expression->is_simple()) { + subjoin = query_expression->first_query_block()->join; + select_number = query_expression->first_query_block()->select_number; +- tmp_table_param = &subjoin->tmp_table_param; ++ tmp_table_param = subjoin->tmp_table_param; + } else if (query_expression->fake_query_block != nullptr) { + // NOTE: subjoin here is never used, as ConvertItemsToCopy only uses it + // for ROLLUP, and fake_query_block can't have ROLLUP. + subjoin = query_expression->fake_query_block->join; +- tmp_table_param = &subjoin->tmp_table_param; ++ tmp_table_param = subjoin->tmp_table_param; + select_number = query_expression->fake_query_block->select_number; + } else { + tmp_table_param = new (thd->mem_root) Temp_table_param; +@@ -1528,7 +1541,7 @@ AccessPath *GetAccessPathForDerivedTable( + // also conservative; if the CTE is defined within this join and used + // only once, we could still stream without losing performance. + path = NewStreamingAccessPath(thd, query_expression->root_access_path(), +- subjoin, &subjoin->tmp_table_param, table, ++ subjoin, subjoin->tmp_table_param, table, + /*ref_slice=*/-1); + CopyCosts(*query_expression->root_access_path(), path); + } else { +@@ -2828,37 +2841,45 @@ AccessPath *JOIN::create_root_access_path_for_join() { + path->num_output_rows = query_block->row_value_list->size(); + path->cost = 0.0; + } else if (const_tables == primary_tables) { +- // Only const tables, so add a fake single row to join in all +- // the const tables (only inner-joined tables are promoted to +- // const tables in the optimizer). +- path = NewFakeSingleRowAccessPath(thd, /*count_examined_rows=*/true); +- qep_tab_map conditions_depend_on_outer_tables = 0; +- if (where_cond != nullptr) { +- path = PossiblyAttachFilter(path, vector{where_cond}, thd, +- &conditions_depend_on_outer_tables); +- } ++ if (need_tmp_pq_leader) { ++ assert(thd->parallel_exec && !thd->is_worker()); ++ QEP_TAB *tab = &qep_tab[const_tables]; ++ path = NewParallelScanAccessPath(thd, tab, tab->table(), tab->gather, ++ pq_stable_sort, ++ tab->old_table()->file->ref_length); ++ } else { ++ // Only const tables, so add a fake single row to join in all ++ // the const tables (only inner-joined tables are promoted to ++ // const tables in the optimizer). ++ path = NewFakeSingleRowAccessPath(thd, /*count_examined_rows=*/true); ++ qep_tab_map conditions_depend_on_outer_tables = 0; ++ if (where_cond != nullptr) { ++ path = PossiblyAttachFilter(path, vector{where_cond}, thd, ++ &conditions_depend_on_outer_tables); ++ } + +- // Surprisingly enough, we can specify that the const tables are +- // to be dumped immediately to a temporary table. If we don't do this, +- // we risk that there are fields that are not copied correctly +- // (tmp_table_param contains copy_funcs we'd otherwise miss). +- if (const_tables > 0) { +- QEP_TAB *qep_tab = &this->qep_tab[const_tables]; +- if (qep_tab->op_type == QEP_TAB::OT_MATERIALIZE) { +- qep_tab->table()->alias = ""; +- AccessPath *table_path = +- create_table_access_path(thd, nullptr, qep_tab, +- /*count_examined_rows=*/false); +- path = NewMaterializeAccessPath( +- thd, +- SingleMaterializeQueryBlock( +- thd, path, query_block->select_number, this, +- /*copy_fields_and_items=*/true, qep_tab->tmp_table_param), +- qep_tab->invalidators, qep_tab->table(), table_path, +- /*cte=*/nullptr, query_expression(), qep_tab->ref_item_slice, +- /*rematerialize=*/true, qep_tab->tmp_table_param->end_write_records, +- /*reject_multiple_rows=*/false); +- EstimateMaterializeCost(path); ++ // Surprisingly enough, we can specify that the const tables are ++ // to be dumped immediately to a temporary table. If we don't do this, ++ // we risk that there are fields that are not copied correctly ++ // (tmp_table_param contains copy_funcs we'd otherwise miss). ++ if (const_tables > 0) { ++ QEP_TAB *qep_tab = &this->qep_tab[const_tables]; ++ if (qep_tab->op_type == QEP_TAB::OT_MATERIALIZE) { ++ qep_tab->table()->alias = ""; ++ AccessPath *table_path = ++ create_table_access_path(thd, nullptr, qep_tab, ++ /*count_examined_rows=*/false); ++ path = NewMaterializeAccessPath( ++ thd, ++ SingleMaterializeQueryBlock( ++ thd, path, query_block->select_number, this, ++ /*copy_fields_and_items=*/true, qep_tab->tmp_table_param), ++ qep_tab->invalidators, qep_tab->table(), table_path, ++ /*cte=*/nullptr, query_expression(), qep_tab->ref_item_slice, ++ /*rematerialize=*/true, qep_tab->tmp_table_param->end_write_records, ++ /*reject_multiple_rows=*/false); ++ EstimateMaterializeCost(path); ++ } + } + } + } else { +@@ -3024,7 +3045,7 @@ AccessPath *JOIN::create_root_access_path_for_join() { + qep_tab->invalidators, qep_tab->table(), table_path, + /*cte=*/nullptr, query_expression(), + /*ref_slice=*/-1, +- /*rematerialize=*/true, tmp_table_param.end_write_records, ++ /*rematerialize=*/true, tmp_table_param->end_write_records, + /*reject_multiple_rows=*/false); + EstimateMaterializeCost(path); + } +@@ -3114,17 +3135,17 @@ AccessPath *JOIN::create_root_access_path_for_join() { + do_aggregate = (qep_tab[primary_tables + tmp_tables].op_type == + QEP_TAB::OT_AGGREGATE) || + ((grouped || group_optimized_away) && +- tmp_table_param.precomputed_group_by); ++ tmp_table_param->precomputed_group_by); + } + if (do_aggregate) { + // Aggregate as we go, with output into a special slice of the same table. +- assert(streaming_aggregation || tmp_table_param.precomputed_group_by); ++ assert(streaming_aggregation || tmp_table_param->precomputed_group_by); + #ifndef NDEBUG + for (unsigned table_idx = const_tables; table_idx < tables; ++table_idx) { + assert(qep_tab->op_type != QEP_TAB::OT_AGGREGATE_THEN_MATERIALIZE); + } + #endif +- if (!tmp_table_param.precomputed_group_by) { ++ if (!tmp_table_param->precomputed_group_by) { + path = + NewAggregateAccessPath(thd, path, rollup_state != RollupState::NONE); + } +@@ -3899,7 +3920,12 @@ bool DynamicRangeIterator::Init() { + Key_map needed_reg_dummy; + QUICK_SELECT_I *old_qck = m_qep_tab->quick(); + QUICK_SELECT_I *qck; +- DEBUG_SYNC(thd(), "quick_not_created"); ++ if (thd()->pq_leader != nullptr) { ++ DEBUG_SYNC(thd()->pq_leader, "quick_not_created"); ++ } else { ++ DEBUG_SYNC(thd(), "quick_not_created"); ++ } ++ + const int rc = test_quick_select( + thd(), m_qep_tab->keys(), + 0, // empty table map +@@ -3921,7 +3947,11 @@ bool DynamicRangeIterator::Init() { + that, we need to take mutex and change type and quick_optim. + */ + +- DEBUG_SYNC(thd(), "quick_created_before_mutex"); ++ if (thd()->pq_leader != nullptr) { ++ DEBUG_SYNC(thd()->pq_leader, "quick_created_before_mutex"); ++ } else { ++ DEBUG_SYNC(thd(), "quick_created_before_mutex"); ++ } + + thd()->lock_query_plan(); + m_qep_tab->set_type(qck ? calc_join_type(qck->get_type()) : JT_ALL); +@@ -3929,7 +3959,11 @@ bool DynamicRangeIterator::Init() { + thd()->unlock_query_plan(); + + delete old_qck; +- DEBUG_SYNC(thd(), "quick_droped_after_mutex"); ++ if (thd()->pq_leader != nullptr) { ++ DEBUG_SYNC(thd()->pq_leader, "quick_droped_after_mutex"); ++ } else { ++ DEBUG_SYNC(thd(), "quick_droped_after_mutex"); ++ } + + // Clear out and destroy any old iterators before we start constructing + // new ones, since they may share the same memory in the union. +@@ -4150,14 +4184,16 @@ bool AlternativeIterator::Init() { + AccessPath *QEP_TAB::access_path() { + assert(table()); + // Only some access methods support reversed access: +- assert(!m_reversed_access || type() == JT_REF || type() == JT_INDEX_SCAN); ++ assert(current_thd->parallel_exec || !m_reversed_access || type() == JT_REF || type() == JT_INDEX_SCAN); + TABLE_REF *used_ref = nullptr; + AccessPath *path = nullptr; + + const TABLE *pushed_root = table()->file->member_of_pushed_join(); + const bool is_pushed_child = (pushed_root && pushed_root != table()); + // A 'pushed_child' has to be a REF type +- assert(!is_pushed_child || type() == JT_REF || type() == JT_EQ_REF); ++ assert(current_thd->parallel_exec || !is_pushed_child || type() == JT_REF || type() == JT_EQ_REF); ++ ++ bool pq_replace_accesspath = false; + + switch (type()) { + case JT_REF: +@@ -4170,6 +4206,8 @@ AccessPath *QEP_TAB::access_path() { + path = NewRefAccessPath(join()->thd, table(), &ref(), use_order(), + m_reversed_access, + /*count_examined_rows=*/true); ++ ++ pq_replace_accesspath = true; + } + used_ref = &ref(); + break; +@@ -4208,6 +4246,8 @@ AccessPath *QEP_TAB::access_path() { + path = NewIndexScanAccessPath(join()->thd, table(), index(), use_order(), + m_reversed_access, + /*count_examined_rows=*/true); ++ ++ pq_replace_accesspath = true; + break; + case JT_ALL: + case JT_RANGE: +@@ -4217,7 +4257,7 @@ AccessPath *QEP_TAB::access_path() { + /*count_examined_rows=*/true); + } else { + path = create_table_access_path(join()->thd, nullptr, this, +- /*count_examined_rows=*/true); ++ /*count_examined_rows=*/true, &pq_replace_accesspath); + } + break; + default: +@@ -4225,6 +4265,12 @@ AccessPath *QEP_TAB::access_path() { + break; + } + ++ /** note that: for gather operator, we have no need to generate iterator */ ++ if (current_thd->is_worker() && pq_replace_accesspath && do_parallel_scan) { ++ path = NewPQBlockScanAccessPath(current_thd, table(), gather, ++ join()->pq_stable_sort); ++ } ++ + /* + If we have an item like IN ( SELECT f2 FROM t2 ), and we were not + able to rewrite it into a semijoin, the optimizer may rewrite it into +@@ -6270,13 +6316,23 @@ bool change_to_use_tmp_fields(mem_root_deque *fields, THD *thd, + ifield->table_name = iref->table_name; + ifield->set_orig_db_name(iref->orig_db_name()); + ifield->db_name = iref->db_name; ++ if (thd->parallel_exec) { ++ ifield->ref = true; ++ Send_field tmp_field; ++ item->make_field(&tmp_field); ++ ifield->field->flags = tmp_field.flags; ++ ifield->ref_col_name = iref->item_name.ptr(); ++ ifield->set_orig_table_name(iref->orig_table_name()); ++ } + } + if (orig_field != nullptr && item != new_item) { + down_cast(new_item)->set_orig_table_name( + orig_field->orig_table_name()); + } + #ifndef NDEBUG +- if (!new_item->item_name.is_set()) { ++ /* Do not set the item_name here when parallel query to keep the MTR ++ execution results of the release and debug versions same. */ ++ if (!new_item->item_name.is_set() && thd->m_suite_for_pq == PqConditionStatus::NOT_SUPPORTED) { + char buff[256]; + String str(buff, sizeof(buff), &my_charset_bin); + str.length(0); +@@ -6289,6 +6345,10 @@ bool change_to_use_tmp_fields(mem_root_deque *fields, THD *thd, + replace_embedded_rollup_references_with_tmp_fields(thd, item, fields); + } + ++ if (item->type() == Item::SUM_FUNC_ITEM && item->const_item() && thd->parallel_exec) { ++ new_item = item; ++ } ++ + new_item->hidden = item->hidden; + res_fields->push_back(new_item); + const size_t idx = +@@ -6615,7 +6675,7 @@ int UnqualifiedCountIterator::Read() { + // If we are outputting to a temporary table, we need to copy the results + // into it here. It is also used for nonaggregated items, even when there are + // no temporary tables involved. +- if (copy_fields_and_funcs(&m_join->tmp_table_param, m_join->thd)) { ++ if (copy_fields_and_funcs(m_join->tmp_table_param, m_join->thd)) { + return 1; + } + +diff --git a/sql/sql_executor.h b/sql/sql_executor.h +index 465e2388..fa8dc51b 100644 +--- a/sql/sql_executor.h ++++ b/sql/sql_executor.h +@@ -2,6 +2,7 @@ + #define SQL_EXECUTOR_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -44,6 +45,7 @@ + #include "sql/sql_opt_exec_shared.h" // QEP_shared_owner + #include "sql/table.h" + #include "sql/temp_table_param.h" // Temp_table_param ++#include "sql/query_result.h" + + class CacheInvalidatorIterator; + class Cached_item; +@@ -256,11 +258,16 @@ bool make_group_fields(JOIN *main_join, JOIN *curr_join); + bool check_unique_constraint(TABLE *table); + ulonglong unique_hash(const Field *field, ulonglong *hash); + ++class Gather_operator; + class QEP_TAB : public QEP_shared_owner { + public: + QEP_TAB() + : QEP_shared_owner(), ++ gather(nullptr), ++ do_parallel_scan(false), + table_ref(nullptr), ++ pos(0), ++ pq_cond(nullptr), + flush_weedout_table(nullptr), + check_weed_out_table(nullptr), + firstmatch_return(NO_PLAN_IDX), +@@ -275,6 +282,7 @@ class QEP_TAB : public QEP_shared_owner { + ref_item_slice(REF_SLICE_SAVED_BASE), + m_condition_optim(nullptr), + m_quick_optim(nullptr), ++ m_old_quick_optim(nullptr), + m_keyread_optim(false), + m_reversed_access(false), + lateral_derived_tables_depend_on_me(0) {} +@@ -282,13 +290,15 @@ class QEP_TAB : public QEP_shared_owner { + /// Initializes the object from a JOIN_TAB + void init(JOIN_TAB *jt); + // Cleans up. +- void cleanup(); ++ void cleanup(bool is_free = true); + + // Getters and setters + + Item *condition_optim() const { return m_condition_optim; } + QUICK_SELECT_I *quick_optim() const { return m_quick_optim; } + void set_quick_optim() { m_quick_optim = quick(); } ++ QUICK_SELECT_I *old_quick_optim() const { return m_old_quick_optim; } ++ void set_old_quick_optim() { m_old_quick_optim = quick(); } + void set_condition_optim() { m_condition_optim = condition(); } + bool keyread_optim() const { return m_keyread_optim; } + void set_keyread_optim() { +@@ -302,6 +312,10 @@ class QEP_TAB : public QEP_shared_owner { + if (t) t->reginfo.qep_tab = this; + } + ++ void set_old_table(TABLE *t) { ++ m_qs->set_old_table(t); ++ } ++ + /// @returns semijoin strategy for this table. + uint get_sj_strategy() const; + +@@ -323,6 +337,8 @@ class QEP_TAB : public QEP_shared_owner { + */ + void init_join_cache(JOIN_TAB *join_tab); + ++ bool pq_copy(THD *thd, QEP_TAB *qep_tab); ++ + /** + @returns query block id for an inner table of materialized semi-join, and + 0 for all other tables. +@@ -363,9 +379,20 @@ class QEP_TAB : public QEP_shared_owner { + bool pfs_batch_update(const JOIN *join) const; + + public: ++ Gather_operator *gather; ++ bool do_parallel_scan; ++ + /// Pointer to table reference + TABLE_LIST *table_ref; + ++ uint pos; // position in qep_tab array ++ ++ bool has_pq_cond{false}; ++ Item *pq_cond; ++ ++ LEX_CSTRING *table_name{nullptr}; ++ LEX_CSTRING *db{nullptr}; ++ + /* Variables for semi-join duplicate elimination */ + SJ_TMP_TABLE *flush_weedout_table; + SJ_TMP_TABLE *check_weed_out_table; +@@ -478,6 +505,7 @@ class QEP_TAB : public QEP_shared_owner { + LOCK_query_plan mutex. + */ + QUICK_SELECT_I *m_quick_optim; ++ QUICK_SELECT_I *m_old_quick_optim; + + /** + True if only index is going to be read for this table. This is the +@@ -548,7 +576,8 @@ struct PendingCondition { + + unique_ptr_destroy_only PossiblyAttachFilterIterator( + unique_ptr_destroy_only iterator, +- const std::vector &conditions, THD *thd); ++ const std::vector &conditions, THD *thd, ++ table_map *conditions_depend_on_outer_tables); + + void SplitConditions(Item *condition, QEP_TAB *current_table, + std::vector *predicates_below_join, +diff --git a/sql/sql_hints.yy b/sql/sql_hints.yy +index d3780b6a..d4f6e977 100644 +--- a/sql/sql_hints.yy ++++ b/sql/sql_hints.yy +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -130,6 +131,8 @@ static bool parse_int(longlong *to, const char *from, size_t from_length) + %token DERIVED_CONDITION_PUSHDOWN_HINT 1047 + %token NO_DERIVED_CONDITION_PUSHDOWN_HINT 1048 + %token HINT_ARG_FLOATING_POINT_NUMBER 1049 ++%token PQ_HINT 1050 ++%token NO_PQ_HINT 1051 + + /* + YYUNDEF in internal to Bison. Please don't change its number, or change +@@ -419,6 +422,34 @@ qb_level_hint: + if ($$ == NULL) + YYABORT; // OOM + } ++ | ++ PQ_HINT ++ { ++ $$= NEW_PTN PT_qb_level_hint(NULL_CSTR, true, PQ_HINT_ENUM, 0); ++ if ($$ == NULL) ++ YYABORT; // OOM ++ } ++ | ++ PQ_HINT '(' HINT_ARG_NUMBER ')' ++ { ++ longlong n; ++ if (parse_int(&n, $3.str, $3.length) || n > UINT_MAX32 || n <= 0) ++ { ++ scanner->syntax_warning(ER_THD(thd, ++ ER_WARN_BAD_PARALLEL_NUM)); ++ $$= NULL; ++ } else { ++ $$= NEW_PTN PT_qb_level_hint(NULL_CSTR, true, PQ_HINT_ENUM, n); ++ if ($$ == NULL) ++ YYABORT; // OOM ++ } ++ } ++ | ++ NO_PQ_HINT { ++ $$= NEW_PTN PT_qb_level_hint(NULL_CSTR, true, NO_PQ_HINT_ENUM, 0); ++ if ($$ == NULL) ++ YYABORT; // OOM ++ } + ; + + semijoin_strategies: +diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc +index 146a4604..c279da3c 100644 +--- a/sql/sql_lex.cc ++++ b/sql/sql_lex.cc +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -4216,6 +4217,18 @@ bool LEX::locate_var_assignment(const Name_string &name) { + return false; + } + ++void Query_block::fix_prepare_information_for_order( ++ THD *thd, SQL_I_List *list, Group_list_ptrs **list_ptrs) { ++ Group_list_ptrs *p = *list_ptrs; ++ if (p == nullptr) { ++ void *mem = thd->stmt_arena->alloc(sizeof(Group_list_ptrs)); ++ *list_ptrs = p = new (mem) Group_list_ptrs(thd->stmt_arena->mem_root); ++ } ++ p->reserve(list->elements); ++ for (ORDER *order = list->first; order; order = order->next) ++ p->push_back(order); ++} ++ + /** + Save properties for ORDER clauses so that they can be reconstructed + for a new optimization of the query block. +diff --git a/sql/sql_lex.h b/sql/sql_lex.h +index ce5ecd54..f8cd5f3f 100644 +--- a/sql/sql_lex.h ++++ b/sql/sql_lex.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -337,6 +338,7 @@ class Table_ident { + + using List_item = mem_root_deque; + using Group_list_ptrs = Mem_root_array; ++using PQ_Group_list_ptrs = Mem_root_array; + + /** + Structure to hold parameters for CHANGE MASTER, START SLAVE, and STOP SLAVE. +@@ -674,7 +676,9 @@ class Query_expression { + unfinished materialization (see optimize()). + */ + unique_ptr_destroy_only m_root_iterator; ++ public: + AccessPath *m_root_access_path = nullptr; ++ private: + + /** + If there is an unfinished materialization (see optimize()), +@@ -695,13 +699,13 @@ class Query_expression { + Mem_root_array setup_materialization( + THD *thd, TABLE *dst_table, bool union_distinct_only); + ++ public: + /** + Convert the executor structures to a set of access paths, storing the result + in m_root_access_path. + */ + void create_access_paths(THD *thd); + +- public: + /** + result of this query can't be cached, bit field, can be : + UNCACHEABLE_DEPENDENT +@@ -880,6 +884,7 @@ class Query_expression { + + /// Set new query result object for this query expression + void set_query_result(Query_result *res) { m_query_result = res; } ++ void set_slave(Query_block *select_lex) { slave = select_lex; } + + /** + Whether there is a chance that optimize() is capable of materializing +@@ -1087,6 +1092,7 @@ enum class enum_explain_type { + EXPLAIN_UNION, + EXPLAIN_UNION_RESULT, + EXPLAIN_MATERIALIZED, ++ EXPLAIN_GATHER, + // Total: + EXPLAIN_total ///< fake type, total number of all valid types + +@@ -1100,6 +1106,7 @@ enum class enum_explain_type { + */ + class Query_block { + public: ++ Query_block *orig; + /** + @note the group_by and order_by lists below will probably be added to the + constructor when the parser is converted into a true bottom-up design. +@@ -1181,6 +1188,8 @@ class Query_block { + + TABLE_LIST *find_table_by_name(const Table_ident *ident); + ++ void set_master_unit(Query_expression *unit) { master = unit; } ++ + /** + @return true If STRAIGHT_JOIN applies to all tables. + @return false Else. +@@ -1876,6 +1885,13 @@ class Query_block { + SQL_I_List group_list{}; + Group_list_ptrs *group_list_ptrs{nullptr}; + ++ /* ++ * the backup of group_list/order_list before optimization, which is used ++ * to generate worker's group_list/order_list. ++ */ ++ PQ_Group_list_ptrs *saved_group_list_ptrs{nullptr}; ++ PQ_Group_list_ptrs *saved_order_list_ptrs{nullptr}; ++ + // Used so that AggregateIterator knows which items to signal when the rollup + // level changes. Obviously only used in the presence of rollup. + Prealloced_array rollup_group_items{ +@@ -2178,8 +2194,10 @@ class Query_block { + bool resolve_rollup(THD *thd); + + bool setup_wild(THD *thd); ++ public: + bool setup_order_final(THD *thd); + bool setup_group(THD *thd); ++ private: + void fix_after_pullout(Query_block *parent_query_block, + Query_block *removed_query_block); + void remove_redundant_subquery_clauses(THD *thd, +@@ -2233,10 +2251,12 @@ class Query_block { + // Delete unused columns from merged derived tables + void delete_unused_merged_columns(mem_root_deque *tables); + ++ public: + /// Helper for fix_prepare_information() + void fix_prepare_information_for_order(THD *thd, SQL_I_List *list, + Group_list_ptrs **list_ptrs); + ++ private: + bool prepare_values(THD *thd); + bool check_only_full_group_by(THD *thd); + bool is_row_count_valid_for_semi_join(); +@@ -2251,10 +2271,16 @@ class Query_block { + Template parameter is "true": no need to run DTORs on pointers. + */ + Mem_root_array *sj_candidates{nullptr}; +- ++ public: + /// How many expressions are part of the order by but not select list. + int hidden_order_field_count{0}; + ++ /** ++ Windows function maybe be optimized, so we save this value to determine ++ whether support parallel query. ++ */ ++ uint saved_windows_elements{0}; ++ private: + /** + Intrusive double-linked list of all query blocks within the same + query expression. +@@ -2279,6 +2305,8 @@ class Query_block { + should not be modified after resolving is done. + */ + ulonglong m_base_options{0}; ++ ++ public: + /** + Active options. Derived from base options, modifiers added during + resolving and values from session variable option_bits. Since the latter +@@ -2286,6 +2314,7 @@ class Query_block { + */ + ulonglong m_active_options{0}; + ++ private: + TABLE_LIST *resolve_nest{ + nullptr}; ///< Used when resolving outer join condition + +@@ -2301,10 +2330,10 @@ class Query_block { + + /// Condition to be evaluated on grouped rows after grouping. + Item *m_having_cond; +- ++ public: + /// Number of GROUP BY expressions added to all_fields + int hidden_group_field_count; +- ++ private: + /** + True if query block has semi-join nests merged into it. Notice that this + is updated earlier than sj_nests, so check this if info is needed +@@ -3900,6 +3929,7 @@ struct LEX : public Query_tables_list { + bool has_udf() const { return m_has_udf; } + st_parsing_options parsing_options; + Alter_info *alter_info; ++ bool in_execute_ps{false}; + /* Prepared statements SQL syntax:*/ + LEX_CSTRING prepared_stmt_name; /* Statement name (in all queries) */ + /* +diff --git a/sql/sql_lex_hints.cc b/sql/sql_lex_hints.cc +index a8d5caed..b5609a57 100644 +--- a/sql/sql_lex_hints.cc ++++ b/sql/sql_lex_hints.cc +@@ -1,5 +1,6 @@ + /* + Copyright (c) 2014, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -226,6 +227,8 @@ void Hint_scanner::add_hint_token_digest() { + case NO_ORDER_INDEX_HINT: + case DERIVED_CONDITION_PUSHDOWN_HINT: + case NO_DERIVED_CONDITION_PUSHDOWN_HINT: ++ case PQ_HINT: ++ case NO_PQ_HINT: + break; + default: + assert(false); +diff --git a/sql/sql_opt_exec_shared.h b/sql/sql_opt_exec_shared.h +index 2018c7c7..59e189f3 100644 +--- a/sql/sql_opt_exec_shared.h ++++ b/sql/sql_opt_exec_shared.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2014, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -150,6 +151,8 @@ struct TABLE_REF { + return false; + } + ++ bool pq_copy(JOIN *join, TABLE_REF *ref, QEP_TAB *qep_tab); ++ + /** + Check if there are triggered/guarded conditions that might be + 'switched off' by the subquery code when executing 'Full scan on +@@ -229,9 +232,11 @@ enum join_type { + class QEP_shared { + public: + QEP_shared() +- : m_join(nullptr), ++ : m_old_ref(nullptr), ++ m_join(nullptr), + m_idx(NO_PLAN_IDX), + m_table(nullptr), ++ m_old_table(nullptr), + m_position(nullptr), + m_sj_mat_exec(nullptr), + m_first_sj_inner(NO_PLAN_IDX), +@@ -239,9 +244,11 @@ class QEP_shared { + m_first_inner(NO_PLAN_IDX), + m_last_inner(NO_PLAN_IDX), + m_first_upper(NO_PLAN_IDX), +- m_ref(), ++ m_orign_ref(), ++ m_ref(&m_orign_ref), + m_index(0), + m_type(JT_UNKNOWN), ++ m_old_type(JT_UNKNOWN), + m_condition(nullptr), + m_keys(), + m_records(0), +@@ -268,7 +275,9 @@ class QEP_shared { + m_idx = i; + } + TABLE *table() const { return m_table; } ++ TABLE *old_table() const { return m_old_table; } + void set_table(TABLE *t) { m_table = t; } ++ void set_old_table(TABLE *t) { m_old_table = t; } + POSITION *position() const { return m_position; } + void set_position(POSITION *p) { m_position = p; } + Semijoin_mat_exec *sj_mat_exec() const { return m_sj_mat_exec; } +@@ -283,11 +292,17 @@ class QEP_shared { + void set_first_upper(plan_idx i) { m_first_upper = i; } + plan_idx last_inner() { return m_last_inner; } + plan_idx first_upper() { return m_first_upper; } +- TABLE_REF &ref() { return m_ref; } ++ TABLE_REF &ref() { return *m_ref; } ++ void set_ref(TABLE_REF *ref) { m_ref = ref; } ++ TABLE_REF &old_ref() { return *m_old_ref; } ++ void set_old_ref(TABLE_REF *ref) { m_old_ref = ref; } + uint index() const { return m_index; } + void set_index(uint i) { m_index = i; } + enum join_type type() const { return m_type; } + void set_type(enum join_type t) { m_type = t; } ++ //for dealing with first rewritten tab ++ enum join_type old_type() const { return m_old_type; } ++ void set_old_type(enum join_type t) { m_old_type = t;} + Item *condition() const { return m_condition; } + void set_condition(Item *c) { m_condition = c; } + bool condition_is_pushed_to_sort() const { +@@ -349,6 +364,10 @@ class QEP_shared { + + bool skip_records_in_range() const { return m_skip_records_in_range; } + ++ public: ++ // the origin index type of the first rewritten qep_tab in leader ++ TABLE_REF *m_old_ref; ++ + private: + JOIN *m_join; + +@@ -362,6 +381,8 @@ class QEP_shared { + + /// Corresponding table. Might be an internal temporary one. + TABLE *m_table; ++ // parallel query old_table ++ TABLE *m_old_table; + + /// Points into best_positions array. Includes cost info. + POSITION *m_position; +@@ -397,7 +418,8 @@ class QEP_shared { + Used when we read constant tables, in misc optimization (like + remove_const()), and in execution. + */ +- TABLE_REF m_ref; ++ TABLE_REF m_orign_ref; ++ TABLE_REF *m_ref; + + /// ID of index used for index scan or semijoin LooseScan + uint m_index; +@@ -405,6 +427,8 @@ class QEP_shared { + /// Type of chosen access method (scan, etc). + enum join_type m_type; + ++ enum join_type m_old_type; ++ + /** + Table condition, ie condition to be evaluated for a row from this table. + Notice that the condition may refer to rows from previous tables in the +@@ -479,6 +503,8 @@ class QEP_shared_owner { + m_qs = q; + } + ++ QEP_shared *get_qs() { return m_qs; } ++ + // Getters/setters forwarding to QEP_shared: + + JOIN *join() const { return m_qs ? m_qs->join() : nullptr; } +@@ -492,6 +518,7 @@ class QEP_shared_owner { + qep_tab_map idx_map() const { return qep_tab_map{1} << m_qs->idx(); } + + TABLE *table() const { return m_qs->table(); } ++ TABLE *old_table() const { return m_qs->old_table(); } + POSITION *position() const { return m_qs->position(); } + void set_position(POSITION *p) { return m_qs->set_position(p); } + Semijoin_mat_exec *sj_mat_exec() const { return m_qs->sj_mat_exec(); } +@@ -509,10 +536,16 @@ class QEP_shared_owner { + void set_last_sj_inner(plan_idx i) { return m_qs->set_last_sj_inner(i); } + void set_first_upper(plan_idx i) { return m_qs->set_first_upper(i); } + TABLE_REF &ref() const { return m_qs->ref(); } ++ void set_ref(TABLE_REF *ref) { m_qs->set_ref(ref); } ++ TABLE_REF &old_ref() const { return m_qs->old_ref(); } ++ void set_old_ref(TABLE_REF *ref) { m_qs->set_old_ref(ref);} + uint index() const { return m_qs->index(); } + void set_index(uint i) { return m_qs->set_index(i); } + enum join_type type() const { return m_qs->type(); } + void set_type(enum join_type t) { return m_qs->set_type(t); } ++ //for dealing with first rewritten tab ++ enum join_type old_type() const { return m_qs->old_type(); } ++ void set_old_type(enum join_type t) { m_qs->set_old_type(t); } + Item *condition() const { return m_qs->condition(); } + void set_condition(Item *to) { return m_qs->set_condition(to); } + bool condition_is_pushed_to_sort() const { +@@ -560,7 +593,7 @@ class QEP_shared_owner { + + void qs_cleanup(); + +- protected: ++ public: + QEP_shared *m_qs; // qs stands for Qep_Shared + }; + +@@ -619,6 +652,10 @@ enum { + table + */ + REF_SLICE_TMP2, ++ /* ++ * Use to store PQ worker query result ++ */ ++ REF_SLICE_PQ_TMP, + /** + The slice with pointers to columns of table(s), ie., the actual Items. + Only used for queries involving temporary tables or the likes; for simple +diff --git a/sql/sql_optimizer.cc b/sql/sql_optimizer.cc +index 29a3049e..cb0782a0 100644 +--- a/sql/sql_optimizer.cc ++++ b/sql/sql_optimizer.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -107,6 +108,8 @@ + #include "sql/window.h" + #include "sql_string.h" + #include "template_utils.h" ++#include "sql/pq_clone.h" ++#include "sql/item_strfunc.h" + + using std::max; + using std::min; +@@ -164,15 +167,22 @@ JOIN::JOIN(THD *thd_arg, Query_block *select) + // Needed in case optimizer short-cuts, set properly in + // make_tmp_tables_info() + fields(&select->fields), +- tmp_table_param(thd_arg->mem_root), ++ origin_tmp_table_param(thd_arg->mem_root), ++ tmp_table_param(&origin_tmp_table_param), ++ saved_tmp_table_param(nullptr), + lock(thd->lock), + // @todo Can this be substituted with select->is_implicitly_grouped()? + implicit_grouping(select->is_implicitly_grouped()), + select_distinct(select->is_distinct()), +- keyuse_array(thd->mem_root), ++ origin_keyuse_array(thd_arg->mem_root), ++ keyuse_array(&origin_keyuse_array), + query_block_fields(&select->fields), + order(select->order_list.first, ESC_ORDER_BY), + group_list(select->group_list.first, ESC_GROUP_BY), ++ pq_tab_idx(-1), ++ pq_rebuilt_group(false), ++ pq_stable_sort(false), ++ pq_last_sort_idx(-1), + m_windows(select->m_windows), + /* + Those four members are meaningless before JOIN::optimize(), so force a +@@ -183,7 +193,9 @@ JOIN::JOIN(THD *thd_arg, Query_block *select) + having_for_explain(reinterpret_cast(1)), + tables_list(reinterpret_cast(1)), + current_ref_item_slice(REF_SLICE_SAVED_BASE), +- with_json_agg(select->json_agg_func_used()) { ++ last_slice_before_pq(REF_SLICE_SAVED_BASE), ++ with_json_agg(select->json_agg_func_used()), ++ select_count(false) { + rollup_state = RollupState::NONE; + if (select->order_list.first) explain_flags.set(ESC_ORDER_BY, ESP_EXISTS); + if (select->group_list.first) explain_flags.set(ESC_GROUP_BY, ESP_EXISTS); +@@ -192,6 +204,8 @@ JOIN::JOIN(THD *thd_arg, Query_block *select) + // Calculate the number of groups + for (ORDER *group = group_list.order; group; group = group->next) + send_group_parts++; ++ ++ tmp_table_param->end_write_records = HA_POS_ERROR; + } + + bool JOIN::alloc_ref_item_slice(THD *thd_arg, int sliceno) { +@@ -204,16 +218,148 @@ bool JOIN::alloc_ref_item_slice(THD *thd_arg, int sliceno) { + return false; + } + ++bool JOIN::alloc_indirection_slices1() { ++ const uint card = REF_SLICE_WIN_1 + m_windows.elements * 2; ++ ++ assert(ref_items1 == nullptr); ++ ++ ref_items1 = ++ (Ref_item_array *)(*THR_MALLOC)->ArrayAlloc(card); ++ if (ref_items1 == nullptr) return true; ++ ++ tmp_fields1 = ++ (*THR_MALLOC)->ArrayAlloc>(card, *THR_MALLOC); ++ if (tmp_fields1 == nullptr) return true; ++ ++ for (uint i = 0; i < card; i++) { ++ ref_items1[i].reset(); ++ } ++ ++ ref_items = ref_items1; ++ tmp_fields = tmp_fields1; ++ ++ return false; ++} ++ + bool JOIN::alloc_indirection_slices() { + const uint card = REF_SLICE_WIN_1 + m_windows.elements * 2; + +- assert(ref_items == nullptr); +- ref_items = (*THR_MALLOC)->ArrayAlloc(card); +- if (ref_items == nullptr) return true; ++ assert(ref_items0 == nullptr); ++ ref_items0 = (*THR_MALLOC)->ArrayAlloc(card); ++ if (ref_items0 == nullptr) return true; + +- tmp_fields = ++ tmp_fields0 = + (*THR_MALLOC)->ArrayAlloc>(card, *THR_MALLOC); +- if (tmp_fields == nullptr) return true; ++ if (tmp_fields0 == nullptr) return true; ++ ++ for (uint i = 0; i < card; i++) { ++ ref_items0[i].reset(); ++ } ++ ++ ref_items = ref_items0; ++ tmp_fields = tmp_fields0; ++ ++ return false; ++} ++ ++void count_field_types(Query_block *select_lex, Temp_table_param *param, ++ List &fields, bool reset_with_sum_func, ++ bool save_sum_fields); ++ ++bool JOIN::restore_optimized_vars() { ++ ++ //restore the make_tmp_tables_info's parameter through saved_optimized_variables ++ grouped = saved_optimized_vars.pq_grouped; ++ group_optimized_away = saved_optimized_vars.pq_group_optimized_away; ++ implicit_grouping = saved_optimized_vars.pq_implicit_grouping; ++ need_tmp_before_win = saved_optimized_vars.pq_need_tmp_before_win; ++ simple_group = saved_optimized_vars.pq_simple_group; ++ simple_order = saved_optimized_vars.pq_simple_order; ++ streaming_aggregation = saved_optimized_vars.pq_streaming_aggregation; ++ m_ordered_index_usage = static_cast(saved_optimized_vars.pq_m_ordered_index_usage); ++ skip_sort_order = saved_optimized_vars.pq_skip_sort_order; ++ ++ // no need for template_join ++ if (need_tmp_pq || need_tmp_pq_leader) { ++ ORDER *optimized_order = NULL; ++ group_list.clean(); ++ ++ optimized_order = restore_optimized_group_order(query_block->group_list, ++ saved_optimized_vars.optimized_group_flags); ++ if (optimized_order) { ++ group_list = ORDER_with_src(optimized_order, ESC_GROUP_BY); ++ } ++ ++ order.clean(); ++ optimized_order = restore_optimized_group_order(query_block->order_list, ++ saved_optimized_vars.optimized_order_flags); ++ ++ if (optimized_order) { ++ order = ORDER_with_src(optimized_order, ESC_ORDER_BY); ++ } ++ ++ if (!group_list.empty()) { ++ uint old_group_parts = tmp_table_param->group_parts; ++ calc_group_buffer(this, group_list.order); ++ send_group_parts = tmp_table_param->group_parts; /* Save org parts */ ++ if (send_group_parts != old_group_parts) //error: leader and worker have different group fields ++ return true; ++ } ++ ++ /** Traverse expressions and inject cast nodes to compatible data types (in ++ * general for time related item), if needed */ ++ { ++ for (Item *item : *query_block_fields) { ++ item->walk(&Item::cast_incompatible_args, enum_walk::POSTFIX, nullptr); ++ } ++ } ++ } ++ return false; ++} ++ ++void JOIN::save_optimized_vars() { ++ // saved optimized variables ++ saved_optimized_vars.pq_grouped= grouped; ++ saved_optimized_vars.pq_group_optimized_away= group_optimized_away; ++ saved_optimized_vars.pq_implicit_grouping= implicit_grouping; ++ saved_optimized_vars.pq_need_tmp_before_win= need_tmp_before_win; ++ saved_optimized_vars.pq_simple_group= simple_group; ++ saved_optimized_vars.pq_simple_order= simple_order; ++ saved_optimized_vars.pq_streaming_aggregation= streaming_aggregation; ++ saved_optimized_vars.pq_skip_sort_order= skip_sort_order; ++ saved_optimized_vars.pq_m_ordered_index_usage = m_ordered_index_usage; ++ ++ //record the mapping: JOIN::group_list -> query_block->group_list ++ record_optimized_group_order(query_block->saved_group_list_ptrs, group_list, ++ saved_optimized_vars.optimized_group_flags); ++ record_optimized_group_order(query_block->saved_order_list_ptrs, order, ++ saved_optimized_vars.optimized_order_flags); ++} ++ ++bool JOIN::setup_tmp_table_info(JOIN* orig) ++{ ++ if (alloc_indirection_slices()) return true; ++ ++ // The base ref items from query block are assigned as JOIN's ref items ++ ref_items[REF_SLICE_ACTIVE] = query_block->base_ref_items; ++ ++ // make aggregation temp table info and create temp table for group by/order by/sort ++ tmp_table_param->pq_copy(orig->saved_tmp_table_param); ++ saved_tmp_table_param = new (thd->mem_root) Temp_table_param(); ++ if(!saved_tmp_table_param) { ++ return true; ++ } ++ saved_tmp_table_param->pq_copy(orig->saved_tmp_table_param); ++ ++ // aggregation ++ if(restore_optimized_vars()) ++ return true; ++ ++ select_distinct = orig->select_distinct; ++ ++ if(alloc_func_list()) { ++ return true; ++ } + + return false; + } +@@ -275,9 +421,9 @@ bool JOIN::optimize() { + trace_optimize.add_select_number(query_block->select_number); + Opt_trace_array trace_steps(trace, "steps"); + +- count_field_types(query_block, &tmp_table_param, *fields, false, false); ++ count_field_types(query_block, tmp_table_param, *fields, false, false); + +- assert(tmp_table_param.sum_func_count == 0 || !group_list.empty() || ++ assert(tmp_table_param->sum_func_count == 0 || !group_list.empty() || + implicit_grouping); + + const bool has_windows = m_windows.elements != 0; +@@ -468,7 +614,7 @@ bool JOIN::optimize() { + best_rowcount = 1; + error = 0; + if (make_tmp_tables_info()) return true; +- count_field_types(query_block, &tmp_table_param, *fields, false, false); ++ count_field_types(query_block, tmp_table_param, *fields, false, false); + // Make plan visible for EXPLAIN + set_plan_state(NO_TABLES); + create_access_paths(); +@@ -495,7 +641,7 @@ bool JOIN::optimize() { + substitute_gc(thd, query_block, where_cond, group_list.order, + order.order)) { + // We added hidden fields to the all_fields list, count them. +- count_field_types(query_block, &tmp_table_param, query_block->fields, false, ++ count_field_types(query_block, tmp_table_param, query_block->fields, false, + false); + } + // Ensure there are no errors prior making query plan +@@ -698,10 +844,10 @@ bool JOIN::optimize() { + // JOIN::optimize_rollup() may set allow_group_via_temp_table = false, + // and we must not undo that. + const bool save_allow_group_via_temp_table = +- tmp_table_param.allow_group_via_temp_table; ++ tmp_table_param->allow_group_via_temp_table; + +- count_field_types(query_block, &tmp_table_param, *fields, false, false); +- tmp_table_param.allow_group_via_temp_table = ++ count_field_types(query_block, tmp_table_param, *fields, false, false); ++ tmp_table_param->allow_group_via_temp_table = + save_allow_group_via_temp_table; + } + +@@ -852,6 +998,18 @@ bool JOIN::optimize() { + return true; // error == -1 + } + ++ if (thd->m_suite_for_pq == PqConditionStatus::ENABLED) { ++ // save temp table param for later PQ scan ++ saved_tmp_table_param = new (thd->mem_root) Temp_table_param(); ++ if (!saved_tmp_table_param) return true; ++ ++ saved_tmp_table_param->pq_copy(tmp_table_param); ++ ++ // saved optimized variables to saved_optimized_vars. ++ save_optimized_vars(); ++ saved_optimized_vars.pq_no_jbuf_after = no_jbuf_after; ++ } ++ + if (make_join_readinfo(this, no_jbuf_after)) + return true; /* purecov: inspected */ + +@@ -875,7 +1033,7 @@ bool JOIN::optimize() { + sort_cost = 0.0; + } + +- count_field_types(query_block, &tmp_table_param, *fields, false, false); ++ count_field_types(query_block, tmp_table_param, *fields, false, false); + + create_access_paths(); + +@@ -975,6 +1133,26 @@ bool JOIN::push_to_engines() { + return false; + } + ++bool JOIN::pq_copy_from(JOIN *orig) { ++ query_block->join = this; ++ where_cond = query_block->where_cond(); ++ tables_list = query_block->leaf_tables; ++ having_for_explain = orig->having_for_explain; ++ tables = orig->tables; ++ explain_flags = orig->explain_flags; ++ set_plan_state(JOIN::PLAN_READY); ++ pq_tab_idx = orig->pq_tab_idx; ++ calc_found_rows = orig->calc_found_rows; ++ m_select_limit = orig->m_select_limit; ++ query_expression()->select_limit_cnt = ++ orig->query_expression()->select_limit_cnt; ++ query_expression()->offset_limit_cnt = 0; ++ pq_stable_sort = orig->pq_stable_sort; ++ saved_optimized_vars = orig->saved_optimized_vars; ++ ++ return false; ++} ++ + /** + Substitute all expressions in the WHERE condition and ORDER/GROUP lists + that match generated columns (GC) expressions with GC fields, if any. +@@ -1128,10 +1306,53 @@ bool JOIN::alloc_qep(uint n) { + + ASSERT_BEST_REF_IN_JOIN_ORDER(this); + +- qep_tab = new (thd->mem_root) ++ qep_tab0 = new (thd->mem_root) + QEP_TAB[n + 1]; // The last one holds only the final op_type. +- if (!qep_tab) return true; /* purecov: inspected */ +- for (uint i = 0; i < n; ++i) qep_tab[i].init(best_ref[i]); ++ if (!qep_tab0) return true; /* purecov: inspected */ ++ for (uint i = 0; i < n; ++i) ++ { ++ qep_tab0[i].init(best_ref[i]); ++ qep_tab0[i].pos = i; ++ } ++ qep_tab = qep_tab0; ++ return false; ++} ++ ++bool JOIN::alloc_qep1(uint n) { ++ static_assert(MAX_TABLES <= INT_MAX8, "plan_idx needs to be wide enough."); ++ assert(tables == n); ++ ++ qep_tab1 = new (thd->pq_mem_root) QEP_TAB[n + 1]; ++ if (!qep_tab1) return true; /* purecov: inspected */ ++ ++ for (uint i=0; i< n; i++) qep_tab1[i].pos = i; ++ ++ for (uint i=0; i < n; i++){ ++ qep_tab1[i].set_qs(qep_tab0[i].get_qs()); ++ qep_tab1[i].set_join(this); ++ qep_tab1[i].match_tab = qep_tab0[i].match_tab; ++ qep_tab1[i].check_weed_out_table = qep_tab0[i].check_weed_out_table; ++ qep_tab1[i].flush_weedout_table = qep_tab0[i].flush_weedout_table; ++ qep_tab1[i].op_type = qep_tab0[i].op_type; ++ qep_tab1[i].table_ref = qep_tab0[i].table_ref; ++ qep_tab1[i].using_dynamic_range = qep_tab0[i].using_dynamic_range; ++ qep_tab0[i].set_old_type(qep_tab0[i].type()); ++ qep_tab0[i].set_old_ref(&qep_tab0[i].ref()); ++ qep_tab0[i].set_old_quick_optim(); ++ } ++ ++ for (uint i=0; i < primary_tables; i++) ++ { ++ qep_tab1[i].pq_copy(thd, &qep_tab0[i]); ++ TABLE *tb = qep_tab0[i].table(); ++ qep_tab1[i].set_table(tb); ++ ++ if(qep_tab0[i].quick()) { ++ qep_tab1[i].set_quick(qep_tab0[i].quick()); ++ } ++ } ++ qep_tab = qep_tab1; ++ + return false; + } + +@@ -1275,7 +1496,7 @@ bool JOIN::optimize_distinct_group_order() { + const bool windowing = m_windows.elements > 0; + const bool may_trace = select_distinct || !group_list.empty() || + !order.empty() || windowing || +- tmp_table_param.sum_func_count; ++ tmp_table_param->sum_func_count; + Opt_trace_context *const trace = &thd->opt_trace; + Opt_trace_disable_I_S trace_disabled(trace, !may_trace); + Opt_trace_object wrapper(trace); +@@ -1319,7 +1540,7 @@ bool JOIN::optimize_distinct_group_order() { + JOIN_TAB *const tab = best_ref[const_tables]; + + if (plan_is_single_table() && (!group_list.empty() || select_distinct) && +- !tmp_table_param.sum_func_count && ++ !tmp_table_param->sum_func_count && + (!tab->quick() || + tab->quick()->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)) { + if (!group_list.empty() && rollup_state == RollupState::NONE && +@@ -1339,7 +1560,7 @@ bool JOIN::optimize_distinct_group_order() { + .add("removed_distinct", true); + } + } +- if (!(!group_list.empty() || tmp_table_param.sum_func_count || windowing) && ++ if (!(!group_list.empty() || tmp_table_param->sum_func_count || windowing) && + select_distinct && plan_is_single_table() && + rollup_state == RollupState::NONE) { + int order_idx = -1, group_idx = -1; +@@ -1365,7 +1586,7 @@ bool JOIN::optimize_distinct_group_order() { + tab, order, m_select_limit, + true, // no_changes + &tab->table()->keys_in_use_for_order_by, &order_idx); +- count_field_types(query_block, &tmp_table_param, *fields, false, false); ++ count_field_types(query_block, tmp_table_param, *fields, false, false); + } + ORDER *o; + bool all_order_fields_used; +@@ -1380,7 +1601,7 @@ bool JOIN::optimize_distinct_group_order() { + true, // no_changes + &tab->table()->keys_in_use_for_group_by, + &group_idx); +- count_field_types(query_block, &tmp_table_param, *fields, false, false); ++ count_field_types(query_block, tmp_table_param, *fields, false, false); + // ORDER BY and GROUP BY are using different indexes, can't skip sorting + if (group_idx >= 0 && order_idx >= 0 && group_idx != order_idx) + skip_sort_order = false; +@@ -1403,7 +1624,7 @@ bool JOIN::optimize_distinct_group_order() { + Force MySQL to read the table in sorted order to get result in + ORDER BY order. + */ +- tmp_table_param.allow_group_via_temp_table = false; ++ tmp_table_param->allow_group_via_temp_table = false; + } + grouped = true; // For end_write_group + trace_opt.add("changed_distinct_to_group_by", true); +@@ -1435,7 +1656,7 @@ bool JOIN::optimize_distinct_group_order() { + } + + calc_group_buffer(this, group_list.order); +- send_group_parts = tmp_table_param.group_parts; /* Save org parts */ ++ send_group_parts = tmp_table_param->group_parts; /* Save org parts */ + + /* + If ORDER BY is a prefix of GROUP BY and if windowing or ROLLUP +@@ -1445,7 +1666,7 @@ bool JOIN::optimize_distinct_group_order() { + */ + if ((test_if_subpart(group_list.order, order.order) && !m_windows_sort && + query_block->olap != ROLLUP_TYPE) || +- (group_list.empty() && tmp_table_param.sum_func_count)) { ++ (group_list.empty() && tmp_table_param->sum_func_count)) { + if (!order.empty()) { + order.clean(); + trace_opt.add("removed_order_by", true); +@@ -1503,7 +1724,7 @@ void JOIN::test_skip_sort() { + TODO: Explain the allow_group_via_temp_table part of the test below. + */ + if ((m_ordered_index_usage != ORDERED_INDEX_GROUP_BY) && +- (tmp_table_param.allow_group_via_temp_table || ++ (tmp_table_param->allow_group_via_temp_table || + (tab->emb_sj_nest && + tab->position()->sj_strategy == SJ_OPT_LOOSE_SCAN))) { + need_tmp_before_win = true; +@@ -2453,7 +2674,7 @@ check_reverse_order: + tab->set_type(calc_join_type(tab->quick()->get_type())); + tab->use_quick = QS_RANGE; + if (tab->quick()->is_loose_index_scan()) +- join->tmp_table_param.precomputed_group_by = true; ++ join->tmp_table_param->precomputed_group_by = true; + tab->position()->filter_effect = COND_FILTER_STALE; + } + } // best_key >= 0 +@@ -2836,7 +3057,7 @@ bool JOIN::get_best_combination() { + (!group_list.empty() || (implicit_grouping && m_windows.elements) > 0 + ? 1 + : 0) + +- (select_distinct ? (tmp_table_param.outer_sum_func_count ? 2 : 1) : 0) + ++ (select_distinct ? (tmp_table_param->outer_sum_func_count ? 2 : 1) : 0) + + (order.empty() ? 0 : 1) + + (query_block->active_options() & + (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) +@@ -4612,7 +4833,7 @@ static bool change_cond_ref_to_const(THD *thd, I_List *save_list, + + @returns false if success, true if error + */ +-static bool propagate_cond_constants(THD *thd, I_List *save_list, ++bool propagate_cond_constants(THD *thd, I_List *save_list, + Item *and_father, Item *cond) { + assert(cond->real_item()->is_bool_func()); + if (cond->type() == Item::COND_ITEM) { +@@ -5010,7 +5231,7 @@ bool JOIN::make_join_plan() { + + // Build the key access information, which is the basis for ref access. + if (where_cond || query_block->outer_join) { +- if (update_ref_and_keys(thd, &keyuse_array, join_tab, tables, where_cond, ++ if (update_ref_and_keys(thd, keyuse_array, join_tab, tables, where_cond, + ~query_block->outer_join, query_block, &sargables)) + return true; + } +@@ -7842,7 +8063,7 @@ static void add_loose_index_scan_and_skip_scan_keys(JOIN *join, + (uchar *)&indexed_fields); + } + cause = "distinct"; +- } else if (join->tmp_table_param.sum_func_count && ++ } else if (join->tmp_table_param->sum_func_count && + is_indexed_agg_distinct(join, &indexed_fields)) { + /* + SELECT list with AGGFN(distinct col). The query qualifies for +@@ -10348,8 +10569,8 @@ static TABLE *get_sort_by_table(ORDER *a, ORDER *b, TABLE_LIST *tables) { + */ + + void JOIN::optimize_keyuse() { +- for (size_t ix = 0; ix < keyuse_array.size(); ++ix) { +- Key_use *keyuse = &keyuse_array.at(ix); ++ for (size_t ix = 0; ix < keyuse_array->size(); ++ix) { ++ Key_use *keyuse = &keyuse_array->at(ix); + table_map map; + /* + If we find a ref, assume this table matches a proportional +@@ -10878,9 +11099,9 @@ double calculate_subquery_executions(const Item_subselect *subquery, + */ + + bool JOIN::optimize_rollup() { +- tmp_table_param.allow_group_via_temp_table = false; ++ tmp_table_param->allow_group_via_temp_table = false; + rollup_state = RollupState::INITED; +- tmp_table_param.group_parts = send_group_parts; ++ tmp_table_param->group_parts = send_group_parts; + return false; + } + +diff --git a/sql/sql_optimizer.h b/sql/sql_optimizer.h +index 53a88995..964693d8 100644 +--- a/sql/sql_optimizer.h ++++ b/sql/sql_optimizer.h +@@ -2,6 +2,7 @@ + #define SQL_OPTIMIZER_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -51,7 +52,9 @@ + #include "sql/sql_select.h" // Key_use + #include "sql/table.h" + #include "sql/temp_table_param.h" ++#include "sql/sql_parallel.h" + ++struct PQ_optimized_var; + enum class Subquery_strategy : int; + class COND_EQUAL; + class Item_subselect; +@@ -59,8 +62,10 @@ class Item_sum; + class Opt_trace_context; + class THD; + class Window; ++class MQueue_handle; + struct AccessPath; + struct MYSQL_LOCK; ++class Gather_operator; + + class Item_equal; + template +@@ -136,7 +141,7 @@ class JOIN { + /// Query block that is optimized and executed using this JOIN + Query_block *const query_block; + /// Thread handler +- THD *const thd; ++ THD *thd; + + /** + Optimal query execution plan. Initialized with a tentative plan in +@@ -146,6 +151,8 @@ class JOIN { + JOIN_TAB *join_tab{nullptr}; + /// Array of QEP_TABs + QEP_TAB *qep_tab{nullptr}; ++ QEP_TAB *qep_tab0{nullptr}; ++ QEP_TAB *qep_tab1{nullptr}; + + /** + Array of plan operators representing the current (partial) best +@@ -207,6 +214,7 @@ class JOIN { + 5. semi-joined tables used with materialization strategy + */ + uint tables{0}; ///< Total number of tables in query block ++ uint old_tables{0}; ///< Save old total number of tables in query block + uint primary_tables{0}; ///< Number of primary input tables in query block + uint const_tables{0}; ///< Number of primary tables deemed constant + uint tmp_tables{0}; ///< Number of temporary tables used by query +@@ -311,7 +319,9 @@ class JOIN { + The one here is transiently used as a model by create_intermediate_table(), + to build the tmp table's own tmp_table_param. + */ +- Temp_table_param tmp_table_param; ++ Temp_table_param origin_tmp_table_param; ++ Temp_table_param *tmp_table_param; ++ Temp_table_param *saved_tmp_table_param; + MYSQL_LOCK *lock; + + enum class RollupState { NONE, INITED, READY }; +@@ -348,8 +358,8 @@ class JOIN { + should be used instead of a filesort when computing + ORDER/GROUP BY. + */ +- enum { +- ORDERED_INDEX_VOID, // No ordered index avail. ++ enum ORDERED_INDEX_USAGE { ++ ORDERED_INDEX_VOID = 0, // No ordered index avail. + ORDERED_INDEX_GROUP_BY, // Use index for GROUP BY + ORDERED_INDEX_ORDER_BY // Use index for ORDER BY + } m_ordered_index_usage{ORDERED_INDEX_VOID}; +@@ -367,11 +377,18 @@ class JOIN { + */ + bool need_tmp_before_win{false}; + ++ // need a tmp table to store Parallel Query result ++ bool need_tmp_pq{false}; ++ ++ // need a tmp table for leader thread ++ bool need_tmp_pq_leader{false}; ++ + /// If JOIN has lateral derived tables (is set at start of planning) + bool has_lateral{false}; + + /// Used and updated by JOIN::make_join_plan() and optimize_keyuse() +- Key_use_array keyuse_array; ++ Key_use_array origin_keyuse_array; ++ Key_use_array *keyuse_array; + + /// List storing all expressions used in query block + mem_root_deque *query_block_fields; +@@ -392,6 +409,8 @@ class JOIN { + @see JOIN::make_tmp_tables_info() + */ + mem_root_deque *tmp_fields = nullptr; ++ mem_root_deque *tmp_fields0 = nullptr; ++ mem_root_deque *tmp_fields1 = nullptr; + + int error{0}; ///< set in optimize(), exec(), prepare_result() + +@@ -400,6 +419,18 @@ class JOIN { + */ + ORDER_with_src order, group_list; + ++ //used for worker's make_tmp_tables_info ++ PQ_optimized_var saved_optimized_vars; ++ ++ // the split table ++ int pq_tab_idx{-1}; ++ ++ bool pq_rebuilt_group{false}; ++ ++ bool pq_stable_sort{false}; ++ ++ int pq_last_sort_idx{-1}; ++ + // Used so that AggregateIterator knows which items to signal when the rollup + // level changes. Obviously only used in the presence of rollup. + Prealloced_array rollup_group_items{ +@@ -512,6 +543,12 @@ class JOIN { + Ref_item_array *ref_items{ + nullptr}; // cardinality: REF_SLICE_SAVED_BASE + 1 + #windows*2 + ++ Ref_item_array ++ *ref_items0{nullptr}; // cardinality: REF_SLICE_SAVED_BASE + 1 + #windows*2 ++ ++ Ref_item_array ++ *ref_items1{nullptr}; // use for parallel Query leader ++ + /** + The slice currently stored in ref_items[0]. + Used to restore the base ref_items slice from the "save" slice after it +@@ -519,6 +556,9 @@ class JOIN { + */ + uint current_ref_item_slice; + ++ // used for Parallel Query ++ uint last_slice_before_pq; ++ + /** + Used only if this query block is recursive. Contains count of + all executions of this recursive query block, since the last +@@ -664,7 +704,7 @@ class JOIN { + returning the row. + */ + bool send_row_on_empty_set() const { +- return (do_send_rows && tmp_table_param.sum_func_count != 0 && ++ return (do_send_rows && tmp_table_param->sum_func_count != 0 && + group_list.empty() && !group_optimized_away && + query_block->having_value != Item::COND_FALSE); + } +@@ -779,7 +819,7 @@ class JOIN { + bool create_intermediate_table(QEP_TAB *tab, + const mem_root_deque &tmp_table_fields, + ORDER_with_src &tmp_table_group, +- bool save_sum_fields); ++ bool save_sum_fields, bool force_disk_table = false); + + /** + Optimize distinct when used on a subset of the tables. +@@ -887,8 +927,36 @@ class JOIN { + POSITION *sjm_pos); + + bool add_having_as_tmp_table_cond(uint curr_tmp_table); ++ ++ public: + bool make_tmp_tables_info(); ++ ++ // make Paralle Query leader's qep tables info ++ bool make_leader_tables_info(); ++ // make a tmp table in Query_result_mq for PQ ++ bool make_pq_tables_info(); ++ bool alloc_qep1(uint n); ++ ++ /** ++ Test if an index could be used to replace filesort for ORDER BY/GROUP BY ++ ++ @details ++ Investigate whether we may use an ordered index as part of either ++ DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be ++ used for only the first of any of these terms to be executed. This ++ is reflected in the order which we check for test_if_skip_sort_order() ++ below. However we do not check for DISTINCT here, as it would have ++ been transformed to a GROUP BY at this stage if it is a candidate for ++ ordered index optimization. ++ If a decision was made to use an ordered index, the availability ++ if such an access path is stored in 'm_ordered_index_usage' for later ++ use by 'execute' or 'explain' ++ */ ++ void test_skip_sort(); ++ + void set_plan_state(enum_plan_state plan_state_arg); ++ ++ private: + bool compare_costs_of_subquery_strategies(Subquery_strategy *method); + ORDER *remove_const(ORDER *first_order, Item *cond, bool change_list, + bool *simple_order, bool group_by); +@@ -911,31 +979,8 @@ class JOIN { + */ + bool optimize_distinct_group_order(); + +- /** +- Test if an index could be used to replace filesort for ORDER BY/GROUP BY +- +- @details +- Investigate whether we may use an ordered index as part of either +- DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be +- used for only the first of any of these terms to be executed. This +- is reflected in the order which we check for test_if_skip_sort_order() +- below. However we do not check for DISTINCT here, as it would have +- been transformed to a GROUP BY at this stage if it is a candidate for +- ordered index optimization. +- If a decision was made to use an ordered index, the availability +- if such an access path is stored in 'm_ordered_index_usage' for later +- use by 'execute' or 'explain' +- */ +- void test_skip_sort(); +- + bool alloc_indirection_slices(); + +- /** +- Convert the executor structures to a set of access paths, storing +- the result in m_root_access_path. +- */ +- void create_access_paths(); +- + /** + Create access paths with the knowledge that there are going to be zero rows + coming from tables (before aggregation); typically because we know that +@@ -952,11 +997,28 @@ class JOIN { + AccessPath *attach_access_paths_for_having_and_limit(AccessPath *path); + /** @} */ + ++ public: + /** + An access path you can read from to get all records for this query + (after you create an iterator from it). + */ + AccessPath *m_root_access_path = nullptr; ++ ++ /** ++ Convert the executor structures to a set of access paths, stroing ++ the result in m_root_access_path. ++ */ ++ void create_access_paths(); ++ ++ bool pq_copy_from(JOIN *orig); ++ ++ bool alloc_indirection_slices1(); ++ ++ bool setup_tmp_table_info(JOIN *orig); ++ ++ bool restore_optimized_vars(); ++ ++ void save_optimized_vars(); + }; + + /** +@@ -966,7 +1028,7 @@ class JOIN { + */ + #define ASSERT_BEST_REF_IN_JOIN_ORDER(join) \ + do { \ +- assert((join)->tables == 0 || ((join)->best_ref && !(join)->join_tab)); \ ++ assert((join)->thd->parallel_exec || (join)->tables == 0 || ((join)->best_ref && !(join)->join_tab)); \ + } while (0) + + /** +@@ -1135,6 +1197,12 @@ extern const char *antijoin_null_cond; + */ + bool evaluate_during_optimization(const Item *item, const Query_block *select); + ++extern Field *create_tmp_field_for_schema(Item *item, TABLE *table, MEM_ROOT *root); ++extern void record_optimized_group_order(PQ_Group_list_ptrs *ptr, ORDER_with_src &new_list, ++ std::vector &optimized_flags); ++extern ORDER *restore_optimized_group_order(SQL_I_List &orig_list, ++ std::vector &optimized_flags); ++ + /** + Find the multiple equality predicate containing a field. + +diff --git a/sql/sql_parallel.cc b/sql/sql_parallel.cc +new file mode 100644 +index 00000000..e986ab07 +--- /dev/null ++++ b/sql/sql_parallel.cc +@@ -0,0 +1,1220 @@ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "sql/sql_parallel.h" ++#include "include/my_alloc.h" ++#include "include/my_dbug.h" ++#include "include/mysql/psi/mysql_thread.h" ++#include "sql/auth/auth_acls.h" ++#include "sql/basic_row_iterators.h" ++#include "sql/exchange_nosort.h" ++#include "sql/exchange_sort.h" ++#include "sql/filesort.h" ++#include "sql/handler.h" ++#include "sql/item_sum.h" ++#include "sql/log.h" ++#include "sql/msg_queue.h" ++#include "sql/mysqld.h" ++#include "sql/mysqld_thd_manager.h" // Global_THD_manager ++#include "sql/opt_range.h" ++#include "sql/opt_trace.h" ++#include "sql/pq_clone.h" ++#include "sql/pq_condition.h" ++#include "sql/pq_global.h" ++#include "sql/query_result.h" ++#include "sql/sql_base.h" ++#include "sql/sql_optimizer.h" ++#include "sql/sql_tmp_table.h" ++#include "sql/timing_iterator.h" ++#include "sql/transaction.h" ++#include "sql/join_optimizer/explain_access_path.h" ++ ++ulonglong parallel_memory_limit = 0; ++ulong parallel_max_threads = 0; ++uint parallel_threads_running = 0; ++uint parallel_threads_refused = 0; ++uint parallel_memory_refused = 0; ++uint pq_memory_used[16] = {0}; ++uint pq_memory_total_used = 0; ++ ++mysql_mutex_t LOCK_pq_threads_running; ++mysql_cond_t COND_pq_threads_running; ++ ++Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno, ++ bool other_tbls_ok); ++ ++Item *make_cond_remainder(Item *cond, bool exclude_index); ++ ++void thd_set_thread_stack(THD *thd, const char *stack_start); ++ ++static JOIN *make_pq_worker_plan(PQ_worker_manager *mngr); ++ ++void release_pq_running_threads(uint dop) { ++ mysql_mutex_lock(&LOCK_pq_threads_running); ++ parallel_threads_running -= dop; ++ current_thd->pq_threads_running -= dop; ++ mysql_cond_broadcast(&COND_pq_threads_running); ++ mysql_mutex_unlock(&LOCK_pq_threads_running); ++} ++ ++/** ++ * Init record gather ++ * ++ * @retval: false if success, and otherwise true ++ */ ++bool MQ_record_gather::mq_scan_init(Filesort *sort, int workers, ++ uint ref_length, bool stab_output) { ++ if (sort) { ++ m_exchange = new (m_thd->pq_mem_root) ++ Exchange_sort(m_thd, m_tab->table(), sort, m_tab->old_table()->file, ++ workers, ref_length, stab_output); ++ } else { ++ m_exchange = new (m_thd->pq_mem_root) Exchange_nosort( ++ m_thd, m_tab->table(), workers, ref_length, stab_output); ++ } ++ ++ if (!m_exchange || m_exchange->init()) return true; ++ ++ return false; ++} ++ ++/** ++ * read table->record[0] from workers through message queue ++ * ++ * @retval: false if success, and otherwise true ++ */ ++ ++bool MQ_record_gather::mq_scan_next() { ++ assert(m_exchange); ++ return (m_exchange->read_mq_record()); ++} ++ ++void MQ_record_gather::mq_scan_end() { ++ assert(m_exchange); ++ m_exchange->cleanup(); ++} ++ ++PQ_worker_manager::PQ_worker_manager(uint id) ++ : m_id(id), ++ m_gather(nullptr), ++ thd_leader(nullptr), ++ thd_worker(nullptr), ++ m_handle(nullptr), ++ m_status(INIT), ++ m_active(false) { ++ mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); ++ mysql_cond_init(0, &m_cond); ++} ++ ++PQ_worker_manager::~PQ_worker_manager() { ++ mysql_mutex_destroy(&m_mutex); ++ mysql_cond_destroy(&m_cond); ++} ++ ++/* ++ * PQ worker wait for an status ++ * ++ * @leader: PQ leader thread ++ * @status: ++ * ++ * @retval: ++ * true if normally execute, and otherwise false (i.e., execution-error) ++ */ ++ ++bool PQ_worker_manager::wait_for_status(THD *leader MY_ATTRIBUTE((unused)), ++ uint status) { ++ assert(leader == current_thd); ++ mysql_mutex_lock(&m_mutex); ++ while (!(((unsigned int)this->m_status) & status)) { ++ struct timespec abstime; ++ Timeout_type wait_time = 5; ++ set_timespec(&abstime, wait_time * TIME_MILLION); ++ mysql_cond_timedwait(&m_cond, &m_mutex, &abstime); ++ } ++ mysql_mutex_unlock(&m_mutex); ++ return !(((unsigned int)this->m_status) & PQ_worker_state::ERROR); ++} ++ ++void PQ_worker_manager::signal_status(THD *thd, PQ_worker_state status) { ++ mysql_mutex_lock(&m_mutex); ++ if (!((unsigned int)this->m_status & (unsigned int)status)) { ++ this->m_status = status; ++ this->thd_worker = thd; ++ } ++ mysql_cond_signal(&m_cond); ++ mysql_mutex_unlock(&m_mutex); ++} ++ ++Gather_operator::Gather_operator(uint dop) ++ : m_dop(dop), ++ m_template_join(nullptr), ++ m_workers(nullptr), ++ m_table(nullptr), ++ m_pq_ctx(nullptr), ++ m_tab(nullptr), ++ keyno(0), ++ m_ha_err(0), ++ m_stmt_da(false), ++ m_code_state(nullptr), ++ table_scan(false) { ++ mysql_mutex_init(0, &lock_stmt_da, MY_MUTEX_INIT_FAST); ++} ++ ++Gather_operator::~Gather_operator() { mysql_mutex_destroy(&lock_stmt_da); } ++ ++/* ++ * replace the parameter item of Aggr. with the generated item in rewritten tab. ++ * ++ */ ++void pq_replace_avg_func(THD *thd, Query_block *select MY_ATTRIBUTE((unused)), ++ mem_root_deque *fields, ++ nesting_map select_nest_level MY_ATTRIBUTE((unused))) { ++ size_t i = 0; ++ for (Item *item : *fields) { ++ if (item->real_item()->type() == Item::SUM_FUNC_ITEM) { ++ Item_sum *item_old = (Item_sum *)(item->real_item()); ++ assert(item_old); ++ if (item_old->sum_func() == Item_sum::AVG_FUNC) { ++ Item_sum_avg *item_avg = dynamic_cast(item_old); ++ assert(item_avg); ++ item_avg->pq_avg_type = PQ_LEADER; ++ item_avg->resolve_type(thd); ++ } ++ } else if (item->real_item()->type() == Item::FIELD_AVG_ITEM) { ++ Item_avg_field *item_avg_field = ++ dynamic_cast(item->real_item()); ++ Item_sum_avg *item_avg = item_avg_field->avg_item; ++ item_avg->pq_avg_type = PQ_LEADER; ++ item_avg_field->pq_avg_type = PQ_LEADER; ++ item_avg->resolve_type(thd); ++ (*fields)[i] = item_avg; ++ ++ } else if (item->real_item()->type() == Item::FIELD_ITEM) { ++ Item_field *item_field = dynamic_cast(item->real_item()); ++ Item_sum *item_sum = item_field->field->item_sum_ref; ++ if (item_sum && item_sum->sum_func() == Item_sum::AVG_FUNC) { ++ Item_sum_avg *item_avg = down_cast(item_sum); ++ item_avg->pq_avg_type = PQ_LEADER; ++ item_avg->resolve_type(thd); ++ (*fields)[i] = item_avg; ++ } ++ } ++ i++; ++ } ++} ++ ++/** ++ * build sum funcs based on PQ leader temp table field when orig JOIN old ++ * fields_list contain sum funcs. because origin sum item has been replaced by ++ * Item_field in temp table fields list ++ * ++ * @fields_orig:orig fields list which could contain sum funs item ++ * @fields_new: PQ leader temp table's fields list ++ * ++ */ ++bool pq_build_sum_funcs(THD *thd, Query_block *select, Ref_item_array &ref_ptr, ++ mem_root_deque &fields, uint elements, ++ nesting_map select_nest_level) { ++ uint saved_allow_sum_funcs = thd->lex->allow_sum_func; ++ thd->lex->allow_sum_func |= select_nest_level; ++ uint border = fields.size() - elements; ++ ++ size_t i = 0; ++ for (Item *item : fields) { ++ if (item->real_item()->type() == Item::FIELD_ITEM) { ++ Item_field *item_field = dynamic_cast(item); ++ ++ if (item_field == nullptr || item_field->field == nullptr || ++ item_field->field->item_sum_ref == nullptr) { ++ i++; ++ continue; ++ } ++ ++ Item_sum *item_ref = item_field->field->item_sum_ref; ++ ++ if (item_ref->type() == Item::SUM_FUNC_ITEM) { ++ Item_sum *sum_func = ++ item_ref->pq_rebuild_sum_func(thd, select, item_field); ++ assert(DBUG_EVALUATE_IF("skip_pq_clone_check", true, false) || ++ sum_func); ++ if (!sum_func) { ++ thd->lex->allow_sum_func = saved_allow_sum_funcs; ++ return true; ++ } ++ sum_func->fix_fields(thd, nullptr); ++ item_field->field->item_sum_ref = sum_func; ++ fields[i] = sum_func; ++ ref_ptr[((i < border) ? fields.size() - i - 1 : i - border)] = ++ sum_func; ++ } ++ } ++ i++; ++ } ++ thd->lex->allow_sum_func = saved_allow_sum_funcs; ++ return false; ++} ++ ++THD *pq_new_thd(THD *thd) { ++ DBUG_TRACE; ++ ++ THD *new_thd = new (thd->pq_mem_root) THD(); ++ if (!new_thd || ++ DBUG_EVALUATE_IF("dup_thd_abort", (!(new_thd->net.error = 0)), false)) { ++ goto err; ++ } ++ ++ new_thd->set_new_thread_id(); ++ thd_set_thread_stack(new_thd, (char *)&new_thd); ++ new_thd->init_cost_model(); ++ new_thd->store_globals(); ++ new_thd->want_privilege = 0; ++ new_thd->net.error = 0; ++ new_thd->set_db(thd->db()); ++ new_thd->pq_copy_from(thd); ++ ++ return new_thd; ++ ++err: ++ if (new_thd) { ++ end_connection(new_thd); ++ close_connection(new_thd, 0, false, false); ++ new_thd->release_resources(); ++ new_thd->get_stmt_da()->reset_diagnostics_area(); ++ destroy(new_thd); ++ } ++ return nullptr; ++} ++ ++/** ++ * make a parallel query gather operator from a serial query plan ++ * ++ * @join : is a pysics serial query plan ++ * @table : table need to parallel scan ++ * @dop is : the degree of parallel ++ * ++ */ ++Gather_operator *make_pq_gather_operator(JOIN *join, QEP_TAB *tab, uint dop) { ++ THD *thd = current_thd; ++ assert(thd == join->thd && thd->parallel_exec); ++ JOIN *template_join = nullptr; ++ Gather_operator *gather_opr = nullptr; ++ ++ // duplicate a query plan template from join, which is used in PQ workers ++ THD *new_thd = pq_new_thd(join->thd); ++ if (!new_thd) goto err; ++ new_thd->pq_leader = thd; ++ new_thd->parallel_exec = true; ++ template_join = pq_make_join(new_thd, join); ++ ++ if (!template_join || pq_dup_tabs(template_join, join, true)) { ++ goto err; ++ } ++ ++ if (template_join->setup_tmp_table_info(join) || ++ DBUG_EVALUATE_IF("pq_gather_error1", true, false)) { ++ sql_print_warning("[Parallel query] Setup gather tmp tables failed"); ++ goto err; ++ } ++ ++ /** duplicate a new THD and set it as current_thd, so here should restore old ++ * THD */ ++ thd->store_globals(); ++ gather_opr = new (thd->pq_mem_root) Gather_operator(dop); ++ ++ if (!gather_opr || DBUG_EVALUATE_IF("pq_gather_error2", true, false)) { ++ goto err; ++ } ++ ++ tab->gather = gather_opr; ++ gather_opr->m_template_join = template_join; ++ gather_opr->m_tab = tab; ++ gather_opr->m_table = tab->table(); ++#ifndef NDEBUG ++ gather_opr->m_code_state = my_thread_var_dbug(); ++ assert(gather_opr->m_code_state && *(gather_opr->m_code_state)); ++#endif ++ template_join->thd->push_diagnostics_area(&gather_opr->m_stmt_da); ++ ++ gather_opr->m_workers = ++ thd->pq_mem_root->ArrayAlloc(dop); ++ ++ if (!gather_opr->m_workers || ++ DBUG_EVALUATE_IF("pq_gather_error3", (!(gather_opr->m_workers = nullptr)), ++ false)) { ++ goto err; ++ } ++ ++ for (uint i = 0; i < dop; i++) { ++ gather_opr->m_workers[i] = new (thd->pq_mem_root) PQ_worker_manager(i); ++ if (!gather_opr->m_workers[i]) goto err; ++ gather_opr->m_workers[i]->m_gather = gather_opr; ++ gather_opr->m_workers[i]->thd_leader = thd; ++ gather_opr->m_workers[i]->thd_worker = nullptr; ++ gather_opr->m_workers[i]->thread_id.thread = 0; ++ } ++ return gather_opr; ++ ++err: ++ if (new_thd) new_thd->store_globals(); ++ pq_free_join(template_join); ++ pq_free_thd(new_thd); ++ if (gather_opr && gather_opr->m_workers) { ++ for (uint i = 0; i < dop; i++) { ++ destroy(gather_opr->m_workers[i]); ++ } ++ } ++ destroy(gather_opr); ++ thd->store_globals(); ++ return nullptr; ++} ++ ++bool Gather_operator::init() { ++ int error = 0; ++ THD *thd = current_thd; ++ assert(thd == m_table->in_use); ++ int tab_idx = m_template_join->pq_tab_idx; ++ assert(tab_idx >= (int)m_template_join->const_tables && ++ m_template_join->qep_tab[tab_idx].do_parallel_scan); ++ ++ QEP_TAB *tab = &m_template_join->qep_tab[tab_idx]; ++ m_table->file->pq_reverse_scan = tab->m_reversed_access; ++ m_table->file->pq_range_type = PQ_QUICK_SELECT_NONE; ++ join_type type = tab->type(); ++ switch (type) { ++ case JT_ALL: ++ keyno = m_table->s->primary_key; ++ /* ++ * Note that: order/group-by may be optimized in test_skip_sort(), and ++ * correspondingly the order/group-by is finished with the generated ++ * tab->quick(). ++ */ ++ if (m_tab->quick() && ++ m_template_join->m_ordered_index_usage != JOIN::ORDERED_INDEX_VOID) { ++ keyno = m_tab->quick()->index; ++ } ++ table_scan = true; ++ break; ++ case JT_RANGE: ++ assert(m_tab->quick()); ++ keyno = m_tab->quick()->index; ++ break; ++ case JT_REF: ++ m_table->file->pq_ref_key.key = tab->ref().key_buff; ++ m_table->file->pq_ref_key.keypart_map = ++ make_prev_keypart_map(tab->ref().key_parts); ++ m_table->file->pq_ref_key.length = tab->ref().key_length; ++ m_table->file->pq_ref_key.flag = HA_READ_KEY_OR_NEXT; ++ m_table->file->pq_ref = true; ++ keyno = tab->ref().key; ++ break; ++ case JT_INDEX_SCAN: ++ keyno = m_tab->index(); ++ break; ++ default: ++ assert(0); ++ keyno = m_table->s->primary_key; ++ } ++ ++ QUICK_SELECT_I *quick = m_tab->quick(); ++ if (quick && type == JT_RANGE) { ++ m_table->file->pq_range_type = quick->quick_select_type(); ++ if (quick->reset()) return true; ++ if (quick->reverse_sorted()) m_table->file->pq_reverse_scan = true; ++ } ++ ++ /** partition table into blocks for parallel scan by multiple workers, ++ * if blocks number is less than workers, m_dop will be changed to blocks ++ * number ++ */ ++ uint dop_orig = m_dop; ++ error = m_table->file->ha_pq_init(m_dop, keyno); ++ m_pq_ctx = thd->pq_ctx; ++ if (error) { ++ m_table->file->print_error(error, MYF(0)); ++ m_ha_err = error; ++ return true; ++ } ++ ++ if (dop_orig != m_dop) { ++ release_pq_running_threads(dop_orig - m_dop); ++ } ++ ++ return false; ++} ++ ++/* ++ * signal the threads waiting for data ++ */ ++void Gather_operator::signalAll() { m_table->file->ha_pq_signal_all(); } ++ ++void Gather_operator::signalReadEnd() { ++ m_read_end_mutex.unlock(); ++} ++ ++void Gather_operator::waitReadEnd() { ++ m_read_end_mutex.lock(); ++} ++ ++void pq_free_gather(Gather_operator *gather) { ++ THD *thd_temp = gather->m_template_join->thd; ++ if (thd_temp == nullptr) return; ++ ++ THD *saved_thd = current_thd; ++ gather->m_table->file->ha_index_or_rnd_end(); ++ ++ // explain format=tree called make_pq_worker_plan, hence ++ // need to free worker join/thd ++ for (uint i = 0; i < gather->m_dop; i++) { ++ if (gather->m_workers[i]->thd_worker) { ++ gather->m_workers[i]->thd_worker->store_globals(); ++ gather->m_workers[i]->thd_worker->lex->unit->cleanup( ++ gather->m_workers[i]->thd_worker, true); ++ pq_free_thd(gather->m_workers[i]->thd_worker); ++ } ++ } ++ ++ thd_set_thread_stack(thd_temp, (char *)thd_temp); ++ thd_temp->store_globals(); ++ ++ uint tables = gather->m_template_join->tables; ++ for (uint i = 0; i < tables; i++) { ++ if (gather->m_template_join->qep_tab[i].table()) { ++ gather->m_template_join->qep_tab[i].table()->set_keyread(false); ++ gather->m_template_join->qep_tab[i].set_keyread_optim(); ++ } ++ } ++ ++ pq_free_join(gather->m_template_join); ++ for (uint i = 0; i < gather->m_dop; i++) { ++ destroy(gather->m_workers[i]); ++ } ++ destroy(gather); ++ ++ pq_free_thd(thd_temp); ++ thd_set_thread_stack(saved_thd, (char *)&saved_thd); ++ saved_thd->store_globals(); ++} ++ ++void Gather_operator::end() { pq_free_gather(this); } ++ ++static void restore_leader_plan(JOIN *join) { ++ join->pq_stable_sort = false; ++ join->qep_tab = join->qep_tab0; ++ join->ref_items = join->ref_items0; ++ join->tmp_fields = join->tmp_fields0; ++} ++ ++/** ++ * make parallel query leader's physical query plan ++ * ++ * @join : origin serial query plan ++ * @dop : degree of parallel ++ * @return ++ * ++ * SEQ_EXEC: can not run in parallel mode, due to RBO. ++ * PARL_EXEC: successfully run in parallel mode ++ * ABORT_EXEC: run error in parallal mode and then drop it ++ */ ++PQ_exec_status make_pq_leader_plan(THD *thd) { ++ if (!check_pq_conditions(thd)) { ++ thd->m_suite_for_pq = PqConditionStatus::NOT_SUPPORTED; ++ return PQ_exec_status::SEQ_EXEC; ++ } else { ++ thd->m_suite_for_pq = PqConditionStatus::SUPPORTED; ++ } ++ ++ uint dop = thd->pq_dop; ++ JOIN *join = thd->lex->unit->first_query_block()->join; ++ mem_root_deque *fields_old = join->fields; ++ QEP_TAB *tab = nullptr; ++ Gather_operator *gather = nullptr; ++ char buff[64] = {0}; ++ ulong saved_thd_want_privilege = thd->want_privilege; ++ thd->want_privilege = 0; ++ ++ Opt_trace_context *const trace = &thd->opt_trace; ++ Opt_trace_object trace_wrapper(trace); ++ Opt_trace_object trace_exec(trace, "make_parallel_query_plan"); ++ trace_exec.add_select_number(join->query_block->select_number); ++ Opt_trace_array trace_detail(trace, "detail"); ++ ++ MEM_ROOT *saved_mem_root = thd->mem_root; ++ thd->mem_root = thd->pq_mem_root; ++ thd->parallel_exec = true; // pass the RBO ++ ++ uint tab_idx = 0; ++ for (uint i = join->const_tables; i < join->primary_tables; i++) { ++ if (join->qep_tab[i].do_parallel_scan) { ++ tab_idx = i; ++ join->pq_tab_idx = tab_idx; ++ break; ++ } ++ } ++ ++ if (tab_idx < join->primary_tables) { ++ TABLE_LIST *table_ref = join->qep_tab[tab_idx].table_ref; ++ Opt_trace_object trace_one_table(trace); ++ trace_one_table.add_utf8_table(table_ref).add("degree of parallel", dop); ++ ++ join->alloc_qep1(join->tables); ++ join->alloc_indirection_slices1(); ++ join->ref_items1[REF_SLICE_ACTIVE] = join->query_block->base_ref_items; ++ ++ join->pq_stable_sort = pq_check_stable_sort(join, tab_idx); ++ gather = make_pq_gather_operator(join, &join->qep_tab[tab_idx], dop); ++ if (!gather || DBUG_EVALUATE_IF("pq_leader_abort1", true, false)) { ++ goto err; ++ } ++ assert(gather->m_template_join->thd->pq_leader); ++ tab = &join->qep_tab[tab_idx]; ++ tab->set_old_table(tab->table()); ++ // replace parallel scan table with a tmp table ++ join->primary_tables = tab_idx; ++ join->need_tmp_pq_leader = true; ++ join->restore_optimized_vars(); ++ if (join->make_leader_tables_info() || ++ DBUG_EVALUATE_IF("pq_leader_abort2", true, false)) ++ goto err; ++ join->old_tables = join->tables; ++ join->tables = join->primary_tables + join->tmp_tables; ++ ++ assert(tab->table()->s->table_category == TABLE_CATEGORY_TEMPORARY); ++ tab->set_type(JT_ALL); ++ tab->gather = gather; ++ tab->check_weed_out_table = nullptr; ++ tab->flush_weedout_table = nullptr; ++ ++ // create TABLE_LIST object for explain ++ TABLE_LIST *tbl = new (thd->pq_mem_root) TABLE_LIST; ++ if (!tbl) goto err; ++ ++ tbl->query_block = join->query_block; ++ tbl->table_name = (char *)thd->memdup(tab->table()->s->table_name.str, ++ tab->table()->s->table_name.length); ++ tbl->table_name_length = tab->table()->s->table_name.length; ++ tbl->db = (char *)thd->memdup(tab->table()->s->db.str, ++ tab->table()->s->db.length); ++ tbl->db_length = tab->table()->s->db.length; ++ snprintf(buff, 64, "", ++ gather->m_template_join->query_block->select_number); ++ tbl->alias = (char *)thd->memdup(buff, 64); ++ if (!tbl->table_name || !tbl->db || !tbl->alias) goto err; ++ ++ tab->table_ref = tbl; ++ tbl->set_tableno(tab_idx); ++ tab->table()->pos_in_table_list = tbl; ++ join->query_block->table_list.link_in_list(tbl, &tbl->next_local); ++ TABLE_REF *ref = new (thd->pq_mem_root) TABLE_REF(); ++ if (!ref) goto err; ++ ++ tab->set_ref(ref); ++ for (uint i = 0; i < join->tables; i++) { ++ join->qep_tab[i].set_condition(nullptr); ++ // we have shrinked primary_tables, so we update position here ++ if (i > join->primary_tables) { ++ join->qep_tab[i].set_position(nullptr); ++ } ++ } ++ ++ join->query_expression()->clear_root_access_path(); ++ ++ // generate execution tree ++ join->m_root_access_path = nullptr; ++ join->create_access_paths(); ++ join->query_expression()->create_access_paths(thd); ++ if (join->query_expression()->force_create_iterators(thd)) goto err; ++ ++ thd->mem_root = saved_mem_root; ++ thd->want_privilege = saved_thd_want_privilege; ++ ++ if (thd->lex->is_explain()) { ++ // Treat the first tmp_table which obtained info from worker as ++ // primary_table, because explain format=json need `primary_tables >= 1` ++ join->primary_tables += 1; ++ join->tmp_tables -= 1; ++ ++ // explain format=[traditional/tree/json] need real dop, so we call ++ // gather->init here, explain format=analyze will call gather->init, do ++ // not need call here ++ if (!thd->lex->is_explain_analyze) { ++ gather->init(); ++ } ++ ++ // explain format=tree need generate worker plan first ++ if (thd->lex->explain_format->is_tree() && ++ !thd->lex->is_explain_analyze) { ++ JOIN *worker_join = make_pq_worker_plan(gather->m_workers[0]); ++ gather->m_workers[0]->thd_worker = worker_join->thd; ++ ++ /** make_pq_worker_plan will duplicate a new THD and set it as ++ * current_thd, so here should restore old THD ++ */ ++ thd->store_globals(); ++ } ++ } ++ ++ return PQ_exec_status::PARL_EXEC; ++ } ++ ++err: ++ thd->mem_root = saved_mem_root; ++ if (gather) pq_free_gather(gather); ++ ++ join->fields = fields_old; ++ thd->want_privilege = saved_thd_want_privilege; ++ restore_leader_plan(join); ++ ++ my_error(ER_PARALLEL_FAIL_INIT, MYF(0)); ++ return PQ_exec_status::ABORT_EXEC; ++} ++ ++/** ++ * make a parallel query worker's pysics query plan ++ * ++ * @template_join : template query plan ++ */ ++static JOIN *make_pq_worker_plan(PQ_worker_manager *mngr) { ++ JOIN *join = nullptr; ++ Gather_operator *gather = mngr->m_gather; ++ JOIN *template_join = gather->m_template_join; ++ handler *file = nullptr; ++ Query_result *mq_result = nullptr; ++ MQueue_handle *msg_handler = mngr->m_handle; ++ ++ // duplicate a query plan from template join, which is used in PQ workers ++ THD *new_thd = pq_new_thd(template_join->thd); ++ if (!new_thd) goto err; ++ new_thd->pq_leader = mngr->thd_leader; ++ new_thd->mem_root = new_thd->pq_mem_root; ++ ++ /* save worker's THD in leader's THD */ ++ mysql_mutex_lock(&mngr->thd_leader->pq_lock_worker); ++ if (mngr->thd_leader->killed) { ++ mysql_mutex_unlock(&mngr->thd_leader->pq_lock_worker); ++ goto err; ++ } ++ mngr->thd_leader->pq_workers.push_back(new_thd); ++ mysql_mutex_unlock(&mngr->thd_leader->pq_lock_worker); ++ ++ join = pq_make_join(new_thd, template_join); ++ if (!join || pq_dup_tabs(join, template_join, true) || ++ DBUG_EVALUATE_IF("pq_worker_abort1", true, false)) { ++ goto err; ++ } ++ ++ join->having_cond = join->query_block->having_cond(); ++ join->need_tmp_pq = true; ++ if (join->setup_tmp_table_info(template_join) || ++ join->make_tmp_tables_info() || ++ pq_make_join_readinfo(join, mngr, ++ join->saved_optimized_vars.pq_no_jbuf_after) || ++ DBUG_EVALUATE_IF("pq_worker_abort2", true, false)) { ++ sql_print_warning( ++ "[Parallel query] Create worker tmp tables or make join read info " ++ "failed"); ++ goto err; ++ } ++ ++ /** set query result */ ++ file = join->qep_tab[join->pq_tab_idx].table()->file; ++ mq_result = new (join->thd->pq_mem_root) ++ Query_result_mq(join, msg_handler, file, join->pq_stable_sort); ++ if (!mq_result || DBUG_EVALUATE_IF("pq_worker_error2", true, false)) { ++ sql_print_warning("[Parallel query] Create worker result mq failed"); ++ goto err; ++ } ++ ++ join->query_expression()->set_query_result(mq_result); ++ join->query_block->set_query_result(mq_result); ++ ++ return join; ++ ++err: ++ if (new_thd) new_thd->store_globals(); ++ pq_free_join(join); ++ pq_free_thd(new_thd); ++ mngr->thd_leader->store_globals(); ++ return nullptr; ++} ++ ++/** ++ * main function of parallel worker. ++ * ++ */ ++void *pq_worker_exec(void *arg) { ++ if (my_thread_init()) { ++ my_thread_exit(0); ++ return nullptr; ++ } ++ ++ Diagnostics_area *da; ++ const Sql_condition *cond; ++ /** only for single query block */ ++ Query_result *result = nullptr; ++ THD *thd = nullptr, *leader_thd = nullptr; ++ ++ PQ_worker_manager *mngr = static_cast(arg); ++ assert(mngr->m_gather); ++#ifndef NDEBUG ++ pq_stack_copy(*mngr->m_gather->m_code_state); ++#endif ++ leader_thd = mngr->thd_leader; ++ THD *temp_thd = mngr->m_gather->m_template_join->thd; ++ MQueue_handle *msg_handler = mngr->m_handle; ++ bool send_error_status = true; ++ ++ JOIN *join = make_pq_worker_plan(mngr); ++ if (!join || DBUG_EVALUATE_IF("pq_worker_error1", true, false)) { ++ sql_print_warning("[Parallel query] Make worker plan failed"); ++ goto err; ++ } ++ ++ thd = join->thd; ++ assert(current_thd == thd && thd->pq_leader == leader_thd); ++ mngr->signal_status(thd, PQ_worker_state::READY); ++ join->query_expression()->ExecuteIteratorQuery(thd); ++ ++ if (thd->lex->is_explain_analyze && mngr->m_id == 0) { ++ msg_handler->set_datched_status(MQ_HAVE_DETACHED); ++ mngr->m_gather->waitReadEnd(); ++ Query_expression *unit = leader_thd->lex->unit; ++ leader_thd->pq_explain += PrintQueryPlan( ++ 0, unit->root_access_path(), ++ unit->is_union() ? nullptr : unit->first_query_block()->join, ++ !unit->is_union()); ++ } ++ ++ if (join->thd->is_error() || join->thd->pq_error || ++ DBUG_EVALUATE_IF("pq_worker_error3", true, false)) { ++ goto err; ++ } ++ send_error_status = false; ++ ++err: ++ ++ /* s1: send error msg to MQ */ ++ if (send_error_status) { ++ assert(msg_handler && leader_thd); ++ leader_thd->pq_error = true; ++ msg_handler->send_exception_msg(ERROR_MSG); ++ } ++ msg_handler->set_datched_status(MQ_HAVE_DETACHED); ++ /** during pq_make_join, join->thd may have been created */ ++ thd = (join && join->thd) ? join->thd : thd; ++ ++ /* s2: release resource */ ++ result = join ? join->query_block->query_result() : nullptr; ++ if (result) { ++ result->cleanup(thd); ++ destroy(result); ++ } ++ ++ if (join) { ++ join->join_free(); ++ } ++ ++ if (thd) { ++ thd->lex->unit->cleanup(thd, true); ++ } ++ ++ /* s3: collect error message */ ++ if (thd) { ++ mysql_mutex_t *stmt_lock = &mngr->m_gather->lock_stmt_da; ++ mysql_mutex_lock(stmt_lock); ++ temp_thd->pq_merge_status(thd); ++ da = thd->get_stmt_da(); ++ if (thd->is_error()) { ++ temp_thd->raise_condition(da->mysql_errno(), da->returned_sqlstate(), ++ Sql_condition::SL_ERROR, da->message_text(), ++ false); ++ } ++ if (da->cond_count() > 0) { ++ Diagnostics_area::Sql_condition_iterator it = da->sql_conditions(); ++ while ((cond = it++)) { ++ temp_thd->raise_condition(cond->mysql_errno(), NULL, cond->severity(), ++ cond->message_text(), false); ++ } ++ } ++ mysql_mutex_unlock(stmt_lock); ++ pq_free_thd(thd); ++ thd = NULL; ++ } ++ ++#ifndef NDEBUG ++ pq_stack_reset(); ++#endif ++ ++ /* Clean up openssl errors. */ ++#if OPENSSL_VERSION_NUMBER < 0x10100000L ++ ERR_remove_thread_state(0); ++#endif ++ ++ my_thread_end(); ++ /* s4: send last status to leader */ ++ PQ_worker_state status = ++ send_error_status ? PQ_worker_state::ERROR : PQ_worker_state::COMPELET; ++ mngr->signal_status(thd, status); ++ my_thread_exit(0); ++ return nullptr; ++} ++ ++/** ++Plan refinement stage: do various setup things for the executor, including ++ - setup join buffering use ++ - push index conditions ++ - increment relevant counters ++ - etc ++ ++@return false if successful, true if error (Out of memory) ++*/ ++bool pq_make_join_readinfo(JOIN *join, PQ_worker_manager *mngr, ++ uint no_jbuf_after MY_ATTRIBUTE((unused))) { ++ const bool prep_for_pos = join->need_tmp_before_win || ++ join->select_distinct || join->grouped || ++ !join->order.empty() || join->m_windows.elements > 0; ++ ++ for (uint i = join->const_tables; i < join->primary_tables; i++) { ++ QEP_TAB *const qep_tab = &join->qep_tab[i]; ++ TABLE *const table = qep_tab->table(); ++ if (prep_for_pos || (qep_tab->do_parallel_scan && join->pq_stable_sort)) ++ table->prepare_for_position(); ++ } ++ std::vector predicates_below_join; ++ std::vector predicates_above_join; ++ join->m_root_access_path = nullptr; ++ Gather_operator *gather = mngr->m_gather; ++ ++ for (uint i = 0; i < join->tables; i++) { ++ QEP_TAB *qep_tab = &join->qep_tab[i]; ++ if (qep_tab->do_parallel_scan) { ++ qep_tab->table()->file->pq_table_scan = gather->table_scan; ++ qep_tab->gather = mngr->m_gather; ++ ++ /* index push down */ ++ uint keyno = gather->keyno; ++ if (!(keyno == qep_tab->table()->s->primary_key && ++ qep_tab->table()->file->primary_key_is_clustered()) && ++ qep_tab->pq_cond) { ++ TABLE *tbl = qep_tab->table(); ++ Item *cond = qep_tab->pq_cond; ++ Item *idx_cond = make_cond_for_index(cond, tbl, keyno, false); ++ if (idx_cond) { ++ Item *idx_remainder_cond = tbl->file->idx_cond_push(keyno, idx_cond); ++ if (idx_remainder_cond != idx_cond) ++ qep_tab->ref().disable_cache = true; ++ ++ Item *row_cond = make_cond_remainder(qep_tab->pq_cond, true); ++ if (row_cond) { ++ if (idx_remainder_cond) ++ and_conditions(&row_cond, idx_remainder_cond); ++ idx_remainder_cond = row_cond; ++ } ++ qep_tab->set_condition(idx_remainder_cond); ++ } ++ } ++ ++ /** optimize order by */ ++ if (join->pq_last_sort_idx == int(i) && i >= join->primary_tables) { ++ assert(qep_tab->filesort); ++ /** if there is limit on tmp table, we cannot remove sort */ ++ if (join->m_select_limit == HA_POS_ERROR) { ++ destroy(qep_tab->filesort); ++ qep_tab->filesort = nullptr; ++ } ++ ++ if (join->pq_rebuilt_group) { ++ assert(join->query_block->saved_group_list_ptrs); ++ assert(join->m_select_limit == HA_POS_ERROR); ++ restore_list(join->query_block->saved_group_list_ptrs, ++ join->query_block->group_list); ++ ORDER *order = restore_optimized_group_order( ++ join->query_block->group_list, ++ join->saved_optimized_vars.optimized_group_flags); ++ if (order) { ++ ORDER_with_src group_list = ORDER_with_src(order, ESC_GROUP_BY); ++ join->add_sorting_to_table(i, &group_list, false, true); ++ } ++ } ++ } ++ } ++ } ++ ++ /** generate execution tree */ ++ join->set_optimized(); ++ join->query_expression()->set_optimized(); ++ join->create_access_paths(); ++ join->query_expression()->create_access_paths(join->thd); ++ join->query_expression()->force_create_iterators(join->thd); ++ ++ return false; ++} ++ ++bool pq_check_stable_sort(JOIN *join, int idx) { ++ if ((join->pq_last_sort_idx >= 0 && join->pq_last_sort_idx != idx) || ++ join->need_tmp_before_win) { ++ return false; ++ } ++ return true; ++} ++ ++/* ++ * record the mapping: ++ * L = join->query_block->group_list -------> join->group_list = R ++ * ++ * @result: ++ * if L[i] \in R, then optimized_flags[i] = 0; otherwise, optimized_flags[i] ++ * = 1 (i.e., L[i] is optimized in JOIN::optimized()). Correspondingly, we can ++ * use L and optimized_flags to retrieve R. ++ * ++ */ ++void record_optimized_group_order(PQ_Group_list_ptrs *ptr, ++ ORDER_with_src &new_list, ++ std::vector &optimized_flags) { ++ optimized_flags.clear(); ++ // group_list (or order) is optimized to NULL. ++ if (new_list.order == nullptr || ptr == nullptr) { ++ return; ++ } ++ ++ optimized_flags.resize(ptr->size(), true); ++ int i = 0; ++ auto iterator = ptr->begin(); ++ auto order = new_list.order; ++ ++ while (iterator != ptr->end() && order) { ++ // find this item, i.e., this item is not optimized ++ if ((*iterator)->item[0] && order->item[0] && ++ ((*iterator)->item[0]->eq(order->item[0], false))) { ++ optimized_flags[i] = false; ++ order = order->next; ++ } else { ++ optimized_flags[i] = true; ++ } ++ i++; ++ iterator++; ++ } ++} ++ ++/* ++ * restore the optimized group/order list, using original and optimized_flags ++ */ ++ORDER *restore_optimized_group_order(SQL_I_List &orig_list, ++ std::vector &optimized_flags) { ++ int size = optimized_flags.size(); ++ if (0 == size) return nullptr; ++ ++ ORDER *header = orig_list.first; ++ ORDER **prev_ptr = &header; ++ ORDER *iterator; ++ ++ int idx = 0; ++ ++ for (iterator = orig_list.first; iterator; iterator = iterator->next) { ++ if (!optimized_flags[idx]) { ++ *prev_ptr = iterator; ++ prev_ptr = &iterator->next; ++ } ++ idx++; ++ } ++ *prev_ptr = nullptr; ++ ++ return header; ++} ++ ++void restore_list(PQ_Group_list_ptrs *ptr, SQL_I_List &orig_list) { ++ orig_list.clear(); ++ ++ ORDER *order = nullptr; ++ ORDER **iterator = ptr->begin(); ++ for (; iterator != ptr->end(); iterator++) { ++ order = *iterator; ++ orig_list.link_in_list(order, &order->next); ++ } ++} ++ ++void pq_free_thd(THD *thd) { ++ if (!thd) return; ++ close_thread_tables(thd); ++ thd->mdl_context.release_transactional_locks(); ++ trans_commit_stmt(thd); ++ end_connection(thd); ++ close_connection(thd, 0, false, false); ++ Diagnostics_area *da = thd->get_stmt_da(); ++ if (thd->is_error() && thd->pq_leader) { ++ thd->pq_leader->raise_condition(da->mysql_errno(), da->returned_sqlstate(), ++ Sql_condition::SL_ERROR, da->message_text(), ++ false); ++ } ++ thd->get_stmt_da()->reset_diagnostics_area(); ++ thd->release_resources(); ++ thd->free_items(); ++ destroy(thd); ++} ++ ++void pq_free_join(JOIN *join) { ++ if (!join) return; ++ join->join_free(); ++ join->destroy(); ++ destroy(join); ++} ++ ++/** ++ * fetch the used key ++ * @table: the first non-const table ++ * @key: the index of the key in table->key_info ++ * @key_parts: #fields in this key ++ * @res_fields: the set of all fields in this key ++ */ ++static void get_key_fields(TABLE *table, int key, uint key_parts, ++ std::vector &key_fields) { ++ assert(table && table->key_info); ++ ++ KEY_PART_INFO *kp = table->key_info[key].key_part; ++ for (uint i = 0; i < key_parts; i++, kp++) { ++ key_fields.emplace_back(kp->field->field_name); ++ } ++} ++ ++/** ++ * fetch the key fields used in the tab ++ * @tab ++ * @res_fields ++ * ++ * @retval: ++ * false for success, and otherwise true ++ */ ++bool get_table_key_fields(QEP_TAB *tab, std::vector &key_fields) { ++ key_fields.clear(); ++ assert(tab); ++ auto type = tab->old_type(); ++ ++ // the original table in leader_join's qep_tab ++ TABLE *table = tab->old_table(); ++ TABLE_REF *ref = &tab->old_ref(); ++ if (!table || !ref) return true; ++ ++ // consider the following cases to obtain the sort filed: ++ // (1) the case of explicitly using primary key ++ if (tab->old_ref().key_parts) { ++ get_key_fields(table, ref->key, ref->key_parts, key_fields); ++ } ++ // (2) the case of index scan ++ else if (type == JT_INDEX_SCAN) { ++ int key = tab->index(); ++ uint key_parts = table->key_info[key].user_defined_key_parts; ++ get_key_fields(table, key, key_parts, key_fields); ++ } ++ ++ // (3) the case of index range. ++ else if (type == JT_RANGE || (type == JT_REF && tab->old_quick_optim())) { ++ // Note: please confirm whether 'quick' belongs to old qep_tab; ++ QUICK_SELECT_I *quick = tab->old_quick_optim(); ++ if (!quick) return true; ++ ++ if (quick->index != MAX_KEY) ++ get_key_fields(table, quick->index, quick->used_key_parts, key_fields); ++ } ++ // (4) the case of implicitly using primary key ++ else { ++ if (table->s->primary_key != MAX_KEY) { ++ int key = table->s->primary_key; ++ int key_parts = table->key_info[key].user_defined_key_parts; ++ get_key_fields(table, key, key_parts, key_fields); ++ } ++ // (5) other cases ++ } ++ return false; ++} ++ ++bool set_key_order(QEP_TAB *tab, std::vector &key_fields, ++ ORDER **order_ptr, Ref_item_array *ref_ptrs) { ++ JOIN *join = tab->join(); ++ assert(join && join->order.empty()); ++ if (!key_fields.size()) { ++ *order_ptr = NULL; ++ return false; ++ } ++ ++ std::map fields_map; // map[field] = item ++ std::map::iterator iter; ++ std::vector order_items; ++ ++ Ref_item_array &ref_items = *ref_ptrs; ++ /** (1) build the map: {name} -> {item} */ ++ for (uint i = 0; i < join->query_block_fields->size(); i++) { ++ Item *item = ref_items[i]; ++ if (item && item->type() == Item::FIELD_ITEM) { ++ std::string field_name = ++ static_cast(item)->field->field_name; ++ fields_map[field_name] = &ref_items[i]; ++ } ++ } ++ ++ /** (2) find the item whose name in res_fields */ ++ for (std::string key : key_fields) { ++ iter = fields_map.find(key); ++ if (iter != fields_map.end()) { ++ order_items.push_back(iter->second); ++ } ++ } ++ ++ /** (3) generate sort order */ ++ THD *thd = join->thd; ++ SQL_I_List order_list; ++ ++ for (Item **item : order_items) { ++ ORDER *order = new (thd->pq_mem_root) ORDER(); ++ if (!order) { ++ *order_ptr = NULL; ++ return true; ++ } ++ ++ order->item_initial = *item; ++ order->item = item; ++ order->in_field_list = 1; ++ order->is_explicit = 0; ++ add_to_list(order_list, order); ++ } ++ ++ *order_ptr = order_list.first; ++ return false; ++} ++ ++uint get_pq_memory_total() { ++ uint sum_memory = 0; ++ for (int i = 0; i < PQ_MEMORY_USED_BUCKET; i++) ++ sum_memory += atomic_add(pq_memory_used[i], 0); ++ return sum_memory; ++} ++ ++void add_pq_memory(PSI_memory_key key, size_t length, ++ unsigned int id /*MY_ATTRIBUTE((unused))*/) { ++ if (key == key_memory_pq_mem_root) ++ atomic_add(pq_memory_used[id], length); ++} ++ ++void sub_pq_memory(PSI_memory_key key, size_t length, ++ unsigned int id /*MY_ATTRIBUTE((unused))*/) { ++ if (key == key_memory_pq_mem_root) ++ atomic_sub(pq_memory_used[id], length); ++} +diff --git a/sql/sql_parallel.h b/sql/sql_parallel.h +new file mode 100644 +index 00000000..18e41cf2 +--- /dev/null ++++ b/sql/sql_parallel.h +@@ -0,0 +1,214 @@ ++#ifndef SQL_PARALLEL_H ++#define SQL_PARALLEL_H ++ ++/* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License, version 2.0, ++ as published by the Free Software Foundation. ++ ++ This program is also distributed with certain software (including ++ but not limited to OpenSSL) that is licensed under separate terms, ++ as designated in a particular file or component or in included license ++ documentation. The authors of MySQL hereby grant you an additional ++ permission to link the program and your derivative works with the ++ separately licensed software that they have included with MySQL. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License, version 2.0, for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ ++ ++#include "sql/basic_row_iterators.h" ++#include "sql/msg_queue.h" ++#include "sql/sql_base.h" ++#include "sql/sql_lex.h" ++ ++class QEP_TAB; ++class Query_block; ++class THD; ++class Gather_operator; ++class JOIN; ++class ORDER_with_src; ++class Exchange; ++class Filesort; ++ ++/** optimized variables */ ++struct PQ_optimized_var { ++ bool pq_grouped; ++ bool pq_implicit_grouping; ++ bool pq_simple_group; ++ bool pq_simple_order; ++ bool pq_streaming_aggregation; ++ bool pq_group_optimized_away; ++ bool pq_need_tmp_before_win; ++ bool pq_skip_sort_order; ++ int pq_m_ordered_index_usage; ++ ++ /* ++ * optimized_*_flags record each element in select_lex->group_list (or ++ * order_list) is optimized or not. Specifically, optimized_group_flags[i] = 1 ++ * means that group_list[i] is optimized in JOIN::optimize(); otherwise, ++ * pq_group_optimized[i] = 0 and group_list[i] is remained in ++ * JOIN::group_list. ++ */ ++ std::vector optimized_group_flags; ++ std::vector optimized_order_flags; ++ uint pq_no_jbuf_after; // for read_set and index push down ++}; ++ ++enum PQ_exec_status { SEQ_EXEC = 0, PARL_EXEC, ABORT_EXEC }; ++ ++enum PQ_worker_state { ++ INIT = 1, ++ READY = 2, ++ COMPELET = 4, ++ ERROR = 8, ++ OVER = 16 ++}; ++ ++class MQ_record_gather { ++ public: ++ THD *m_thd; ++ QEP_TAB *m_tab; ++ Exchange *m_exchange; ++ ++ public: ++ MQ_record_gather() : m_thd(nullptr), m_tab(nullptr), m_exchange(nullptr) {} ++ ++ MQ_record_gather(THD *thd, QEP_TAB *tab) ++ : m_thd(thd), m_tab(tab), m_exchange(nullptr) {} ++ ~MQ_record_gather() {} ++ bool mq_scan_init(Filesort *sort, int workers, uint ref_length, ++ bool stab_output = false); ++ ++ bool mq_scan_next(); ++ ++ void mq_scan_end(); ++}; ++ ++/** ++ * Parallel scan worker's manager struct ++ */ ++class PQ_worker_manager { ++ public: ++ uint m_id; // worker number ++ Gather_operator *m_gather; ++ THD *thd_leader; // pointer to leader thread ++ THD *thd_worker; // pointer to worker thread ++ MQueue_handle *m_handle; // worker's message queue handle ++ PQ_worker_state m_status; // worker status ++ my_thread_handle thread_id; // Thread id ++ bool m_active{false}; // true if this worker is created, and false otherwise. ++ ++ private: ++ mysql_mutex_t m_mutex; // mutex protect previous members ++ mysql_cond_t m_cond; ++ ++ public: ++ PQ_worker_manager() = delete; ++ ++ PQ_worker_manager(uint id); ++ ++ ~PQ_worker_manager(); ++ ++ bool wait_for_status(THD *thd, uint state); ++ ++ void signal_status(THD *thd, PQ_worker_state state); ++ ++ void kill(); ++}; ++ ++struct CODE_STATE; ++/** ++ * Gather operator for parallel scan ++ */ ++class Gather_operator { ++ public: ++ uint m_dop; // Degree of parallel execution; ++ // (if m_pq_ctx->m_ctxs.size() is less than exepected m_dop, ++ // m_dop will change to m_pq_ctx->m_ctxs.size() in init() ++ // function) ++ JOIN *m_template_join; // physical query plan template ++ PQ_worker_manager **m_workers; // parallel workers manager info ++ TABLE *m_table; // the table need parallel query ++ void *m_pq_ctx; // innodb Parallel query context ++ QEP_TAB *m_tab; // the rewrite qep_tab ++ uint keyno; // the index for paralleling read ++ int m_ha_err; ++ Diagnostics_area m_stmt_da; ++ mysql_mutex_t lock_stmt_da; ++ CODE_STATE **m_code_state; ++ bool table_scan; ++ ++ private: ++ std::mutex m_read_end_mutex; ++ ++ public: ++ Gather_operator() = delete; ++ ++ Gather_operator(uint dop); ++ ++ ~Gather_operator(); ++ ++ bool init(); ++ ++ void end(); ++ ++ void signalAll(); ++ ++ void signalReadEnd(); ++ ++ void waitReadEnd(); ++}; ++ ++Gather_operator *make_pq_gather_operator(JOIN *join, QEP_TAB *tab, uint dop); ++PQ_exec_status make_pq_leader_plan(THD *thd); ++void *pq_worker_exec(void *arg); ++bool pq_build_sum_funcs(THD *thd, Query_block *select, Ref_item_array &ref_ptr, ++ mem_root_deque &fields, uint elements, ++ nesting_map select_nest_level); ++void pq_replace_avg_func(THD *thd, Query_block *select, mem_root_deque *fields, ++ nesting_map select_nest_level); ++extern bool get_table_key_fields(QEP_TAB *tab, ++ std::vector &res_fields); ++extern bool setup_order(THD *thd, Ref_item_array ref_item_array, ++ TABLE_LIST *tables, List &fields, ++ List &all_fields, ORDER *order); ++extern void release_pq_running_threads(uint dop); ++extern void add_pq_memory(PSI_memory_key key, size_t length, unsigned int id); ++extern void sub_pq_memory(PSI_memory_key key, size_t length, unsigned int id); ++extern uint get_pq_memory_total(); ++extern void add_to_list(SQL_I_List &list, ORDER *order); ++ ++extern ulonglong parallel_memory_limit; ++extern ulong parallel_max_threads; ++extern uint pq_memory_used[16]; ++extern uint pq_memory_total_used; ++extern uint parallel_threads_running; ++ ++extern mysql_mutex_t LOCK_pq_threads_running; ++extern mysql_cond_t COND_pq_threads_running; ++ ++bool pq_make_join_readinfo(JOIN *join, PQ_worker_manager *mngr, ++ uint no_jbuf_after); ++ ++bool pq_check_stable_sort(JOIN *join, int idx); ++ ++bool set_key_order(QEP_TAB *tab, std::vector &res_fields, ++ ORDER **order_ptr, Ref_item_array *ref_ptrs); ++ ++void restore_list(PQ_Group_list_ptrs *ptr, SQL_I_List &orig_list); ++ ++void pq_free_thd(THD *thd); ++ ++void pq_free_join(JOIN *join); ++ ++void pq_free_gather(Gather_operator *gather); ++ ++#endif /* SQL_PARALLEL_H */ +diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc +index 52f7f164..43c85fa2 100644 +--- a/sql/sql_parse.cc ++++ b/sql/sql_parse.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 1999, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -173,6 +174,7 @@ + #include "sql/thd_raii.h" + #include "sql/transaction.h" // trans_rollback_implicit + #include "sql/transaction_info.h" ++#include "sql/pq_condition.h" + #include "sql_string.h" + #include "template_utils.h" + #include "thr_lock.h" +@@ -2739,6 +2741,8 @@ int mysql_execute_command(THD *thd, bool first_level) { + + thd->work_part_info = nullptr; + ++ set_pq_condition_status(thd); ++ + if (thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SUBQUERY_TO_DERIVED)) + lex->add_statement_options(OPTION_NO_CONST_TABLES); + +@@ -4411,6 +4415,8 @@ int mysql_execute_command(THD *thd, bool first_level) { + + res = lex->m_sql_cmd->execute(thd); + ++ thd = current_thd; ++ + break; + } + case SQLCOM_ALTER_USER: { +@@ -4855,6 +4861,9 @@ void THD::reset_for_next_command() { + a grant/revoke or flush. + */ + thd->security_context()->checkout_access_maps(); ++ ++ thd->parallel_exec = false; ++ + #ifndef NDEBUG + thd->set_tmp_table_seq_id(1); + #endif +@@ -4998,6 +5007,7 @@ void dispatch_sql_command(THD *thd, Parser_state *parser_state) { + thd, &src_res_grp, &dest_res_grp, &ticket, &cur_ticket); + + error = mysql_execute_command(thd, true); ++ thd = current_thd; + + if (switched) + mgr_ptr->restore_original_resource_group(thd, src_res_grp, +diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc +index 1d4f5983..f6d005ef 100644 +--- a/sql/sql_prepare.cc ++++ b/sql/sql_prepare.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2002, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -403,6 +404,9 @@ class Statement_backup { + + m_query_string = thd->query(); + thd->set_query(stmt->m_query_string); ++ if (thd->lex != nullptr) { ++ thd->lex->in_execute_ps = true; ++ } + + m_safe_to_display = thd->safe_to_display(); + +@@ -429,6 +433,7 @@ class Statement_backup { + + stmt->m_query_string = thd->query(); + thd->set_query(m_query_string); ++ thd->lex->in_execute_ps = false; + + return; + } +diff --git a/sql/sql_resolver.cc b/sql/sql_resolver.cc +index 0c116ca6..0ff87166 100644 +--- a/sql/sql_resolver.cc ++++ b/sql/sql_resolver.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -181,6 +182,8 @@ bool Query_block::prepare(THD *thd, mem_root_deque *insert_field_list) { + + Query_expression *const unit = master_query_expression(); + ++ if (has_windows()) saved_windows_elements = m_windows.elements; ++ + if (!top_join_list.empty()) propagate_nullability(&top_join_list, false); + + /* +@@ -580,6 +583,15 @@ bool Query_block::prepare(THD *thd, mem_root_deque *insert_field_list) { + if (olap == ROLLUP_TYPE && resolve_rollup_wfs(thd)) + return true; /* purecov: inspected */ + ++ if (thd->m_suite_for_pq == PqConditionStatus::ENABLED) { ++ if (group_list.elements > 0) ++ fix_prepare_information_for_order(thd, &group_list, ++ &saved_group_list_ptrs); ++ if (order_list.elements > 0) ++ fix_prepare_information_for_order(thd, &order_list, ++ &saved_order_list_ptrs); ++ } ++ + assert(!thd->is_error()); + return false; + } +@@ -4244,9 +4256,15 @@ bool find_order_in_list(THD *thd, Ref_item_array ref_item_array, + order->is_position = true; + return false; + } +- /* Lookup the current GROUP/ORDER field in the SELECT clause. */ +- select_item = find_item_in_list(thd, order_item, fields, &counter, ++ ++ if (thd->parallel_exec && !order->in_field_list) { ++ select_item = not_found_item; ++ } else { ++ /* Lookup the current GROUP/ORDER field in the SELECT clause. */ ++ select_item = find_item_in_list(thd, order_item, fields, &counter, + REPORT_EXCEPT_NOT_FOUND, &resolution); ++ } ++ + if (!select_item) + return true; /* The item is not unique, or some other error occurred. */ + +@@ -4272,7 +4290,7 @@ bool find_order_in_list(THD *thd, Ref_item_array ref_item_array, + order_item_type == Item::FIELD_ITEM) || + order_item_type == Item::REF_ITEM) { + from_field = find_field_in_tables(thd, (Item_ident *)order_item, tables, +- nullptr, &view_ref, IGNORE_ERRORS, true, ++ nullptr, &view_ref, IGNORE_ERRORS, !thd->pq_leader, + // view_ref is a local variable, so + // don't record a change to roll back: + false); +diff --git a/sql/sql_select.cc b/sql/sql_select.cc +index 0d5929ff..c4110b07 100644 +--- a/sql/sql_select.cc ++++ b/sql/sql_select.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -118,11 +119,12 @@ + #include "sql_string.h" + #include "template_utils.h" + #include "thr_lock.h" ++#include "sql/sql_parallel.h" + + using std::max; + using std::min; + +-static store_key *get_store_key(THD *thd, Item *val, table_map used_tables, ++store_key *get_store_key(THD *thd, Item *val, table_map used_tables, + table_map const_tables, + const KEY_PART_INFO *key_part, uchar *key_buff, + uint maybe_null); +@@ -760,6 +762,8 @@ static bool optimize_secondary_engine(THD *thd) { + secondary_engine->optimize_secondary_engine(thd, thd->lex); + } + ++void thd_set_thread_stack(THD *thd, const char *stack_start); ++ + /** + Execute a DML statement. + This is the default implementation for a DML statement and uses a +@@ -783,6 +787,19 @@ bool Sql_cmd_dml::execute_inner(THD *thd) { + // Perform secondary engine optimizations, if needed. + if (optimize_secondary_engine(thd)) return true; + ++ if (thd->m_suite_for_pq == PqConditionStatus::ENABLED) ++ { ++ PQ_exec_status status = make_pq_leader_plan(thd); ++ if (status == PQ_exec_status::ABORT_EXEC) { ++ return true; ++ } ++ ++ assert(status == PQ_exec_status::SEQ_EXEC || ++ status == PQ_exec_status::PARL_EXEC); ++ ++ DEBUG_SYNC(thd, "after_pq_leader_plan"); ++ } ++ + // We know by now that execution will complete (successful or with error) + lex->set_exec_completed(); + if (lex->is_explain()) { +@@ -1186,6 +1203,7 @@ SJ_TMP_TABLE *create_sj_tmp_table(THD *thd, JOIN *join, + if (!(sjtbl = new (thd->mem_root) SJ_TMP_TABLE)) + return nullptr; /* purecov: inspected */ + sjtbl->tmp_table = nullptr; ++ sjtbl->tabs = nullptr; + sjtbl->is_confluent = true; + sjtbl->have_confluent_row = false; + } +@@ -1705,16 +1723,33 @@ void JOIN::destroy() { + + if (qep_tab) { + assert(!join_tab); +- for (uint i = 0; i < tables; i++) { +- TABLE *table = qep_tab[i].table(); ++ uint tables_num = tables >= old_tables ? tables : old_tables; ++ for (uint i = 0; i < tables_num; i++) { ++ QEP_TAB *qtab = &qep_tab[i]; ++ if (qtab->gather && !thd->running_explain_analyze && !thd->is_worker()) { ++ qtab->gather->end(); ++ qtab->gather = nullptr; ++ } ++ ++ TABLE *table = qep_tab0[i].table(); ++ TABLE *old_table = qep_tab0[i].old_table(); ++ + if (table != nullptr) { +- // These were owned by the root iterator, which we just destroyed. +- // Keep filesort_free_buffers() from trying to call CleanupAfterQuery() +- // on them. + table->sorting_iterator = nullptr; + table->duplicate_removal_iterator = nullptr; + } +- qep_tab[i].cleanup(); ++ ++ if (old_table != nullptr) { ++ old_table->sorting_iterator = nullptr; ++ old_table->duplicate_removal_iterator = nullptr; ++ } ++ ++ /* only clean iterator, filesort, op (etc.) info. */ ++ if (qep_tab1 != nullptr) { ++ qep_tab1[i].cleanup(false); ++ } ++ ++ qep_tab0[i].cleanup(true); + } + } else { + // Same, for hypergraph queries. +@@ -1751,15 +1786,27 @@ void JOIN::destroy() { + // Run Cached_item DTORs! + group_fields.destroy_elements(); + +- tmp_table_param.cleanup(); ++ tmp_table_param->cleanup(); + + /* Cleanup items referencing temporary table columns */ +- if (tmp_fields != nullptr) { +- cleanup_item_list(tmp_fields[REF_SLICE_TMP1]); +- cleanup_item_list(tmp_fields[REF_SLICE_TMP2]); ++ if (tmp_fields1 != nullptr) { ++ cleanup_item_list(tmp_fields1[REF_SLICE_TMP1]); ++ cleanup_item_list(tmp_fields1[REF_SLICE_TMP2]); ++ cleanup_item_list(tmp_fields1[REF_SLICE_PQ_TMP]); ++ for (uint widx = 0; widx < m_windows.elements; widx++) { ++ cleanup_item_list(tmp_fields1[REF_SLICE_WIN_1 + widx]); ++ cleanup_item_list(tmp_fields1[REF_SLICE_WIN_1 + widx + ++ m_windows.elements]); // frame buffer ++ } ++ } ++ ++ if (tmp_fields0 != nullptr) { ++ cleanup_item_list(tmp_fields0[REF_SLICE_TMP1]); ++ cleanup_item_list(tmp_fields0[REF_SLICE_TMP2]); ++ cleanup_item_list(tmp_fields0[REF_SLICE_PQ_TMP]); + for (uint widx = 0; widx < m_windows.elements; widx++) { +- cleanup_item_list(tmp_fields[REF_SLICE_WIN_1 + widx]); +- cleanup_item_list(tmp_fields[REF_SLICE_WIN_1 + widx + ++ cleanup_item_list(tmp_fields0[REF_SLICE_WIN_1 + widx]); ++ cleanup_item_list(tmp_fields0[REF_SLICE_WIN_1 + widx + + m_windows.elements]); // frame buffer + } + } +@@ -1778,7 +1825,7 @@ void JOIN::destroy() { + while ((sjm = sjm_list_it++)) ::destroy(sjm); + sjm_exec_list.clear(); + +- keyuse_array.clear(); ++ keyuse_array->clear(); + // Free memory for rollup arrays + if (query_block->olap == ROLLUP_TYPE) { + rollup_group_items.clear(); +@@ -2404,7 +2451,7 @@ class store_key_json_item final : public store_key_item { + + } // namespace + +-static store_key *get_store_key(THD *thd, Item *val, table_map used_tables, ++store_key *get_store_key(THD *thd, Item *val, table_map used_tables, + table_map const_tables, + const KEY_PART_INFO *key_part, uchar *key_buff, + uint maybe_null) { +@@ -2631,7 +2678,7 @@ bool and_conditions(Item **e1, Item *e2) { + Index condition, or NULL if no condition could be inferred. + */ + +-static Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno, ++Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno, + bool other_tbls_ok) { + assert(cond != nullptr); + +@@ -2698,7 +2745,7 @@ static Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno, + return cond; + } + +-static Item *make_cond_remainder(Item *cond, bool exclude_index) { ++Item *make_cond_remainder(Item *cond, bool exclude_index) { + if (exclude_index && cond->marker == Item::MARKER_ICP_COND_USES_INDEX_ONLY) + return nullptr; /* Already checked */ + +@@ -2829,6 +2876,12 @@ void QEP_TAB::push_index_cond(const JOIN_TAB *join_tab, uint keyno, + !has_guarded_conds() && type() != JT_CONST && type() != JT_SYSTEM && + !(keyno == tbl->s->primary_key && + tbl->file->primary_key_is_clustered())) { ++ if (do_parallel_scan) { ++ has_pq_cond = true; ++ pq_cond = condition()->pq_clone(join_->thd, join_->query_block); ++ if (pq_cond == nullptr) { return; } ++ } ++ + DBUG_EXECUTE("where", print_where(join_->thd, condition(), "full cond", + QT_ORDINARY);); + Item *idx_cond = +@@ -3385,13 +3438,18 @@ void JOIN_TAB::cleanup() { + qs_cleanup(); + } + +-void QEP_TAB::cleanup() { ++/** ++ As qep_tab0 and qep_tab1 share the same table info, we only free these tables once ++ and the free procedure is finished in qep_tab0. ++*/ ++void QEP_TAB::cleanup(bool is_free) { + // Delete parts specific of QEP_TAB: + destroy(filesort); + filesort = nullptr; + if (quick_optim() != quick()) delete quick_optim(); + + TABLE *const t = table(); ++ TABLE *const old_t = old_table(); + + if (t != nullptr) { + t->reginfo.qep_tab = nullptr; +@@ -3399,21 +3457,36 @@ void QEP_TAB::cleanup() { + } + + // Delete shared parts: +- qs_cleanup(); ++ if (is_free) { qs_cleanup(); } + + // Order of qs_cleanup() and this, matters: +- if (op_type == QEP_TAB::OT_MATERIALIZE || +- op_type == QEP_TAB::OT_AGGREGATE_THEN_MATERIALIZE || +- op_type == QEP_TAB::OT_AGGREGATE_INTO_TMP_TABLE || +- op_type == QEP_TAB::OT_WINDOWING_FUNCTION) { +- if (t != nullptr) // Check tmp table is not yet freed. +- { +- close_tmp_table(t); +- free_tmp_table(t); ++ if (is_free) { ++ if (op_type == QEP_TAB::OT_MATERIALIZE || ++ op_type == QEP_TAB::OT_AGGREGATE_THEN_MATERIALIZE || ++ op_type == QEP_TAB::OT_AGGREGATE_INTO_TMP_TABLE || ++ op_type == QEP_TAB::OT_WINDOWING_FUNCTION) { ++ // free only tmp table ++ if (old_t != nullptr) { ++ close_tmp_table(old_t); ++ free_tmp_table(old_t); ++ } ++ ++ // Check tmp table is not yet freed. ++ if (t != nullptr) { ++ close_tmp_table(t); ++ free_tmp_table(t); ++ } ++ ++ destroy(tmp_table_param); ++ tmp_table_param = nullptr; ++ } else { ++ if (t != nullptr && old_t != nullptr && t->s->table_category == TABLE_CATEGORY_TEMPORARY) { ++ close_tmp_table(t); ++ free_tmp_table(t); ++ } + } +- destroy(tmp_table_param); +- tmp_table_param = nullptr; + } ++ + if (table_ref != nullptr && table_ref->uses_materialization()) { + assert(t == table_ref->table); + t->merge_keys.clear_all(); +@@ -3439,6 +3512,20 @@ void QEP_shared_owner::qs_cleanup() { + table_ref->derived_key_list.clear(); + } + } ++ ++ if (old_table() && ++ (old_table()->s->tmp_table != INTERNAL_TMP_TABLE || old_table()->is_created())) { ++ old_table()->set_keyread(false); ++ old_table()->file->ha_index_or_rnd_end(); ++ free_io_cache(old_table()); ++ filesort_free_buffers(old_table(), true); ++ TABLE_LIST *const table_ref = old_table()->pos_in_table_list; ++ if (table_ref) { ++ table_ref->derived_keys_ready = false; ++ table_ref->derived_key_list.clear(); ++ } ++ } ++ + delete quick(); + } + +@@ -3599,6 +3686,9 @@ void JOIN::cleanup() { + assert(!join_tab); + qtab = &qep_tab[i]; + table = qtab->table(); ++ if (qtab->gather && !thd->running_explain_analyze && !thd->is_worker()) { ++ qtab->gather->m_table->file->ha_index_or_rnd_end(); ++ } + } else { + qtab = nullptr; + table = (join_tab ? &join_tab[i] : best_ref[i])->table(); +@@ -3619,6 +3709,105 @@ void JOIN::cleanup() { + set_ref_item_slice(REF_SLICE_SAVED_BASE); + } + ++bool JOIN::make_pq_tables_info() { ++ ++ DBUG_ENTER("JOIN::make_pq_tables_info"); ++ TABLE *table = nullptr; ++ mem_root_deque table_item_list(thd->mem_root); ++ Query_result_mq *query_result = nullptr; ++ ++ mem_root_deque *curr_fields = fields; ++ ++ const bool may_trace = // just to avoid an empty trace block ++ need_tmp_before_win || implicit_grouping || m_windowing_steps || ++ grouped || !order.empty(); ++ ++ Opt_trace_context *const trace = &thd->opt_trace; ++ Opt_trace_disable_I_S trace_disabled(trace, !may_trace); ++ Opt_trace_object wrapper(trace); ++ Opt_trace_array trace_tmp(trace, "making tmp table for MQ"); ++ ++ query_result = down_cast(query_expression()->query_result()); ++ assert(query_result && query_result->m_param); ++ ++ Temp_table_param *tmp_param = query_result->m_param; ++ tmp_param->pq_copy(saved_tmp_table_param); ++ tmp_param->hidden_field_count = CountHiddenFields(*curr_fields); ++ tmp_param->m_window_frame_buffer = true; ++ tmp_param->skip_create_table = true; ++ ++ /* ++ set saved_sum_func= true, then temp table will create a Item_field for sum funcs ++ in tmplist, which we call it sum_field that use for recieving PQ workers's sum data. ++ */ ++ table = create_tmp_table(thd, tmp_param, *curr_fields, nullptr, false, true, ++ query_block->active_options(), HA_POS_ERROR, "", true, true); ++ query_result->m_table = table; ++ ++ // the leader/worker's table is not same ++ if (!table || !thd->pq_leader ++ || table->s->fields != thd->pq_leader->pq_check_fields ++ || table->s->reclength != thd->pq_leader->pq_check_reclen ++ || DBUG_EVALUATE_IF("pq_worker_error5", true, false)) { ++ goto ERR; ++ } ++ ++ // we should remove const_item that doesn't generate result_field in table->field ++ for (Item *tmp_item : *curr_fields) { ++ // check const_item ++ if (tmp_item->skip_create_tmp_table) { ++ assert(tmp_item->const_item()); ++ continue; ++ } ++ ++ // check Item_copy. ++ /* ++ if (tmp_item->type() == Item::COPY_STR_ITEM) { ++ Item *orig_item = down_cast(tmp_item)->get_item(); ++ assert(orig_item); ++ if (orig_item->skip_create_tmp_table) { ++ assert(orig_item->const_item()); ++ continue; ++ } ++ } */ ++ ++ // check an item that refers to a summary function. ++ if (tmp_item->has_aggregation() && tmp_item->type() != Item::SUM_FUNC_ITEM) { ++ if (tmp_item->type() == Item::SUBSELECT_ITEM || ++ (tmp_item->used_tables() & ~OUTER_REF_TABLE_BIT)) { ++ continue; ++ } ++ } ++ ++ table_item_list.push_back(tmp_item); ++ } ++ ++ if (table_item_list.size() != table->s->fields ++ || alloc_ref_item_slice(thd, REF_SLICE_PQ_TMP)) { ++ goto ERR; ++ } ++ ++ /* ++ table_item_list consists of items needed to send to MQ, ++ and we store this list into REF_SLICE_PQ_TMP position. ++ */ ++ tmp_fields[REF_SLICE_PQ_TMP] = table_item_list; ++ DBUG_RETURN(false); ++ ++ERR: ++ if (!thd->pq_error) { ++ // here, occurs an error ++ MQueue_handle *handle = query_result->get_mq_handler(); ++ if (handle != nullptr) { ++ handle->send_exception_msg(ERROR_MSG); ++ handle->set_datched_status(MQ_HAVE_DETACHED); ++ } ++ thd->pq_error = true; ++ } ++ ++ DBUG_RETURN(true); ++} ++ + /** + Filter out ORDER BY items that are equal to constants in WHERE condition + +@@ -3941,9 +4130,9 @@ void calc_group_buffer(JOIN *join, ORDER *group) { + parts++; + if (group_item->is_nullable()) null_parts++; + } +- join->tmp_table_param.group_length = key_length + null_parts; +- join->tmp_table_param.group_parts = parts; +- join->tmp_table_param.group_null_parts = null_parts; ++ join->tmp_table_param->group_length = key_length + null_parts; ++ join->tmp_table_param->group_parts = parts; ++ join->tmp_table_param->group_null_parts = null_parts; + } + + /** +@@ -3960,7 +4149,7 @@ bool JOIN::alloc_func_list() { + uint func_count, group_parts; + DBUG_TRACE; + +- func_count = tmp_table_param.sum_func_count; ++ func_count = tmp_table_param->sum_func_count; + /* + If we are using rollup, we need a copy of the summary functions for + each level +@@ -4146,6 +4335,540 @@ bool JOIN::add_having_as_tmp_table_cond(uint curr_tmp_table) { + return false; + } + ++bool JOIN::make_leader_tables_info() { ++ mem_root_deque *curr_fields = fields; ++ bool materialize_join = false; ++ uint curr_tmp_table = primary_tables; ++ TABLE *exec_tmp_table = nullptr; ++ ++ const bool may_trace = // just to avoid an empty trace block ++ need_tmp_before_win || implicit_grouping || m_windowing_steps || ++ !group_list.empty() || !order.empty(); ++ ++ Opt_trace_context *const trace = &thd->opt_trace; ++ Opt_trace_disable_I_S trace_disabled(trace, !may_trace); ++ Opt_trace_object wrapper(trace); ++ Opt_trace_array trace_tmp(trace, "considering_tmp_tables"); ++ ++ DBUG_ENTER("JOIN::make_leader_tables_info"); ++ ++ const bool has_group_by = this->grouped; ++ tmp_table_param->cleanup(); ++ tmp_table_param->pq_copy(saved_tmp_table_param); ++ tmp_table_param->copy_fields.clear(); ++ ++ if (alloc_ref_item_slice(thd, REF_SLICE_SAVED_BASE)) { DBUG_RETURN(true); } ++ copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE); ++ current_ref_item_slice = REF_SLICE_SAVED_BASE; ++ ++ Opt_trace_object trace_this_tbl(trace); ++ trace_this_tbl.add("adding_tmp_table_in_plan_at_position", curr_tmp_table) ++ .add_alnum("cause", "output_for_parallel_query"); ++ QEP_TAB *tab = &qep_tab[curr_tmp_table]; ++ ++ pq_replace_avg_func(thd, query_block, curr_fields, ++ (nesting_map)1 << (unsigned int)query_block->nest_level); ++ ++ Temp_table_param *tmp_param = ++ new (thd->mem_root) Temp_table_param(*tmp_table_param); ++ if (tmp_param == nullptr) { DBUG_RETURN(true); } ++ tmp_param->m_window_frame_buffer = true; ++ tmp_param->hidden_field_count = CountHiddenFields(*curr_fields); ++ tmp_param->skip_create_table = true; ++ ++ mem_root_deque tmplist(*curr_fields); ++ ++ /* ++ create_tmp_table may change the original item's result_field, hence ++ we must save it before. ++ */ ++ std::vector saved_result_field (tmplist.size(), nullptr); ++ uint i = 0; ++ ++ for (Item *tmp_item : *curr_fields) { ++ if (tmp_item->type() == Item::FIELD_ITEM || tmp_item->type() == Item::DEFAULT_VALUE_ITEM) { ++ saved_result_field[i] = down_cast(tmp_item)->result_field; ++ } else { ++ saved_result_field[i] = tmp_item->get_result_field(); ++ } ++ i++; ++ } ++ ++ std::vector saved_join_fields (fields->size(), nullptr); ++ i = 0; ++ for (Item *tmp_item : *fields) { ++ if (tmp_item->type() == Item::FIELD_ITEM || tmp_item->type() == Item::DEFAULT_VALUE_ITEM) { ++ saved_join_fields[i] = down_cast(tmp_item)->field; ++ } ++ i++; ++ } ++ ++ /* ++ set saved_sum_func= true, then temp table will create a Item_field for sum funcs ++ in tmplist, which we call it sum_field that use for recieving PQ workers's sum data ++ */ ++ TABLE *table = ++ create_tmp_table(thd, tmp_param, tmplist, nullptr, false, true, ++ query_block->active_options(), HA_POS_ERROR, "", true, true); ++ if (table == nullptr) { DBUG_RETURN(true); } ++ table->materialized= false; ++ tmp_tables = 1; ++ ++ // check the rewritten table ++ thd->pq_check_fields = table->s->fields; ++ thd->pq_check_reclen = table->s->reclength; ++ ++ tab->set_table(table); ++ tab->tmp_table_param = tmp_param; ++ ++ // restore result_field->name ++ i = 0; ++ for (Item *tmp_item : *curr_fields) { ++ if (tmp_item->type() == Item::FIELD_ITEM || tmp_item->type() == Item::DEFAULT_VALUE_ITEM) { ++ // create_tmp_table may change the original item's result_field, ++ // restore field_name to pass the main.metadata test. ++ (down_cast(tmp_item)->result_field)->field_name = saved_result_field[i]->field_name; ++ if (tmp_item->const_item()) { ++ uint32 length = (down_cast(tmp_item)->base_item_field())->field->data_length(); ++ memmove((down_cast(tmp_item)->result_field)->ptr, ++ (down_cast(tmp_item)->base_item_field())->field->ptr, ++ length); ++ } ++ } ++ i++; ++ } ++ ++ /* ++ Allocate a slice of ref items that describe the items to be copied ++ from the second temporary table. ++ */ ++ if (alloc_ref_item_slice(thd, REF_SLICE_PQ_TMP)) DBUG_RETURN(true); ++ ++ // No sum funcs anymore ++ if (change_to_use_tmp_fields( ++ curr_fields, thd, ++ ref_items[REF_SLICE_PQ_TMP], ++ &tmp_fields[REF_SLICE_PQ_TMP], ++ query_block->m_added_non_hidden_fields)) { ++ DBUG_RETURN(true); ++ } ++ ++ /* ++ * create sum() base on tmp table's sum_field which is sum of worker send. ++ */ ++ if (pq_build_sum_funcs(thd, query_block, ref_items[REF_SLICE_PQ_TMP], ++ tmp_fields[REF_SLICE_PQ_TMP], CountVisibleFields(tmp_fields[REF_SLICE_PQ_TMP]), ++ (nesting_map)1 << (unsigned int) query_block->nest_level)) { ++ DBUG_RETURN(true); ++ } ++ curr_fields = &tmp_fields[REF_SLICE_PQ_TMP]; ++ set_ref_item_slice(REF_SLICE_PQ_TMP); ++ ++ if (qep_tab != nullptr) { ++ qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_PQ_TMP; ++ } ++ ++ /* save based slice of parallel query */ ++ copy_ref_item_slice(REF_SLICE_SAVED_BASE, REF_SLICE_ACTIVE); ++ current_ref_item_slice = REF_SLICE_SAVED_BASE; ++ ++ /* re-generate group_list based on group_list and curr_all_fields */ ++ if (!group_list.empty()) { ++ group_fields_cache.clear(); ++ group_fields.destroy_elements(); ++ } ++ ++ if (!group_list.empty() || tmp_table_param->sum_func_count) { ++ if (make_sum_func_list(*curr_fields, true, true)) { ++ DBUG_RETURN(true); ++ } ++ } ++ ++ /* ++ Create the first temporary table if distinct elimination is requested or ++ if the sort is too complicated to be evaluated as a filesort. ++ */ ++ if (need_tmp_before_win) { ++ curr_tmp_table++; ++ tmp_tables++; ++ /* ++ Create temporary table for use in a single execution. ++ (Will be reused if this is a subquery that is executed several times ++ for one execution of the statement) ++ Don't use tmp table grouping for json aggregate funcs as it's ++ very ineffective. ++ */ ++ ORDER_with_src tmp_group; ++ if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg) ++ tmp_group = group_list; ++ ++ tmp_table_param->hidden_field_count = CountHiddenFields(*curr_fields); ++ ++ QEP_TAB *win_tab = &qep_tab[curr_tmp_table]; ++ win_tab->set_old_table(win_tab->table()); ++ if (create_intermediate_table(&qep_tab[curr_tmp_table], *curr_fields, ++ tmp_group, !group_list.empty() && simple_group)) ++ DBUG_RETURN(true); ++ exec_tmp_table = qep_tab[curr_tmp_table].table(); ++ ++ if (exec_tmp_table->s->is_distinct) optimize_distinct(); ++ ++ /* ++ Allocate a slice of ref items that describe the items to be copied ++ from the first temporary table. ++ */ ++ if (alloc_ref_item_slice(thd, REF_SLICE_TMP1)) DBUG_RETURN(true); ++ ++ // Change sum_fields reference to calculated fields in tmp_table ++ if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group || ++ tmp_table_param->precomputed_group_by) { ++ if (change_to_use_tmp_fields(curr_fields, thd, ++ ref_items[REF_SLICE_TMP1], ++ &tmp_fields[REF_SLICE_TMP1], ++ query_block->m_added_non_hidden_fields)) ++ DBUG_RETURN(true); ++ } else { ++ if (change_to_use_tmp_fields_except_sums(curr_fields, thd, query_block, ++ ref_items[REF_SLICE_TMP1], ++ &tmp_fields[REF_SLICE_TMP1], ++ query_block->m_added_non_hidden_fields)) ++ DBUG_RETURN(true); ++ } ++ ++ curr_fields = &tmp_fields[REF_SLICE_TMP1]; ++ // Need to set them now for correct group_fields setup, reset at the end. ++ set_ref_item_slice(REF_SLICE_TMP1); ++ qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP1; ++ setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_tbl); ++ //last_slice_before_windowing = REF_SLICE_TMP1; ++ ++ /* ++ If having is not handled here, it will be checked before the row is sent ++ to the client. ++ */ ++ if (having_cond && (streaming_aggregation || ++ (exec_tmp_table->s->is_distinct && group_list.empty()))) { ++ /* ++ If there is no select distinct or rollup, then move the having to table ++ conds of tmp table. ++ NOTE : We cannot apply having after distinct. If columns of having are ++ not part of select distinct, then distinct may remove rows ++ which can satisfy having. ++ ++ As this condition will read the tmp table, it is appropriate that ++ REF_SLICE_TMP1 is in effect when we create it below. ++ */ ++ if ((!select_distinct && rollup_state == RollupState::NONE) && ++ add_having_as_tmp_table_cond(curr_tmp_table)) ++ DBUG_RETURN(true); ++ ++ /* ++ Having condition which we are not able to add as tmp table conds are ++ kept as before. And, this will be applied before storing the rows in ++ tmp table. ++ */ ++ qep_tab[curr_tmp_table].having = having_cond; ++ having_cond = NULL; // Already done ++ } ++ ++ tmp_table_param->func_count = 0; ++ ++ if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group) { ++ tmp_table_param->field_count += tmp_table_param->sum_func_count; ++ tmp_table_param->sum_func_count = 0; ++ } ++ /** check if group by has to respect ordering. If true, move group by ++ to order by */ ++ if (exec_tmp_table->group) { // Already grouped ++ if (order.empty() && !skip_sort_order) { ++ for (ORDER *group = group_list.order; group; group = group->next) { ++ if (group->direction != ORDER_NOT_RELEVANT) { ++ order = group_list; /* order by group */ ++ break; ++ } ++ } ++ } ++ group_list.clean(); ++ } ++ /* ++ If we have different sort & group then we must sort the data by group ++ and copy it to a second temporary table. ++ This code is also used if we are using distinct something ++ we haven't been able to store in the temporary table yet ++ like SEC_TO_TIME(SUM(...)) or when distinct is used with rollup. ++ */ ++// ============== not yet test ================== ++ if ((!group_list.empty() && ++ (!test_if_subpart(group_list.order, order.order) || select_distinct || ++ m_windowing_steps || rollup_state != RollupState::NONE)) || ++ (select_distinct && (tmp_table_param->using_outer_summary_function || ++ rollup_state != RollupState::NONE))) { ++ DBUG_PRINT("info", ("Creating group table")); ++ ++ calc_group_buffer(this, group_list.order); ++ count_field_types(query_block, tmp_table_param, ++ tmp_fields[REF_SLICE_TMP1], ++ select_distinct && group_list.empty(), false); ++ tmp_table_param->hidden_field_count = ++ CountHiddenFields(tmp_fields[REF_SLICE_TMP1]); ++ streaming_aggregation = false; ++ if (!exec_tmp_table->group && !exec_tmp_table->s->is_distinct) { ++ // 1st tmp table were materializing join result ++ materialize_join = true; ++ explain_flags.set(ESC_BUFFER_RESULT, ESP_USING_TMPTABLE); ++ } ++ curr_tmp_table++; ++ tmp_tables++; ++ trace_this_tbl.add("adding_tmp_table_in_plan_at_position", curr_tmp_table) ++ .add_alnum("cause", "sorting_to_make_groups"); ++ ++ /* group data to new table */ ++ /* ++ If the access method is loose index scan then all MIN/MAX ++ functions are precomputed, and should be treated as regular ++ functions. See extended comment above. ++ */ ++ if (qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan()) ++ tmp_table_param->precomputed_group_by = true; ++ ++ ORDER_with_src dummy; ++ ++ if (create_intermediate_table(&qep_tab[curr_tmp_table], *curr_fields, ++ dummy, true)) ++ DBUG_RETURN(true); ++ ++ if (!group_list.empty()) { ++ explain_flags.set(group_list.src, ESP_USING_TMPTABLE); ++ if (!plan_is_const()) // No need to sort a single row ++ { ++ if (add_sorting_to_table(curr_tmp_table - 1, &group_list, false, true)) ++ DBUG_RETURN(true); ++ pq_last_sort_idx= curr_tmp_table - 1; ++ pq_rebuilt_group= true; ++ } ++ ++ if (make_group_fields(this, this)) DBUG_RETURN(true); ++ } ++ ++ // Setup sum funcs only when necessary, otherwise we might break info ++ // for the first table ++ if (!group_list.empty() || tmp_table_param->sum_func_count) { ++ if (make_sum_func_list(*curr_fields, true, true)) return true; ++ const bool need_distinct = ++ !(qep_tab[0].quick() && ++ qep_tab[0].quick()->is_agg_loose_index_scan()); ++ if (prepare_sum_aggregators(sum_funcs, need_distinct)) ++ DBUG_RETURN(true); ++ group_list.clean(); ++ if (setup_sum_funcs(thd, sum_funcs)) DBUG_RETURN(true); ++ } ++ ++ /* ++ Allocate a slice of ref items that describe the items to be copied ++ from the second temporary table. ++ */ ++ if (alloc_ref_item_slice(thd, REF_SLICE_TMP2)) DBUG_RETURN(true); ++ ++ // No sum funcs anymore ++ if (change_to_use_tmp_fields( ++ &tmp_fields[REF_SLICE_TMP1], thd, ++ ref_items[REF_SLICE_TMP2], ++ &tmp_fields[REF_SLICE_TMP2], ++ query_block->m_added_non_hidden_fields)) ++ DBUG_RETURN(true); ++ ++ curr_fields = &tmp_fields[REF_SLICE_TMP2]; ++ set_ref_item_slice(REF_SLICE_TMP2); ++ qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP2; ++ setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_tbl); ++ //last_slice_before_windowing = REF_SLICE_TMP2; ++ } ++ if (qep_tab[curr_tmp_table].table()->s->is_distinct) ++ select_distinct = false; /* Each row is unique */ ++ ++ if (select_distinct && group_list.empty() && !m_windowing_steps) { ++ if (having_cond) { ++ qep_tab[curr_tmp_table].having = having_cond; ++ having_cond->update_used_tables(); ++ having_cond = NULL; ++ } ++ qep_tab[curr_tmp_table].needs_duplicate_removal = true; ++ trace_this_tbl.add("reading_from_table_eliminates_duplicates", true); ++ explain_flags.set(ESC_DISTINCT, ESP_DUPS_REMOVAL); ++ select_distinct = false; ++ } ++ /* Clean tmp_table_param for the next tmp table. */ ++ tmp_table_param->field_count = tmp_table_param->sum_func_count = ++ tmp_table_param->func_count = 0; ++ ++ tmp_table_param->cleanup(); ++ streaming_aggregation = false; ++ ++ if (!group_optimized_away) { ++ grouped = false; ++ } else { ++ /* ++ If grouping has been optimized away, a temporary table is ++ normally not needed unless we're explicitly requested to create ++ one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT or ++ there is a windowing function that needs sorting). ++ ++ In this case (grouping was optimized away), temp_table was ++ created without a grouping expression and JOIN::exec() will not ++ perform the necessary grouping (by the use of end_send_group() ++ or end_write_group()) if JOIN::group is set to false. ++ */ ++ /* ++ The temporary table was explicitly requested or there is a window ++ function which needs sorting (check need_tmp_before_win in ++ JOIN::optimize). ++ */ ++ assert(query_block->active_options() & OPTION_BUFFER_RESULT || ++ m_windowing_steps); ++ // the temporary table does not have a grouping expression ++ assert(!qep_tab[curr_tmp_table].table()->group); ++ } ++ calc_group_buffer(this, group_list.order); ++ count_field_types(query_block, tmp_table_param, *curr_fields, false, ++ false); ++ } ++// ====================================== ++ /* ++ Set up structures for a temporary table but do not actually create ++ the temporary table if one of these conditions are true: ++ - The query is implicitly grouped. ++ - The query is explicitly grouped and ++ + implemented as a simple grouping, or ++ + LIMIT 1 is specified, or ++ + ROLLUP is specified, or ++ + . ++ */ ++ ++ if ((grouped || implicit_grouping) && !m_windowing_steps) { ++ if (make_group_fields(this, this)) return true; ++ ++ if (make_sum_func_list(*curr_fields, true, true)) return true; ++ ++ const bool need_distinct = !(qep_tab && qep_tab[0].quick() && ++ qep_tab[0].quick()->is_agg_loose_index_scan()); ++ if (prepare_sum_aggregators(sum_funcs, need_distinct)) DBUG_RETURN(true); ++ if (setup_sum_funcs(thd, sum_funcs) || thd->is_fatal_error()) return true; ++ } ++ ++ if (qep_tab && (!group_list.empty() || (!order.empty() && !m_windowing_steps /* [1] */))) { ++ /* ++ [1] above: too early to do query ORDER BY if we have windowing; must ++ wait till after window processing. ++ */ ++ /* call from pq_tmp_table_info, join->join_tab/join->best_ref is null */ ++ // ASSERT_BEST_REF_IN_JOIN_ORDER(this); ++ DBUG_PRINT("info", ("Sorting for send_result_set_metadata")); ++ /* ++ If we have already done the group, add HAVING to sorted table except ++ when rollup is present ++ */ ++ if (having_cond && group_list.empty() && !streaming_aggregation && ++ rollup_state == RollupState::NONE) { ++ if (add_having_as_tmp_table_cond(curr_tmp_table)) DBUG_RETURN(true); ++ } ++ ++ if (grouped) ++ m_select_limit = HA_POS_ERROR; ++ else if (!need_tmp_before_win) { ++ /* ++ We can abort sorting after thd->select_limit rows if there are no ++ filter conditions for any tables after the sorted one. ++ Filter conditions come in several forms: ++ 1. as a condition item attached to the join_tab, or ++ 2. as a keyuse attached to the join_tab (ref access). ++ */ ++ for (i = const_tables + 1; i < primary_tables; i++) { ++ QEP_TAB *const local_tab = qep_tab + i; ++ if (local_tab->condition() || // 1 ++ (best_ref[tab->idx()]->keyuse() && ++ local_tab->first_inner() == NO_PLAN_IDX)) // 2 ++ { ++ /* We have to sort all rows */ ++ m_select_limit = HA_POS_ERROR; ++ break; ++ } ++ } ++ } ++ /* ++ Here we add sorting stage for ORDER BY/GROUP BY clause, if the ++ optimiser chose FILESORT to be faster than INDEX SCAN or there is ++ no suitable index present. ++ OPTION_FOUND_ROWS supersedes LIMIT and is taken into account. ++ */ ++ DBUG_PRINT("info", ("Sorting for order by/group by")); ++ ORDER_with_src order_arg = group_list.empty() ? order : group_list; ++ if (qep_tab && ++ m_ordered_index_usage != ++ (group_list.empty() ? ORDERED_INDEX_ORDER_BY : ORDERED_INDEX_GROUP_BY) && ++ // Windowing will change order, so it's too early to sort here ++ !m_windowing_steps) { ++ // Sort either first non-const table or the last tmp table ++ QEP_TAB *const sort_tab = &qep_tab[curr_tmp_table]; ++ if (need_tmp_before_win && !materialize_join && !exec_tmp_table->group) ++ explain_flags.set(order_arg.src, ESP_USING_TMPTABLE); ++ ++ if (add_sorting_to_table(curr_tmp_table, &order_arg, pq_stable_sort, false)) ++ return true; ++ /* ++ filesort_limit: Return only this many rows from filesort(). ++ We can use select_limit_cnt only if we have no group_by and 1 table. ++ This allows us to use Bounded_queue for queries like: ++ "select * from t1 order by b desc limit 1;" ++ m_select_limit == HA_POS_ERROR (we need a full table scan) ++ query_expression()->select_limit_cnt == 1 (we only need one row in the result set) ++ */ ++ ++ pq_last_sort_idx = curr_tmp_table; ++ if (sort_tab->filesort) { ++ sort_tab->filesort->limit = ++ (has_group_by || (primary_tables > curr_tmp_table + 1) || ++ calc_found_rows) ++ ? m_select_limit ++ : query_expression()->select_limit_cnt; ++ } ++ } ++ } ++ fields = curr_fields; ++ i = 0; ++ for (Item *tmp_item : *fields) { ++ if ((tmp_item->type() == Item::FIELD_ITEM || tmp_item->type() == Item::DEFAULT_VALUE_ITEM) && ++ saved_join_fields[i] && saved_join_fields[i]->orig_table_name) { ++ (down_cast(tmp_item))->field->orig_table_name = saved_join_fields[i]->orig_table_name; ++ } ++ i++; ++ } ++ ++ // Reset before execution ++ set_ref_item_slice(REF_SLICE_SAVED_BASE); ++ if (qep_tab) { ++ qep_tab[primary_tables + tmp_tables].op_type = ++ get_end_select_func(); ++ } ++ grouped = has_group_by; ++ ++ unplug_join_tabs(); ++ ++ /* ++ Tmp tables are a layer between the nested loop and the derived table's ++ result, WITH RECURSIVE cannot work with them. This should not happen, as a ++ recursive query cannot have clauses which use a tmp table (GROUP BY, ++ etc). ++ */ ++ assert(!query_block->is_recursive() || !tmp_tables); ++ DBUG_RETURN(false); ++ ++ if (table != NULL) { ++ free_tmp_table(table); ++ tab->set_table(NULL); ++ } ++ DBUG_RETURN(true); ++} ++ + /** + Init tmp tables usage info. + +@@ -4225,7 +4948,7 @@ bool JOIN::make_tmp_tables_info() { + */ + if (qep_tab && qep_tab[0].quick() && + qep_tab[0].quick()->is_loose_index_scan()) +- tmp_table_param.precomputed_group_by = ++ tmp_table_param->precomputed_group_by = + !qep_tab[0].quick()->is_agg_loose_index_scan(); + + uint last_slice_before_windowing = REF_SLICE_ACTIVE; +@@ -4264,7 +4987,7 @@ bool JOIN::make_tmp_tables_info() { + if (!simple_group && !(test_flags & TEST_NO_KEY_GROUP) && !with_json_agg) + tmp_group = group_list; + +- tmp_table_param.hidden_field_count = CountHiddenFields(*fields); ++ tmp_table_param->hidden_field_count = CountHiddenFields(*fields); + + if (create_intermediate_table(&qep_tab[curr_tmp_table], *fields, tmp_group, + !group_list.empty() && simple_group)) +@@ -4280,6 +5003,7 @@ bool JOIN::make_tmp_tables_info() { + sorted access even if final result is not to be sorted. + */ + assert( ++ thd->parallel_exec || + !(m_ordered_index_usage == ORDERED_INDEX_VOID && !plan_is_const() && + qep_tab[const_tables].position()->sj_strategy != SJ_OPT_LOOSE_SCAN && + qep_tab[const_tables].use_order())); +@@ -4292,7 +5016,7 @@ bool JOIN::make_tmp_tables_info() { + + // Change sum_fields reference to calculated fields in tmp_table + if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group || +- tmp_table_param.precomputed_group_by) { ++ tmp_table_param->precomputed_group_by) { + if (change_to_use_tmp_fields(fields, thd, ref_items[REF_SLICE_TMP1], + &tmp_fields[REF_SLICE_TMP1], + query_block->m_added_non_hidden_fields)) +@@ -4310,6 +5034,7 @@ bool JOIN::make_tmp_tables_info() { + qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP1; + setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_outer); + last_slice_before_windowing = REF_SLICE_TMP1; ++ last_slice_before_pq = REF_SLICE_TMP1; + + /* + If having is not handled here, it will be checked before the row is sent +@@ -4341,11 +5066,11 @@ bool JOIN::make_tmp_tables_info() { + having_cond = nullptr; // Already done + } + +- tmp_table_param.func_count = 0; ++ tmp_table_param->func_count = 0; + + if (streaming_aggregation || qep_tab[curr_tmp_table].table()->group) { +- tmp_table_param.field_count += tmp_table_param.sum_func_count; +- tmp_table_param.sum_func_count = 0; ++ tmp_table_param->field_count += tmp_table_param->sum_func_count; ++ tmp_table_param->sum_func_count = 0; + } + + if (exec_tmp_table->group) { // Already grouped +@@ -4374,15 +5099,15 @@ bool JOIN::make_tmp_tables_info() { + if ((!group_list.empty() && + (!test_if_subpart(group_list.order, order.order) || select_distinct || + m_windowing_steps || rollup_state != RollupState::NONE)) || +- (select_distinct && (tmp_table_param.using_outer_summary_function || ++ (select_distinct && (tmp_table_param->using_outer_summary_function || + rollup_state != RollupState::NONE))) { + DBUG_PRINT("info", ("Creating group table")); + + calc_group_buffer(this, group_list.order); +- count_field_types(query_block, &tmp_table_param, ++ count_field_types(query_block, tmp_table_param, + tmp_fields[REF_SLICE_TMP1], + select_distinct && group_list.empty(), false); +- tmp_table_param.hidden_field_count = ++ tmp_table_param->hidden_field_count = + CountHiddenFields(tmp_fields[REF_SLICE_TMP1]); + streaming_aggregation = false; + if (!exec_tmp_table->group && !exec_tmp_table->s->is_distinct) { +@@ -4403,7 +5128,7 @@ bool JOIN::make_tmp_tables_info() { + functions. See extended comment above. + */ + if (qep_tab[0].quick() && qep_tab[0].quick()->is_loose_index_scan()) +- tmp_table_param.precomputed_group_by = true; ++ tmp_table_param->precomputed_group_by = true; + + ORDER_with_src dummy; // TODO can use table->group here also + +@@ -4426,7 +5151,7 @@ bool JOIN::make_tmp_tables_info() { + + // Setup sum funcs only when necessary, otherwise we might break info + // for the first table +- if (!group_list.empty() || tmp_table_param.sum_func_count) { ++ if (!group_list.empty() || tmp_table_param->sum_func_count) { + if (make_sum_func_list(*curr_fields, true, true)) return true; + const bool need_distinct = + !(qep_tab[0].quick() && +@@ -4454,6 +5179,7 @@ bool JOIN::make_tmp_tables_info() { + qep_tab[curr_tmp_table].ref_item_slice = REF_SLICE_TMP2; + setup_tmptable_write_func(&qep_tab[curr_tmp_table], &trace_this_tbl); + last_slice_before_windowing = REF_SLICE_TMP2; ++ last_slice_before_pq = REF_SLICE_TMP2; + } + if (qep_tab[curr_tmp_table].table()->s->is_distinct) + select_distinct = false; /* Each row is unique */ +@@ -4470,10 +5196,10 @@ bool JOIN::make_tmp_tables_info() { + select_distinct = false; + } + /* Clean tmp_table_param for the next tmp table. */ +- tmp_table_param.field_count = tmp_table_param.sum_func_count = +- tmp_table_param.func_count = 0; ++ tmp_table_param->field_count = tmp_table_param->sum_func_count = ++ tmp_table_param->func_count = 0; + +- tmp_table_param.cleanup(); ++ tmp_table_param->cleanup(); + streaming_aggregation = false; + + if (!group_optimized_away) { +@@ -4501,7 +5227,7 @@ bool JOIN::make_tmp_tables_info() { + assert(!qep_tab[curr_tmp_table].table()->group); + } + calc_group_buffer(this, group_list.order); +- count_field_types(query_block, &tmp_table_param, *curr_fields, false, ++ count_field_types(query_block, tmp_table_param, *curr_fields, false, + false); + } + +@@ -4556,7 +5282,7 @@ bool JOIN::make_tmp_tables_info() { + for (uint i = const_tables + 1; i < primary_tables; i++) { + QEP_TAB *const tab = qep_tab + i; + if (tab->condition() || // 1 +- (best_ref[tab->idx()]->keyuse() && ++ (best_ref != nullptr && best_ref[tab->idx()]->keyuse() && + tab->first_inner() == NO_PLAN_IDX)) // 2 + { + /* We have to sort all rows */ +@@ -4585,7 +5311,7 @@ bool JOIN::make_tmp_tables_info() { + explain_flags.set(order_arg.src, ESP_USING_TMPTABLE); + + if (add_sorting_to_table(curr_tmp_table, &order_arg, +- /*force_stable_sort=*/false, ++ /*force_stable_sort=*/pq_stable_sort, + /*sort_before_group=*/false)) + return true; + /* +@@ -4597,18 +5323,18 @@ bool JOIN::make_tmp_tables_info() { + query_expression->select_limit_cnt == 1 (we only need one row in the + result set) + */ +- if (sort_tab->filesort) +- sort_tab->filesort->limit = +- (has_group_by || (primary_tables > curr_tmp_table + 1) || +- calc_found_rows) +- ? m_select_limit +- : query_expression()->select_limit_cnt; ++ pq_last_sort_idx = curr_tmp_table; ++ sort_tab->filesort->limit = ++ (has_group_by || (primary_tables > curr_tmp_table + 1) || ++ calc_found_rows) ++ ? m_select_limit ++ : query_expression()->select_limit_cnt; + } + } + + if (qep_tab && m_windowing_steps) { + for (uint wno = 0; wno < m_windows.elements; wno++) { +- tmp_table_param.m_window = m_windows[wno]; ++ tmp_table_param->m_window = m_windows[wno]; + + if (!tmp_tables) { + curr_tmp_table = primary_tables; +@@ -4635,12 +5361,12 @@ bool JOIN::make_tmp_tables_info() { + ORDER_with_src dummy; + + if (last_slice_before_windowing == REF_SLICE_ACTIVE) { +- tmp_table_param.hidden_field_count = CountHiddenFields(*fields); ++ tmp_table_param->hidden_field_count = CountHiddenFields(*fields); + } else { + assert(tmp_tables >= 1 && + last_slice_before_windowing > REF_SLICE_ACTIVE); + +- tmp_table_param.hidden_field_count = ++ tmp_table_param->hidden_field_count = + CountHiddenFields(tmp_fields[last_slice_before_windowing]); + } + +@@ -4663,7 +5389,7 @@ bool JOIN::make_tmp_tables_info() { + buffering. + */ + Temp_table_param *par = +- new (thd->mem_root) Temp_table_param(tmp_table_param); ++ new (thd->mem_root) Temp_table_param(*tmp_table_param); + par->m_window_frame_buffer = true; + TABLE *table = + create_tmp_table(thd, par, *curr_fields, nullptr, false, false, +@@ -4804,6 +5530,11 @@ bool JOIN::make_tmp_tables_info() { + } + + void JOIN::unplug_join_tabs() { ++ //clone JOIN info from pq_tmp_tables_info, best_bef = NULL ++ if (tables !=0 && !(best_ref && !join_tab)) { ++ return; ++ } ++ + ASSERT_BEST_REF_IN_JOIN_ORDER(this); + + /* +diff --git a/sql/sql_select.h b/sql/sql_select.h +index 9c4f4aed..24bb5d29 100644 +--- a/sql/sql_select.h ++++ b/sql/sql_select.h +@@ -2,6 +2,7 @@ + #define SQL_SELECT_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -205,6 +206,18 @@ class Key_use { + fanout(0.0), + read_cost(0.0) {} + ++ Key_use *pq_clone(THD *thd) { ++ Key_use *new_key_use = new (thd->pq_mem_root) Key_use( ++ nullptr, nullptr, used_tables, key, keypart, optimize, keypart_map, ++ ref_table_rows, null_rejecting, cond_guard, sj_pred_no); ++ if (new_key_use != nullptr) { ++ new_key_use->bound_keyparts = bound_keyparts; ++ new_key_use->fanout = fanout; ++ new_key_use->read_cost = read_cost; ++ } ++ return new_key_use; ++ } ++ + TABLE_LIST *table_ref; ///< table owning the index + + /** +@@ -566,6 +579,8 @@ struct POSITION { + } + prefix_rowcount *= filter_effect; + } ++ ++ bool pq_copy(THD *thd, POSITION *orig); + }; + + /** +@@ -990,4 +1005,8 @@ SJ_TMP_TABLE *create_sj_tmp_table(THD *thd, JOIN *join, + */ + uint actual_key_flags(const KEY *key_info); + ++store_key *get_store_key(THD *thd, Item *val, table_map used_tables, ++ table_map const_tables, const KEY_PART_INFO *key_part, ++ uchar *key_buff, uint maybe_null); ++ + #endif /* SQL_SELECT_INCLUDED */ +diff --git a/sql/sql_tmp_table.cc b/sql/sql_tmp_table.cc +index adcfe0f8..1bd259f7 100644 +--- a/sql/sql_tmp_table.cc ++++ b/sql/sql_tmp_table.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2011, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -86,6 +87,7 @@ + #include "sql/thr_malloc.h" + #include "sql/window.h" + #include "template_utils.h" ++#include "sql/temp_table_param.h" + + using std::max; + using std::min; +@@ -175,8 +177,9 @@ static bool alloc_record_buffers(THD *thd, TABLE *table); + + Field *create_tmp_field_from_field(THD *thd, const Field *org_field, + const char *name, TABLE *table, +- Item_field *item) { +- Field *new_field = org_field->new_field(thd->mem_root, table); ++ Item_field *item, MEM_ROOT *root) { ++ MEM_ROOT *pq_check_root = root ? root : thd->mem_root; ++ Field *new_field = org_field->new_field(pq_check_root, table); + if (new_field == nullptr) return nullptr; + + new_field->init(table); +@@ -216,13 +219,14 @@ Field *create_tmp_field_from_field(THD *thd, const Field *org_field, + new_created field + */ + +-static Field *create_tmp_field_from_item(Item *item, TABLE *table) { ++static Field *create_tmp_field_from_item(Item *item, TABLE *table, MEM_ROOT *root) { + bool maybe_null = item->is_nullable(); + Field *new_field = nullptr; ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; + + switch (item->result_type()) { + case REAL_RESULT: +- new_field = new (*THR_MALLOC) ++ new_field = new (pq_check_root) + Field_double(item->max_length, maybe_null, item->item_name.ptr(), + item->decimals, false, true); + break; +@@ -234,11 +238,11 @@ static Field *create_tmp_field_from_item(Item *item, TABLE *table) { + Field_long : make them Field_longlong. + */ + if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1)) +- new_field = new (*THR_MALLOC) ++ new_field = new (pq_check_root) + Field_longlong(item->max_length, maybe_null, item->item_name.ptr(), + item->unsigned_flag); + else +- new_field = new (*THR_MALLOC) ++ new_field = new (pq_check_root) + Field_long(item->max_length, maybe_null, item->item_name.ptr(), + item->unsigned_flag); + break; +@@ -251,14 +255,18 @@ static Field *create_tmp_field_from_item(Item *item, TABLE *table) { + */ + if (item->is_temporal() || item->data_type() == MYSQL_TYPE_GEOMETRY || + item->data_type() == MYSQL_TYPE_JSON) { +- new_field = item->tmp_table_field_from_field_type(table, true); ++ new_field = item->tmp_table_field_from_field_type(table, true, root); + } else { +- new_field = item->make_string_field(table); ++ new_field = item->make_string_field(table, root); ++ } ++ ++ if (new_field != nullptr) { ++ new_field->set_derivation(item->collation.derivation); + } +- new_field->set_derivation(item->collation.derivation); ++ + break; + case DECIMAL_RESULT: +- new_field = Field_new_decimal::create_from_item(item); ++ new_field = Field_new_decimal::create_from_item(item, root); + break; + case ROW_RESULT: + default: +@@ -288,15 +296,16 @@ static Field *create_tmp_field_from_item(Item *item, TABLE *table) { + new_created field + */ + +-static Field *create_tmp_field_for_schema(const Item *item, TABLE *table) { ++Field *create_tmp_field_for_schema(Item *item, TABLE *table, MEM_ROOT *root) { ++ MEM_ROOT *pq_check_root = root ? root : *THR_MALLOC; + if (item->data_type() == MYSQL_TYPE_VARCHAR) { + Field *field; + if (item->max_length > MAX_FIELD_VARCHARLENGTH) +- field = new (*THR_MALLOC) ++ field = new (pq_check_root) + Field_blob(item->max_length, item->is_nullable(), + item->item_name.ptr(), item->collation.collation, false); + else { +- field = new (*THR_MALLOC) Field_varstring( ++ field = new (pq_check_root) Field_varstring( + item->max_length, item->is_nullable(), item->item_name.ptr(), + table->s, item->collation.collation); + table->s->db_create_options |= HA_OPTION_PACK_RECORD; +@@ -304,7 +313,7 @@ static Field *create_tmp_field_for_schema(const Item *item, TABLE *table) { + if (field) field->init(table); + return field; + } +- return item->tmp_table_field_from_field_type(table, false); ++ return item->tmp_table_field_from_field_type(table, false, root); + } + + /** +@@ -350,7 +359,7 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + Func_ptr_array *copy_func, Field **from_field, + Field **default_field, bool group, bool modify_item, + bool table_cant_handle_bit_fields, bool make_copy_field, +- bool copy_result_field) { ++ bool copy_result_field, MEM_ROOT *root) { + DBUG_TRACE; + Field *result = nullptr; + Item::Type orig_type = type; +@@ -378,10 +387,10 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + if (item_field->is_nullable() && + !(item_field->field->is_nullable() || + item_field->field->table->is_nullable())) { +- result = create_tmp_field_from_item(item_field, table); ++ result = create_tmp_field_from_item(item_field, table, root); + } else if (table_cant_handle_bit_fields && + item_field->field->type() == MYSQL_TYPE_BIT) { +- result = create_tmp_field_from_item(item_field, table); ++ result = create_tmp_field_from_item(item_field, table, root); + /* + If the item is a function, a pointer to the item is stored in + copy_func. We separate fields from functions by checking if the +@@ -395,7 +404,7 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + : item_field->item_name.ptr(), + table, + (modify_item && orig_type != Item::REF_ITEM) ? item_field +- : nullptr); ++ : nullptr, root); + } + if (result == nullptr) return nullptr; + if (modify_item) { +@@ -430,7 +439,7 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + + result = create_tmp_field_from_field(thd, sp_result_field, + item_func_sp->item_name.ptr(), +- table, nullptr); ++ table, nullptr, root); + if (!result) break; + if (modify_item) item_func_sp->set_result_field(result); + break; +@@ -456,7 +465,7 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + case Item::SUM_FUNC_ITEM: + if (type == Item::SUM_FUNC_ITEM && !is_wf) { + Item_sum *item_sum = down_cast(item); +- result = item_sum->create_tmp_field(group, table); ++ result = item_sum->create_tmp_field(group, table, root); + if (!result) my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATALERROR)); + } else { + /* +@@ -482,7 +491,7 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + assert(*from_field); + } + +- result = create_tmp_field_from_item(item, table); ++ result = create_tmp_field_from_item(item, table, root); + if (result == nullptr) return nullptr; + if (modify_item) item->set_result_field(result); + if (copy_func && !make_copy_field && item->is_result_field()) +@@ -493,12 +502,17 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + case Item::TYPE_HOLDER: + case Item::VALUES_COLUMN_ITEM: + result = down_cast(item)->make_field_by_type( +- table, thd->is_strict_mode()); ++ table, thd->is_strict_mode(), root); + break; + default: // Doesn't have to be stored + assert(false); + break; + } ++ ++ if (result != nullptr && thd->parallel_exec) { ++ result->extra_length = item->pq_extra_len(group); ++ } ++ + return result; + } + +@@ -828,6 +842,27 @@ inline void relocate_field(Field *field, uchar *pos, uchar *null_flags, + field->reset(); + } + ++void Temp_table_param::pq_copy(Temp_table_param *orig) ++{ ++ end_write_records = orig->end_write_records; ++ field_count = orig->field_count; ++ func_count = orig->func_count; ++ sum_func_count = orig->sum_func_count; ++ hidden_field_count = orig->hidden_field_count; ++ group_parts = orig->group_parts; ++ group_length = orig->group_length; ++ group_null_parts = orig->group_null_parts; ++ outer_sum_func_count = orig->outer_sum_func_count; ++ using_outer_summary_function = orig->using_outer_summary_function; ++ schema_table = orig->schema_table; ++ precomputed_group_by = orig->precomputed_group_by; ++ force_copy_fields = orig->force_copy_fields; ++ skip_create_table = orig->skip_create_table; ++ bit_fields_as_long = orig->bit_fields_as_long; ++ can_use_pk_for_unique = orig->can_use_pk_for_unique; ++ m_window_short_circuit = orig->m_window_short_circuit; ++} ++ + /** + Create a temp table according to a field list. + +@@ -869,7 +904,8 @@ TABLE *create_tmp_table(THD *thd, Temp_table_param *param, + const mem_root_deque &fields, ORDER *group, + bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit, +- const char *table_alias) { ++ const char *table_alias, bool force_disk_table, ++ bool parallel_query) { + DBUG_TRACE; + if (!param->allow_group_via_temp_table) + group = nullptr; // Can't use group key +@@ -1030,8 +1066,18 @@ TABLE *create_tmp_table(THD *thd, Temp_table_param *param, + if (param->m_window == nullptr || !param->m_window->is_last()) + store_column = false; + } +- if (item->const_item() && hidden_field_count <= 0) +- continue; // We don't have to store this ++ ++ if (item->const_item()) { ++ if ((int)hidden_field_count <= 0) { ++ // mark this item and then we can identify it without sending a message to MQ. ++ item->skip_create_tmp_table = true; ++ continue; // We don't have to store this ++ } ++ if (parallel_query) { ++ item->skip_create_tmp_table = true; ++ goto HIDDEN; ++ } ++ } + } + + if (store_column && is_sum_func && !group && +@@ -1046,6 +1092,12 @@ TABLE *create_tmp_table(THD *thd, Temp_table_param *param, + &from_field[fieldnr], &default_field[fieldnr], group != nullptr, + not_all_columns, false, false, false); + if (new_field == nullptr) return nullptr; // Should be OOM ++ if (thd->parallel_exec) { ++ new_field->item_sum_ref = sum_item; ++ new_field->extra_length = sum_item->sum_func() == Item_sum::AVG_FUNC ++ ? sizeof(longlong) ++ : 0; ++ } + new_field->set_field_index(fieldnr); + reg_field[fieldnr++] = new_field; + share->reclength += new_field->pack_length(); +@@ -1123,8 +1175,16 @@ TABLE *create_tmp_table(THD *thd, Temp_table_param *param, + But only for the group-by table. So do not set result_field if this is + a tmp table for UNION or derived table materialization. + */ +- if (not_all_columns && type == Item::SUM_FUNC_ITEM) ++ if (not_all_columns && type == Item::SUM_FUNC_ITEM) { ++ new_field->item_sum_ref = ((Item_sum *) item); + down_cast(item)->set_result_field(new_field); ++ } ++ if (item->type() == Item::FIELD_AVG_ITEM) { ++ Item_avg_field *item_avg_field= static_cast(item->real_item()); ++ Item_sum_avg *item_avg= item_avg_field->avg_item; ++ new_field->item_sum_ref= item_avg; ++ } ++ + share->reclength += new_field->pack_length(); + if (!new_field->is_flag_set(NOT_NULL_FLAG)) null_count++; + if (new_field->type() == MYSQL_TYPE_BIT) +@@ -1168,6 +1228,7 @@ TABLE *create_tmp_table(THD *thd, Temp_table_param *param, + } + } + ++HIDDEN: + hidden_field_count--; + if (hidden_field_count == 0) { + /* +@@ -1365,7 +1426,7 @@ TABLE *create_tmp_table(THD *thd, Temp_table_param *param, + table->hash_field = field; + } + +- if (setup_tmp_table_handler(thd, table, select_options, false, ++ if (setup_tmp_table_handler(thd, table, select_options, force_disk_table, + param->schema_table)) + return nullptr; /* purecov: inspected */ + +diff --git a/sql/sql_tmp_table.h b/sql/sql_tmp_table.h +index 217723cc..16338aa0 100644 +--- a/sql/sql_tmp_table.h ++++ b/sql/sql_tmp_table.h +@@ -2,6 +2,7 @@ + #define SQL_TMP_TABLE_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -55,7 +56,8 @@ TABLE *create_tmp_table(THD *thd, Temp_table_param *param, + const mem_root_deque &fields, ORDER *group, + bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit, +- const char *table_alias); ++ const char *table_alias, bool force_disk_table = false, ++ bool parallel_query = false); + bool open_tmp_table(TABLE *table); + TABLE *create_tmp_table_from_fields(THD *thd, List &field_list, + bool is_virtual = true, +@@ -75,10 +77,17 @@ Field *create_tmp_field(THD *thd, TABLE *table, Item *item, Item::Type type, + Func_ptr_array *copy_func, Field **from_field, + Field **default_field, bool group, bool modify_item, + bool table_cant_handle_bit_fields, bool make_copy_field, +- bool copy_result_field = false); ++ bool copy_result_field = false, MEM_ROOT *root = nullptr); + Field *create_tmp_field_from_field(THD *thd, const Field *org_field, + const char *name, TABLE *table, +- Item_field *item); ++ Item_field *item, MEM_ROOT *root = nullptr); ++ ++Field *create_tmp_field_from_item(Item *item, TABLE *table, ++ Func_ptr_array *copy_func, bool modify_item, ++ MEM_ROOT *root = nullptr); ++ ++Field *create_tmp_field_for_schema(Item *item, TABLE *table, ++ MEM_ROOT *root = nullptr); + + /** + Get the minimum of max_key_length and max_key_part_length between +diff --git a/sql/sql_union.cc b/sql/sql_union.cc +index 76977f95..1827a44b 100644 +--- a/sql/sql_union.cc ++++ b/sql/sql_union.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2001, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -845,7 +846,7 @@ Query_expression::setup_materialization(THD *thd, TABLE *dst_table, + assert(join && join->is_optimized()); + assert(join->root_access_path() != nullptr); + ConvertItemsToCopy(*join->fields, dst_table->visible_field_ptr(), +- &join->tmp_table_param); ++ join->tmp_table_param); + + query_block.subquery_path = join->root_access_path(); + assert(query_block.subquery_path != nullptr); +@@ -854,7 +855,7 @@ Query_expression::setup_materialization(THD *thd, TABLE *dst_table, + query_block.disable_deduplication_by_hash_field = + (mixed_union_operators() && !activate_deduplication); + query_block.copy_fields_and_items = true; +- query_block.temp_table_param = &join->tmp_table_param; ++ query_block.temp_table_param = join->tmp_table_param; + query_block.is_recursive_reference = select->recursive_reference; + query_blocks.push_back(move(query_block)); + +@@ -962,10 +963,10 @@ void Query_expression::create_access_paths(THD *thd) { + JOIN *join = select->join; + assert(join && join->is_optimized()); + ConvertItemsToCopy(*join->fields, tmp_table->visible_field_ptr(), +- &join->tmp_table_param); ++ join->tmp_table_param); + AppendPathParameters param; + param.path = NewStreamingAccessPath(thd, join->root_access_path(), join, +- &join->tmp_table_param, tmp_table, ++ join->tmp_table_param, tmp_table, + /*ref_slice=*/-1); + param.join = join; + CopyCosts(*join->root_access_path(), param.path); +@@ -1209,6 +1210,11 @@ bool Query_expression::ExecuteIteratorQuery(THD *thd) { + + { + auto join_cleanup = create_scope_guard([this, thd] { ++ /** for parallel scan, we should end the pq iterator */ ++ if (thd->parallel_exec && thd->pq_iterator) { ++ thd->pq_iterator->End(); ++ } ++ + for (Query_block *sl = first_query_block(); sl; + sl = sl->next_query_block()) { + JOIN *join = sl->join; +@@ -1224,30 +1230,61 @@ bool Query_expression::ExecuteIteratorQuery(THD *thd) { + return true; + } + ++ uint read_records_num = 0; ++ MQueue_handle *handler = query_result->get_mq_handler(); ++ if (handler) { ++ handler->set_datched_status(MQ_NOT_DETACHED); ++ } ++ + PFSBatchMode pfs_batch_mode(m_root_iterator.get()); + ++ bool execute_error = false; + for (;;) { + int error = m_root_iterator->Read(); + DBUG_EXECUTE_IF("bug13822652_1", thd->killed = THD::KILL_QUERY;); + +- if (error > 0 || thd->is_error()) // Fatal error +- return true; ++ if (error > 0 || thd->is_error() || thd->is_pq_error()) // Fatal error ++ execute_error = true; + else if (error < 0) + break; + else if (thd->killed) // Aborted by user + { + thd->send_kill_message(); +- return true; ++ execute_error = true; + } + ++ if (execute_error) break; + ++*send_records_ptr; ++ read_records_num++; + + if (query_result->send_data(thd, *fields)) { +- return true; ++ execute_error = true; ++ break; + } + thd->get_stmt_da()->inc_current_row_for_condition(); + } + ++ // if there is error, then for worker it should send an error msg to MQ and ++ // detach the MQ. Note that, only worker can detach the MQ. ++ if ((execute_error || !read_records_num || ++ DBUG_EVALUATE_IF("pq_worker_error4", true, false)) && ++ thd->is_worker()) { ++ MQ_DETACHED_STATUS status = MQ_NOT_DETACHED; ++ // there is an error during the execution ++ if (execute_error || DBUG_EVALUATE_IF("pq_worker_error4", true, false)) { ++ thd->pq_error = true; ++ if (handler != nullptr) { ++ handler->send_exception_msg(ERROR_MSG); ++ } ++ status = MQ_HAVE_DETACHED; ++ } else if (!read_records_num) { ++ status = MQ_TMP_DETACHED; ++ } ++ if (handler) handler->set_datched_status(status); ++ } ++ ++ if (execute_error) return true; ++ + // NOTE: join_cleanup must be done before we send EOF, so that we get the + // row counts right. + } +diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc +index 3b8473bd..6f83dfd0 100644 +--- a/sql/sys_vars.cc ++++ b/sql/sys_vars.cc +@@ -1,4 +1,5 @@ + /* Copyright (c) 2009, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2021, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -136,6 +137,8 @@ + #include "sql/xa.h" + #include "template_utils.h" // pointer_cast + #include "thr_lock.h" ++#include "sql/sql_parallel.h" ++ + #ifdef _WIN32 + #include "sql/named_pipe.h" + #endif +@@ -880,6 +883,54 @@ static Sys_var_bool Sys_windowing_use_high_precision( + HINT_UPDATEABLE SESSION_VAR(windowing_use_high_precision), + CMD_LINE(OPT_ARG), DEFAULT(true)); + ++#ifndef NDEBUG ++extern bool dbug_pq_worker_stall; ++ ++static Sys_var_bool Sys_Debug_pq_worker_stall( ++ "debug_pq_worker_stall", ++ "PQ worker stall while send date to message queue.", ++ HINT_UPDATEABLE GLOBAL_VAR(dbug_pq_worker_stall), CMD_LINE(OPT_ARG), ++ DEFAULT(false)); ++#endif ++ ++static Sys_var_bool Sys_sql_force_parallel_execute( ++ "force_parallel_execute", "force parallel execute in session", ++ HINT_UPDATEABLE SESSION_VAR(force_parallel_execute), CMD_LINE(OPT_ARG), ++ DEFAULT(0)); ++ ++static Sys_var_ulonglong Sys_parallel_memory_limit( ++ "parallel_memory_limit", ++ "upper limit memory size that parallel query can use", ++ GLOBAL_VAR(parallel_memory_limit), CMD_LINE(REQUIRED_ARG), ++ VALID_RANGE(0, ULONG_MAX), DEFAULT(100 * 1024 * 1024), BLOCK_SIZE(IO_SIZE), ++ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(NULL)); ++ ++static Sys_var_ulong Sys_parallel_max_threads( ++ "parallel_max_threads", "max running threads of parallel query.", ++ GLOBAL_VAR(parallel_max_threads), CMD_LINE(REQUIRED_ARG), ++ VALID_RANGE(0, ULONG_MAX), DEFAULT(64), BLOCK_SIZE(1), NO_MUTEX_GUARD, ++ NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(NULL)); ++ ++static Sys_var_ulong Sys_parallel_cost_threshold( ++ "parallel_cost_threshold", "Cost threshold for parallel query.", ++ SESSION_VAR(parallel_cost_threshold), CMD_LINE(REQUIRED_ARG), ++ VALID_RANGE(0, ULONG_MAX), DEFAULT(1000), BLOCK_SIZE(1), NO_MUTEX_GUARD, ++ NOT_IN_BINLOG); ++ ++static Sys_var_ulong Sys_parallel_default_dop( ++ "parallel_default_dop", "default degree of parallel query.", ++ SESSION_VAR(parallel_default_dop), CMD_LINE(REQUIRED_ARG), ++ VALID_RANGE(0, 1024), DEFAULT(4), BLOCK_SIZE(1), NO_MUTEX_GUARD, ++ NOT_IN_BINLOG); ++ ++static Sys_var_ulong Sys_parallel_queue_timeout( ++ "parallel_queue_timeout", ++ "queue timeout for parallel query when resource is not enough ." ++ "the unit is microseconds", ++ SESSION_VAR(parallel_queue_timeout), CMD_LINE(REQUIRED_ARG), ++ VALID_RANGE(0, ULONG_MAX), DEFAULT(0), BLOCK_SIZE(1), NO_MUTEX_GUARD, ++ NOT_IN_BINLOG); ++ + static Sys_var_uint Sys_cte_max_recursion_depth( + "cte_max_recursion_depth", + "Abort a recursive common table expression " +diff --git a/sql/system_variables.h b/sql/system_variables.h +index 932ae91f..a9e6f2ce 100644 +--- a/sql/system_variables.h ++++ b/sql/system_variables.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -422,6 +423,16 @@ struct System_variables { + @sa Sys_select_disk_sync_delay + */ + uint select_into_disk_sync_delay; ++ ++ bool force_parallel_execute; ++ ++ ulong parallel_cost_threshold; ++ ++ ulong parallel_default_dop; ++ ++ ulong parallel_queue_timeout; ++ ++ bool pq_copy_from(System_variables leader); + }; + + /** +@@ -504,6 +515,8 @@ struct System_status_var { + */ + double last_query_cost; + ulonglong last_query_partial_plans; ++ bool reset{false}; ++ bool pq_merge_status(System_status_var worker); + }; + + /* +diff --git a/sql/table.h b/sql/table.h +index 491d64d3..4ccae669 100644 +--- a/sql/table.h ++++ b/sql/table.h +@@ -2,6 +2,7 @@ + #define TABLE_INCLUDED + + /* Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -108,6 +109,7 @@ struct TABLE; + struct TABLE_LIST; + struct TABLE_SHARE; + struct handlerton; ++struct Field_raw_data; + typedef int8 plan_idx; + + namespace dd { +@@ -280,7 +282,7 @@ struct ORDER { + + ORDER *next{nullptr}; + +- protected: ++ public: + /** + The initial ordering expression. Usually substituted during resolving + and must not be used during optimization and execution. +@@ -2101,6 +2103,10 @@ struct TABLE { + bool should_binlog_drop_if_temp_flag{false}; + + public: ++ ++ /** copy table property from orig table */ ++ bool pq_copy(THD *thd, void *select, TABLE *orig); ++ + /** + Does this table have any columns that can be updated using partial update + in the current row? +@@ -3324,13 +3330,14 @@ struct TABLE_LIST { + + const Lock_descriptor &lock_descriptor() const { return m_lock_descriptor; } + +- private: ++ public: + /** + The members below must be kept aligned so that (1 << m_tableno) == m_map. + A table that takes part in a join operation must be assigned a unique + table number. + */ + uint m_tableno{0}; ///< Table number within query block ++ private: + table_map m_map{0}; ///< Table map, derived from m_tableno + /** + If this table or join nest is the Y in "X [LEFT] JOIN Y ON C", this +@@ -3404,7 +3411,6 @@ struct TABLE_LIST { + */ + Table_function *table_function{nullptr}; + +- private: + /** + This field is set to non-null for derived tables and views. It points + to the Query_expression representing the derived table/view. +@@ -3413,6 +3419,7 @@ struct TABLE_LIST { + */ + Query_expression *derived{nullptr}; /* Query_expression of derived table */ + ++ private: + /// If non-NULL, the CTE which this table is derived from. + Common_table_expr *m_common_table_expr{nullptr}; + /** +@@ -3511,9 +3518,9 @@ struct TABLE_LIST { + ulonglong view_suid{0}; ///< view is suid (true by default) + ulonglong with_check{0}; ///< WITH CHECK OPTION + +- private: + /// The view algorithm that is actually used, if this is a view. + enum_view_algorithm effective_algorithm{VIEW_ALGORITHM_UNDEFINED}; ++ private: + Lock_descriptor m_lock_descriptor; + + public: +diff --git a/sql/temp_table_param.h b/sql/temp_table_param.h +index bfff4928..f898c3c9 100644 +--- a/sql/temp_table_param.h ++++ b/sql/temp_table_param.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2015, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, +@@ -211,6 +212,7 @@ class Temp_table_param { + m_window(nullptr) {} + + void cleanup() { copy_fields.clear(); } ++ void pq_copy(Temp_table_param *orig); + }; + + #endif // TEMP_TABLE_PARAM_INCLUDED +diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc +index 08c46bbe..8c65e284 100644 +--- a/storage/innobase/handler/ha_innodb.cc ++++ b/storage/innobase/handler/ha_innodb.cc +@@ -4,6 +4,7 @@ Copyright (c) 2000, 2021, Oracle and/or its affiliates. + Copyright (c) 2008, 2009 Google Inc. + Copyright (c) 2009, Percona Inc. + Copyright (c) 2012, Facebook Inc. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + Portions of this file contain modifications contributed and copyrighted by + Google, Inc. Those modifications are gratefully acknowledged and are described +@@ -9953,7 +9954,12 @@ int ha_innobase::index_read( + + m_prebuilt->ins_sel_stmt = thd_is_ins_sel_stmt(m_user_thd); + ++ auto saved_ipc = m_prebuilt->idx_cond; ++ if (m_prebuilt->pq_index_read) { ++ m_prebuilt->idx_cond = false; ++ } + ret = row_search_mvcc(buf, mode, m_prebuilt, match_mode, 0); ++ m_prebuilt->idx_cond = saved_ipc; + + } else { + m_prebuilt->session = thd_to_innodb_session(m_user_thd); +diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h +index 2ad48f4b..9d377585 100644 +--- a/storage/innobase/handler/ha_innodb.h ++++ b/storage/innobase/handler/ha_innodb.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -448,6 +449,18 @@ class ha_innobase : public handler { + int parallel_scan_init(void *&scan_ctx, size_t *num_threads, + bool use_reserved_threads) override; + ++ int pq_leader_range_select_scan_init(uint keyno, void *&pq_ctx, uint &n_threads); ++ ++ int pq_leader_skip_scan_select_scan_init(uint keyno, void *&pq_ctx, uint n_threads); ++ ++ int pq_leader_ref_init(uint keyno, void *&pq_ctx, uint &n_threads); ++ ++ int pq_leader_scan_init(uint keyno, void *&scan_ctx, uint &n_threads) override; ++ ++ int pq_worker_scan_init(uint keyno, void *scan_ctx) override; ++ ++ int pq_leader_signal_all(void *scan_ctx) override; ++ + /** Start parallel read of InnoDB records. + @param[in] scan_ctx A scan context created by parallel_scan_init + @param[in] thread_ctxs Context for each of the spawned threads +@@ -462,10 +475,16 @@ class ha_innobase : public handler { + int parallel_scan(void *scan_ctx, void **thread_ctxs, Reader::Init_fn init_fn, + Reader::Load_fn load_fn, Reader::End_fn end_fn) override; + ++ int pq_worker_scan_next(void *scan_ctx, uchar* buf) override; ++ + /** End of the parallel scan. + @param[in] scan_ctx A scan context created by parallel_scan_init. */ + void parallel_scan_end(void *scan_ctx) override; + ++ int pq_leader_scan_end(void *parallel_scan_ctx) override; ++ ++ int pq_worker_scan_end(void *parallel_scan_ctx) override; ++ + bool check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) override; + +diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc +index bfef1bc8..515db889 100644 +--- a/storage/innobase/handler/handler0alter.cc ++++ b/storage/innobase/handler/handler0alter.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2005, 2021, Oracle and/or its affiliates. ++Copyright (c) 2021, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -1159,12 +1160,476 @@ bool ha_innobase::prepare_inplace_alter_table(TABLE *altered_table, + return prepare_inplace_alter_table_impl( + altered_table, ha_alter_info, old_dd_tab, new_dd_tab); + } ++int ha_innobase::pq_worker_scan_init(uint keyno, void *scan_ctx) { ++ active_index = keyno; ++ ++ Parallel_reader *pq_reader = static_cast(scan_ctx); ++ update_thd(); ++ m_prebuilt->index = innobase_get_index(pq_reader->key); ++ /** ++ * here, we must init m_prebuilt->is_attach_ctx because this value may ++ * not be reset in the last execution. ++ */ ++ m_prebuilt->is_attach_ctx = false; ++ auto trx = m_prebuilt->trx; ++ innobase_register_trx(ht, ha_thd(), trx); ++ trx_start_if_not_started_xa(trx, false); ++ if (trx->read_view == nullptr && pq_reader->snapshot) { ++ trx_clone_read_view(trx, pq_reader->snapshot); ++ } ++ build_template(false); ++ inited = handler::PQ_WORKER; ++ ++ return (0); ++} ++ ++int ha_innobase::pq_leader_range_select_scan_init(uint keyno, void *&pq_ctx, ++ uint &n_threads) { ++ pq_ctx = nullptr; ++ update_thd(); ++ auto trx = m_prebuilt->trx; ++ innobase_register_trx(ht, ha_thd(), trx); ++ trx_start_if_not_started_xa(trx, false); ++ trx_assign_read_view(trx); ++ ++ auto pq_reader = UT_NEW_NOKEY( ++ Parallel_reader(Parallel_reader::available_threads(n_threads))); ++ if (pq_reader == nullptr || !pq_reader->pq_have_event()) { ++ if (pq_reader) UT_DELETE(pq_reader); ++ return (HA_ERR_OUT_OF_MEM); ++ } ++ ++ dict_index_t *index{nullptr}; ++ pq_reader->key = keyno; ++ index = innobase_get_index(keyno); ++ m_prebuilt->index = index; ++ ++ uint range_res{0}; ++ while (!(range_res = mrr_funcs.next(mrr_iter, &mrr_cur_range))) { ++ dtuple_t *range_start{nullptr}; ++ dtuple_t *range_end{nullptr}; ++ uint range_errno{0}; ++ mem_heap_t *heap{nullptr}; ++ btr_pcur_t *pcur{nullptr}; ++ ++ auto start_key = ++ mrr_cur_range.start_key.keypart_map ? &mrr_cur_range.start_key : 0; ++ auto end_key = ++ mrr_cur_range.end_key.keypart_map ? &mrr_cur_range.end_key : 0; ++ ++ // alloc heap for range_scan tuple ++ if (index != nullptr) { ++ ulint search_tuple_n_fields; ++ search_tuple_n_fields = 2 * (index->table->get_n_cols() + ++ dict_table_get_n_v_cols(index->table)); ++ if (!heap) ++ heap = mem_heap_create(DTUPLE_EST_ALLOC(search_tuple_n_fields)); ++ } ++ ++ range_errno = 0; ++ // set range boundary ++ /* 1) seq scan. range_start is pos on the first rec that is fulfill the ++ range condition range_end is pos on the next rec of the last rec fulfill ++ the range contition 2) reverse scan. range_start is pos on the prev rec ++ of the first rec fulfill the range conition range_end is pos on the last ++ rec fulfill the range conition ++ */ ++ if (start_key) { ++ const uchar *key = start_key->key; ++ auto keypart_map = start_key->keypart_map; ++ uint key_len = calculate_key_len(table, keyno, keypart_map); ++ ++ auto start_flag = start_key->flag; ++ if (!pq_reverse_scan) { ++ start_flag = (start_key->flag == HA_READ_AFTER_KEY) ++ ? HA_READ_AFTER_KEY ++ : HA_READ_KEY_OR_NEXT; ++ } else { ++ start_flag = (start_key->flag == HA_READ_AFTER_KEY) ++ ? HA_READ_KEY_OR_PREV ++ : HA_READ_BEFORE_KEY; ++ } ++ ++ m_prebuilt->pq_index_read = true; ++ int err = index_read(table->record[0], key, key_len, start_flag); ++ m_prebuilt->pq_index_read = false; ++ ++ if (!err) { ++ range_start = dtuple_copy(m_prebuilt->pq_tuple, heap); ++ range_start->n_fields_cmp = m_prebuilt->pq_tuple->n_fields_cmp; ++ } else if (pq_reverse_scan) { ++ range_errno = (err == HA_ERR_KEY_NOT_FOUND) ? 0 : err; ++ } else { ++ range_errno = err; ++ } ++ } ++ ++ if (end_key && !range_errno) { ++ const uchar *key = end_key->key; ++ auto keypart_map = end_key->keypart_map; ++ uint key_len = calculate_key_len(table, keyno, keypart_map); ++ ++ auto end_flag = end_key->flag; ++ if (!pq_reverse_scan) { ++ end_flag = (end_key->flag == HA_READ_BEFORE_KEY) ? HA_READ_KEY_OR_NEXT ++ : HA_READ_AFTER_KEY; ++ } else { ++ end_flag = (end_key->flag == HA_READ_BEFORE_KEY) ? HA_READ_BEFORE_KEY ++ : HA_READ_KEY_OR_PREV; ++ } ++ ++ m_prebuilt->pq_index_read = true; ++ int err = index_read(table->record[0], key, key_len, end_flag); ++ m_prebuilt->pq_index_read = false; ++ if (!err) { ++ range_end = dtuple_copy(m_prebuilt->pq_tuple, heap); ++ range_end->n_fields_cmp = m_prebuilt->pq_tuple->n_fields_cmp; ++ pcur = m_prebuilt->pcur; ++ } else { ++ if (err == HA_ERR_KEY_NOT_FOUND) { ++ index_last(table->record[0]); ++ pcur = m_prebuilt->pcur; ++ range_errno = 0; ++ } else { ++ range_errno = err; ++ } ++ } ++ } else if (end_key == nullptr && !range_errno) { ++ index_last(table->record[0]); ++ pcur = m_prebuilt->pcur; ++ } ++ ++ Parallel_reader::Scan_range range_scan{range_start, range_end}; ++ Parallel_reader::Config config(range_scan, index); ++ config.m_range_errno = range_errno; ++ config.m_pcur = pcur; ++ config.m_pq_reverse_scan = pq_reverse_scan; ++ ++ auto success = pq_reader->add_scan(trx, config, nullptr, false); ++ pq_reader->snapshot = trx->read_view; ++ ++ if (heap != nullptr) { ++ mem_heap_free(heap); ++ } ++ if (success != DB_SUCCESS) { ++ UT_DELETE(pq_reader); ++ return (HA_ERR_GENERIC); ++ } ++ ++ if (pq_reverse_scan) pq_reader->pq_set_reverse_scan(); ++ } ++ ++ pq_ctx = pq_reader; ++ build_template(false); ++ ++ if (pq_reader->max_splits() < n_threads) { ++ n_threads = pq_reader->max_splits() > 1 ? pq_reader->max_splits() : 1; ++ } ++ ++ return (0); ++} ++ ++int ha_innobase::pq_leader_ref_init(uint keyno, void *&pq_ctx, ++ uint &n_threads) { ++ pq_ctx = nullptr; ++ update_thd(); ++ auto trx = m_prebuilt->trx; ++ innobase_register_trx(ht, ha_thd(), trx); ++ trx_start_if_not_started_xa(trx, false); ++ trx_assign_read_view(trx); ++ ++ dtuple_t *range_start{nullptr}; ++ dtuple_t *range_end{nullptr}; ++ dict_index_t *index{nullptr}; ++ uint range_errno{0}; ++ mem_heap_t *heap{nullptr}; ++ btr_pcur_t *pcur{nullptr}; ++ ++ auto pq_reader = UT_NEW_NOKEY( ++ Parallel_reader(Parallel_reader::available_threads(n_threads))); ++ if (pq_reader == nullptr || !pq_reader->pq_have_event()) { ++ if (pq_reader) UT_DELETE(pq_reader); ++ return (HA_ERR_OUT_OF_MEM); ++ } ++ ++ pq_reader->key = keyno; ++ index = innobase_get_index(keyno); ++ m_prebuilt->index = index; ++ ++ // create search tuple on this index ++ if (index != nullptr) { ++ ulint search_tuple_n_fields; ++ search_tuple_n_fields = 2 * (index->table->get_n_cols() + ++ dict_table_get_n_v_cols(index->table)); ++ if (!heap) heap = mem_heap_create(DTUPLE_EST_ALLOC(search_tuple_n_fields)); ++ } ++ ++ range_errno = 0; ++ const uchar *key = pq_ref_key.key; ++ auto keypart_map = pq_ref_key.keypart_map; ++ uint key_len = calculate_key_len(table, keyno, keypart_map); ++ ++ // populate search range boudary from ref record value ++ int ret = index_read(table->record[0], key, key_len, HA_READ_KEY_EXACT); ++ if (ret) { ++ // record errorno when can't find ref key. which will lead process finish ++ // early ++ range_errno = ret; ++ } else { ++ // record range boudary for searching ++ auto start_flag = ++ pq_reverse_scan ? HA_READ_BEFORE_KEY : HA_READ_KEY_OR_NEXT; ++ ++ m_prebuilt->pq_index_read = true; ++ int err = index_read(table->record[0], key, key_len, start_flag); ++ m_prebuilt->pq_index_read = false; ++ ++ if (!err) { ++ range_start = dtuple_copy(m_prebuilt->pq_tuple, heap); ++ range_start->n_fields_cmp = m_prebuilt->pq_tuple->n_fields_cmp; ++ } else { ++ if (err == HA_ERR_KEY_NOT_FOUND) { ++ index_first(table->record[0]); ++ pcur = m_prebuilt->pcur; ++ range_errno = 0; ++ } else { ++ range_errno = err; ++ } ++ } ++ ++ if (!range_errno) { ++ auto end_flag = pq_reverse_scan ? HA_READ_KEY_OR_PREV : HA_READ_AFTER_KEY; ++ m_prebuilt->pq_index_read = true; ++ int err = index_read(table->record[0], key, key_len, end_flag); ++ m_prebuilt->pq_index_read = false; ++ if (!err) { ++ range_end = dtuple_copy(m_prebuilt->pq_tuple, heap); ++ range_end->n_fields_cmp = m_prebuilt->pq_tuple->n_fields_cmp; ++ pcur = m_prebuilt->pcur; ++ } else { ++ if (err == HA_ERR_KEY_NOT_FOUND) { ++ index_last(table->record[0]); ++ pcur = m_prebuilt->pcur; ++ range_errno = 0; ++ } else { ++ range_errno = err; ++ } ++ } ++ } ++ } ++ ++ Parallel_reader::Scan_range range_scan{range_start, range_end}; ++ Parallel_reader::Config config(range_scan, index); ++ config.m_range_errno = range_errno; ++ config.m_pcur = pcur; ++ config.m_pq_reverse_scan = pq_reverse_scan; ++ ++ auto success = pq_reader->add_scan(trx, config, nullptr, false); ++ pq_reader->snapshot = trx->read_view; ++ ++ if (heap != nullptr) { ++ mem_heap_free(heap); ++ } ++ if (success != DB_SUCCESS) { ++ UT_DELETE(pq_reader); ++ return (HA_ERR_GENERIC); ++ } ++ ++ if (pq_reverse_scan) pq_reader->pq_set_reverse_scan(); ++ pq_ctx = pq_reader; ++ build_template(false); ++ ++ if (pq_reader->max_splits() < n_threads) { ++ n_threads = pq_reader->max_splits() > 1 ? pq_reader->max_splits() : 1; ++ } ++ ++ return (0); ++} ++ ++int ha_innobase::pq_leader_scan_init(uint keyno, void *&pq_ctx, ++ uint &n_threads) { ++ if (dict_table_is_discarded(m_prebuilt->table)) { ++ ib_senderrf(ha_thd(), IB_LOG_LEVEL_ERROR, ER_TABLESPACE_DISCARDED, ++ table->s->table_name.str); ++ ++ return (HA_ERR_NO_SUCH_TABLE); ++ } ++ ++ btr_pcur_t *pcur{nullptr}; ++ active_index = keyno; ++ int result = change_active_index(active_index); ++ if (result) return result; ++ ++ // equality reference ++ if (pq_ref) return pq_leader_ref_init(keyno, pq_ctx, n_threads); ++ ++ // range scan ++ if (PQ_RANGE_SELECT == pq_range_type) ++ return pq_leader_range_select_scan_init(keyno, pq_ctx, n_threads); ++ ++ // table or index scan ++ pq_ctx = nullptr; ++ update_thd(); ++ auto trx = m_prebuilt->trx; ++ innobase_register_trx(ht, ha_thd(), trx); ++ trx_start_if_not_started_xa(trx, false); ++ trx_assign_read_view(trx); ++ ++ dtuple_t *range_start{nullptr}; ++ dtuple_t *range_end{nullptr}; ++ dict_index_t *index{nullptr}; ++ ++ auto pq_reader = UT_NEW_NOKEY( ++ Parallel_reader(Parallel_reader::available_threads(n_threads))); ++ ++ if (pq_reader == nullptr || !pq_reader->pq_have_event()) { ++ if (pq_reader) UT_DELETE(pq_reader); ++ return (HA_ERR_OUT_OF_MEM); ++ } ++ ++ pq_reader->key = keyno; ++ index = innobase_get_index(keyno); ++ ++ if (index == nullptr) { ++ if (pq_reader != nullptr) UT_DELETE(pq_reader); ++ return HA_ERR_GENERIC; ++ } ++ ++ m_prebuilt->index = index; ++ ++ int ret; ++ if (pq_reverse_scan) { ++ ret = index_last(table->record[0]); ++ if (!ret) { ++ pcur = m_prebuilt->pcur; ++ } ++ } else { ++ ret = index_first(table->record[0]); ++ } ++ ++ Parallel_reader::Scan_range range_scan{range_start, range_end}; ++ Parallel_reader::Config config(range_scan, index); ++ config.m_range_errno = ret; ++ config.m_pcur = pcur; ++ config.m_pq_reverse_scan = pq_reverse_scan; ++ ++ auto success = pq_reader->add_scan(trx, config, nullptr, false); ++ pq_reader->snapshot = trx->read_view; ++ ++ if (success != DB_SUCCESS) { ++ UT_DELETE(pq_reader); ++ return (HA_ERR_GENERIC); ++ } ++ ++ if (pq_reverse_scan) pq_reader->pq_set_reverse_scan(); ++ ++ pq_ctx = pq_reader; ++ build_template(false); ++ ++ if (pq_reader->pq_need_change_dop() && pq_reader->max_splits() < n_threads) { ++ n_threads = pq_reader->max_splits() > 1 ? pq_reader->max_splits() : 1; ++ } ++ ++ return (0); ++} ++ ++static int convert_error_code(dberr_t err, int flags, THD *thd, ++ row_prebuilt_t *prebuilt, TABLE *table) { ++ int error; ++ switch (err) { ++ case DB_SUCCESS: ++ error = 0; ++ srv_stats.n_rows_read.add(thd_get_thread_id(prebuilt->trx->mysql_thd), 1); ++ break; ++ case DB_END_OF_INDEX: ++ error = HA_ERR_END_OF_FILE; ++ break; ++ default: ++ error = convert_error_code_to_mysql(err, prebuilt->table->flags, thd); ++ break; ++ } ++ ++ return error; ++} ++ ++int ha_innobase::pq_leader_signal_all(void *pq_ctx) { ++ auto pq_reader = static_cast(pq_ctx); ++ ++ pq_reader->pq_wakeup_workers(); ++ ++ return DB_SUCCESS; ++} ++ ++/** ++ * parallel scan worker read a record from partititon and store it in buf ++ * ++ */ ++int ha_innobase::pq_worker_scan_next(void *pq_ctx, uchar *buf) { ++ dberr_t err{DB_SUCCESS}; ++ ut_a(pq_ctx != nullptr); ++ ha_statistic_increment(&System_status_var::ha_read_rnd_next_count); ++ auto pq_reader = static_cast(pq_ctx); ++ if (pq_reader->is_error_set()) return err; ++ ++retry: ++ if (!m_prebuilt->is_attach_ctx) { ++ err = pq_reader->dispatch_ctx(m_prebuilt); ++ if (err == DB_SUCCESS) { ++ m_prebuilt->is_attach_ctx = true; ++ } else { ++ goto end; ++ } ++ } ++ ++ { ++ auto ctx = m_prebuilt->ctx; ++ err = ctx->read_record(buf, m_prebuilt); ++ if (err != DB_SUCCESS) { ++ if (err == DB_END_OF_INDEX) { ++ m_prebuilt->is_attach_ctx = false; ++ pq_reader->ctx_completed_inc(); ++ goto retry; ++ } else if (err == DB_END_OF_RANGE) { ++ m_prebuilt->is_attach_ctx = false; ++ goto retry; ++ } else if (err == DB_NOT_FOUND) { ++ goto retry; ++ } else if (!pq_reader->is_error_set()) { ++ pq_reader->set_error_state(err); ++ } ++ } ++ } ++ ++end: ++ return (convert_error_code(err, 0, current_thd, m_prebuilt, table)); ++} ++ ++int ha_innobase::pq_leader_scan_end(void *pq_ctx) { ++ active_index = MAX_KEY; ++ Parallel_reader *parallel_reader = static_cast(pq_ctx); ++ ++ /* wake up worker thread*/ ++ parallel_reader->pq_wakeup_workers(); ++ UT_DELETE(parallel_reader); ++ return 0; ++} ++ ++int ha_innobase::pq_worker_scan_end(void *pq_ctx) { ++ if (m_prebuilt->trx->read_view != nullptr && ++ m_prebuilt->trx->read_view->skip_view_list == true) { ++ UT_DELETE(m_prebuilt->trx->read_view); ++ m_prebuilt->trx->read_view = nullptr; ++ } ++ ++ inited = handler::NONE; ++ return 0; ++} + + int ha_innobase::parallel_scan_init(void *&scan_ctx, size_t *num_threads, + bool use_reserved_threads) { + if (dict_table_is_discarded(m_prebuilt->table)) { + ib_senderrf(ha_thd(), IB_LOG_LEVEL_ERROR, ER_TABLESPACE_DISCARDED, +- m_prebuilt->table->name.m_name); ++ table->s->table_name.str); + + return (HA_ERR_NO_SUCH_TABLE); + } +@@ -1228,7 +1693,7 @@ int ha_innobase::parallel_scan(void *scan_ctx, void **thread_ctxs, + Reader::End_fn end_fn) { + if (dict_table_is_discarded(m_prebuilt->table)) { + ib_senderrf(ha_thd(), IB_LOG_LEVEL_ERROR, ER_TABLESPACE_DISCARDED, +- m_prebuilt->table->name.m_name); ++ table->s->table_name.str); + + return (HA_ERR_NO_SUCH_TABLE); + } +@@ -9821,7 +10286,7 @@ int ha_innopart::parallel_scan_init(void *&scan_ctx, size_t *num_threads, + + if (dict_table_is_discarded(m_prebuilt->table)) { + ib_senderrf(ha_thd(), IB_LOG_LEVEL_ERROR, ER_TABLESPACE_DISCARDED, +- m_prebuilt->table->name.m_name); ++ table->s->table_name.str); + + UT_DELETE(adapter); + return HA_ERR_NO_SUCH_TABLE; +diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h +index d18a4287..338ed405 100644 +--- a/storage/innobase/include/btr0pcur.h ++++ b/storage/innobase/include/btr0pcur.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1996, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -519,7 +520,6 @@ struct btr_pcur_t { + return (old_fetch_mode); + } + +- private: + /** Moves the persistent cursor backward if it is on the first record + of the page. Commits mtr. Note that to prevent a possible deadlock, the + operation first stores the position of the cursor, commits mtr, acquires +@@ -532,7 +532,6 @@ struct btr_pcur_t { + @param[in,out] mtr Mini-tranaction. */ + void move_backward_from_page(mtr_t *mtr); + +- public: + /** a B-tree cursor */ + btr_cur_t m_btr_cur; + +diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h +index 8639fc0d..b7625bbc 100644 +--- a/storage/innobase/include/data0data.h ++++ b/storage/innobase/include/data0data.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1994, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -810,6 +811,9 @@ struct dtuple_t { + } + return (false); + } ++ ++ int compare(const rec_t *rec, const dict_index_t *index, const dict_index_t *index2, ++ const ulint *offsets); + }; + + /** A slot for a field in a big rec vector */ +diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic +index 509f32d6..98191d85 100644 +--- a/storage/innobase/include/data0data.ic ++++ b/storage/innobase/include/data0data.ic +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1994, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -572,6 +573,16 @@ dtuple_t *dtuple_copy(const dtuple_t *tuple, /*!< in: tuple to copy from */ + return (new_tuple); + } + ++UNIV_INLINE ++dtuple_t *pq_dtuple_copy(const dtuple_t *tuple, /*!< in: tuple to copy from */ ++ mem_heap_t *heap) /*!< in: memory heap ++ where the tuple is created */ ++{ ++ dtuple_t *new_tuple = dtuple_copy(tuple, heap); ++ dtuple_set_n_fields_cmp(new_tuple, tuple->n_fields_cmp); ++ return (new_tuple); ++} ++ + /** The following function returns the sum of data lengths of a tuple. The space + occupied by the field structs or the tuple struct is not counted. Neither + is possible space in externally stored parts of the field. +diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h +index ccdcd0ca..fb635041 100644 +--- a/storage/innobase/include/db0err.h ++++ b/storage/innobase/include/db0err.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1996, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -209,6 +210,7 @@ enum dberr_t { + DB_END_OF_BLOCK, + DB_END_OF_INDEX, + DB_END_SAMPLE_READ, ++ DB_END_OF_RANGE, // reach parallel query range end + + /** Generic error code for "Not found" type of errors */ + DB_NOT_FOUND, +diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h +index a27506f8..44c1dd11 100644 +--- a/storage/innobase/include/read0types.h ++++ b/storage/innobase/include/read0types.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1997, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -150,6 +151,9 @@ class ReadView { + public: + ReadView(); + ~ReadView(); ++ ++ void Copy_readView(const ReadView &); ++ + /** Check whether transaction id is valid. + @param[in] id transaction id to check + @param[in] name table name */ +@@ -274,9 +278,11 @@ class ReadView { + + private: + // Disable copying +- ReadView(const ReadView &); + ReadView &operator=(const ReadView &); + ++ public: ++ bool skip_view_list{false}; ++ + private: + /** The read should not see any transaction with trx id >= this + value. In other words, this is the "high water mark". */ +diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h +index d637981f..7173c038 100644 +--- a/storage/innobase/include/rem0cmp.h ++++ b/storage/innobase/include/rem0cmp.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1994, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -118,6 +119,10 @@ int cmp_dtuple_rec_with_gis_internal(const dtuple_t *dtuple, const rec_t *rec, + const ulint *offsets, + const dd::Spatial_reference_system *srs); + ++int cmp_sec_dtuple_pri_rec_with_match(const dtuple_t *dtuple, const rec_t *rec, ++ const dict_index_t *index, const dict_index_t *clust_index, ++ const ulint *offsets, ulint n_cmp); ++ + /** Compare a data tuple to a physical record. + @param[in] dtuple data tuple + @param[in] rec record +diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h +index 9491a602..214cdb31 100644 +--- a/storage/innobase/include/row0mysql.h ++++ b/storage/innobase/include/row0mysql.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -61,6 +62,7 @@ this program; if not, write to the Free Software Foundation, Inc., + #include "trx0types.h" + #include "univ.i" + #include "ut0bool_scope_guard.h" ++#include "row0pread.h" + + // Forward declarations + class THD; +@@ -900,6 +902,15 @@ struct row_prebuilt_t { + @return true iff duplicated values should be allowed */ + bool allow_duplicates() { return (replace || on_duplicate_key_update); } + ++ std::shared_ptr ctx{}; ++ bool is_attach_ctx{false}; ++ mem_heap_t *pq_heap{nullptr}; ++ dtuple_t *pq_tuple{nullptr}; ++ bool pq_index_read{false}; ++ /** Number of externally stored columns. */ ++ ulint pq_m_n_ext{ULINT_UNDEFINED}; ++ bool pq_requires_clust_rec{false}; ++ + private: + /** A helper function for init_search_tuples_types() which prepares the shape + of the tuple to match the index +diff --git a/storage/innobase/include/row0pread-adapter.h b/storage/innobase/include/row0pread-adapter.h +index addc9fac..f7ed6383 100644 +--- a/storage/innobase/include/row0pread-adapter.h ++++ b/storage/innobase/include/row0pread-adapter.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -67,7 +68,7 @@ class Parallel_reader_adapter { + @param[in] f Callback function. + @retval error. */ + dberr_t add_scan(trx_t *trx, const Parallel_reader::Config &config, +- Parallel_reader::F &&f) MY_ATTRIBUTE((warn_unused_result)); ++ Parallel_reader::F &&f, bool split = false) MY_ATTRIBUTE((warn_unused_result)); + + /** Run the parallel scan. + @param[in] thread_contexts Context for each of the spawned threads +diff --git a/storage/innobase/include/row0pread-histogram.h b/storage/innobase/include/row0pread-histogram.h +index 142e7114..204c5590 100644 +--- a/storage/innobase/include/row0pread-histogram.h ++++ b/storage/innobase/include/row0pread-histogram.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2019, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -35,6 +36,7 @@ Created 2019-04-20 by Darshan M N. */ + #include + #include "row0pread.h" + #include "ut0counter.h" ++#include "handler.h" + + class Histogram_sampler { + public: +diff --git a/storage/innobase/include/row0pread.h b/storage/innobase/include/row0pread.h +index 2189e586..9e914441 100644 +--- a/storage/innobase/include/row0pread.h ++++ b/storage/innobase/include/row0pread.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -35,9 +36,6 @@ Created 2018-01-27 by Sunny Bains. */ + #include + #include + +-#include "os0thread-create.h" +-#include "row0sel.h" +- + // Forward declarations + struct trx_t; + struct mtr_t; +@@ -96,6 +94,7 @@ reference counting, this allows us to dispose of the Ctx instances + without worrying about dangling pointers. + + NOTE: Secondary index scans are not supported currently. */ ++class ReadView; + class Parallel_reader { + public: + /** Maximum value for innodb-parallel-read-threads. */ +@@ -177,7 +176,12 @@ class Parallel_reader { + m_is_compact(config.m_is_compact), + m_page_size(config.m_page_size), + m_read_level(config.m_read_level), +- m_partition_id(config.m_partition_id) {} ++ m_partition_id(config.m_partition_id), ++ m_range_errno(config.m_range_errno), ++ m_pcur(config.m_pcur), ++ m_pq_reverse_scan(config.m_pq_reverse_scan) {} ++ ++ ~Config() {} + + /** Range to scan. */ + const Scan_range m_scan_range; +@@ -197,6 +201,12 @@ class Parallel_reader { + /** Partition id if the index to be scanned belongs to a partitioned table, + else std::numeric_limits::max(). */ + size_t m_partition_id{std::numeric_limits::max()}; ++ ++ uint m_range_errno{0}; ++ ++ btr_pcur_t *m_pcur{nullptr}; ++ ++ bool m_pq_reverse_scan{false}; + }; + + /** Thread related context information. */ +@@ -291,7 +301,7 @@ class Parallel_reader { + @param[in] f Callback function. + (default is 0 which is leaf level) + @return error. */ +- dberr_t add_scan(trx_t *trx, const Config &config, F &&f) ++ dberr_t add_scan(trx_t *trx, const Config &config, F &&f, bool split = false) + MY_ATTRIBUTE((warn_unused_result)); + + /** Wait for the join of threads spawned by the parallel reader. */ +@@ -325,11 +335,29 @@ class Parallel_reader { + @return DB_SUCCESS or error code. */ + dberr_t run(size_t n_threads = 0) MY_ATTRIBUTE((warn_unused_result)); + ++ /** dispatch a execution context to the prebuilt object */ ++ dberr_t dispatch_ctx(row_prebuilt_t *prebuilt); ++ ++ void ctx_completed_inc(); ++ ++ void pq_set_worker_done(); ++ ++ void pq_wakeup_workers(); ++ ++ void pq_set_reverse_scan(); ++ ++ bool pq_get_reverse_scan() { return m_pq_reverse_scan; } ++ + /** @return the configured max threads size. */ + size_t max_threads() const MY_ATTRIBUTE((warn_unused_result)) { + return m_max_threads; + } + ++ /** @return the queue size. */ ++ size_t max_splits() const MY_ATTRIBUTE((warn_unused_result)) { ++ return m_ctxs.size(); ++ } ++ + /** @return true if in error state. */ + bool is_error_set() const MY_ATTRIBUTE((warn_unused_result)) { + return (m_err.load(std::memory_order_relaxed) != DB_SUCCESS); +@@ -347,6 +375,15 @@ class Parallel_reader { + Parallel_reader &operator=(Parallel_reader &&) = delete; + Parallel_reader &operator=(const Parallel_reader &) = delete; + ++ /** obtain m_event **/ ++ bool pq_have_event() { return m_event ? true : false; } ++ ++ bool pq_need_change_dop() { return m_need_change_dop; } ++ ++ ReadView *snapshot{}; ++ ++ uint key{0}; ++ + private: + /** Reset error state. */ + void reset_error_state() { m_err = DB_SUCCESS; } +@@ -376,9 +413,9 @@ class Parallel_reader { + /** Create the threads and do a parallel read across the partitions. */ + void parallel_read(); + +- /** @return true if tasks are still executing. */ +- bool is_active() const MY_ATTRIBUTE((warn_unused_result)) { +- return (m_n_completed.load(std::memory_order_relaxed) < ++ /** @return true if tasks are over. */ ++ bool is_ctx_over() const MY_ATTRIBUTE((warn_unused_result)) { ++ return !(m_n_completed.load(std::memory_order_relaxed) < + m_ctx_id.load(std::memory_order_relaxed)); + } + +@@ -397,6 +434,10 @@ class Parallel_reader { + /** Maximum number of worker threads to use. */ + const size_t m_max_threads; + ++ /** True: dop will change to m_ctxs.size() when m_ctxs.size() is less then ++ * exepected dop. */ ++ bool m_need_change_dop{true}; ++ + /** Number of worker threads that will be spawned. */ + size_t m_n_threads{0}; + +@@ -446,11 +487,17 @@ class Parallel_reader { + /** If the caller wants to wait for the parallel_read to finish it's run */ + bool m_sync; + ++ /** state of worker currently doing parallel reads. */ ++ std::atomic work_done{false}; ++ ++ bool m_pq_reverse_scan{false}; ++ + /** Context information related to each parallel reader thread. */ + std::vector> m_thread_ctxs; + + friend class Ctx; + friend class Scan_ctx; ++ friend class Parallel_reader_adapter; + }; + + /** Parallel reader context. */ +@@ -598,6 +645,12 @@ class Parallel_reader::Scan_ctx { + bool check_visibility(const rec_t *&rec, ulint *&offsets, mem_heap_t *&heap, + mtr_t *mtr) MY_ATTRIBUTE((warn_unused_result)); + ++ dberr_t find_visible_record(byte *buf, const rec_t *&rec, ++ const rec_t *&clust_rec, ulint *&offsets, ++ ulint *&clust_offsets, mem_heap_t *&heap, ++ mtr_t *mtr, row_prebuilt_t *prebuilt = nullptr) ++ MY_ATTRIBUTE((warn_unused_result)); ++ + /** Create an execution context for a range and add it to + the Parallel_reader's run queue. + @param[in] range Range for which to create the context. +@@ -680,7 +733,7 @@ class Parallel_reader::Ctx { + @param[in] scan_ctx Scan context. + @param[in] range Range that the thread has to read. */ + Ctx(size_t id, Scan_ctx *scan_ctx, const Scan_ctx::Range &range) +- : m_id(id), m_range(range), m_scan_ctx(scan_ctx) {} ++ : m_id(id), m_range(range), m_scan_ctx(scan_ctx), reader(nullptr) {} + + public: + /** Destructor. */ +@@ -794,9 +847,24 @@ class Parallel_reader::Ctx { + + ulint *m_offsets{}; + ++ dberr_t read_record(uchar *buf, row_prebuilt_t *prebuilt) ++ MY_ATTRIBUTE((warn_unused_result)); ++ + /** Start of a new range to scan. */ + bool m_start{}; + ++ /** Current row curous */ ++ btr_pcur_t *m_pcur{}; ++ ++ bool start_read{true}; // start to read range's records ++ mem_heap_t *m_blob_heap{}; // heap for containing mysql records ++ mem_heap_t *m_heap{}; // heap for containing innnodb rec ++ ++ ulint offsets_[REC_OFFS_NORMAL_SIZE]{}; ++ ulint clust_offsets_[REC_OFFS_NORMAL_SIZE]{}; ++ ++ Parallel_reader *reader; ++ + friend class Parallel_reader; + }; + +diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h +index 62a5b882..83b7e630 100644 +--- a/storage/innobase/include/row0sel.h ++++ b/storage/innobase/include/row0sel.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1997, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -120,6 +121,12 @@ bool row_sel_store_mysql_rec(byte *mysql_rec, row_prebuilt_t *prebuilt, + lob::undo_vers_t *lob_undo, + mem_heap_t *&blob_heap); + ++void pq_row_sel_store_row_id_to_prebuilt( ++ row_prebuilt_t *prebuilt, /*!< in/out: prebuilt */ ++ const rec_t *index_rec, /*!< in: record */ ++ const dict_index_t *index, /*!< in: index of the record */ ++ const ulint *offsets); /*!< in: rec_get_offsets */ ++ + /** Converts a key value stored in MySQL format to an Innobase dtuple. The last + field of the key value may be just a prefix of a fixed length field: hence + the parameter key_len. But currently we do not allow search keys where the +@@ -163,7 +170,6 @@ position and fetch next or fetch prev must not be tried to the cursor! + cursor 'direction' should be 0. + @return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK, + DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */ +-UNIV_INLINE + dberr_t row_search_for_mysql(byte *buf, page_cur_mode_t mode, + row_prebuilt_t *prebuilt, ulint match_mode, + ulint direction) +@@ -511,6 +517,42 @@ bool row_search_index_stats(const char *db_name, const char *tbl_name, + const char *index_name, ulint col_offset, + ulonglong *cardinality); + ++/** Helper class to cache clust_rec and old_ver */ ++class Row_sel_get_clust_rec_for_mysql { ++ const rec_t *cached_clust_rec; ++ rec_t *cached_old_vers; ++ ++ public: ++ /** Constructor */ ++ Row_sel_get_clust_rec_for_mysql() ++ : cached_clust_rec(nullptr), cached_old_vers(nullptr) {} ++ ~Row_sel_get_clust_rec_for_mysql() {} ++ /** Retrieve the clustered index record corresponding to a record in a ++ non-clustered index. Does the necessary locking. ++ @param[in] prebuilt prebuilt struct in the handle ++ @param[in] sec_index secondary index where rec resides ++ @param[in] rec record in a non-clustered index ++ @param[in] thr query thread ++ @param[out] out_rec clustered record or an old version of it, ++ NULL if the old version did not exist in the ++ read view, i.e., it was a fresh inserted version ++ @param[in,out] offsets in: offsets returned by ++ rec_get_offsets(rec, sec_index); ++ out: offsets returned by ++ rec_get_offsets(out_rec, clust_index) ++ @param[in,out] offset_heap memory heap from which the offsets are allocated ++ @param[out] vrow virtual column to fill ++ @param[in] mtr mtr used to get access to the non-clustered record; ++ the same mtr is used to access the clustered index ++ @param[in] lob_undo the LOB undo information. ++ @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ ++ dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index, ++ const rec_t *rec, que_thr_t *thr, const rec_t **out_rec, ++ ulint **offsets, mem_heap_t **offset_heap, ++ const dtuple_t **vrow, mtr_t *mtr, ++ lob::undo_vers_t *lob_undo); ++}; ++ + #include "row0sel.ic" + + #endif +diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic +index 9b06770c..db4d029f 100644 +--- a/storage/innobase/include/row0sel.ic ++++ b/storage/innobase/include/row0sel.ic +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1997, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -31,6 +32,8 @@ this program; if not, write to the Free Software Foundation, Inc., + *******************************************************/ + + #include "que0que.h" ++#include "row0sel.h" ++#include "row0mysql.h" + + /** Gets the plan node for the nth table in a join. + @return plan node */ +@@ -97,34 +100,3 @@ que_thr_t *open_step(que_thr_t *thr) /*!< in: query thread */ + + return (thr); + } +- +-/** Searches for rows in the database. This is used in the interface to +-MySQL. This function opens a cursor, and also implements fetch next +-and fetch prev. NOTE that if we do a search with a full key value +-from a unique index (ROW_SEL_EXACT), then we will not store the cursor +-position and fetch next or fetch prev must not be tried to the cursor! +- +-@param[out] buf buffer for the fetched row in MySQL format +-@param[in] mode search mode PAGE_CUR_L +-@param[in,out] prebuilt prebuilt struct for the table handler; +- this contains the info to search_tuple, +- index; if search tuple contains 0 field then +- we position the cursor at start or the end of +- index, depending on 'mode' +-@param[in] match_mode 0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX +-@param[in] direction 0 or ROW_SEL_NEXT or ROW_SEL_PREV; +- Note: if this is != 0, then prebuilt must has a +- pcur with stored position! In opening of a +- cursor 'direction' should be 0. +-@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK, +-DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */ +-UNIV_INLINE +-dberr_t row_search_for_mysql(byte *buf, page_cur_mode_t mode, +- row_prebuilt_t *prebuilt, ulint match_mode, +- ulint direction) { +- if (!prebuilt->table->is_intrinsic()) { +- return (row_search_mvcc(buf, mode, prebuilt, match_mode, direction)); +- } else { +- return (row_search_no_mvcc(buf, mode, prebuilt, match_mode, direction)); +- } +-} +diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h +index f22a2179..60416e83 100644 +--- a/storage/innobase/include/trx0trx.h ++++ b/storage/innobase/include/trx0trx.h +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1996, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -234,6 +235,8 @@ void trx_mark_sql_stat_end(trx_t *trx); /*!< in: trx handle */ + when this function is first called for a new started transaction. */ + ReadView *trx_assign_read_view(trx_t *trx); /*!< in: active transaction */ + ++ReadView *trx_clone_read_view(trx_t *trx, ReadView *readview); ++ + /** @return the transaction's read view or NULL if one not assigned. */ + UNIV_INLINE + ReadView *trx_get_read_view(trx_t *trx); +diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc +index 4351d31a..5eca03c1 100644 +--- a/storage/innobase/read/read0read.cc ++++ b/storage/innobase/read/read0read.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1996, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -320,6 +321,12 @@ ReadView::ReadView() + ut_d(m_view_low_limit_no = 0); + } + ++void ReadView::Copy_readView(const ReadView &view) { ++ copy_prepare(view); ++ copy_complete(); ++ m_creator_trx_id = view.m_creator_trx_id; ++} ++ + /** + ReadView destructor */ + ReadView::~ReadView() { +@@ -721,8 +728,10 @@ void MVCC::view_close(ReadView *&view, bool own_mutex) { + + view->close(); + +- UT_LIST_REMOVE(m_views, view); +- UT_LIST_ADD_LAST(m_free, view); ++ if (!view->skip_view_list) { ++ UT_LIST_REMOVE(m_views, view); ++ UT_LIST_ADD_LAST(m_free, view); ++ } + + ut_ad(validate()); + +diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc +index e0ec4bbc..f1f0187a 100644 +--- a/storage/innobase/rem/rem0cmp.cc ++++ b/storage/innobase/rem/rem0cmp.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1994, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -596,6 +597,79 @@ int cmp_data_data(ulint mtype, ulint prtype, bool is_asc, const byte *data1, + return (cmp_data(mtype, prtype, is_asc, data1, len1, data2, len2)); + } + ++int cmp_sec_dtuple_pri_rec_with_match(const dtuple_t *dtuple, const rec_t *rec, ++ const dict_index_t *index, ++ const dict_index_t *clust_index, ++ const ulint *offsets, ulint n_cmp) { ++ ut_ad(dtuple_check_typed(dtuple)); ++ ut_ad(rec_offs_validate(rec, clust_index, offsets)); ++ ut_ad(n_cmp > 0); ++ ut_ad(n_cmp <= dtuple_get_n_fields(dtuple)); ++ ++ for (ulint i = 0; i < n_cmp; ++i) { ++ const auto dtuple_field = dtuple_get_nth_field(dtuple, i); ++ ++ const auto dtuple_b_ptr = ++ static_cast(dfield_get_data(dtuple_field)); ++ ++ const auto type = dfield_get_type(dtuple_field); ++ ++ auto dtuple_f_len = dfield_get_len(dtuple_field); ++ ++ ut_ad(!rec_offs_nth_extern(offsets, i)); ++ ++ ut_ad(!rec_offs_nth_default(offsets, i)); ++ ++ // caculate the pos ++ auto pos = dict_index_get_nth_field_pos(clust_index, index, i); ++ ++ ulint rec_f_len; ++ ++ const auto rec_b_ptr = rec_get_nth_field(rec, offsets, pos, &rec_f_len); ++ ++ ut_ad(!dfield_is_ext(dtuple_field)); ++ ++ int ret{}; ++ ++ if (dfield_is_multi_value(dtuple_field) && ++ (dtuple_f_len == UNIV_MULTI_VALUE_ARRAY_MARKER || ++ dtuple_f_len == UNIV_NO_INDEX_VALUE)) { ++ /* If it's the value parsed from the array, or NULL, then ++ the calculation can be done in a normal way in the else branch */ ++ ut_ad(index->is_multi_value()); ++ if (dtuple_f_len == UNIV_NO_INDEX_VALUE) { ++ ret = 1; ++ } else { ++ multi_value_data *mv_data = ++ static_cast(dtuple_field->data); ++ ret = mv_data->has(type, rec_b_ptr, rec_f_len) ? 0 : 1; ++ } ++ } else { ++ /* For now, change buffering is only supported on ++ indexes with ascending order on the columns. */ ++ if (dtuple_f_len != UNIV_SQL_NULL && rec_f_len != UNIV_SQL_NULL && ++ dtuple_f_len < rec_f_len) { ++ const dict_field_t *field = index->get_field(i); ++ if (field->prefix_len > 0) { ++ rec_f_len = dtype_get_at_most_n_mbchars( ++ field->col->prtype, field->col->mbminmaxlen, field->prefix_len, ++ rec_f_len, reinterpret_cast(rec_b_ptr)); ++ } ++ } ++ ret = cmp_data( ++ type->mtype, type->prtype, ++ dict_index_is_ibuf(index) || index->get_field(i)->is_ascending, ++ dtuple_b_ptr, dtuple_f_len, rec_b_ptr, rec_f_len); ++ } ++ ++ if (ret) { ++ return (ret); ++ } ++ } ++ ++ return (0); ++} ++ + int cmp_dtuple_rec_with_match_low(const dtuple_t *dtuple, const rec_t *rec, + const dict_index_t *index, + const ulint *offsets, ulint n_cmp, +@@ -648,8 +722,16 @@ int cmp_dtuple_rec_with_match_low(const dtuple_t *dtuple, const rec_t *rec, + ut_ad(!rec_offs_nth_default(offsets, i)); + + ulint rec_f_len; ++ const byte *rec_b_ptr = nullptr; ++ if(index->has_instant_cols()) { ++ rec_b_ptr = rec_get_nth_field_instant(rec, offsets, i, index, &rec_f_len); ++ } ++ else { ++ /* So does the field with default value */ ++ ut_ad(!rec_offs_nth_default(offsets, i)); + +- const auto rec_b_ptr = rec_get_nth_field(rec, offsets, i, &rec_f_len); ++ rec_b_ptr = rec_get_nth_field(rec, offsets, i, &rec_f_len); ++ } + + ut_ad(!dfield_is_ext(dtuple_field)); + +@@ -1130,3 +1212,9 @@ int dtuple_t::compare(const rec_t *rec, const dict_index_t *index, + return (cmp_dtuple_rec_with_match_low(this, rec, index, offsets, n_fields_cmp, + matched_fields)); + } ++ ++int dtuple_t::compare(const rec_t *rec, const dict_index_t *index, ++ const dict_index_t *index2, const ulint *offsets) { ++ return cmp_sec_dtuple_pri_rec_with_match(this, rec, index, index2, offsets, ++ n_fields_cmp); ++} +diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc +index 55c6ada9..6612d7c2 100644 +--- a/storage/innobase/row/row0mysql.cc ++++ b/storage/innobase/row/row0mysql.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2000, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -993,6 +994,12 @@ void row_prebuilt_free(row_prebuilt_t *prebuilt, ibool dict_locked) { + } + + prebuilt->m_lob_undo.destroy(); ++ prebuilt->ctx = nullptr; ++ ++ if (prebuilt->pq_heap) { ++ mem_heap_free(prebuilt->pq_heap); ++ prebuilt->pq_heap = nullptr; ++ } + + mem_heap_free(prebuilt->heap); + } +diff --git a/storage/innobase/row/row0pread-adapter.cc b/storage/innobase/row/row0pread-adapter.cc +index 4a7e40db..eb0efea0 100644 +--- a/storage/innobase/row/row0pread-adapter.cc ++++ b/storage/innobase/row/row0pread-adapter.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -43,8 +44,8 @@ Parallel_reader_adapter::Parallel_reader_adapter(size_t max_threads, + + dberr_t Parallel_reader_adapter::add_scan(trx_t *trx, + const Parallel_reader::Config &config, +- Parallel_reader::F &&f) { +- return m_parallel_reader.add_scan(trx, config, std::move(f)); ++ Parallel_reader::F &&f, bool split) { ++ return m_parallel_reader.add_scan(trx, config, std::move(f), split); + } + + Parallel_reader_adapter::Thread_ctx::Thread_ctx() { +diff --git a/storage/innobase/row/row0pread.cc b/storage/innobase/row/row0pread.cc +index 9e962da5..54789a73 100644 +--- a/storage/innobase/row/row0pread.cc ++++ b/storage/innobase/row/row0pread.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 2018, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -39,11 +40,23 @@ Created 2018-01-27 by Sunny Bains */ + #include "row0row.h" + #include "row0vers.h" + #include "ut0new.h" ++#include "row0sel.h" + + #ifdef UNIV_PFS_THREAD + mysql_pfs_key_t parallel_read_thread_key; + #endif /* UNIV_PFS_THREAD */ + ++ICP_RESULT row_search_idx_cond_check( ++ byte *mysql_rec, /*!< out: record ++ in MySQL format (invalid unless ++ prebuilt->idx_cond == true and ++ we return ICP_MATCH) */ ++ row_prebuilt_t *prebuilt, /*!< in/out: prebuilt struct ++ for the table handle */ ++ const rec_t *rec, /*!< in: InnoDB record */ ++ const ulint *offsets); /*!< in: rec_get_offsets() */ ++ ++ + std::atomic_size_t Parallel_reader::s_active_threads{}; + + /** Tree depth at which we decide to split blocks further. */ +@@ -86,7 +99,13 @@ Parallel_reader::Scan_ctx::Iter::~Iter() { + m_heap = nullptr; + } + +-Parallel_reader::Ctx::~Ctx() {} ++Parallel_reader::Ctx::~Ctx() { ++ if (m_blob_heap) ++ mem_heap_free(m_blob_heap); ++ ++ if(m_heap) ++ mem_heap_free(m_heap); ++} + + Parallel_reader::Scan_ctx::~Scan_ctx() {} + +@@ -220,6 +239,7 @@ class PCursor { + /** Check if there are threads waiting on the index latch. Yield the latch + so that other threads can progress. */ + void yield(); ++ void yield_prev(); + + /** Move to the next block. + @param[in] index Index being traversed. +@@ -227,6 +247,9 @@ class PCursor { + dberr_t move_to_next_block(dict_index_t *index) + MY_ATTRIBUTE((warn_unused_result)); + ++ dberr_t move_to_prev_block(dict_index_t *index) ++ MY_ATTRIBUTE((warn_unused_result)); ++ + /** Restore the cursor position. */ + void restore_position() { + auto relative = m_pcur->m_rel_pos; +@@ -240,7 +263,8 @@ class PCursor { + } + } else { + ut_ad(relative == BTR_PCUR_AFTER || +- relative == BTR_PCUR_AFTER_LAST_IN_TREE); ++ relative == BTR_PCUR_AFTER_LAST_IN_TREE || ++ relative == BTR_PCUR_BEFORE); + } + } + +@@ -295,6 +319,35 @@ void PCursor::yield() { + } + } + ++void PCursor::yield_prev() { ++ /* We should always yield on a block boundary. */ ++ ut_ad(m_pcur->is_before_first_on_page()); ++ ++ /* Store the cursor position on the first user record on the page. */ ++ m_pcur->move_to_next_on_page(); ++ ++ m_pcur->store_position(m_mtr); ++ ++ m_mtr->commit(); ++ ++ /* Yield so that another thread can proceed. */ ++ std::this_thread::yield(); ++ ++ m_mtr->start(); ++ ++ m_mtr->set_log_mode(MTR_LOG_NO_REDO); ++ ++ /* Restore position on the record, or its predecessor if the record ++ was purged meanwhile. */ ++ ++ restore_position(); ++ ++ if (!m_pcur->is_before_first_on_page()) { ++ /* Move to the successor of the saved record. */ ++ m_pcur->move_to_prev_on_page(); ++ } ++} ++ + dberr_t PCursor::move_to_next_block(dict_index_t *index) { + ut_ad(m_pcur->is_after_last_on_page()); + +@@ -345,6 +398,45 @@ dberr_t PCursor::move_to_next_block(dict_index_t *index) { + return (DB_SUCCESS); + } + ++dberr_t PCursor::move_to_prev_block(dict_index_t * index) { ++ ut_ad(m_pcur->is_before_first_on_page()); ++ ++ if (rw_lock_get_waiters(dict_index_get_lock(index))) { ++ /* There are waiters on the index tree lock. Store and restore ++ the cursor position, and yield so that scanning a large table ++ will not starve other threads. */ ++ ++ yield_prev(); ++ ++ /* It's possible that the restore places the cursor in the middle of ++ the block. We need to account for that too. */ ++ ++ if (m_pcur->is_on_user_rec()) { ++ return (DB_SUCCESS); ++ } ++ } ++ ++ auto cur = m_pcur->get_page_cur(); ++ ++ auto prev_page_no = btr_page_get_prev(page_cur_get_page(cur), m_mtr); ++ ++ if (prev_page_no == FIL_NULL) { ++ m_mtr->commit(); ++ ++ return (DB_END_OF_INDEX); ++ } ++ ++ m_pcur->move_backward_from_page(m_mtr); ++ ++ /* Skip the supremum record. */ ++ page_cur_move_to_prev(cur); ++ ++ /* Page can't be empty unless it is a root page. */ ++ ut_ad(!page_cur_is_before_first(cur)); ++ ++ return (DB_SUCCESS); ++} ++ + bool Parallel_reader::Scan_ctx::check_visibility(const rec_t *&rec, + ulint *&offsets, + mem_heap_t *&heap, +@@ -383,8 +475,6 @@ bool Parallel_reader::Scan_ctx::check_visibility(const rec_t *&rec, + } + } + } else { +- /* Secondary index scan not supported yet. */ +- ut_error; + + auto max_trx_id = page_get_max_trx_id(page_align(rec)); + +@@ -404,12 +494,149 @@ bool Parallel_reader::Scan_ctx::check_visibility(const rec_t *&rec, + return (false); + } + +- ut_ad(!m_trx || m_trx->isolation_level == TRX_ISO_READ_UNCOMMITTED || +- !rec_offs_any_null_extern(rec, offsets)); +- + return (true); + } + ++dberr_t Parallel_reader::Scan_ctx::find_visible_record(byte *buf, ++ const rec_t *&rec, ++ const rec_t *&clust_rec, ++ ulint *&offsets, ++ ulint *&clust_offsets, ++ mem_heap_t *&heap, ++ mtr_t *mtr, ++ row_prebuilt_t *prebuilt) { ++ ++ const auto table_name = m_config.m_index->table->name; ++ ut_ad(m_trx->read_view == nullptr || MVCC::is_view_active(m_trx->read_view)); ++ if (prebuilt != nullptr) { ++ prebuilt->pq_requires_clust_rec = false; ++ } ++ if (m_trx->read_view != nullptr) { ++ auto view = m_trx->read_view; ++ ++ if (m_config.m_index->is_clustered()) { ++ trx_id_t rec_trx_id; ++ ++ if (m_config.m_index->trx_id_offset > 0) { ++ rec_trx_id = trx_read_trx_id(rec + m_config.m_index->trx_id_offset); ++ } else { ++ rec_trx_id = row_get_rec_trx_id(rec, m_config.m_index, offsets); ++ } ++ ++ if (m_trx->isolation_level > TRX_ISO_READ_UNCOMMITTED && ++ !view->changes_visible(rec_trx_id, table_name)) { ++ rec_t *old_vers = nullptr; ++ ++ row_vers_build_for_consistent_read(rec, mtr, m_config.m_index, &offsets, ++ view, &heap, heap, &old_vers, ++ nullptr, nullptr); ++ ++ rec = old_vers; ++ if (rec == nullptr) { ++ return DB_NOT_FOUND; ++ } ++ } ++ } else { ++ /* Secondary index scan not supported yet. */ ++ auto max_trx_id = page_get_max_trx_id(page_align(rec)); ++ ut_ad(max_trx_id > 0); ++ ++ if (!view->sees(max_trx_id) || ++ (prebuilt && prebuilt->need_to_access_clustered)) { ++ if (prebuilt) ++ { ++ if (prebuilt->idx_cond) { ++ switch (row_search_idx_cond_check(buf, prebuilt, rec, offsets)) ++ { ++ case ICP_NO_MATCH: ++ return DB_NOT_FOUND; ++ case ICP_OUT_OF_RANGE: ++ return DB_END_OF_RANGE; ++ case ICP_MATCH: ++ break; ++ } ++ } ++ if (prebuilt->sel_graph == nullptr) ++ row_prebuild_sel_graph(prebuilt); ++ ++ Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql; ++ que_thr_t *thr = que_fork_get_first_thr(prebuilt->sel_graph); ++ ++ prebuilt->pq_requires_clust_rec = true; ++ int err = row_sel_get_clust_rec_for_mysql(prebuilt, m_config.m_index, rec, thr, &clust_rec, ++ &clust_offsets, &heap, NULL, mtr, nullptr); ++ ++ if (err != DB_SUCCESS) ++ return DB_NOT_FOUND; ++ else { ++ if (clust_rec == NULL) { ++ /* The record did not exist in the read view */ ++ ut_ad(prebuilt->select_lock_type == LOCK_NONE); ++ ++ return DB_NOT_FOUND; ++ } ++ else if(rec_get_deleted_flag(clust_rec, m_config.m_is_compact)) { ++ /* The record is delete marked: we can skip it */ ++ return DB_NOT_FOUND; ++ } ++ else { ++ return DB_SUCCESS; ++ } ++ } ++ } else ++ return DB_NOT_FOUND; ++ } ++ } ++ } else if (srv_read_only_mode && /** innodb_read_only */ ++ (prebuilt && prebuilt->need_to_access_clustered && /** secondary index and non-covered index */ ++ !m_config.m_index->is_clustered())) { ++ if (prebuilt->idx_cond) { ++ switch (row_search_idx_cond_check(buf, prebuilt, rec, offsets)) ++ { ++ case ICP_NO_MATCH: ++ return DB_NOT_FOUND; ++ case ICP_OUT_OF_RANGE: ++ return DB_END_OF_RANGE; ++ case ICP_MATCH: ++ break; ++ } ++ } ++ ++ if (prebuilt->sel_graph == nullptr) ++ row_prebuild_sel_graph(prebuilt); ++ ++ que_thr_t *thr = que_fork_get_first_thr(prebuilt->sel_graph); ++ prebuilt->pq_requires_clust_rec = true; ++ Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql; ++ int err = row_sel_get_clust_rec_for_mysql(prebuilt, m_config.m_index, rec, thr, &clust_rec, ++ &clust_offsets, &heap, NULL, mtr, nullptr); ++ ++ if (err != DB_SUCCESS) ++ return DB_NOT_FOUND; ++ else { ++ if (clust_rec == NULL) { ++ /* The record did not exist in the read view */ ++ ut_ad(prebuilt->select_lock_type == LOCK_NONE); ++ ++ return DB_NOT_FOUND; ++ } else if(rec_get_deleted_flag(clust_rec, m_config.m_is_compact)) { ++ /* The record is delete marked: we can skip it */ ++ return DB_NOT_FOUND; ++ } else { ++ return DB_SUCCESS; ++ } ++ } ++ } ++ ++ if (rec_get_deleted_flag(rec, m_config.m_is_compact)) { ++ /* This record was deleted in the latest committed version, or it was ++ deleted and then reinserted-by-update before purge kicked in. Skip it. */ ++ return DB_NOT_FOUND; ++ } ++ ++ return DB_SUCCESS; ++} ++ + void Parallel_reader::Scan_ctx::copy_row(const rec_t *rec, Iter *iter) const { + iter->m_offsets = rec_get_offsets(rec, m_config.m_index, nullptr, + ULINT_UNDEFINED, &iter->m_heap); +@@ -503,6 +730,145 @@ bool Parallel_reader::Ctx::move_to_next_node(PCursor *pcursor, mtr_t *mtr) { + return (true); + } + ++dberr_t Parallel_reader::Ctx::read_record(uchar* buf, row_prebuilt_t *prebuilt) { ++ mtr_t mtr; ++ btr_pcur_t *pcur; ++ ++ dberr_t err{DB_SUCCESS}; ++ dberr_t err1{DB_SUCCESS}; ++ int ret{0}; ++ const rec_t *clust_rec = nullptr; ++ const rec_t *rec = nullptr; ++ const rec_t *result_rec = nullptr; ++ ulint *offsets = offsets_; ++ ulint *clust_offsets = clust_offsets_; ++ ++ if(start_read) { ++ rec_offs_init(offsets_); ++ rec_offs_init(clust_offsets_); ++ start_read = false; ++ } ++ ++ mtr.start(); ++ mtr.set_log_mode(MTR_LOG_NO_REDO); ++ ++ auto &from = m_scan_ctx->m_config.m_pq_reverse_scan ? m_range.second : m_range.first; ++ pcur = from->m_pcur; ++ ++ PCursor pcursor(pcur, &mtr, m_scan_ctx->m_config.m_read_level); ++ pcursor.restore_position(); ++ ++ ++ const auto &end_tuple = m_scan_ctx->m_config.m_pq_reverse_scan ? m_range.first->m_tuple : m_range.second->m_tuple; ++ auto index = m_scan_ctx->m_config.m_index; ++ auto cur = pcur->get_page_cur(); ++ dict_index_t *clust_index = index->table->first_index(); ++ ++ if(m_blob_heap == nullptr) ++ m_blob_heap = mem_heap_create(srv_page_size); ++ if(m_heap == nullptr) ++ m_heap = mem_heap_create(srv_page_size / 4); ++ ++ if(!m_scan_ctx->m_config.m_pq_reverse_scan && page_cur_is_after_last(cur)) { ++ // pcur point to last record, move to next block ++ mem_heap_empty(m_heap); ++ offsets = offsets_; ++ rec_offs_init(offsets_); ++ ++ err = pcursor.move_to_next_block(index); ++ if (err != DB_SUCCESS) { ++ ut_a(!mtr.is_active()); ++ return err; ++ } ++ ut_ad(!page_cur_is_before_first(cur)); ++ } else if(m_scan_ctx->m_config.m_pq_reverse_scan && page_cur_is_before_first(cur)) { ++ // pcur point to first record, move to prev block ++ mem_heap_empty(m_heap); ++ offsets = offsets_; ++ rec_offs_init(offsets_); ++ ++ err = pcursor.move_to_prev_block(index); ++ if (err != DB_SUCCESS) { ++ ut_a(!mtr.is_active()); ++ return err; ++ } ++ ut_ad(!page_cur_is_after_last(cur)); ++ } ++ ++ // 1. read record ++ rec = page_cur_get_rec(cur); ++ offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &m_heap); ++ clust_offsets = rec_get_offsets(rec, index, clust_offsets, ULINT_UNDEFINED, &m_heap); ++ ++ // 2. find visible version record ++ err1 = m_scan_ctx->find_visible_record(buf, rec, clust_rec, offsets, clust_offsets, m_heap, &mtr, prebuilt); ++ ++ if (err1 == DB_END_OF_RANGE) ++ { ++ err = DB_END_OF_RANGE; ++ goto func_exit; ++ } ++ ++ ++ if (err1 != DB_NOT_FOUND) { ++ // 3. check range boundary ++ m_block = page_cur_get_block(cur); ++ if (rec != nullptr && end_tuple != nullptr) { ++ if (!m_scan_ctx->m_config.m_index->is_clustered() && ++ prebuilt->need_to_access_clustered) ++ ret = ((dtuple_t *)end_tuple)->compare(clust_rec, index, clust_index, clust_offsets); ++ else ++ ret = end_tuple->compare(rec, index, offsets); ++ ++ /* Note: The range creation doesn't use MVCC. Therefore it's possible ++ that the range boundary entry could have been deleted. */ ++ if((!m_scan_ctx->m_config.m_pq_reverse_scan && ret <= 0) || (m_scan_ctx->m_config.m_pq_reverse_scan && ret >= 0)) { ++ m_scan_ctx->m_reader->ctx_completed_inc(); ++ err= DB_END_OF_RANGE; ++ goto func_exit; ++ } ++ } ++ ++ // 4. convert record to mysql format ++ if(prebuilt->pq_requires_clust_rec) ++ { ++ result_rec = clust_rec; ++ if (!row_sel_store_mysql_rec(buf, prebuilt, clust_rec, nullptr, ++ true, clust_index, prebuilt->index, clust_offsets, false, nullptr, m_blob_heap)) ++ err= DB_ERROR; ++ } else { ++ result_rec = rec; ++ if (!row_sel_store_mysql_rec(buf, prebuilt, rec, nullptr, ++ m_scan_ctx->m_config.m_index->is_clustered(), ++ index, prebuilt->index, offsets, false, nullptr, m_blob_heap)) ++ err= DB_ERROR; ++ } ++ if (prebuilt->clust_index_was_generated) { ++ pq_row_sel_store_row_id_to_prebuilt(prebuilt, result_rec, ++ result_rec == rec ? index : clust_index, ++ result_rec == rec ? offsets:clust_offsets); ++ } ++ } ++ else ++ { ++ err = DB_NOT_FOUND; ++ goto next_record; ++ } ++ ++next_record: ++ if(!m_scan_ctx->m_config.m_pq_reverse_scan) ++ page_cur_move_to_next(cur); ++ else ++ page_cur_move_to_prev(cur); ++ ++func_exit: ++ pcur->store_position(&mtr); ++ ut_a(mtr.is_active()); ++ mtr.commit(); ++ ++ return err; ++} ++ + dberr_t Parallel_reader::Ctx::traverse() { + /* Take index lock if the requested read level is on a non-leaf level as the + index lock is required to access non-leaf page. */ +@@ -636,14 +1002,24 @@ std::shared_ptr Parallel_reader::dequeue() { + return (nullptr); + } + +- auto ctx = m_ctxs.front(); +- m_ctxs.pop_front(); ++ std::shared_ptr ctx{}; ++ if(!m_pq_reverse_scan) { ++ ctx = m_ctxs.front(); ++ m_ctxs.pop_front(); ++ } else { ++ ctx = m_ctxs.back(); ++ m_ctxs.pop_back(); ++ } + + mutex_exit(&m_mutex); + + return (ctx); + } + ++void Parallel_reader::pq_set_reverse_scan() { ++ m_pq_reverse_scan = true; ++} ++ + bool Parallel_reader::is_queue_empty() const { + mutex_enter(&m_mutex); + auto empty = m_ctxs.empty(); +@@ -743,6 +1119,62 @@ void Parallel_reader::worker(Parallel_reader::Thread_ctx *thread_ctx) { + ut_a(is_error_set() || (m_n_completed == m_ctx_id && is_queue_empty())); + } + ++void Parallel_reader::ctx_completed_inc() ++{ ++ m_n_completed.fetch_add(1, std::memory_order_relaxed); ++} ++ ++void Parallel_reader::pq_set_worker_done() ++{ ++ work_done.store(true, std::memory_order_relaxed); ++} ++ ++void Parallel_reader::pq_wakeup_workers() ++{ ++ pq_set_worker_done(); ++ os_event_set(m_event); ++} ++ ++dberr_t Parallel_reader::dispatch_ctx(row_prebuilt_t *prebuilt) { ++ dberr_t err{DB_SUCCESS}; ++ ++ for (;;) { ++ int64_t sig_count = os_event_reset(m_event); ++ ++ auto ctx = dequeue(); ++ bool done = work_done.load(std::memory_order_relaxed); ++ ++ if (ctx == nullptr) { ++ prebuilt->ctx = nullptr; ++ if (is_ctx_over() || done) { ++ /* Wakeup other worker threads before exiting */ ++ os_event_set(m_event); ++ ut_a(is_queue_empty()); ++ return DB_END_OF_INDEX; ++ } ++ else { ++ /* wait for other worker */ ++ constexpr auto FOREVER = OS_SYNC_INFINITE_TIME; ++ os_event_wait_time_low(m_event, FOREVER, sig_count); ++ } ++ } ++ else { ++ if (ctx->m_split) { ++ err = ctx->split(); ++ /* Tell the other threads that there is work to do. */ ++ os_event_set(m_event); ++ ctx_completed_inc(); ++ } ++ else { ++ prebuilt->ctx = ctx; ++ break; ++ } ++ } ++ } ++ ++ return err; ++} ++ + page_no_t Parallel_reader::Scan_ctx::search(const buf_block_t *block, + const dtuple_t *key) const { + ut_ad(index_s_own()); +@@ -848,6 +1280,8 @@ dberr_t Parallel_reader::Scan_ctx::create_ranges(const Scan_range &scan_range, + ut_ad(index_s_own()); + ut_a(max_threads() > 0); + ut_a(page_no != FIL_NULL); ++ if (m_config.m_range_errno) ++ return (DB_SUCCESS); + + /* Do a breadth first traversal of the B+Tree using recursion. We want to + set up the scan ranges in one pass. This guarantees that the tree structure +@@ -864,6 +1298,8 @@ dberr_t Parallel_reader::Scan_ctx::create_ranges(const Scan_range &scan_range, + + auto block = block_get_s_latched(page_id, mtr, __LINE__); + ++ page_no_t child_page_no = FIL_NULL; ++ + /* read_level requested should be less than the tree height. */ + ut_ad(m_config.m_read_level < + btr_page_get_level(buf_block_get_frame(block), mtr) + 1); +@@ -916,7 +1352,7 @@ dberr_t Parallel_reader::Scan_ctx::create_ranges(const Scan_range &scan_range, + + const auto end = scan_range.m_end; + +- if (end != nullptr && end->compare(rec, index, offsets) <= 0) { ++ if (end != nullptr && end->compare(rec, index, offsets) < 0) { + break; + } + +@@ -926,6 +1362,7 @@ dberr_t Parallel_reader::Scan_ctx::create_ranges(const Scan_range &scan_range, + the root level. */ + if (at_level > m_config.m_read_level) { + auto page_no = btr_node_ptr_get_child_page_no(rec, offsets); ++ child_page_no = page_no; + + if (depth < split_level) { + /* Need to create a range starting at a lower level in the tree. */ +@@ -983,6 +1420,11 @@ dberr_t Parallel_reader::Scan_ctx::create_ranges(const Scan_range &scan_range, + page_cur_move_to_next(&page_cursor); + } + ++ if (ranges.size() == 1 && depth == split_level && !at_leaf && child_page_no != FIL_NULL) { ++ ranges.clear(); ++ create_ranges(scan_range, child_page_no, depth + 1, split_level + 1, ranges, mtr); ++ } ++ + savepoints.push_back(savepoint); + + for (auto &savepoint : savepoints) { +@@ -1010,14 +1452,28 @@ dberr_t Parallel_reader::Scan_ctx::partition( + err = create_ranges(scan_range, m_config.m_index->page, 0, split_level, + ranges, &mtr); + +- if (err == DB_SUCCESS && scan_range.m_end != nullptr && !ranges.empty()) { ++ if (m_config.m_pq_reverse_scan && !ranges.empty()) { ++ auto &iter = ranges.back().second; ++ auto block = m_config.m_pcur->get_block(); ++ page_id_t page_id(m_config.m_index->space, block->get_page_no()); ++ auto s_block MY_ATTRIBUTE((unused)) = block_get_s_latched(page_id, &mtr, __LINE__); ++ assert(block == s_block); ++ ++ auto page_cursor = m_config.m_pcur->get_page_cur(); ++ page_cursor->index = m_config.m_index; ++ iter = create_persistent_cursor(*page_cursor, &mtr); ++ ++ /* deep copy of start of first ctx */ ++ if(scan_range.m_start == nullptr) { ++ auto &first_iter = ranges.front().first; ++ first_iter = std::make_shared(); ++ } ++ } else if (scan_range.m_end != nullptr && !ranges.empty()) { ++ + auto &iter = ranges.back().second; +- + ut_a(iter->m_heap == nullptr); +- + iter->m_heap = mem_heap_create(sizeof(btr_pcur_t) + (srv_page_size / 16)); +- +- iter->m_tuple = dtuple_copy(scan_range.m_end, iter->m_heap); ++ iter->m_tuple = pq_dtuple_copy(scan_range.m_end, iter->m_heap); + + /* Do a deep copy. */ + for (size_t i = 0; i < dtuple_get_n_fields(iter->m_tuple); ++i) { +@@ -1049,6 +1505,7 @@ dberr_t Parallel_reader::Scan_ctx::create_context(const Range &range, + return (DB_OUT_OF_MEMORY); + } else { + ctx->m_split = split; ++ ctx->reader = m_reader; + m_reader->enqueue(ctx); + } + +@@ -1069,10 +1526,20 @@ dberr_t Parallel_reader::Scan_ctx::create_contexts(const Ranges &ranges) { + split_point = max_threads(); + } + ++ if (ranges.size() > split_point) { ++ m_reader->m_need_change_dop = false; ++ } ++ + size_t i{}; ++ size_t split_num = 0; ++ if (ranges.size() > split_point) { ++ split_num = ranges.size() - split_point; ++ } ++ bool split; + + for (auto range : ranges) { +- auto err = create_context(range, i >= split_point); ++ split = m_config.m_pq_reverse_scan ? i < split_num : i >= split_point; ++ auto err = create_context(range, split); + + if (err != DB_SUCCESS) { + return (err); +@@ -1190,7 +1657,7 @@ dberr_t Parallel_reader::run(size_t n_threads) { + + dberr_t Parallel_reader::add_scan(trx_t *trx, + const Parallel_reader::Config &config, +- Parallel_reader::F &&f) { ++ Parallel_reader::F &&f, bool split) { + // clang-format off + + auto scan_ctx = std::shared_ptr( +diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc +index bb13b3da..04d56a25 100644 +--- a/storage/innobase/row/row0sel.cc ++++ b/storage/innobase/row/row0sel.cc +@@ -2,6 +2,7 @@ + + Copyright (c) 1997, 2021, Oracle and/or its affiliates. + Copyright (c) 2008, Google Inc. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + Portions of this file contain modifications contributed and copyrighted by + Google, Inc. Those modifications are gratefully acknowledged and are described +@@ -2534,6 +2535,15 @@ static void row_sel_store_row_id_to_prebuilt( + ut_memcpy(prebuilt->row_id, data, len); + } + ++void pq_row_sel_store_row_id_to_prebuilt(row_prebuilt_t *prebuilt, /*!< in/out: prebuilt */ ++ const rec_t *index_rec, /*!< in: record */ ++ const dict_index_t *index, /*!< in: index of the record */ ++ const ulint *offsets)/*!< in: rec_get_offsets ++ (index_rec, index) */ ++{ ++ row_sel_store_row_id_to_prebuilt(prebuilt, index_rec, index, offsets); ++} ++ + #ifdef UNIV_DEBUG + /** Convert a non-SQL-NULL field from Innobase format to MySQL format. */ + #define row_sel_field_store_in_mysql_format(dest, templ, idx, field, src, len, \ +@@ -3148,42 +3158,6 @@ static MY_ATTRIBUTE((warn_unused_result)) dberr_t + return err; + } + +-/** Helper class to cache clust_rec and old_ver */ +-class Row_sel_get_clust_rec_for_mysql { +- const rec_t *cached_clust_rec; +- rec_t *cached_old_vers; +- +- public: +- /** Constructor */ +- Row_sel_get_clust_rec_for_mysql() +- : cached_clust_rec(nullptr), cached_old_vers(nullptr) {} +- +- /** Retrieve the clustered index record corresponding to a record in a +- non-clustered index. Does the necessary locking. +- @param[in] prebuilt prebuilt struct in the handle +- @param[in] sec_index secondary index where rec resides +- @param[in] rec record in a non-clustered index +- @param[in] thr query thread +- @param[out] out_rec clustered record or an old version of it, +- NULL if the old version did not exist in the +- read view, i.e., it was a fresh inserted version +- @param[in,out] offsets in: offsets returned by +- rec_get_offsets(rec, sec_index); +- out: offsets returned by +- rec_get_offsets(out_rec, clust_index) +- @param[in,out] offset_heap memory heap from which the offsets are allocated +- @param[out] vrow virtual column to fill +- @param[in] mtr mtr used to get access to the non-clustered record; +- the same mtr is used to access the clustered index +- @param[in] lob_undo the LOB undo information. +- @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ +- dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index, +- const rec_t *rec, que_thr_t *thr, const rec_t **out_rec, +- ulint **offsets, mem_heap_t **offset_heap, +- const dtuple_t **vrow, mtr_t *mtr, +- lob::undo_vers_t *lob_undo); +-}; +- + /** Retrieve the clustered index record corresponding to a record in a + non-clustered index. Does the necessary locking. + @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ +@@ -3820,7 +3794,7 @@ static ulint row_sel_try_search_shortcut_for_mysql( + + /** Check a pushed-down index condition. + @return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +-static ICP_RESULT row_search_idx_cond_check( ++ICP_RESULT row_search_idx_cond_check( + byte *mysql_rec, /*!< out: record + in MySQL format (invalid unless + prebuilt->idx_cond == true and +@@ -6017,6 +5991,15 @@ normal_return: + /*-------------------------------------------------------------*/ + que_thr_stop_for_mysql_no_error(thr, trx); + ++ if(err == DB_SUCCESS && prebuilt->pq_index_read) { ++ if(prebuilt->pq_heap) ++ mem_heap_free(prebuilt->pq_heap); ++ prebuilt->pq_heap = mem_heap_create(sizeof(btr_pcur_t) + (srv_page_size / 16)); ++ prebuilt->pq_tuple = row_rec_to_index_entry_low(rec, index, ++ rec_get_offsets(rec, index,nullptr, ULINT_UNDEFINED, &prebuilt->heap), ++ prebuilt->pq_heap); ++ } ++ + mtr_commit(&mtr); + + /* Rollback blocking transactions from hit list for high priority +@@ -6506,3 +6489,33 @@ bool row_search_index_stats(const char *db_name, const char *tbl_name, + mem_heap_free(heap); + return (false); + } ++ ++/** Searches for rows in the database. This is used in the interface to ++MySQL. This function opens a cursor, and also implements fetch next ++and fetch prev. NOTE that if we do a search with a full key value ++from a unique index (ROW_SEL_EXACT), then we will not store the cursor ++position and fetch next or fetch prev must not be tried to the cursor! ++ ++@param[out] buf buffer for the fetched row in MySQL format ++@param[in] mode search mode PAGE_CUR_L ++@param[in,out] prebuilt prebuilt struct for the table handler; ++ this contains the info to search_tuple, ++ index; if search tuple contains 0 field then ++ we position the cursor at start or the end of ++ index, depending on 'mode' ++@param[in] match_mode 0 or ROW_SEL_EXACT or ROW_SEL_EXACT_PREFIX ++@param[in] direction 0 or ROW_SEL_NEXT or ROW_SEL_PREV; ++ Note: if this is != 0, then prebuilt must has a ++ pcur with stored position! In opening of a ++ cursor 'direction' should be 0. ++@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK, ++DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */ ++dberr_t row_search_for_mysql(byte *buf, page_cur_mode_t mode, ++ row_prebuilt_t *prebuilt, ulint match_mode, ++ ulint direction) { ++ if (!prebuilt->table->is_intrinsic()) { ++ return (row_search_mvcc(buf, mode, prebuilt, match_mode, direction)); ++ } else { ++ return (row_search_no_mvcc(buf, mode, prebuilt, match_mode, direction)); ++ } ++} +diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc +index 074de0ff..e9bb3fb3 100644 +--- a/storage/innobase/trx/trx0trx.cc ++++ b/storage/innobase/trx/trx0trx.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1996, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -2165,6 +2166,16 @@ ReadView *trx_assign_read_view(trx_t *trx) /*!< in/out: active transaction */ + return (trx->read_view); + } + ++ReadView *trx_clone_read_view(trx_t *trx, ReadView *snapshot) /*!< in/out: active transaction */ ++{ ++ trx->read_view = UT_NEW_NOKEY(ReadView()); ++ if (trx->read_view != nullptr) { ++ trx->read_view->Copy_readView(*snapshot); ++ trx->read_view->skip_view_list = true; ++ } ++ return (trx->read_view); ++} ++ + /** Prepares a transaction for commit/rollback. */ + void trx_commit_or_rollback_prepare(trx_t *trx) /*!< in/out: transaction */ + { +diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc +index 0fb49971..cdea8fb7 100644 +--- a/storage/innobase/ut/ut0ut.cc ++++ b/storage/innobase/ut/ut0ut.cc +@@ -1,6 +1,7 @@ + /***************************************************************************** + + Copyright (c) 1994, 2021, Oracle and/or its affiliates. ++Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -433,6 +434,8 @@ const char *ut_strerr(dberr_t num) { + return ("End of index"); + case DB_END_OF_BLOCK: + return ("End of block"); ++ case DB_END_OF_RANGE: ++ return ("Reach end of Parallel Query Range"); + case DB_IO_ERROR: + return ("I/O error"); + case DB_TABLE_IN_FK_CHECK: +diff --git a/storage/temptable/include/temptable/block.h b/storage/temptable/include/temptable/block.h +index 9cd32b20..794c6653 100644 +--- a/storage/temptable/include/temptable/block.h ++++ b/storage/temptable/include/temptable/block.h +@@ -1,4 +1,5 @@ + /* Copyright (c) 2019, 2021, Oracle and/or its affiliates. ++ Copyright (c) 2022, Huawei Technologies Co., Ltd. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License, version 2.0, as published by the +@@ -371,13 +372,16 @@ inline size_t Block::deallocate(Chunk chunk, size_t chunk_size) noexcept { + + inline void Block::destroy() noexcept { + assert(!is_empty()); +- assert(Header::number_of_used_chunks() == 0); +- DBUG_PRINT("temptable_allocator", +- ("destroying the block: (%s)", to_string().c_str())); + +- deallocate_from(Header::memory_source_type(), Header::block_size(), +- Header::block_address()); +- Header::reset(); ++ // PQ worker thread could quit early ++ if (Header::number_of_used_chunks() == 0) { ++ DBUG_PRINT("temptable_allocator", ++ ("destroying the block: (%s)", to_string().c_str())); ++ ++ deallocate_from(Header::memory_source_type(), Header::block_size(), ++ Header::block_address()); ++ Header::reset(); ++ } + } + + inline bool Block::is_empty() const { -- Gitee