mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
AggregationNode.computeStats() estimate cardinality under single node assumption. This can be an underestimation in preaggregation node case because same grouping key may exist in multiple nodes during preaggreation. This patch adjust the cardinality estimate using following model for the number of distinct values in a random sample of k rows, previously used to calculate ProcessingCost model by IMPALA-12657 and IMPALA-13644. Assumes we are picking k rows from an infinite sample with ndv distinct values, with the value uniformly distributed. The probability of a given value not appearing in a sample, in that case is ((NDV - 1) / NDV) ^ k This is because we are making k choices, and each of them has (ndv - 1) / ndv chance of not being our value. Therefore the probability of a given value appearing in the sample is: 1 - ((NDV - 1) / NDV) ^ k And the number of distinct values in the sample is: (1 - ((NDV - 1) / NDV) ^ k) * NDV Query option ESTIMATE_DUPLICATE_IN_PREAGG is added to control whether to use the new estimation logic or not. Testing: - Pass core tests. Change-Id: I04c563e59421928875b340cb91654b9d4bc80b55 Reviewed-on: http://gerrit.cloudera.org:8080/22047 Reviewed-by: Riza Suminto <riza.suminto@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
621 lines
28 KiB
Plaintext
621 lines
28 KiB
Plaintext
# TPCDS-Q19, with ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE=false
|
|
select
|
|
i_brand_id brand_id,
|
|
i_brand brand,
|
|
i_manufact_id,
|
|
i_manufact,
|
|
sum(ss_ext_sales_price) ext_price
|
|
from
|
|
date_dim,
|
|
store_sales,
|
|
item,
|
|
customer,
|
|
customer_address,
|
|
store
|
|
where
|
|
d_date_sk = ss_sold_date_sk
|
|
and ss_item_sk = i_item_sk
|
|
and i_manager_id = 7
|
|
and d_moy = 11
|
|
and d_year = 1999
|
|
and ss_customer_sk = c_customer_sk
|
|
and c_current_addr_sk = ca_address_sk
|
|
and substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
|
|
and ss_store_sk = s_store_sk
|
|
group by
|
|
i_brand,
|
|
i_brand_id,
|
|
i_manufact_id,
|
|
i_manufact
|
|
order by
|
|
ext_price desc,
|
|
i_brand,
|
|
i_brand_id,
|
|
i_manufact_id,
|
|
i_manufact
|
|
limit 100
|
|
---- QUERYOPTIONS
|
|
ENABLE_TUPLE_ANALYSIS_IN_AGGREGATE=false
|
|
ESTIMATE_DUPLICATE_IN_PREAGG=false
|
|
---- PLAN
|
|
Max Per-Host Resource Reservation: Memory=16.27MB Threads=7
|
|
Per-Host Resource Estimates: Memory=315MB
|
|
F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
|
|
| Per-Host Resources: mem-estimate=314.81MB mem-reservation=16.27MB thread-reservation=7 runtime-filters-memory=5.00MB
|
|
PLAN-ROOT SINK
|
|
| output exprs: i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price)
|
|
| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
|
|
|
|
|
12:TOP-N [LIMIT=100]
|
|
| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
|
|
| mem-estimate=7.38KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=7 row-size=76B cardinality=100
|
|
| in pipelines: 12(GETNEXT), 11(OPEN)
|
|
|
|
|
11:AGGREGATE [FINALIZE]
|
|
| output: sum(ss_ext_sales_price)
|
|
| group by: i_brand, i_brand_id, i_manufact_id, i_manufact
|
|
| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=6 row-size=76B cardinality=1.72K
|
|
| in pipelines: 11(GETNEXT), 04(OPEN)
|
|
|
|
|
10:HASH JOIN [INNER JOIN]
|
|
| hash predicates: ss_store_sk = s_store_sk
|
|
| fk/pk conjuncts: ss_store_sk = s_store_sk
|
|
| other predicates: substr(ca_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) != substr(s_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT))
|
|
| runtime filters: RF000[bloom] <- s_store_sk, RF001[min_max] <- s_store_sk
|
|
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=4,3,1,2,0,5 row-size=158B cardinality=1.72K
|
|
| in pipelines: 04(GETNEXT), 05(OPEN)
|
|
|
|
|
|--05:SCAN HDFS [tpcds_parquet.store]
|
|
| HDFS partitions=1/1 files=1 size=9.93KB
|
|
| stored statistics:
|
|
| table: rows=12 size=9.93KB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=12
|
|
| mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
|
|
| tuple-ids=5 row-size=21B cardinality=12
|
|
| in pipelines: 05(GETNEXT)
|
|
|
|
|
09:HASH JOIN [INNER JOIN]
|
|
| hash predicates: ca_address_sk = c_current_addr_sk
|
|
| fk/pk conjuncts: ca_address_sk = c_current_addr_sk
|
|
| runtime filters: RF002[bloom] <- c_current_addr_sk, RF003[min_max] <- c_current_addr_sk
|
|
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=4,3,1,2,0 row-size=137B cardinality=1.72K
|
|
| in pipelines: 04(GETNEXT), 03(OPEN)
|
|
|
|
|
|--08:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: c_customer_sk = ss_customer_sk
|
|
| | fk/pk conjuncts: c_customer_sk = ss_customer_sk
|
|
| | runtime filters: RF004[bloom] <- ss_customer_sk, RF005[min_max] <- ss_customer_sk
|
|
| | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| | tuple-ids=3,1,2,0 row-size=116B cardinality=1.72K
|
|
| | in pipelines: 03(GETNEXT), 01(OPEN)
|
|
| |
|
|
| |--07:HASH JOIN [INNER JOIN]
|
|
| | | hash predicates: ss_sold_date_sk = d_date_sk
|
|
| | | fk/pk conjuncts: ss_sold_date_sk = d_date_sk
|
|
| | | runtime filters: RF006[bloom] <- d_date_sk
|
|
| | | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| | | tuple-ids=1,2,0 row-size=108B cardinality=1.72K
|
|
| | | in pipelines: 01(GETNEXT), 00(OPEN)
|
|
| | |
|
|
| | |--00:SCAN HDFS [tpcds_parquet.date_dim]
|
|
| | | HDFS partitions=1/1 files=1 size=2.15MB
|
|
| | | predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| | | stored statistics:
|
|
| | | table: rows=73.05K size=2.15MB
|
|
| | | columns: all
|
|
| | | extrapolated-rows=disabled max-scan-range-rows=73.05K
|
|
| | | parquet statistics predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| | | parquet dictionary predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| | | mem-estimate=48.00MB mem-reservation=512.00KB thread-reservation=1
|
|
| | | tuple-ids=0 row-size=12B cardinality=108
|
|
| | | in pipelines: 00(GETNEXT)
|
|
| | |
|
|
| | 06:HASH JOIN [INNER JOIN]
|
|
| | | hash predicates: ss_item_sk = i_item_sk
|
|
| | | fk/pk conjuncts: ss_item_sk = i_item_sk
|
|
| | | runtime filters: RF008[bloom] <- i_item_sk, RF009[min_max] <- i_item_sk
|
|
| | | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| | | tuple-ids=1,2 row-size=96B cardinality=1.72K(filtered from 28.96K)
|
|
| | | in pipelines: 01(GETNEXT), 02(OPEN)
|
|
| | |
|
|
| | |--02:SCAN HDFS [tpcds_parquet.item]
|
|
| | | HDFS partitions=1/1 files=1 size=1.73MB
|
|
| | | predicates: i_manager_id = CAST(7 AS INT)
|
|
| | | stored statistics:
|
|
| | | table: rows=18.00K size=1.73MB
|
|
| | | columns: all
|
|
| | | extrapolated-rows=disabled max-scan-range-rows=18.00K
|
|
| | | parquet statistics predicates: i_manager_id = CAST(7 AS INT)
|
|
| | | parquet dictionary predicates: i_manager_id = CAST(7 AS INT)
|
|
| | | mem-estimate=96.00MB mem-reservation=512.00KB thread-reservation=1
|
|
| | | tuple-ids=2 row-size=72B cardinality=181
|
|
| | | in pipelines: 02(GETNEXT)
|
|
| | |
|
|
| | 01:SCAN HDFS [tpcds_parquet.store_sales]
|
|
| | HDFS partitions=1824/1824 files=1824 size=200.96MB
|
|
| | runtime filters: RF001[min_max] -> ss_store_sk, RF009[min_max] -> ss_item_sk, RF000[bloom] -> ss_store_sk, RF006[bloom] -> ss_sold_date_sk, RF008[bloom] -> ss_item_sk
|
|
| | stored statistics:
|
|
| | table: rows=2.88M size=200.96MB
|
|
| | partitions: 1824/1824 rows=2.88M
|
|
| | columns: all
|
|
| | extrapolated-rows=disabled max-scan-range-rows=130.09K est-scan-range=109(filtered from 1824)
|
|
| | mem-estimate=64.00MB mem-reservation=2.00MB thread-reservation=1
|
|
| | tuple-ids=1 row-size=24B cardinality=1.72K(filtered from 2.88M)
|
|
| | in pipelines: 01(GETNEXT)
|
|
| |
|
|
| 03:SCAN HDFS [tpcds_parquet.customer]
|
|
| HDFS partitions=1/1 files=1 size=5.49MB
|
|
| runtime filters: RF005[min_max] -> c_customer_sk, RF004[bloom] -> c_customer_sk
|
|
| stored statistics:
|
|
| table: rows=100.00K size=5.49MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=100.00K
|
|
| mem-estimate=32.00MB mem-reservation=1.00MB thread-reservation=1
|
|
| tuple-ids=3 row-size=8B cardinality=1.72K(filtered from 100.00K)
|
|
| in pipelines: 03(GETNEXT)
|
|
|
|
|
04:SCAN HDFS [tpcds_parquet.customer_address]
|
|
HDFS partitions=1/1 files=1 size=1.16MB
|
|
runtime filters: RF003[min_max] -> ca_address_sk, RF002[bloom] -> ca_address_sk
|
|
stored statistics:
|
|
table: rows=50.00K size=1.16MB
|
|
columns: all
|
|
extrapolated-rows=disabled max-scan-range-rows=50.00K
|
|
mem-estimate=32.00MB mem-reservation=512.00KB thread-reservation=1
|
|
tuple-ids=4 row-size=21B cardinality=1.76K(filtered from 50.00K)
|
|
in pipelines: 04(GETNEXT)
|
|
---- DISTRIBUTEDPLAN
|
|
Max Per-Host Resource Reservation: Memory=31.08MB Threads=16
|
|
Per-Host Resource Estimates: Memory=353MB
|
|
F09:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
|
|
| Per-Host Resources: mem-estimate=4.03MB mem-reservation=4.00MB thread-reservation=1
|
|
PLAN-ROOT SINK
|
|
| output exprs: i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price)
|
|
| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
|
|
|
|
|
22:MERGING-EXCHANGE [UNPARTITIONED]
|
|
| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
|
|
| limit: 100
|
|
| mem-estimate=25.76KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=7 row-size=76B cardinality=100
|
|
| in pipelines: 12(GETNEXT)
|
|
|
|
|
F08:PLAN FRAGMENT [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)] hosts=3 instances=3
|
|
Per-Host Resources: mem-estimate=10.27MB mem-reservation=1.94MB thread-reservation=1
|
|
12:TOP-N [LIMIT=100]
|
|
| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
|
|
| mem-estimate=7.38KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=7 row-size=76B cardinality=100
|
|
| in pipelines: 12(GETNEXT), 21(OPEN)
|
|
|
|
|
21:AGGREGATE [FINALIZE]
|
|
| output: sum:merge(ss_ext_sales_price)
|
|
| group by: i_brand, i_brand_id, i_manufact_id, i_manufact
|
|
| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=6 row-size=76B cardinality=1.72K
|
|
| in pipelines: 21(GETNEXT), 01(OPEN)
|
|
|
|
|
20:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]
|
|
| mem-estimate=280.82KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=6 row-size=76B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
F06:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=3
|
|
Per-Host Resources: mem-estimate=17.36MB mem-reservation=7.88MB thread-reservation=1 runtime-filters-memory=2.00MB
|
|
11:AGGREGATE [STREAMING]
|
|
| output: sum(ss_ext_sales_price)
|
|
| group by: i_brand, i_brand_id, i_manufact_id, i_manufact
|
|
| mem-estimate=10.00MB mem-reservation=2.00MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=6 row-size=76B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
10:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: ss_store_sk = s_store_sk
|
|
| fk/pk conjuncts: ss_store_sk = s_store_sk
|
|
| other predicates: substr(ca_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) != substr(s_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT))
|
|
| runtime filters: RF000[bloom] <- s_store_sk, RF001[min_max] <- s_store_sk
|
|
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0,3,4,5 row-size=158B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 05(OPEN)
|
|
|
|
|
|--19:EXCHANGE [BROADCAST]
|
|
| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=5 row-size=21B cardinality=12
|
|
| | in pipelines: 05(GETNEXT)
|
|
| |
|
|
| F07:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Host Resources: mem-estimate=32.10MB mem-reservation=16.00KB thread-reservation=2
|
|
| 05:SCAN HDFS [tpcds_parquet.store, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=9.93KB
|
|
| stored statistics:
|
|
| table: rows=12 size=9.93KB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=12
|
|
| mem-estimate=32.00MB mem-reservation=16.00KB thread-reservation=1
|
|
| tuple-ids=5 row-size=21B cardinality=12
|
|
| in pipelines: 05(GETNEXT)
|
|
|
|
|
09:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: c_current_addr_sk = ca_address_sk
|
|
| fk/pk conjuncts: c_current_addr_sk = ca_address_sk
|
|
| runtime filters: RF002[bloom] <- ca_address_sk, RF003[min_max] <- ca_address_sk
|
|
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0,3,4 row-size=137B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 04(OPEN)
|
|
|
|
|
|--18:EXCHANGE [HASH(ca_address_sk)]
|
|
| | mem-estimate=1.03MB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=4 row-size=21B cardinality=50.00K
|
|
| | in pipelines: 04(GETNEXT)
|
|
| |
|
|
| F05:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Host Resources: mem-estimate=32.29MB mem-reservation=512.00KB thread-reservation=2
|
|
| 04:SCAN HDFS [tpcds_parquet.customer_address, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=1.16MB
|
|
| stored statistics:
|
|
| table: rows=50.00K size=1.16MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=50.00K
|
|
| mem-estimate=32.00MB mem-reservation=512.00KB thread-reservation=1
|
|
| tuple-ids=4 row-size=21B cardinality=50.00K
|
|
| in pipelines: 04(GETNEXT)
|
|
|
|
|
17:EXCHANGE [HASH(c_current_addr_sk)]
|
|
| mem-estimate=459.16KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
F04:PLAN FRAGMENT [HASH(ss_customer_sk)] hosts=3 instances=3
|
|
Per-Host Resources: mem-estimate=5.83MB mem-reservation=3.88MB thread-reservation=1 runtime-filters-memory=1.00MB
|
|
08:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: ss_customer_sk = c_customer_sk
|
|
| fk/pk conjuncts: ss_customer_sk = c_customer_sk
|
|
| runtime filters: RF004[bloom] <- c_customer_sk, RF005[min_max] <- c_customer_sk
|
|
| mem-estimate=2.88MB mem-reservation=2.88MB spill-buffer=128.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 03(OPEN)
|
|
|
|
|
|--16:EXCHANGE [HASH(c_customer_sk)]
|
|
| | mem-estimate=793.25KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=3 row-size=8B cardinality=100.00K
|
|
| | in pipelines: 03(GETNEXT)
|
|
| |
|
|
| F03:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Host Resources: mem-estimate=33.14MB mem-reservation=2.00MB thread-reservation=2 runtime-filters-memory=1.00MB
|
|
| 03:SCAN HDFS [tpcds_parquet.customer, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=5.49MB
|
|
| runtime filters: RF003[min_max] -> c_current_addr_sk, RF002[bloom] -> c_current_addr_sk
|
|
| stored statistics:
|
|
| table: rows=100.00K size=5.49MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=100.00K
|
|
| mem-estimate=32.00MB mem-reservation=1.00MB thread-reservation=1
|
|
| tuple-ids=3 row-size=8B cardinality=100.00K
|
|
| in pipelines: 03(GETNEXT)
|
|
|
|
|
15:EXCHANGE [HASH(ss_customer_sk)]
|
|
| mem-estimate=418.69KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=1,2,0 row-size=108B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
|
|
Per-Host Resources: mem-estimate=73.28MB mem-reservation=9.88MB thread-reservation=2 runtime-filters-memory=4.00MB
|
|
07:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: ss_sold_date_sk = d_date_sk
|
|
| fk/pk conjuncts: ss_sold_date_sk = d_date_sk
|
|
| runtime filters: RF006[bloom] <- d_date_sk
|
|
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0 row-size=108B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 00(OPEN)
|
|
|
|
|
|--14:EXCHANGE [BROADCAST]
|
|
| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=0 row-size=12B cardinality=108
|
|
| | in pipelines: 00(GETNEXT)
|
|
| |
|
|
| F02:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Host Resources: mem-estimate=48.06MB mem-reservation=512.00KB thread-reservation=2
|
|
| 00:SCAN HDFS [tpcds_parquet.date_dim, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=2.15MB
|
|
| predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| stored statistics:
|
|
| table: rows=73.05K size=2.15MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=73.05K
|
|
| parquet statistics predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| parquet dictionary predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| mem-estimate=48.00MB mem-reservation=512.00KB thread-reservation=1
|
|
| tuple-ids=0 row-size=12B cardinality=108
|
|
| in pipelines: 00(GETNEXT)
|
|
|
|
|
06:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: ss_item_sk = i_item_sk
|
|
| fk/pk conjuncts: ss_item_sk = i_item_sk
|
|
| runtime filters: RF008[bloom] <- i_item_sk, RF009[min_max] <- i_item_sk
|
|
| mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2 row-size=96B cardinality=1.72K(filtered from 28.96K)
|
|
| in pipelines: 01(GETNEXT), 02(OPEN)
|
|
|
|
|
|--13:EXCHANGE [BROADCAST]
|
|
| | mem-estimate=26.00KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=2 row-size=72B cardinality=181
|
|
| | in pipelines: 02(GETNEXT)
|
|
| |
|
|
| F01:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Host Resources: mem-estimate=96.30MB mem-reservation=512.00KB thread-reservation=2
|
|
| 02:SCAN HDFS [tpcds_parquet.item, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=1.73MB
|
|
| predicates: i_manager_id = CAST(7 AS INT)
|
|
| stored statistics:
|
|
| table: rows=18.00K size=1.73MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=18.00K
|
|
| parquet statistics predicates: i_manager_id = CAST(7 AS INT)
|
|
| parquet dictionary predicates: i_manager_id = CAST(7 AS INT)
|
|
| mem-estimate=96.00MB mem-reservation=512.00KB thread-reservation=1
|
|
| tuple-ids=2 row-size=72B cardinality=181
|
|
| in pipelines: 02(GETNEXT)
|
|
|
|
|
01:SCAN HDFS [tpcds_parquet.store_sales, RANDOM]
|
|
HDFS partitions=1824/1824 files=1824 size=200.96MB
|
|
runtime filters: RF001[min_max] -> ss_store_sk, RF005[min_max] -> ss_customer_sk, RF009[min_max] -> ss_item_sk, RF000[bloom] -> ss_store_sk, RF004[bloom] -> ss_customer_sk, RF006[bloom] -> ss_sold_date_sk, RF008[bloom] -> ss_item_sk
|
|
stored statistics:
|
|
table: rows=2.88M size=200.96MB
|
|
partitions: 1824/1824 rows=2.88M
|
|
columns: all
|
|
extrapolated-rows=disabled max-scan-range-rows=130.09K est-scan-range=109(filtered from 1824)
|
|
mem-estimate=64.00MB mem-reservation=2.00MB thread-reservation=1
|
|
tuple-ids=1 row-size=24B cardinality=1.72K(filtered from 2.88M)
|
|
in pipelines: 01(GETNEXT)
|
|
---- PARALLELPLANS
|
|
Max Per-Host Resource Reservation: Memory=49.77MB Threads=21
|
|
Per-Host Resource Estimates: Memory=212MB
|
|
F09:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
|
|
| Per-Instance Resources: mem-estimate=4.05MB mem-reservation=4.00MB thread-reservation=1
|
|
PLAN-ROOT SINK
|
|
| output exprs: i_brand_id, i_brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price)
|
|
| mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0
|
|
|
|
|
22:MERGING-EXCHANGE [UNPARTITIONED]
|
|
| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
|
|
| limit: 100
|
|
| mem-estimate=49.06KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=7 row-size=76B cardinality=100
|
|
| in pipelines: 12(GETNEXT)
|
|
|
|
|
F08:PLAN FRAGMENT [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)] hosts=3 instances=6
|
|
Per-Instance Resources: mem-estimate=10.51MB mem-reservation=1.94MB thread-reservation=1
|
|
12:TOP-N [LIMIT=100]
|
|
| order by: sum(ss_ext_sales_price) DESC, i_brand ASC, i_brand_id ASC, i_manufact_id ASC, i_manufact ASC
|
|
| mem-estimate=7.38KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=7 row-size=76B cardinality=100
|
|
| in pipelines: 12(GETNEXT), 21(OPEN)
|
|
|
|
|
21:AGGREGATE [FINALIZE]
|
|
| output: sum:merge(ss_ext_sales_price)
|
|
| group by: i_brand, i_brand_id, i_manufact_id, i_manufact
|
|
| mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=6 row-size=76B cardinality=1.72K
|
|
| in pipelines: 21(GETNEXT), 01(OPEN)
|
|
|
|
|
20:EXCHANGE [HASH(i_brand,i_brand_id,i_manufact_id,i_manufact)]
|
|
| mem-estimate=519.44KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=6 row-size=76B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
F06:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=6
|
|
Per-Instance Resources: mem-estimate=12.70MB mem-reservation=2.00MB thread-reservation=1
|
|
11:AGGREGATE [STREAMING]
|
|
| output: sum(ss_ext_sales_price)
|
|
| group by: i_brand, i_brand_id, i_manufact_id, i_manufact
|
|
| mem-estimate=10.00MB mem-reservation=2.00MB spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=6 row-size=76B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
10:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash-table-id=00
|
|
| hash predicates: ss_store_sk = s_store_sk
|
|
| fk/pk conjuncts: ss_store_sk = s_store_sk
|
|
| other predicates: substr(ca_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT)) != substr(s_zip, CAST(1 AS BIGINT), CAST(5 AS BIGINT))
|
|
| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0,3,4,5 row-size=158B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 05(OPEN)
|
|
|
|
|
|--F10:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=3
|
|
| | Per-Instance Resources: mem-estimate=4.89MB mem-reservation=4.88MB thread-reservation=1 runtime-filters-memory=1.00MB
|
|
| JOIN BUILD
|
|
| | join-table-id=00 plan-id=01 cohort-id=01
|
|
| | build expressions: s_store_sk
|
|
| | runtime filters: RF000[bloom] <- s_store_sk, RF001[min_max] <- s_store_sk
|
|
| | mem-estimate=3.88MB mem-reservation=3.88MB spill-buffer=64.00KB thread-reservation=0
|
|
| |
|
|
| 19:EXCHANGE [BROADCAST]
|
|
| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=5 row-size=21B cardinality=12
|
|
| | in pipelines: 05(GETNEXT)
|
|
| |
|
|
| F07:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Instance Resources: mem-estimate=16.10MB mem-reservation=16.00KB thread-reservation=1
|
|
| 05:SCAN HDFS [tpcds_parquet.store, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=9.93KB
|
|
| stored statistics:
|
|
| table: rows=12 size=9.93KB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=12
|
|
| mem-estimate=16.00MB mem-reservation=16.00KB thread-reservation=0
|
|
| tuple-ids=5 row-size=21B cardinality=12
|
|
| in pipelines: 05(GETNEXT)
|
|
|
|
|
09:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash-table-id=01
|
|
| hash predicates: c_current_addr_sk = ca_address_sk
|
|
| fk/pk conjuncts: c_current_addr_sk = ca_address_sk
|
|
| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0,3,4 row-size=137B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 04(OPEN)
|
|
|
|
|
|--F11:PLAN FRAGMENT [HASH(c_current_addr_sk)] hosts=3 instances=6
|
|
| | Per-Instance Resources: mem-estimate=3.96MB mem-reservation=2.94MB thread-reservation=1 runtime-filters-memory=1.00MB
|
|
| JOIN BUILD
|
|
| | join-table-id=01 plan-id=02 cohort-id=01
|
|
| | build expressions: ca_address_sk
|
|
| | runtime filters: RF002[bloom] <- ca_address_sk, RF003[min_max] <- ca_address_sk
|
|
| | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| |
|
|
| 18:EXCHANGE [HASH(ca_address_sk)]
|
|
| | mem-estimate=1.03MB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=4 row-size=21B cardinality=50.00K
|
|
| | in pipelines: 04(GETNEXT)
|
|
| |
|
|
| F05:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Instance Resources: mem-estimate=16.59MB mem-reservation=512.00KB thread-reservation=1
|
|
| 04:SCAN HDFS [tpcds_parquet.customer_address, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=1.16MB
|
|
| stored statistics:
|
|
| table: rows=50.00K size=1.16MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=50.00K
|
|
| mem-estimate=16.00MB mem-reservation=512.00KB thread-reservation=0
|
|
| tuple-ids=4 row-size=21B cardinality=50.00K
|
|
| in pipelines: 04(GETNEXT)
|
|
|
|
|
17:EXCHANGE [HASH(c_current_addr_sk)]
|
|
| mem-estimate=853.78KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
F04:PLAN FRAGMENT [HASH(ss_customer_sk)] hosts=3 instances=6
|
|
Per-Instance Resources: mem-estimate=3.84MB mem-reservation=0B thread-reservation=1
|
|
08:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash-table-id=02
|
|
| hash predicates: ss_customer_sk = c_customer_sk
|
|
| fk/pk conjuncts: ss_customer_sk = c_customer_sk
|
|
| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0,3 row-size=116B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 03(OPEN)
|
|
|
|
|
|--F12:PLAN FRAGMENT [HASH(ss_customer_sk)] hosts=3 instances=6
|
|
| | Per-Instance Resources: mem-estimate=3.71MB mem-reservation=2.94MB thread-reservation=1 runtime-filters-memory=1.00MB
|
|
| JOIN BUILD
|
|
| | join-table-id=02 plan-id=03 cohort-id=01
|
|
| | build expressions: c_customer_sk
|
|
| | runtime filters: RF004[bloom] <- c_customer_sk, RF005[min_max] <- c_customer_sk
|
|
| | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0
|
|
| |
|
|
| 16:EXCHANGE [HASH(c_customer_sk)]
|
|
| | mem-estimate=793.25KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=3 row-size=8B cardinality=100.00K
|
|
| | in pipelines: 03(GETNEXT)
|
|
| |
|
|
| F03:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Host Shared Resources: mem-estimate=1.00MB mem-reservation=1.00MB thread-reservation=0 runtime-filters-memory=1.00MB
|
|
| Per-Instance Resources: mem-estimate=16.28MB mem-reservation=1.00MB thread-reservation=1
|
|
| 03:SCAN HDFS [tpcds_parquet.customer, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=5.49MB
|
|
| runtime filters: RF003[min_max] -> c_current_addr_sk, RF002[bloom] -> c_current_addr_sk
|
|
| stored statistics:
|
|
| table: rows=100.00K size=5.49MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=100.00K
|
|
| mem-estimate=16.00MB mem-reservation=1.00MB thread-reservation=0
|
|
| tuple-ids=3 row-size=8B cardinality=100.00K
|
|
| in pipelines: 03(GETNEXT)
|
|
|
|
|
15:EXCHANGE [HASH(ss_customer_sk)]
|
|
| mem-estimate=777.31KB mem-reservation=0B thread-reservation=0
|
|
| tuple-ids=1,2,0 row-size=108B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT)
|
|
|
|
|
F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
|
|
Per-Host Shared Resources: mem-estimate=4.00MB mem-reservation=4.00MB thread-reservation=0 runtime-filters-memory=4.00MB
|
|
Per-Instance Resources: mem-estimate=18.80MB mem-reservation=2.00MB thread-reservation=1
|
|
07:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash-table-id=03
|
|
| hash predicates: ss_sold_date_sk = d_date_sk
|
|
| fk/pk conjuncts: ss_sold_date_sk = d_date_sk
|
|
| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2,0 row-size=108B cardinality=1.72K
|
|
| in pipelines: 01(GETNEXT), 00(OPEN)
|
|
|
|
|
|--F13:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
|
|
| | Per-Instance Resources: mem-estimate=4.89MB mem-reservation=4.88MB thread-reservation=1 runtime-filters-memory=1.00MB
|
|
| JOIN BUILD
|
|
| | join-table-id=03 plan-id=04 cohort-id=01
|
|
| | build expressions: d_date_sk
|
|
| | runtime filters: RF006[bloom] <- d_date_sk
|
|
| | mem-estimate=3.88MB mem-reservation=3.88MB spill-buffer=64.00KB thread-reservation=0
|
|
| |
|
|
| 14:EXCHANGE [BROADCAST]
|
|
| | mem-estimate=16.00KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=0 row-size=12B cardinality=108
|
|
| | in pipelines: 00(GETNEXT)
|
|
| |
|
|
| F02:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Instance Resources: mem-estimate=16.06MB mem-reservation=512.00KB thread-reservation=1
|
|
| 00:SCAN HDFS [tpcds_parquet.date_dim, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=2.15MB
|
|
| predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| stored statistics:
|
|
| table: rows=73.05K size=2.15MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=73.05K
|
|
| parquet statistics predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| parquet dictionary predicates: d_year = CAST(1999 AS INT), d_moy = CAST(11 AS INT)
|
|
| mem-estimate=16.00MB mem-reservation=512.00KB thread-reservation=0
|
|
| tuple-ids=0 row-size=12B cardinality=108
|
|
| in pipelines: 00(GETNEXT)
|
|
|
|
|
06:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash-table-id=04
|
|
| hash predicates: ss_item_sk = i_item_sk
|
|
| fk/pk conjuncts: ss_item_sk = i_item_sk
|
|
| mem-estimate=0B mem-reservation=0B spill-buffer=64.00KB thread-reservation=0
|
|
| tuple-ids=1,2 row-size=96B cardinality=1.72K(filtered from 28.96K)
|
|
| in pipelines: 01(GETNEXT), 02(OPEN)
|
|
|
|
|
|--F14:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
|
|
| | Per-Instance Resources: mem-estimate=4.90MB mem-reservation=4.88MB thread-reservation=1 runtime-filters-memory=1.00MB
|
|
| JOIN BUILD
|
|
| | join-table-id=04 plan-id=05 cohort-id=01
|
|
| | build expressions: i_item_sk
|
|
| | runtime filters: RF008[bloom] <- i_item_sk, RF009[min_max] <- i_item_sk
|
|
| | mem-estimate=3.88MB mem-reservation=3.88MB spill-buffer=64.00KB thread-reservation=0
|
|
| |
|
|
| 13:EXCHANGE [BROADCAST]
|
|
| | mem-estimate=26.00KB mem-reservation=0B thread-reservation=0
|
|
| | tuple-ids=2 row-size=72B cardinality=181
|
|
| | in pipelines: 02(GETNEXT)
|
|
| |
|
|
| F01:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
|
|
| Per-Instance Resources: mem-estimate=16.30MB mem-reservation=512.00KB thread-reservation=1
|
|
| 02:SCAN HDFS [tpcds_parquet.item, RANDOM]
|
|
| HDFS partitions=1/1 files=1 size=1.73MB
|
|
| predicates: i_manager_id = CAST(7 AS INT)
|
|
| stored statistics:
|
|
| table: rows=18.00K size=1.73MB
|
|
| columns: all
|
|
| extrapolated-rows=disabled max-scan-range-rows=18.00K
|
|
| parquet statistics predicates: i_manager_id = CAST(7 AS INT)
|
|
| parquet dictionary predicates: i_manager_id = CAST(7 AS INT)
|
|
| mem-estimate=16.00MB mem-reservation=512.00KB thread-reservation=0
|
|
| tuple-ids=2 row-size=72B cardinality=181
|
|
| in pipelines: 02(GETNEXT)
|
|
|
|
|
01:SCAN HDFS [tpcds_parquet.store_sales, RANDOM]
|
|
HDFS partitions=1824/1824 files=1824 size=200.96MB
|
|
runtime filters: RF001[min_max] -> ss_store_sk, RF005[min_max] -> ss_customer_sk, RF009[min_max] -> ss_item_sk, RF000[bloom] -> ss_store_sk, RF004[bloom] -> ss_customer_sk, RF006[bloom] -> ss_sold_date_sk, RF008[bloom] -> ss_item_sk
|
|
stored statistics:
|
|
table: rows=2.88M size=200.96MB
|
|
partitions: 1824/1824 rows=2.88M
|
|
columns: all
|
|
extrapolated-rows=disabled max-scan-range-rows=130.09K est-scan-range=109(filtered from 1824)
|
|
mem-estimate=16.00MB mem-reservation=2.00MB thread-reservation=0
|
|
tuple-ids=1 row-size=24B cardinality=1.72K(filtered from 2.88M)
|
|
in pipelines: 01(GETNEXT)
|
|
====
|