# Rows per node is < 3000: codegen should be disabled. select count(*) from functional.alltypes ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=32.00KB Threads=3 Per-Host Resource Estimates: Memory=128MB Codegen disabled by planner PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] HDFS partitions=24/24 files=24 size=478.45KB row-size=0B cardinality=7.30K ==== # Rows per node is > 3000: codegen should be enabled. select count(*) from functional.alltypesagg ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=128.00KB Threads=3 Per-Host Resource Estimates: Memory=80MB PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypesagg] HDFS partitions=11/11 files=11 size=814.73KB row-size=0B cardinality=11.00K ==== # No stats on functional_parquet: codegen should be disabled. select count(*) from functional_parquet.alltypes ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=16.00KB Threads=3 Per-Host Resource Estimates: Memory=10MB WARNING: The following tables are missing relevant table and/or column statistics. functional_parquet.alltypes PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: sum_init_zero(functional_parquet.alltypes.stats: num_rows) | row-size=8B cardinality=1 | 00:SCAN HDFS [functional_parquet.alltypes] HDFS partitions=24/24 files=24 size=202.42KB row-size=8B cardinality=12.88K ==== # > 3000 rows returned to coordinator: codegen should be enabled select * from functional_parquet.alltypes ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=88.00KB Threads=3 Per-Host Resource Estimates: Memory=129MB WARNING: The following tables are missing relevant table and/or column statistics. functional_parquet.alltypes PLAN-ROOT SINK | 01:EXCHANGE [UNPARTITIONED] | 00:SCAN HDFS [functional_parquet.alltypes] HDFS partitions=24/24 files=24 size=202.42KB row-size=80B cardinality=12.88K ==== # Optimisation is enabled for join producing < 3000 rows select count(*) from functional.alltypes t1 join functional.alltypestiny t2 on t1.id = t2.id ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=2.98MB Threads=5 Per-Host Resource Estimates: Memory=163MB Codegen disabled by planner PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: t1.id = t2.id | runtime filters: RF000 <- t2.id | row-size=8B cardinality=8 | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.alltypestiny t2] | HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 00:SCAN HDFS [functional.alltypes t1] HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t1.id row-size=4B cardinality=7.30K ==== # Optimisation is disabled by cross join producing > 3000 rows select count(*) from functional.alltypes t1, functional.alltypes t2 ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=64.00KB Threads=5 Per-Host Resource Estimates: Memory=256MB PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | 02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=0B cardinality=53.29M | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.alltypes t2] | HDFS partitions=24/24 files=24 size=478.45KB | row-size=0B cardinality=7.30K | 00:SCAN HDFS [functional.alltypes t1] HDFS partitions=24/24 files=24 size=478.45KB row-size=0B cardinality=7.30K ==== # Optimisation is enabled for union producing < 3000 rows select count(*) from ( select * from functional.alltypes union all select * from functional.alltypestiny) v ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=32.00KB Threads=3 Per-Host Resource Estimates: Memory=128MB Codegen disabled by planner PLAN-ROOT SINK | 05:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | 04:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | 00:UNION | pass-through-operands: all | row-size=0B cardinality=7.31K | |--02:SCAN HDFS [functional.alltypestiny] | HDFS partitions=4/4 files=4 size=460B | row-size=0B cardinality=8 | 01:SCAN HDFS [functional.alltypes] HDFS partitions=24/24 files=24 size=478.45KB row-size=0B cardinality=7.30K ==== # Optimisation is disabled by union producing > 3000 rows select count(*) from ( select * from functional.alltypes union all select * from functional.alltypes) v ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=32.00KB Threads=3 Per-Host Resource Estimates: Memory=128MB PLAN-ROOT SINK | 05:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | 04:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | 00:UNION | pass-through-operands: all | row-size=0B cardinality=14.60K | |--02:SCAN HDFS [functional.alltypes] | HDFS partitions=24/24 files=24 size=478.45KB | row-size=0B cardinality=7.30K | 01:SCAN HDFS [functional.alltypes] HDFS partitions=24/24 files=24 size=478.45KB row-size=0B cardinality=7.30K ==== # Scan with limit on large table: the number of rows scanned is bounded, # codegen should be disabled select sum(l_discount) from (select * from tpch.lineitem limit 1000) v ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=8.00MB Threads=3 Per-Host Resource Estimates: Memory=264MB Codegen disabled by planner PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: sum(tpch.lineitem.l_discount) | row-size=16B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | limit: 1000 | 00:SCAN HDFS [tpch.lineitem] HDFS partitions=1/1 files=1 size=718.94MB limit: 1000 row-size=8B cardinality=1.00K ==== # Scan with limit and predicates on large table: any number of rows could be scanned: # codegen should be enabled select sum(l_discount) from (select * from tpch.lineitem where l_orderkey > 100 limit 1000) v ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=8.00MB Threads=3 Per-Host Resource Estimates: Memory=264MB PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: sum(tpch.lineitem.l_discount) | row-size=16B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | limit: 1000 | 00:SCAN HDFS [tpch.lineitem] HDFS partitions=1/1 files=1 size=718.94MB predicates: l_orderkey > 100 limit: 1000 row-size=16B cardinality=1.00K ==== # Test query on large Kudu table with all Kudu primary key columns in equivalence # predicates: not more than 1 row could be returned from Kudu, # Codegen should be disabled. select * from tpch_kudu.partsupp where ps_partkey=2 and ps_suppkey=5003 ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=0B Threads=3 Per-Host Resource Estimates: Memory=10MB Codegen disabled by planner PLAN-ROOT SINK | 01:EXCHANGE [UNPARTITIONED] | 00:SCAN KUDU [tpch_kudu.partsupp] kudu predicates: ps_partkey = 2, ps_suppkey = 5003 row-size=172B cardinality=1 ==== # Test query on large Kudu table with partial Kudu primary key columns in equivalence # predicates: any number of rows could be returned from Kudu, # Codegen should be enabled select * from tpch_kudu.partsupp where ps_partkey=2 ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=0B Threads=3 Per-Host Resource Estimates: Memory=10MB PLAN-ROOT SINK | 01:EXCHANGE [UNPARTITIONED] | 00:SCAN KUDU [tpch_kudu.partsupp] kudu predicates: ps_partkey = 2 row-size=172B cardinality=4 ==== # Test that codegen is disabled for a query over a small number of large files when the # partition key scan optimisation kicks in. This is desirable because most of the rows # in the file will not be processed. select distinct 'const' from tpch_parquet.lineitem; ---- DISTRIBUTEDPLAN Max Per-Host Resource Reservation: Memory=4.06MB Threads=4 Per-Host Resource Estimates: Memory=21MB Codegen disabled by planner PLAN-ROOT SINK | 04:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE [FINALIZE] | group by: 'const' | row-size=12B cardinality=1 | 02:EXCHANGE [HASH('const')] | 01:AGGREGATE [STREAMING] | group by: 'const' | row-size=12B cardinality=1 | 00:SCAN HDFS [tpch_parquet.lineitem] HDFS partitions=1/1 files=3 size=193.99MB partition key scan row-size=0B cardinality=3 ====