Files
impala/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test
Riza Suminto 69da2ff86e IMPALA-12106: Fix overparallelization of Union fragment by 1
IMPALA-10973 has a bug where a union fragment without a scan node can be
over-parallelized by the backend scheduler by 1. It is reproducible by
running TPC-DS Q11 with MT_DOP=1. This patch additionally checks that
such a fragment does not have an input fragment before randomizing the
host assignment.

Testing:
Add TPC-DS Q11 to test_mt_dop.py::TestMtDopScheduling::test_scheduling
and verify the number of fragment instances scheduled in the
ExecSummary.

Change-Id: Ic69e7c8c0cadb4b07ee398aff362fbc6513eb08d
Reviewed-on: http://gerrit.cloudera.org:8080/19816
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2023-05-03 03:16:29 +00:00

310 lines
11 KiB
Plaintext

====
---- QUERY
# 24 partitions across 3 backends means that we'll get 4 finstances per backend
# (since mt_dop=4, which limits us to 4 finstances per backend).
select min(string_col) from alltypes
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*AdmissionSlots: 4.*
====
---- QUERY
# 2 partitions across 3 backends means that we'll get 1 finstance per backend
# and one unused backend.
select min(string_col) from alltypes where month = 1
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*AdmissionSlots: 1.*
====
---- QUERY
# 7 partitions across 3 backends results in 3 finstances on one backend and 2
# finstances on the others. This test illustrates that the slots can vary between
# backends.
select min(string_col) from alltypes where month <= 7 and year = 2009
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*AdmissionSlots: 2.*
row_regex:.*AdmissionSlots: 3.*
====
---- QUERY
# This query should have three scans in the same fragment. The scan of 'alltypes'
# has 24 files - enough scan ranges to result in 4 finstances per backend.
select min(string_col) from (
select * from alltypes
union all
select * from alltypessmall
union all
select * from alltypestiny) v
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*AdmissionSlots: 4 .*
row_regex:.*F04:ROOT * 1 * 1 .*
row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*00:UNION * 3 * 12 *
row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.*
row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
====
---- QUERY
# Same idea, but with smallest scan first to check that the scheduler is taking the
# max # ranges over the branches of the union when deciding parallelism.
select min(string_col) from (
select * from alltypestiny
union all
select * from alltypessmall
union all
select * from alltypes) v
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*AdmissionSlots: 4 .*
row_regex:.*F04:ROOT * 1 * 1 .*
row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*00:UNION * 3 * 12 *
row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.*
row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
====
---- QUERY
# This query should have one scan and one exchange in the interior fragment.
# The scan of 'alltypes' has 24 files - enough scan ranges to result in 4 finstances per
# backend.
select min(string_col) from (
select * from alltypestiny
union all
select distinct * from alltypes) v
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*AdmissionSlots: 4.*
row_regex:.*F04:ROOT * 1 * 1 .*
row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*06:AGGREGATE * 3 * 12 .*
row_regex:.*03:AGGREGATE * 3 * 12 .*
row_regex:.*00:UNION * 3 * 12 *
row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.*
====
---- QUERY
# This query should have one scan and one exchange in the interior fragment.
# The scan of 'alltypes' has 24 files - enough scan ranges to result in 4 finstances per
# backend (12 total), but the scan of alltypestiny only results in 4 scan ranges total.
# So the parallelism of the interior fragment is higher than the alltypestiny
# scan fragment.
select min(string_col) from (
select * from alltypes
union all
select distinct * from alltypestiny) v
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*AdmissionSlots: 4.*
row_regex:.*F04:ROOT * 1 * 1 .*
row_regex:.*04:AGGREGATE * 3 * 12 .*
row_regex:.*06:AGGREGATE * 3 * 12 .*
row_regex:.*03:AGGREGATE * 3 * 4 .*
row_regex:.*00:UNION * 3 * 12 *
row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 4 .*alltypestiny.*
row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.*
====
---- QUERY
# This query should have one scan and two exchanges in the interior fragment.
# The input scans to the exchanges have 2 finstances each per node, so the
# parallelism of the interior fragment should be 2 also.
select min(string_col) from (
select * from alltypestiny
union all
select distinct * from alltypes where month >= 7 and year = 2010
union all
select distinct * from alltypes where month <= 6 and year = 2009) v
---- RESULTS
'0'
---- RUNTIME_PROFILE
row_regex:.*F06:ROOT * 1 * 1 .*
row_regex:.*AdmissionSlots: 2.*
row_regex:.*00:UNION * 3 * 6 .*
row_regex:.*08:AGGREGATE * 3 * 6 .*
row_regex:.*03:AGGREGATE * 3 * 6 .*
row_regex:.*04:SCAN (HDFS|OZONE) * 3 * 6 .*
row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 6 .*
====
---- QUERY: TPCDS-Q11
with year_total as (
select c_customer_id customer_id
,c_first_name customer_first_name
,c_last_name customer_last_name
,c_preferred_cust_flag customer_preferred_cust_flag
,c_birth_country customer_birth_country
,c_login customer_login
,c_email_address customer_email_address
,d_year dyear
,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
,'s' sale_type
from tpcds_parquet.customer
,tpcds_parquet.store_sales
,tpcds_parquet.date_dim
where c_customer_sk = ss_customer_sk
and ss_sold_date_sk = d_date_sk
group by c_customer_id
,c_first_name
,c_last_name
,c_preferred_cust_flag
,c_birth_country
,c_login
,c_email_address
,d_year
union all
select c_customer_id customer_id
,c_first_name customer_first_name
,c_last_name customer_last_name
,c_preferred_cust_flag customer_preferred_cust_flag
,c_birth_country customer_birth_country
,c_login customer_login
,c_email_address customer_email_address
,d_year dyear
,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
,'w' sale_type
from tpcds_parquet.customer
,tpcds_parquet.web_sales
,tpcds_parquet.date_dim
where c_customer_sk = ws_bill_customer_sk
and ws_sold_date_sk = d_date_sk
group by c_customer_id
,c_first_name
,c_last_name
,c_preferred_cust_flag
,c_birth_country
,c_login
,c_email_address
,d_year
)
select
t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_preferred_cust_flag
from year_total t_s_firstyear
,year_total t_s_secyear
,year_total t_w_firstyear
,year_total t_w_secyear
where t_s_secyear.customer_id = t_s_firstyear.customer_id
and t_s_firstyear.customer_id = t_w_secyear.customer_id
and t_s_firstyear.customer_id = t_w_firstyear.customer_id
and t_s_firstyear.sale_type = 's'
and t_w_firstyear.sale_type = 'w'
and t_s_secyear.sale_type = 's'
and t_w_secyear.sale_type = 'w'
and t_s_firstyear.dyear = 2001
and t_s_secyear.dyear = 2001+1
and t_w_firstyear.dyear = 2001
and t_w_secyear.dyear = 2001+1
and t_s_firstyear.year_total > 0
and t_w_firstyear.year_total > 0
and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
> case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
order by t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_preferred_cust_flag
limit 100;
---- RESULTS
'AAAAAAAAAFGBBAAA','Howard','Major','Y'
'AAAAAAAAAMGDAAAA','Kenneth','Harlan','Y'
'AAAAAAAAAOPFBAAA','Jerry','Fields','Y'
'AAAAAAAABLEIBAAA','Paula','Wakefield','Y'
'AAAAAAAABNBBAAAA','Irma','Smith','Y'
'AAAAAAAACADPAAAA','Cristobal','Thomas','Y'
'AAAAAAAACFENAAAA','Christopher','Dawson','NULL'
'AAAAAAAACIJMAAAA','Elizabeth','Thomas','Y'
'AAAAAAAACJDIAAAA','James','Kerr','N'
'AAAAAAAACNAGBAAA','Virginia','May','N'
'AAAAAAAADBEFBAAA','Bennie','Bowers','N'
'AAAAAAAADCKOAAAA','Robert','Gonzalez','N'
'AAAAAAAADFKABAAA','Latoya','Craft','N'
'AAAAAAAADIIOAAAA','David','Carroll','Y'
'AAAAAAAADIJGBAAA','Ruth','Sanders','N'
'AAAAAAAADLHBBAAA','Henry','Bertrand','N'
'AAAAAAAAEADJAAAA','Ruth','Carroll','N'
'AAAAAAAAEJDLAAAA','Alice','Wright','N'
'AAAAAAAAEKFPAAAA','Annika','Chin','N'
'AAAAAAAAEKJLAAAA','Aisha','Carlson','Y'
'AAAAAAAAEPOGAAAA','Felisha','Mendes','Y'
'AAAAAAAAFACEAAAA','Priscilla','Miller','N'
'AAAAAAAAFBAHAAAA','Michael','Williams','N'
'AAAAAAAAFGIGAAAA','Eduardo','Miller','Y'
'AAAAAAAAFGPGAAAA','Albert','Wadsworth','Y'
'AAAAAAAAFMHIAAAA','Emilio','Darling','Y'
'AAAAAAAAFOGIAAAA','Michelle','Greene','N'
'AAAAAAAAFOJAAAAA','Don','Castillo','Y'
'AAAAAAAAGEHIAAAA','Tyler','Miller','N'
'AAAAAAAAGHPBBAAA','Nick','Mendez','Y'
'AAAAAAAAGNDAAAAA','Terry','Mcdowell','N'
'AAAAAAAAHGOABAAA','Sonia','White','N'
'AAAAAAAAHHCABAAA','William','Stewart','Y'
'AAAAAAAAHJLAAAAA','Audrey','Beltran','Y'
'AAAAAAAAHMJNAAAA','Ryan','Baptiste','Y'
'AAAAAAAAHMOIAAAA','Grace','Henderson','N'
'AAAAAAAAIADEBAAA','Diane','Aldridge','N'
'AAAAAAAAIBAEBAAA','Sandra','Wilson','N'
'AAAAAAAAIBFCBAAA','Ruth','Grantham','N'
'AAAAAAAAIBHHAAAA','Jennifer','Ballard','Y'
'AAAAAAAAICHFAAAA','Linda','Mccoy','N'
'AAAAAAAAIDKFAAAA','Michael','Mack','N'
'AAAAAAAAIJEMAAAA','Charlie','Cummings','Y'
'AAAAAAAAIMHBAAAA','Kathy','Knowles','N'
'AAAAAAAAIMHHBAAA','Lillian','Davidson','Y'
'AAAAAAAAJDBLAAAA','Melvin','Taylor','Y'
'AAAAAAAAJEKFBAAA','Norma','Burkholder','N'
'AAAAAAAAJGMMAAAA','Richard','Larson','Y'
'AAAAAAAAJIALAAAA','Santos','Gutierrez','N'
'AAAAAAAAJKBNAAAA','Julie','Kern','N'
'AAAAAAAAJMHLAAAA','Wanda','Ryan','Y'
'AAAAAAAAJONHBAAA','Warren','Orozco','N'
'AAAAAAAAJPINAAAA','Rose','Waite','Y'
'AAAAAAAAKAECAAAA','Milton','Mackey','N'
'AAAAAAAAKAPPAAAA','Karen','Parker','Y'
'AAAAAAAAKJBKAAAA','Georgia','Scott','N'
'AAAAAAAAKJBLAAAA','Kerry','Davis','Y'
'AAAAAAAAKKGEAAAA','Katie','Dunbar','N'
'AAAAAAAAKLHHBAAA','Manuel','Castaneda','N'
'AAAAAAAAKNAKAAAA','Gladys','Banks','N'
'AAAAAAAALFKKAAAA','Ignacio','Miller','Y'
'AAAAAAAALHMCAAAA','Brooke','Nelson','Y'
'AAAAAAAALIOPAAAA','Derek','Allen','Y'
'AAAAAAAALJNCBAAA','George','Gamez','Y'
'AAAAAAAAMDCAAAAA','Louann','Hamel','Y'
'AAAAAAAAMFFLAAAA','Margret','Gray','Y'
'AAAAAAAAMMOBBAAA','Margaret','Smith','N'
'AAAAAAAANFBDBAAA','Vernice','Fernandez','Y'
'AAAAAAAANGDBBAAA','Carlos','Jewell','N'
'AAAAAAAANIPLAAAA','Eric','Lawrence','Y'
'AAAAAAAANJAGAAAA','Allen','Hood','Y'
'AAAAAAAANJHCBAAA','Christopher','Schreiber','N'
'AAAAAAAANJOLAAAA','Debra','Underwood','Y'
'AAAAAAAAOBADBAAA','Elizabeth','Burnham','N'
'AAAAAAAAOCAJAAAA','Jenna','Staton','N'
'AAAAAAAAOCLBBAAA','NULL','NULL','NULL'
'AAAAAAAAODMMAAAA','Gayla','Cline','N'
'AAAAAAAAOFLCAAAA','James','Taylor','N'
'AAAAAAAAOPDLAAAA','Ann','Pence','N'
'AAAAAAAAPDFBAAAA','Terrance','Banks','Y'
'AAAAAAAAPEHEBAAA','Edith','Molina','Y'
'AAAAAAAAPFCLAAAA','Felicia','Neville','N'
'AAAAAAAAPICEAAAA','Jennifer','Cortez','Y'
'AAAAAAAAPJENAAAA','Ashley','Norton','Y'
'AAAAAAAAPKBCBAAA','Andrea','White','N'
'AAAAAAAAPKIKAAAA','Wendy','Horvath','Y'
'AAAAAAAAPMMBBAAA','Paul','Jordan','N'
'AAAAAAAAPPIBBAAA','Candice','Lee','Y'
---- TYPES
STRING, STRING, STRING, STRING
---- RUNTIME_PROFILE
row_regex:.*21:UNION * 2 * 2 .*
row_regex:.*00:UNION * 3 * 12 .*
row_regex:.*14:UNION * 2 * 2 .*
row_regex:.*07:UNION * 3 * 12 .*
====