mirror of
https://github.com/apache/impala.git
synced 2026-01-10 00:00:16 -05:00
This patch adds support for limiting the rows produced by a join node such that runaway join queries can be prevented. The limit is specified by a query option. Queries exceeding that limit get terminated. The checking runs periodically, so the actual rows produced may go somewhat over the limit. JOIN_ROWS_PRODUCED_LIMIT is exposed as an advanced query option. Rows produced Query profile is updated to include query wide and per backend metrics for RowsReturned. Example from " set JOIN_ROWS_PRODUCED_LIMIT = 10000000; select count(*) from tpch_parquet.lineitem l1 cross join (select * from tpch_parquet.lineitem l2 limit 5) l3;": NESTED_LOOP_JOIN_NODE (id=2): - InactiveTotalTime: 107.534ms - PeakMemoryUsage: 16.00 KB (16384) - ProbeRows: 1.02K (1024) - ProbeTime: 0.000ns - RowsReturned: 10.00M (10002025) - RowsReturnedRate: 749.58 K/sec - TotalTime: 13s337ms Testing: Added tests for JOIN_ROWS_PRODUCED_LIMIT Change-Id: Idbca7e053b61b4e31b066edcfb3b0398fa859d02 Reviewed-on: http://gerrit.cloudera.org:8080/16706 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
145 lines
4.9 KiB
Plaintext
145 lines
4.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Query should succeed.
|
|
set CPU_LIMIT_S=10000;
|
|
set SCAN_BYTES_LIMIT="10G";
|
|
select id from functional.alltypessmall order by 1 limit 1
|
|
---- TYPES
|
|
INT
|
|
---- RESULTS
|
|
0
|
|
====
|
|
---- QUERY
|
|
# Query should fail due to exceeding scan bytes limit.
|
|
# Added a sleep to make sure it runs long enough to get a non-zero update from the
|
|
# fragment instances on the num of bytes read and cpu time.
|
|
set SCAN_BYTES_LIMIT="100M";
|
|
select sleep(10000) union select count(*) from tpch.lineitem l1,tpch.lineitem l2,
|
|
tpch.lineitem l3 where l1.l_suppkey = l2.l_linenumber and l1.l_orderkey = l2.l_orderkey
|
|
and l1.l_orderkey = l3.l_orderkey group by l1.l_comment, l2.l_comment having count(*) = 99
|
|
---- CATCH
|
|
row_regex:.*terminated due to scan bytes limit of 100.00 M.*
|
|
====
|
|
---- QUERY
|
|
# Query should fail due to CPU time limit.
|
|
# Added a sleep to make sure it runs long enough to get a non-zero update from the
|
|
# fragment instances on the num of bytes read and cpu time.
|
|
set CPU_LIMIT_S=1;
|
|
select sleep(10000) union select count(*) from tpch.lineitem l1,tpch.lineitem l2,
|
|
tpch.lineitem l3 where l1.l_suppkey = l2.l_linenumber and l1.l_orderkey = l2.l_orderkey
|
|
and l1.l_orderkey = l3.l_orderkey group by l1.l_comment, l2.l_comment having count(*) = 99
|
|
---- CATCH
|
|
row_regex:.*terminated due to CPU limit of 1s000ms.*
|
|
====
|
|
---- QUERY
|
|
# Query should fail due to CPU time limit.
|
|
# Added a sleep to make sure it runs long enough to get a non-zero update from the
|
|
# fragment instances on the num of bytes read and cpu time.
|
|
set CPU_LIMIT_S=1;
|
|
set SCAN_BYTES_LIMIT="100G";
|
|
select sleep(10000) union select count(*) from tpch.lineitem l1,tpch.lineitem l2,
|
|
tpch.lineitem l3 where l1.l_suppkey = l2.l_linenumber and l1.l_orderkey = l2.l_orderkey
|
|
and l1.l_orderkey = l3.l_orderkey group by l1.l_comment, l2.l_comment having count(*) = 99
|
|
---- CATCH
|
|
row_regex:.*terminated due to CPU limit of 1s000ms.*
|
|
====
|
|
---- QUERY
|
|
# Query should fail due to exceeding time limit.
|
|
set EXEC_TIME_LIMIT_S=2;
|
|
select sleep(10000)
|
|
---- CATCH
|
|
row_regex:.*expired due to execution time limit of 2s000ms.*
|
|
====
|
|
---- QUERY
|
|
# Query should fail due to exceeding rows produced limit.
|
|
set NUM_ROWS_PRODUCED_LIMIT = 10000;
|
|
select * from tpch.lineitem;
|
|
---- CATCH
|
|
row_regex:.*terminated due to rows produced limit of 10000.*
|
|
====
|
|
---- QUERY
|
|
# The query should succeed when it doesn't exceed the rows produced limit.
|
|
set NUM_ROWS_PRODUCED_LIMIT = 3;
|
|
select * from functional.tinytable;
|
|
---- TYPES
|
|
STRING,STRING
|
|
---- RESULTS
|
|
'aaaaaaa','bbbbbbb'
|
|
'ccccc','dddd'
|
|
'eeeeeeee','f'
|
|
====
|
|
---- QUERY
|
|
# Query should fail due to exceeding hash join rows produced limit.
|
|
# Added a sleep to make sure it runs long enough to get a non-zero update from the
|
|
# fragment instances on the num of join rows produced.
|
|
set JOIN_ROWS_PRODUCED_LIMIT = 1000;
|
|
select sleep(10000) union select count(*) from tpch.lineitem l1,tpch.lineitem l2,
|
|
tpch.lineitem l3 where l1.l_suppkey = l2.l_linenumber and l1.l_orderkey = l2.l_orderkey
|
|
and l1.l_orderkey = l3.l_orderkey group by l1.l_comment, l2.l_comment having count(*) = 99
|
|
---- CATCH
|
|
row_regex:.*terminated due to join rows produced exceeds the limit of 1.00K.*
|
|
====
|
|
---- QUERY
|
|
# Query should fail due to exceeding nested loop join rows produced limit.
|
|
# Added a sleep to make sure it runs long enough to get a non-zero update from the
|
|
# fragment instances on the num of join rows produced.
|
|
set JOIN_ROWS_PRODUCED_LIMIT = 500;
|
|
select sleep(10000) union select count(*) from functional.alltypestiny t1 left outer join
|
|
functional.alltypessmall t2 on t1.int_col < t2.int_col;
|
|
---- CATCH
|
|
row_regex:.*terminated due to join rows produced exceeds the limit of 500.*
|
|
====
|
|
---- QUERY
|
|
# If one of the mixed joins exceeds the row produced limit, the query should fail.
|
|
# Added a sleep to make sure it runs long enough to get a non-zero update from the
|
|
# fragment instances on the num of join rows produced.
|
|
set JOIN_ROWS_PRODUCED_LIMIT = 10000;
|
|
select sleep(10000) union select count(*) from (
|
|
select
|
|
*
|
|
from (
|
|
select
|
|
ps_partkey,
|
|
sum(ps_supplycost * ps_availqty) as value
|
|
from
|
|
tpch_parquet.partsupp,
|
|
tpch_parquet.supplier,
|
|
tpch_parquet.nation
|
|
where
|
|
ps_suppkey = s_suppkey
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'GERMANY'
|
|
group by
|
|
ps_partkey
|
|
) as inner_query
|
|
where
|
|
value > (
|
|
select
|
|
sum(ps_supplycost * ps_availqty) * 0.0001
|
|
from
|
|
tpch_parquet.partsupp,
|
|
tpch_parquet.supplier,
|
|
tpch_parquet.nation
|
|
where
|
|
ps_suppkey = s_suppkey
|
|
and s_nationkey = n_nationkey
|
|
and n_name = 'GERMANY'
|
|
)
|
|
order by
|
|
value desc
|
|
) t;
|
|
---- CATCH
|
|
row_regex:.*terminated due to join rows produced exceeds the limit of 10.00k*
|
|
====
|
|
---- QUERY
|
|
# The query should succeed when it doesn't exceed the join rows produced limit.
|
|
set JOIN_ROWS_PRODUCED_LIMIT = 10000;
|
|
select count(*) from alltypessmall a inner join alltypessmall b
|
|
on (a.timestamp_col = b.timestamp_col)
|
|
where a.year=2009 and a.month=1 and b.year=2009 and b.month=1;
|
|
---- TYPES
|
|
BIGINT
|
|
---- RESULTS
|
|
25
|
|
====
|