Files
impala/testdata/workloads/functional-query/queries/QueryTest/spilling-naaj.test
Tim Armstrong acaf8b9f0c IMPALA-5570: fix spilling null-aware anti join
IMPALA-4672: Part 2 regressed NAAJ by tightening up the spilling
invariants (e.g.  can't unpin with spilling disabled) but we
didn't have tests for spilling NAAJs that could detect the
regression. This patch adds those tests, fixes the regressions,
and improves NAAJ by reliably spilling the probe side and not
trying to bring the whole probe side into memory.

The changes are:
* All null-aware streams start off in memory and are only unpinned if
  spilling is enabled.
* The null-aware build partition can be spilled in the same way as hash
  partitions.
* Probe streams are unpinned whenever there is memory pressure - if
  spilling is enabled and either a build partition is spilled or
  appending to the probe stream fails.
* Spilled probe streams are not re-pinned in EvaluateNullProbe().
  Instead we just iterate over the rows of the stream.

Testing:
Add query tests where the three different buckets of rows are large
enough to spill: the build and probe of the null-aware partition and the
null probe rows.

Test both spilling and in-memory (with spilling disabled) cases.

Change-Id: Ie2e60eb4dd32bd287a31479a6232400df65964c1
Reviewed-on: http://gerrit.cloudera.org:8080/7367
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
2017-08-24 04:24:10 +00:00

103 lines
3.4 KiB
Plaintext

====
---- QUERY
# Also see related tests in spilling-naaj-no-deny-reservation.test
#
# =======================================================================================
# NAAJ QUERY 2: many probe rows with NULLs in the join key.
# =======================================================================================
# This produces the same results as:
# select l_orderkey, l_partkey, l_suppkey, l_linenumber
# from lineitem
# where l_orderkey % 2 = 1 and l_partkey not in (
# select p_partkey
# from part
# where p_retailprice != l_extendedprice * l_tax)
# order by 1,2,3,4 limit 5
#
# Which produces the same results as:
# select l_orderkey, l_partkey, l_suppkey, l_linenumber
# from lineitem
# join part on l_partkey = p_partkey
# where l_orderkey % 2 = 1 and p_retailprice = l_extendedprice * l_tax
# order by 1,2,3,4 limit 5
#
set buffer_pool_limit=10m;
select l_orderkey, l_partkey, l_suppkey, l_linenumber
from lineitem
where if(l_orderkey % 2 = 0, NULL, l_partkey) not in (
select p_partkey
from part
where p_retailprice != l_extendedprice * l_tax)
order by 1,2,3,4 limit 5
---- RESULTS
965,107207,9718,1
1351,107227,7228,1
1505,122702,5215,2
1601,174374,1926,2
1767,22387,4890,4
---- TYPES
BIGINT,BIGINT,BIGINT,INT
====
---- QUERY
# Execute NAAJ query 2 in-memory only without enough memory to complete.
set scratch_limit=0;
set buffer_pool_limit=10m;
select l_orderkey, l_partkey, l_suppkey, l_linenumber
from lineitem
where if(l_orderkey % 2 = 0, NULL, l_partkey) not in (
select p_partkey
from part
where p_retailprice != l_extendedprice * l_tax)
order by 1,2,3,4 limit 5
---- CATCH
Could not free memory by spilling to disk: scratch_limit is 0
====
---- QUERY
# =======================================================================================
# NAAJ QUERY 3: many non-NULL probe rows that didn't match a build row.
# =======================================================================================
# The correlated subquery includes a NULL when l_extended_price * l_tax != p_retailprice
# and all the even p_partkey values except 2. Thus this query returns the same results:
#
# select l_orderkey, l_partkey, l_suppkey, l_linenumber
# from lineitem
# join part on l_extendedprice * l_tax = p_retailprice
# where p_partkey = 2 and l_partkey % 2 = 1
# order by 1,2,3,4
#
set buffer_pool_limit=10m;
select l_orderkey, l_partkey, l_suppkey, l_linenumber
from lineitem
where l_partkey not in (
select if(p_partkey = 2, NULL, p_partkey)
from part
where p_partkey % 2 = 0 and p_retailprice != l_extendedprice * l_tax)
order by 1,2,3,4
---- RESULTS
3178597,1001,3502,1
4801283,199001,9002,1
4958784,116009,1032,2
---- TYPES
BIGINT,BIGINT,BIGINT,INT
====
---- QUERY
# =======================================================================================
# NAAJ QUERY 4: many of both kinds of probe rows (NULL and non-NULL/unmatched)
# =======================================================================================
# This returns one less row than the previous query because l_partkey=116009 is
# replaced with a NULL.
set buffer_pool_limit=10m;
select l_orderkey, l_partkey, l_suppkey, l_linenumber
from lineitem
where if(l_partkey % 5 != 1, NULL, l_partkey) not in (
select if(p_partkey = 2, NULL, p_partkey)
from part
where p_partkey % 2 = 0 and p_retailprice != l_extendedprice * l_tax)
order by 1,2,3,4
---- RESULTS
3178597,1001,3502,1
4801283,199001,9002,1
---- TYPES
BIGINT,BIGINT,BIGINT,INT
====