Files
impala/testdata/workloads/functional-query/queries/QueryTest/data-cache.test
Michael Smith 15b07ff1fb IMPALA-11704: (Addendum) fix crash on open for HDFS cache
When trying to read from HDFS cache, ReadFromCache calls
FileReader::Open(false) to force the file to open. The prior commit for
IMPALA-11704 didn't allow for that case when using a data cache, as the
data cache check would always happen. This resulted in a crash calling
CachedFile as exclusive_hdfs_fh_ was nullptr. Tests only catch this when
reading from HDFS cache with data cache enabled.

Replaces explicit arguments to override FileReader behavior with a flag
to communicate whether FileReader supports delayed open. Then the caller
can choose whether to call Open before read. Also simplifies calls to
ReadFromPos as it already has a pointer to ScanRange and can check
whether file handle caching is enabled directly. The Open call in
DoInternalRead uses a slightly wider net by only checking UseDataCache.
If the data cache is unavailable or a miss the file will then be opened.

Adds a select from tpch.nation to the query for test_data_cache.py as
something that triggers checking the HDFS cache.

Change-Id: I741488d6195e586917de220a39090895886a2dc5
Reviewed-on: http://gerrit.cloudera.org:8080/19228
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-11-11 17:53:58 +00:00

58 lines
1.7 KiB
Plaintext

====
---- QUERY
create table test_parquet stored as parquet as select * from tpch_parquet.lineitem;
---- RUNTIME_PROFILE
# Exepct all cache misses for tpch_parquet.lineitem.
row_regex: .*DataCacheHitBytes: 0.*
row_regex: .*DataCacheHitCount: 0 \(0\).*
row_regex: .*DataCacheMissCount: 64 \(64\).*
====
---- QUERY
select count(*) from tpch_parquet.lineitem t1, test_parquet t2 where t1.l_orderkey = t2.l_orderkey;
---- RESULTS
30012985
---- RUNTIME_PROFILE
# Exepct cache hits for t1 and cache misses for t2.
row_regex: .*DataCacheHitCount: 6 \(6\).*
row_regex: .*DataCacheMissBytes: 0.*
row_regex: .*DataCacheMissCount: 0 \(0\).*
row_regex: .*DataCachePartialHitCount: 0 \(0\).*
row_regex: .*DataCacheHitBytes: 0.*
row_regex: .*DataCacheHitCount: 0 \(0\).*
row_regex: .*DataCacheMissCount: 3 \(3\).*
row_regex: .*DataCachePartialHitCount: 0 \(0\).*
====
---- QUERY
select count(distinct l_orderkey) from test_parquet;
---- RESULTS
1500000
---- RUNTIME_PROFILE
# Expect all cache hits.
row_regex: .*DataCacheHitCount: 3 \(3\).*
row_regex: .*DataCacheMissBytes: 0.*
row_regex: .*DataCacheMissCount: 0 \(0\).*
row_regex: .*DataCachePartialHitCount: 0 \(0\).*
====
---- QUERY
# Overwrite temp table with subset of data.
insert overwrite test_parquet select * from tpch_parquet.lineitem where l_shipmode = 'AIR';
====
---- QUERY
# Verifies that stale data from the cache is not used.
select count(distinct l_orderkey) from test_parquet;
---- RESULTS
652393
---- RUNTIME_PROFILE
# Expect all cache misses due to change in mtime.
row_regex: .*DataCacheHitBytes: 0.*
row_regex: .*DataCacheHitCount: 0 \(0\).*
row_regex: .*DataCacheMissCount: 2 \(2\).*
row_regex: .*DataCachePartialHitCount: 0 \(0\).*
====
---- QUERY
# Exercise HDFS cache
select count(*) from tpch.nation;
---- RESULTS
25
====