mirror of
https://github.com/apache/impala.git
synced 2026-01-20 03:03:01 -05:00
This reverts commit f932d78ad0.
The commit is reverted because it cause significant regression for
non-optimized counts star query in parquet format.
There are several conflicts that need to be resolved manually:
- Removed assertion against 'NumFileMetadataRead' counter that is lost
with the revert.
- Adjust the assertion in test_plain_count_star_optimization,
test_in_predicate_push_down, and test_partitioned_insert of
test_iceberg.py due to missing improvement in parquet optimized count
star code path.
- Keep the "override" specifier in hdfs-parquet-scanner.h to pass
clang-tidy
- Keep python3 style of RuntimeError instantiation in
test_file_parser.py to pass check-python-syntax.sh
Change-Id: Iefd8fd0838638f9db146f7b706e541fe2aaf01c1
Reviewed-on: http://gerrit.cloudera.org:8080/19843
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com>
41 lines
1.1 KiB
Plaintext
41 lines
1.1 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Check that data from mixed format partitions can be read
|
|
# transparently. Compute sum in order to force deserialisation
|
|
select count(*), sum(int_col) from functional.alltypesmixedformat
|
|
---- TYPES
|
|
bigint, bigint
|
|
---- RESULTS
|
|
1200,5400
|
|
====
|
|
---- QUERY
|
|
# Restrict set of partitions (still multi-format)
|
|
select count(*), sum(int_col) from functional.alltypesmixedformat where month = 1 or month = 3
|
|
---- TYPES
|
|
bigint, bigint
|
|
---- RESULTS
|
|
620,2790
|
|
====
|
|
---- QUERY
|
|
# Read single partition alone
|
|
select count(*), sum(int_col) from functional.alltypesmixedformat where month = 2
|
|
---- TYPES
|
|
bigint, bigint
|
|
---- RESULTS
|
|
280,1260
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5861: RowsRead counter should be accurate for table scan that returns
|
|
# zero slots. This test is run with various batch_size values, which helps
|
|
# reproduce the bug. Scanning multiple file formats triggers the bug because
|
|
# the Parquet count(*) rewrite is disabled when non-Parquet file formats are
|
|
# present.
|
|
select count(*) from functional.alltypesmixedformat
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
1200
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1200
|
|
====
|