mirror of
https://github.com/apache/impala.git
synced 2026-01-26 12:02:21 -05:00
This change ensures that the planner computes parquet conjuncts
only when for scans containing parquet files. Additionally, it
also handles PARQUET_DICTIONARY_FILTERING and
PARQUET_READ_STATISTICS query options in the planner.
Testing was carried out independently on parquet and non-parquet
scans:
1. Parquet scans were tested via the existing parquet-filtering
planner test. Additionally, a new test
[parquet-filtering-disabled] was added to ensure that the
explain plan generated skips parquet predicates based on the
query options.
2. Non-parquet scans were tested manually to ensure that the
functions to compute parquet conjucts were not invoked.
Additional test cases were added to the parquet-filtering
planner test to scan non parquet tables and ensure that the
plans do not contain conjuncts based on parquet statistics.
3. A parquet partition was added to the alltypesmixedformat
table in the functional database. Planner tests were added
to ensure that Parquet conjuncts are constructed only when
the Parquet partition is included in the query.
Change-Id: I9d6c26d42db090c8a15c602f6419ad6399c329e7
Reviewed-on: http://gerrit.cloudera.org:8080/10704
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
175 lines
13 KiB
Plaintext
175 lines
13 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Stats on a partitioned Hdfs table stored as text
|
|
show table stats alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',310,1,'19.95KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=1'
|
|
'2009','2',280,1,'18.12KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=2'
|
|
'2009','3',310,1,'20.06KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=3'
|
|
'2009','4',300,1,'19.61KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=4'
|
|
'2009','5',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=5'
|
|
'2009','6',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=6'
|
|
'2009','7',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=7'
|
|
'2009','8',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=8'
|
|
'2009','9',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=9'
|
|
'2009','10',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=10'
|
|
'2009','11',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=11'
|
|
'2009','12',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=12'
|
|
'2010','1',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=1'
|
|
'2010','2',280,1,'18.39KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=2'
|
|
'2010','3',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=3'
|
|
'2010','4',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=4'
|
|
'2010','5',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=5'
|
|
'2010','6',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=6'
|
|
'2010','7',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=7'
|
|
'2010','8',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=8'
|
|
'2010','9',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=9'
|
|
'2010','10',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=10'
|
|
'2010','11',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=11'
|
|
'2010','12',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=12'
|
|
'Total','',7300,24,'478.45KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# SHOW PARTITIONS returns the same results as SHOW TABLE STATS.
|
|
show partitions alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',310,1,'19.95KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=1'
|
|
'2009','2',280,1,'18.12KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=2'
|
|
'2009','3',310,1,'20.06KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=3'
|
|
'2009','4',300,1,'19.61KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=4'
|
|
'2009','5',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=5'
|
|
'2009','6',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=6'
|
|
'2009','7',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=7'
|
|
'2009','8',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=8'
|
|
'2009','9',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=9'
|
|
'2009','10',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=10'
|
|
'2009','11',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=11'
|
|
'2009','12',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2009/month=12'
|
|
'2010','1',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=1'
|
|
'2010','2',280,1,'18.39KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=2'
|
|
'2010','3',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=3'
|
|
'2010','4',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=4'
|
|
'2010','5',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=5'
|
|
'2010','6',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=6'
|
|
'2010','7',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=7'
|
|
'2010','8',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=8'
|
|
'2010','9',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=9'
|
|
'2010','10',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=10'
|
|
'2010','11',300,1,'19.71KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=11'
|
|
'2010','12',310,1,'20.36KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypes/year=2010/month=12'
|
|
'Total','',7300,24,'478.45KB','0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on an unpartitioned Hdfs table stored as text
|
|
show table stats alltypesaggmultifilesnopart
|
|
---- LABELS
|
|
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
11000,4,'805.23KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypesaggmultifilesnopart'
|
|
---- TYPES
|
|
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on an Hdfs with mixed partition formats
|
|
show table stats alltypesmixedformat
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
'2009','1',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=1'
|
|
'2009','2',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','SEQUENCE_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=2'
|
|
'2009','3',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','RC_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=3'
|
|
'2009','4',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=4'
|
|
'Total','',-1,4,regex:.+KB,'0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Stats on a table that has no statistics
|
|
show table stats functional_parquet.alltypes
|
|
---- LABELS
|
|
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
|
|
---- RESULTS
|
|
'2009','1',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=1'
|
|
'2009','2',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=2'
|
|
'2009','3',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=3'
|
|
'2009','4',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=4'
|
|
'2009','5',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=5'
|
|
'2009','6',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=6'
|
|
'2009','7',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=7'
|
|
'2009','8',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=8'
|
|
'2009','9',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=9'
|
|
'2009','10',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=10'
|
|
'2009','11',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=11'
|
|
'2009','12',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2009/month=12'
|
|
'2010','1',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=1'
|
|
'2010','2',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=2'
|
|
'2010','3',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=3'
|
|
'2010','4',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=4'
|
|
'2010','5',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=5'
|
|
'2010','6',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=6'
|
|
'2010','7',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=7'
|
|
'2010','8',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=8'
|
|
'2010','9',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=9'
|
|
'2010','10',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=10'
|
|
'2010','11',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=11'
|
|
'2010','12',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/alltypes_parquet/year=2010/month=12'
|
|
'Total','',-1,24,regex:.+KB,'0B','','','',''
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Column stats on an HdfsTable
|
|
show column stats alltypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
'id','INT',7300,-1,4,4
|
|
'bool_col','BOOLEAN',2,-1,1,1
|
|
'tinyint_col','TINYINT',10,-1,1,1
|
|
'smallint_col','SMALLINT',10,-1,2,2
|
|
'int_col','INT',10,-1,4,4
|
|
'bigint_col','BIGINT',10,-1,8,8
|
|
'float_col','FLOAT',10,-1,4,4
|
|
'double_col','DOUBLE',10,-1,8,8
|
|
'date_string_col','STRING',736,-1,8,8
|
|
'string_col','STRING',10,-1,1,1
|
|
'timestamp_col','TIMESTAMP',7300,-1,16,16
|
|
'year','INT',2,0,4,4
|
|
'month','INT',12,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Column column stats for a table with complex types.
|
|
show column stats functional.allcomplextypes
|
|
---- LABELS
|
|
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
|
|
---- RESULTS
|
|
---- RESULTS
|
|
'id','INT',-1,-1,4,4
|
|
'int_array_col','ARRAY<INT>',-1,-1,-1,-1
|
|
'array_array_col','ARRAY<ARRAY<INT>>',-1,-1,-1,-1
|
|
'map_array_col','ARRAY<MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'struct_array_col','ARRAY<STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
|
|
'int_map_col','MAP<STRING,INT>',-1,-1,-1,-1
|
|
'array_map_col','MAP<STRING,ARRAY<INT>>',-1,-1,-1,-1
|
|
'map_map_col','MAP<STRING,MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'struct_map_col','MAP<STRING,STRUCT<f1:BIGINT,f2:STRING>>',-1,-1,-1,-1
|
|
'int_struct_col','STRUCT<f1:INT,f2:INT>',-1,-1,-1,-1
|
|
'complex_struct_col','STRUCT<f1:INT,f2:ARRAY<INT>,f3:MAP<STRING,INT>>',-1,-1,-1,-1
|
|
'nested_struct_col','STRUCT<f1:INT,f2:STRUCT<f11:BIGINT,f12:STRUCT<f21:BIGINT>>>',-1,-1,-1,-1
|
|
'complex_nested_struct_col','STRUCT<f1:INT,f2:ARRAY<STRUCT<f11:BIGINT,f12:MAP<STRING,STRUCT<f21:BIGINT>>>>>',-1,-1,-1,-1
|
|
'year','INT',0,0,4,4
|
|
'month','INT',0,0,4,4
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
|
|
====
|