mirror of
https://github.com/apache/impala.git
synced 2026-01-06 06:01:03 -05:00
IMPALA-10723: Treat materialized view as a table instead of a view
The existing behavior is that materialized views are treated as views and therefore expanded similar to a view when one queries the MV directly (SELECT * FROM materialized_view). This is incorrect since an MV is a regular table with physical properties such as partitioning, clustering etc. and should be treated as such even though it has a view definition associated with it. This patch focuses on the use case where MVs are created as HDFS tables and makes the MVs a derived class of HdfsTable, therefore making it a Table object. It adds support for collecting and displaying statistics on materialized views and these statistics could be leveraged by an external frontend that supports MV based query rewrites (note that such a rewrite is not supported by Impala with or without this patch). Note that we are not introducing new syntax for MVs since DDL, DML operations on MVs are only supported through Hive. Directly querying a MV is permitted but inserts into MVs is not since MVs are supposed to be only modified through an external refresh when the source tables have modifications. If the source tables associated with a materialized view have column masking or row-filtering Ranger policies, querying the MV will throw an error. This behavior is consistent with that of Hive. Testing: - Added transactional tables for alltypes, jointbl and used them as source tables to create materialized view. - Added tests for compute stats, drop stats, show stats and simple select query on a materialized view. - Added test for select on a materialized view when the source table has a column mask. - Modified analyzer tests related to alter, insert, drop of materialized view. Change-Id: If3108996124c6544a97fb0c34b6aff5e324a6cff Reviewed-on: http://gerrit.cloudera.org:8080/17595 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Quanlong Huang <huangquanlong@gmail.com>
This commit is contained in:
committed by
Quanlong Huang
parent
c190966db9
commit
e644c99724
@@ -2541,6 +2541,32 @@ SELECT * from functional.{table_name};
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
alltypes_transactional
|
||||
---- CREATE
|
||||
CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
id INT,
|
||||
bool_col BOOLEAN,
|
||||
tinyint_col TINYINT,
|
||||
smallint_col SMALLINT,
|
||||
int_col INT,
|
||||
bigint_col BIGINT,
|
||||
float_col FLOAT,
|
||||
double_col DOUBLE,
|
||||
date_string_col STRING,
|
||||
string_col STRING,
|
||||
timestamp_col TIMESTAMP,
|
||||
year int,
|
||||
month int
|
||||
)
|
||||
STORED AS {file_format}
|
||||
TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only');
|
||||
---- DEPENDENT_LOAD
|
||||
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name}
|
||||
SELECT * FROM {db_name}{db_suffix}.alltypes;
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
materialized_view
|
||||
---- HIVE_MAJOR_VERSION
|
||||
3
|
||||
@@ -3403,4 +3429,56 @@ TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.catalo
|
||||
---- DEPENDENT_LOAD
|
||||
`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \
|
||||
hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_delete_positional /test-warehouse/iceberg_test/hadoop_catalog/ice
|
||||
====
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
jointbl_transactional
|
||||
---- CREATE
|
||||
CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
test_id BIGINT,
|
||||
test_name STRING,
|
||||
test_zip INT,
|
||||
alltypes_id INT
|
||||
)
|
||||
STORED AS {file_format}
|
||||
TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only');
|
||||
---- DEPENDENT_LOAD
|
||||
INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name}
|
||||
SELECT * FROM {db_name}{db_suffix}.jointbl;
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
mv1_alltypes_jointbl
|
||||
---- HIVE_MAJOR_VERSION
|
||||
3
|
||||
---- CREATE_HIVE
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS {db_name}{db_suffix}.{table_name}
|
||||
STORED AS {file_format} AS SELECT t1.smallint_col c1, t1.bool_col c2,
|
||||
t2.test_id c3, min(t1.bigint_col) min_bigint, min(t2.test_zip) min_zip
|
||||
FROM {db_name}{db_suffix}.alltypes_transactional t1
|
||||
JOIN {db_name}{db_suffix}.jointbl_transactional t2 ON (t1.id=t2.alltypes_id)
|
||||
group by t1.smallint_col, t1.bool_col, t2.test_id;
|
||||
---- DEPENDENT_LOAD_HIVE
|
||||
ALTER MATERIALIZED VIEW {db_name}{db_suffix}.{table_name} REBUILD;
|
||||
=====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
mv2_alltypes_jointbl
|
||||
---- HIVE_MAJOR_VERSION
|
||||
3
|
||||
---- CREATE_HIVE
|
||||
-- Create a duplicate materialized view because we want to test
|
||||
-- computing stats, dropping stats on this MV without affecting
|
||||
-- planner tests for which we use the other MV mv1_alltypes_jointbl
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS {db_name}{db_suffix}.{table_name}
|
||||
STORED AS {file_format} AS SELECT t1.smallint_col c1, t1.bool_col c2,
|
||||
t2.test_id c3, max(t1.bigint_col) max_bigint, max(t2.test_zip) max_zip
|
||||
FROM {db_name}{db_suffix}.alltypes_transactional t1
|
||||
JOIN {db_name}{db_suffix}.jointbl_transactional t2 ON (t1.id=t2.alltypes_id)
|
||||
group by t1.smallint_col, t1.bool_col, t2.test_id;
|
||||
---- DEPENDENT_LOAD_HIVE
|
||||
ALTER MATERIALIZED VIEW {db_name}{db_suffix}.{table_name} REBUILD;
|
||||
====
|
||||
|
||||
Reference in New Issue
Block a user