mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
Iceberg tables Similarly to 'COMPUTE INCREMENTAL STATS', 'DROP INCREMENTAL STATS' should prohibit the partition variant for Iceberg tables. Testing: - FE: fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java - EE: tests/query_test/test_iceberg.py Change-Id: If3d9ef45a9c9ddce9a5e43c5058ae84f919e0283 Reviewed-on: http://gerrit.cloudera.org:8080/23394 Reviewed-by: Noemi Pap-Takacs <npaptakacs@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
453 lines
14 KiB
Plaintext
453 lines
14 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table ice_alltypes stored as iceberg
|
|
as select * from functional_parquet.iceberg_alltypes_part;
|
|
show column stats ice_alltypes;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
|
|
'p_int','INT',-1,-1,4,4,-1,-1
|
|
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
|
|
'p_float','FLOAT',-1,-1,4,4,-1,-1
|
|
'p_double','DOUBLE',-1,-1,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
|
|
'p_date','DATE',-1,-1,4,4,-1,-1
|
|
'p_string','STRING',-1,-1,-1,-1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
show table stats ice_alltypes
|
|
---- LABELS
|
|
#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy
|
|
---- RESULTS: VERIFY_IS_EQUAL
|
|
# The file size is on the boundary between 2.32KB and 2.33KB. The build version is written
|
|
# into the file, and "x.y.z-RELEASE" is one byte shorter than "x.y.z-SNAPSHOT". In release
|
|
# builds the file size is 2.32KB, in snapshot builds it is 2.33KB.
|
|
2,1,regex:'2.3[23]KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/ice_alltypes','$ERASURECODE_POLICY'
|
|
---- TYPES
|
|
BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Table-level stats are automatically updated.
|
|
# 'impala.lastComputeStatsTime' is not set yet.
|
|
describe formatted ice_alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','numFiles ','1 '
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
compute stats ice_alltypes;
|
|
show column stats ice_alltypes;
|
|
---- RESULTS
|
|
'i','INT',2,0,4,4,-1,-1
|
|
'p_bool','BOOLEAN',2,0,1,1,2,0
|
|
'p_int','INT',1,0,4,4,-1,-1
|
|
'p_bigint','BIGINT',1,0,8,8,-1,-1
|
|
'p_float','FLOAT',1,0,4,4,-1,-1
|
|
'p_double','DOUBLE',1,0,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
|
|
'p_date','DATE',1,0,4,4,-1,-1
|
|
'p_string','STRING',1,0,6,6,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
describe formatted ice_alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
'','numFiles ','1 '
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop stats ice_alltypes;
|
|
---- RESULTS
|
|
'Stats have been dropped.'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Table-level stats are not affected by DROP STATS.
|
|
describe formatted ice_alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','numFiles ','1 '
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Column stats have been dropped.
|
|
show column stats ice_alltypes;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
|
|
'p_int','INT',-1,-1,4,4,-1,-1
|
|
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
|
|
'p_float','FLOAT',-1,-1,4,4,-1,-1
|
|
'p_double','DOUBLE',-1,-1,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
|
|
'p_date','DATE',-1,-1,4,4,-1,-1
|
|
'p_string','STRING',-1,-1,-1,-1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
compute incremental stats ice_alltypes;
|
|
show column stats ice_alltypes;
|
|
---- RESULTS
|
|
'i','INT',2,0,4,4,-1,-1
|
|
'p_bool','BOOLEAN',2,0,1,1,2,0
|
|
'p_int','INT',1,0,4,4,-1,-1
|
|
'p_bigint','BIGINT',1,0,8,8,-1,-1
|
|
'p_float','FLOAT',1,0,4,4,-1,-1
|
|
'p_double','DOUBLE',1,0,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
|
|
'p_date','DATE',1,0,4,4,-1,-1
|
|
'p_string','STRING',1,0,6,6,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
describe formatted ice_alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
'','numFiles ','1 '
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Setting 'numRows' doesn't have effect on Iceberg tables.
|
|
ALTER TABLE ice_alltypes
|
|
SET TBLPROPERTIES('numRows'='1000', 'STATS_GENERATED_VIA_STATS_TASK'='true');
|
|
describe formatted ice_alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
'','numRows ','2 '
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Users can still set column stats manually as they are managed outside of Iceberg.
|
|
ALTER TABLE ice_alltypes SET COLUMN STATS i ('numDVs'='100');
|
|
show column stats ice_alltypes;
|
|
---- RESULTS
|
|
'i','INT',100,0,4,4,-1,-1
|
|
'p_bool','BOOLEAN',2,0,1,1,2,0
|
|
'p_int','INT',1,0,4,4,-1,-1
|
|
'p_bigint','BIGINT',1,0,8,8,-1,-1
|
|
'p_float','FLOAT',1,0,4,4,-1,-1
|
|
'p_double','DOUBLE',1,0,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
|
|
'p_date','DATE',1,0,4,4,-1,-1
|
|
'p_string','STRING',1,0,6,6,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
drop stats ice_alltypes;
|
|
---- RESULTS
|
|
'Stats have been dropped.'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Table-level stats are not affected by DROP STATS.
|
|
describe formatted ice_alltypes;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','numFiles ','1 '
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Column stats have been dropped.
|
|
show column stats ice_alltypes;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
|
|
'p_int','INT',-1,-1,4,4,-1,-1
|
|
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
|
|
'p_float','FLOAT',-1,-1,4,4,-1,-1
|
|
'p_double','DOUBLE',-1,-1,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
|
|
'p_date','DATE',-1,-1,4,4,-1,-1
|
|
'p_string','STRING',-1,-1,-1,-1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
create table ice_alltypes_ht stored as iceberg
|
|
tblproperties ('iceberg.catalog'='hadoop.tables')
|
|
as select * from functional_parquet.iceberg_alltypes_part;
|
|
show column stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
|
|
'p_int','INT',-1,-1,4,4,-1,-1
|
|
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
|
|
'p_float','FLOAT',-1,-1,4,4,-1,-1
|
|
'p_double','DOUBLE',-1,-1,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
|
|
'p_date','DATE',-1,-1,4,4,-1,-1
|
|
'p_string','STRING',-1,-1,-1,-1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Table-level stats are not automatically updated for non-HMS integrated
|
|
# Iceberg tables.
|
|
# 'impala.lastComputeStatsTime' is not set yet.
|
|
describe formatted ice_alltypes_ht;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'','numFiles ','1 '
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
compute stats ice_alltypes_ht;
|
|
show column stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'i','INT',2,0,4,4,-1,-1
|
|
'p_bool','BOOLEAN',2,0,1,1,2,0
|
|
'p_int','INT',1,0,4,4,-1,-1
|
|
'p_bigint','BIGINT',1,0,8,8,-1,-1
|
|
'p_float','FLOAT',1,0,4,4,-1,-1
|
|
'p_double','DOUBLE',1,0,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
|
|
'p_date','DATE',1,0,4,4,-1,-1
|
|
'p_string','STRING',1,0,6,6,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
describe formatted ice_alltypes_ht;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
drop stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'Stats have been dropped.'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Table-level stats are dropped for non-HMS integrated Iceberg tables.
|
|
describe formatted ice_alltypes_ht;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'','numRows ','2 '
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Column stats have been dropped.
|
|
show column stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
|
|
'p_int','INT',-1,-1,4,4,-1,-1
|
|
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
|
|
'p_float','FLOAT',-1,-1,4,4,-1,-1
|
|
'p_double','DOUBLE',-1,-1,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
|
|
'p_date','DATE',-1,-1,4,4,-1,-1
|
|
'p_string','STRING',-1,-1,-1,-1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
compute incremental stats ice_alltypes_ht;
|
|
show column stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'i','INT',2,0,4,4,-1,-1
|
|
'p_bool','BOOLEAN',2,0,1,1,2,0
|
|
'p_int','INT',1,0,4,4,-1,-1
|
|
'p_bigint','BIGINT',1,0,8,8,-1,-1
|
|
'p_float','FLOAT',1,0,4,4,-1,-1
|
|
'p_double','DOUBLE',1,0,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
|
|
'p_date','DATE',1,0,4,4,-1,-1
|
|
'p_string','STRING',1,0,6,6,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
describe formatted ice_alltypes_ht;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
|
|
'','numRows ','2 '
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Users can set 'numRows' for non-HMS integrated Iceberfg tables.
|
|
ALTER TABLE ice_alltypes_ht
|
|
SET TBLPROPERTIES('numRows'='1000', 'STATS_GENERATED_VIA_STATS_TASK'='true');
|
|
describe formatted ice_alltypes_ht;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','numRows ','1000 '
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Users can set column stats manually.
|
|
ALTER TABLE ice_alltypes_ht SET COLUMN STATS i ('numDVs'='100');
|
|
show column stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'i','INT',100,0,4,4,-1,-1
|
|
'p_bool','BOOLEAN',2,0,1,1,2,0
|
|
'p_int','INT',1,0,4,4,-1,-1
|
|
'p_bigint','BIGINT',1,0,8,8,-1,-1
|
|
'p_float','FLOAT',1,0,4,4,-1,-1
|
|
'p_double','DOUBLE',1,0,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
|
|
'p_date','DATE',1,0,4,4,-1,-1
|
|
'p_string','STRING',1,0,6,6,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
drop stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'Stats have been dropped.'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Table-level stats are dropped for non-HMS integrated Iceberg tables.
|
|
describe formatted ice_alltypes_ht;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'','numFiles ','1 '
|
|
'','numRows ','2 '
|
|
row_regex:'','totalSize ','\d+\s+'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Column stats have been dropped.
|
|
show column stats ice_alltypes_ht;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
|
|
'p_int','INT',-1,-1,4,4,-1,-1
|
|
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
|
|
'p_float','FLOAT',-1,-1,4,4,-1,-1
|
|
'p_double','DOUBLE',-1,-1,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
|
|
'p_date','DATE',-1,-1,4,4,-1,-1
|
|
'p_string','STRING',-1,-1,-1,-1,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# COMPUTE INCREMENTAL STATS without partition clause fall back to original
|
|
# COMPUTE STATS
|
|
create table ice_alltypes_part
|
|
partitioned by spec (i)
|
|
stored as iceberg
|
|
as select * from functional_parquet.iceberg_alltypes_part;
|
|
COMPUTE INCREMENTAL STATS ice_alltypes_part;
|
|
show column stats ice_alltypes_part;
|
|
---- RESULTS
|
|
'i','INT',2,0,4,4,-1,-1
|
|
'p_bool','BOOLEAN',2,0,1,1,2,0
|
|
'p_int','INT',1,0,4,4,-1,-1
|
|
'p_bigint','BIGINT',1,0,8,8,-1,-1
|
|
'p_float','FLOAT',1,0,4,4,-1,-1
|
|
'p_double','DOUBLE',1,0,8,8,-1,-1
|
|
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
|
|
'p_date','DATE',1,0,4,4,-1,-1
|
|
'p_string','STRING',1,0,6,6,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
COMPUTE INCREMENTAL STATS ice_alltypes_part PARTITION (i=1);
|
|
---- CATCH
|
|
COMPUTE INCREMENTAL STATS ... PARTITION not supported for Iceberg table
|
|
---- QUERY
|
|
DROP INCREMENTAL STATS ice_alltypes_part PARTITION (i=1);
|
|
---- CATCH
|
|
DROP INCREMENTAL STATS ... PARTITION not supported for Iceberg table
|
|
====
|
|
|
|
---- QUERY
|
|
# Tests for 'impala.computeStatsSnapshotIds'.
|
|
create table test_ice (
|
|
i INT,
|
|
b BIGINT,
|
|
s STRING
|
|
) stored as iceberg;
|
|
compute stats test_ice;
|
|
describe formatted test_ice;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
# For a table that contains no snapshot, the snapshot ids are -1.
|
|
row_regex:'','impala.computeStatsSnapshotIds','1-3:-1\s*'
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Create a new snapshot, compute stats, and verify that the snapshot id is the same for
|
|
# all columns.
|
|
insert into test_ice values (1, 1, "one"), (1, 2, "two");
|
|
compute stats test_ice;
|
|
describe formatted test_ice;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.computeStatsSnapshotIds','1-3:(\d+)\s*'
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Compute stats for only the first and third columns, verify that they have the same
|
|
# snapshot id.
|
|
insert into test_ice values (10, 10, "ten");
|
|
compute stats test_ice(i, s);
|
|
describe formatted test_ice;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.computeStatsSnapshotIds','1:(\d+),2:(\d+),3:\1\s*'
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Compute stats for only the second and third columns, verify that they have the same
|
|
# snapshot id.
|
|
insert into test_ice values (100, 100, "hundred");
|
|
compute stats test_ice(b, s);
|
|
describe formatted test_ice;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'','impala.computeStatsSnapshotIds','1:(\d+),2-3:(\d+)\s*'
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
drop stats test_ice;
|
|
describe formatted test_ice;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
row_regex:'','impala.computeStatsSnapshotIds','.*'
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|