Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-compute-stats.test
Mihaly Szjatinya 591bf48c72 IMPALA-14013: DROP INCREMENTAL STATS throws NullPointerException for
Iceberg tables

Similarly to 'COMPUTE INCREMENTAL STATS', 'DROP INCREMENTAL STATS'
should prohibit the partition variant for Iceberg tables.

Testing:
- FE: fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
- EE: tests/query_test/test_iceberg.py

Change-Id: If3d9ef45a9c9ddce9a5e43c5058ae84f919e0283
Reviewed-on: http://gerrit.cloudera.org:8080/23394
Reviewed-by: Noemi Pap-Takacs <npaptakacs@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-09-18 09:54:26 +00:00

453 lines
14 KiB
Plaintext

====
---- QUERY
create table ice_alltypes stored as iceberg
as select * from functional_parquet.iceberg_alltypes_part;
show column stats ice_alltypes;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
'p_int','INT',-1,-1,4,4,-1,-1
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
'p_float','FLOAT',-1,-1,4,4,-1,-1
'p_double','DOUBLE',-1,-1,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
'p_date','DATE',-1,-1,4,4,-1,-1
'p_string','STRING',-1,-1,-1,-1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
show table stats ice_alltypes
---- LABELS
#ROWS, #Files, Size, Bytes Cached, Cache Replication, Format, Incremental stats, Location, EC Policy
---- RESULTS: VERIFY_IS_EQUAL
# The file size is on the boundary between 2.32KB and 2.33KB. The build version is written
# into the file, and "x.y.z-RELEASE" is one byte shorter than "x.y.z-SNAPSHOT". In release
# builds the file size is 2.32KB, in snapshot builds it is 2.33KB.
2,1,regex:'2.3[23]KB','NOT CACHED','NOT CACHED','PARQUET','false','$NAMENODE/test-warehouse/$DATABASE.db/ice_alltypes','$ERASURECODE_POLICY'
---- TYPES
BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,STRING
====
---- QUERY
# Table-level stats are automatically updated.
# 'impala.lastComputeStatsTime' is not set yet.
describe formatted ice_alltypes;
---- RESULTS: VERIFY_IS_SUBSET
'','numFiles ','1 '
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
---- RESULTS: VERIFY_IS_NOT_IN
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
compute stats ice_alltypes;
show column stats ice_alltypes;
---- RESULTS
'i','INT',2,0,4,4,-1,-1
'p_bool','BOOLEAN',2,0,1,1,2,0
'p_int','INT',1,0,4,4,-1,-1
'p_bigint','BIGINT',1,0,8,8,-1,-1
'p_float','FLOAT',1,0,4,4,-1,-1
'p_double','DOUBLE',1,0,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
'p_date','DATE',1,0,4,4,-1,-1
'p_string','STRING',1,0,6,6,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
describe formatted ice_alltypes;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
'','numFiles ','1 '
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
drop stats ice_alltypes;
---- RESULTS
'Stats have been dropped.'
---- TYPES
STRING
====
---- QUERY
# Table-level stats are not affected by DROP STATS.
describe formatted ice_alltypes;
---- RESULTS: VERIFY_IS_SUBSET
'','numFiles ','1 '
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
---- RESULTS: VERIFY_IS_NOT_IN
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Column stats have been dropped.
show column stats ice_alltypes;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
'p_int','INT',-1,-1,4,4,-1,-1
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
'p_float','FLOAT',-1,-1,4,4,-1,-1
'p_double','DOUBLE',-1,-1,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
'p_date','DATE',-1,-1,4,4,-1,-1
'p_string','STRING',-1,-1,-1,-1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
compute incremental stats ice_alltypes;
show column stats ice_alltypes;
---- RESULTS
'i','INT',2,0,4,4,-1,-1
'p_bool','BOOLEAN',2,0,1,1,2,0
'p_int','INT',1,0,4,4,-1,-1
'p_bigint','BIGINT',1,0,8,8,-1,-1
'p_float','FLOAT',1,0,4,4,-1,-1
'p_double','DOUBLE',1,0,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
'p_date','DATE',1,0,4,4,-1,-1
'p_string','STRING',1,0,6,6,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
describe formatted ice_alltypes;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
'','numFiles ','1 '
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Setting 'numRows' doesn't have effect on Iceberg tables.
ALTER TABLE ice_alltypes
SET TBLPROPERTIES('numRows'='1000', 'STATS_GENERATED_VIA_STATS_TASK'='true');
describe formatted ice_alltypes;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
'','numRows ','2 '
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Users can still set column stats manually as they are managed outside of Iceberg.
ALTER TABLE ice_alltypes SET COLUMN STATS i ('numDVs'='100');
show column stats ice_alltypes;
---- RESULTS
'i','INT',100,0,4,4,-1,-1
'p_bool','BOOLEAN',2,0,1,1,2,0
'p_int','INT',1,0,4,4,-1,-1
'p_bigint','BIGINT',1,0,8,8,-1,-1
'p_float','FLOAT',1,0,4,4,-1,-1
'p_double','DOUBLE',1,0,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
'p_date','DATE',1,0,4,4,-1,-1
'p_string','STRING',1,0,6,6,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
drop stats ice_alltypes;
---- RESULTS
'Stats have been dropped.'
---- TYPES
STRING
====
---- QUERY
# Table-level stats are not affected by DROP STATS.
describe formatted ice_alltypes;
---- RESULTS: VERIFY_IS_SUBSET
'','numFiles ','1 '
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Column stats have been dropped.
show column stats ice_alltypes;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
'p_int','INT',-1,-1,4,4,-1,-1
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
'p_float','FLOAT',-1,-1,4,4,-1,-1
'p_double','DOUBLE',-1,-1,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
'p_date','DATE',-1,-1,4,4,-1,-1
'p_string','STRING',-1,-1,-1,-1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
create table ice_alltypes_ht stored as iceberg
tblproperties ('iceberg.catalog'='hadoop.tables')
as select * from functional_parquet.iceberg_alltypes_part;
show column stats ice_alltypes_ht;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
'p_int','INT',-1,-1,4,4,-1,-1
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
'p_float','FLOAT',-1,-1,4,4,-1,-1
'p_double','DOUBLE',-1,-1,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
'p_date','DATE',-1,-1,4,4,-1,-1
'p_string','STRING',-1,-1,-1,-1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Table-level stats are not automatically updated for non-HMS integrated
# Iceberg tables.
# 'impala.lastComputeStatsTime' is not set yet.
describe formatted ice_alltypes_ht;
---- RESULTS: VERIFY_IS_NOT_IN
'','numFiles ','1 '
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
compute stats ice_alltypes_ht;
show column stats ice_alltypes_ht;
---- RESULTS
'i','INT',2,0,4,4,-1,-1
'p_bool','BOOLEAN',2,0,1,1,2,0
'p_int','INT',1,0,4,4,-1,-1
'p_bigint','BIGINT',1,0,8,8,-1,-1
'p_float','FLOAT',1,0,4,4,-1,-1
'p_double','DOUBLE',1,0,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
'p_date','DATE',1,0,4,4,-1,-1
'p_string','STRING',1,0,6,6,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
describe formatted ice_alltypes_ht;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
drop stats ice_alltypes_ht;
---- RESULTS
'Stats have been dropped.'
---- TYPES
STRING
====
---- QUERY
# Table-level stats are dropped for non-HMS integrated Iceberg tables.
describe formatted ice_alltypes_ht;
---- RESULTS: VERIFY_IS_NOT_IN
'','numRows ','2 '
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
row_regex:'','totalSize ','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Column stats have been dropped.
show column stats ice_alltypes_ht;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
'p_int','INT',-1,-1,4,4,-1,-1
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
'p_float','FLOAT',-1,-1,4,4,-1,-1
'p_double','DOUBLE',-1,-1,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
'p_date','DATE',-1,-1,4,4,-1,-1
'p_string','STRING',-1,-1,-1,-1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
compute incremental stats ice_alltypes_ht;
show column stats ice_alltypes_ht;
---- RESULTS
'i','INT',2,0,4,4,-1,-1
'p_bool','BOOLEAN',2,0,1,1,2,0
'p_int','INT',1,0,4,4,-1,-1
'p_bigint','BIGINT',1,0,8,8,-1,-1
'p_float','FLOAT',1,0,4,4,-1,-1
'p_double','DOUBLE',1,0,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
'p_date','DATE',1,0,4,4,-1,-1
'p_string','STRING',1,0,6,6,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
describe formatted ice_alltypes_ht;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.lastComputeStatsTime','\d+\s+'
'','numRows ','2 '
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Users can set 'numRows' for non-HMS integrated Iceberfg tables.
ALTER TABLE ice_alltypes_ht
SET TBLPROPERTIES('numRows'='1000', 'STATS_GENERATED_VIA_STATS_TASK'='true');
describe formatted ice_alltypes_ht;
---- RESULTS: VERIFY_IS_SUBSET
'','numRows ','1000 '
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Users can set column stats manually.
ALTER TABLE ice_alltypes_ht SET COLUMN STATS i ('numDVs'='100');
show column stats ice_alltypes_ht;
---- RESULTS
'i','INT',100,0,4,4,-1,-1
'p_bool','BOOLEAN',2,0,1,1,2,0
'p_int','INT',1,0,4,4,-1,-1
'p_bigint','BIGINT',1,0,8,8,-1,-1
'p_float','FLOAT',1,0,4,4,-1,-1
'p_double','DOUBLE',1,0,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
'p_date','DATE',1,0,4,4,-1,-1
'p_string','STRING',1,0,6,6,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
drop stats ice_alltypes_ht;
---- RESULTS
'Stats have been dropped.'
---- TYPES
STRING
====
---- QUERY
# Table-level stats are dropped for non-HMS integrated Iceberg tables.
describe formatted ice_alltypes_ht;
---- RESULTS: VERIFY_IS_NOT_IN
'','numFiles ','1 '
'','numRows ','2 '
row_regex:'','totalSize ','\d+\s+'
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Column stats have been dropped.
show column stats ice_alltypes_ht;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
'p_bool','BOOLEAN',-1,-1,1,1,-1,-1
'p_int','INT',-1,-1,4,4,-1,-1
'p_bigint','BIGINT',-1,-1,8,8,-1,-1
'p_float','FLOAT',-1,-1,4,4,-1,-1
'p_double','DOUBLE',-1,-1,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',-1,-1,4,4,-1,-1
'p_date','DATE',-1,-1,4,4,-1,-1
'p_string','STRING',-1,-1,-1,-1,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# COMPUTE INCREMENTAL STATS without partition clause fall back to original
# COMPUTE STATS
create table ice_alltypes_part
partitioned by spec (i)
stored as iceberg
as select * from functional_parquet.iceberg_alltypes_part;
COMPUTE INCREMENTAL STATS ice_alltypes_part;
show column stats ice_alltypes_part;
---- RESULTS
'i','INT',2,0,4,4,-1,-1
'p_bool','BOOLEAN',2,0,1,1,2,0
'p_int','INT',1,0,4,4,-1,-1
'p_bigint','BIGINT',1,0,8,8,-1,-1
'p_float','FLOAT',1,0,4,4,-1,-1
'p_double','DOUBLE',1,0,8,8,-1,-1
'p_decimal','DECIMAL(6,3)',1,0,4,4,-1,-1
'p_date','DATE',1,0,4,4,-1,-1
'p_string','STRING',1,0,6,6,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
COMPUTE INCREMENTAL STATS ice_alltypes_part PARTITION (i=1);
---- CATCH
COMPUTE INCREMENTAL STATS ... PARTITION not supported for Iceberg table
---- QUERY
DROP INCREMENTAL STATS ice_alltypes_part PARTITION (i=1);
---- CATCH
DROP INCREMENTAL STATS ... PARTITION not supported for Iceberg table
====
---- QUERY
# Tests for 'impala.computeStatsSnapshotIds'.
create table test_ice (
i INT,
b BIGINT,
s STRING
) stored as iceberg;
compute stats test_ice;
describe formatted test_ice;
---- RESULTS: VERIFY_IS_SUBSET
# For a table that contains no snapshot, the snapshot ids are -1.
row_regex:'','impala.computeStatsSnapshotIds','1-3:-1\s*'
---- TYPES
STRING,STRING,STRING
====
---- QUERY
# Create a new snapshot, compute stats, and verify that the snapshot id is the same for
# all columns.
insert into test_ice values (1, 1, "one"), (1, 2, "two");
compute stats test_ice;
describe formatted test_ice;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.computeStatsSnapshotIds','1-3:(\d+)\s*'
---- TYPES
STRING,STRING,STRING
====
---- QUERY
# Compute stats for only the first and third columns, verify that they have the same
# snapshot id.
insert into test_ice values (10, 10, "ten");
compute stats test_ice(i, s);
describe formatted test_ice;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.computeStatsSnapshotIds','1:(\d+),2:(\d+),3:\1\s*'
---- TYPES
STRING,STRING,STRING
====
---- QUERY
# Compute stats for only the second and third columns, verify that they have the same
# snapshot id.
insert into test_ice values (100, 100, "hundred");
compute stats test_ice(b, s);
describe formatted test_ice;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'','impala.computeStatsSnapshotIds','1:(\d+),2-3:(\d+)\s*'
---- TYPES
STRING,STRING,STRING
====
---- QUERY
drop stats test_ice;
describe formatted test_ice;
---- RESULTS: VERIFY_IS_NOT_IN
row_regex:'','impala.computeStatsSnapshotIds','.*'
---- TYPES
STRING,STRING,STRING
====