Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
Daniel Vanko 9d112dae23 IMPALA-14536: Fix CONVERT TO ICEBERG to not throw exception on Iceberg tables
Previously, running ALTER TABLE <table> CONVERT TO ICEBERG on an Iceberg
table produced an error. This patch fixes that, so the statement will do
nothing when called on an Iceberg table and return with 'Table has
already been migrated.' message.

This is achieved by adding a new flag to StatementBase to signal when a
statement ends up NO_OP, if that's true, the new TStmtType::NO_OP will
be set as TExecRequest's type and noop_result can be used to set result
from Frontend-side.

Tests:
 * extended fe and e2e tests

Change-Id: I41ecbfd350d38e4e3fd7b813a4fc27211d828f73
Reviewed-on: http://gerrit.cloudera.org:8080/23699
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Peter Rozsa <prozsa@cloudera.com>
2025-12-12 15:35:28 +00:00

380 lines
13 KiB
Plaintext

====
---- QUERY
create table alltypes (
id int,
bool_col boolean,
tinyint_col int,
smallint_col int,
int_col int,
bigint_col bigint,
float_col float,
double_col double,
string_col string,
timestamp_col timestamp)
partitioned by (year int, month int, date_col date, date_string_col string)
stored as parquet;
insert into alltypes partition (year, month, date_col, date_string_col)
select
id,
bool_col,
cast(tinyint_col as int) as tinyint_col,
cast(smallint_col as int) as smallint_col,
int_col,
bigint_col,
float_col,
double_col,
string_col,
timestamp_col,
year,
month,
cast(date_string_col as date format 'MM/DD/YY') as date_col,
date_string_col
from functional.alltypes t;
insert into alltypes partition (year, month, date_col, date_string_col)
values (10000, true, 1, 2, 3, 4, 5.1, 6.2, "str", "2023-05-01 01:02:03", 2023, 5,
cast("2023-05-02" as date) as date_col, null as date_string_col);
describe alltypes;
---- RESULTS
'id','int',regex:'.*'
'bool_col','boolean',regex:'.*'
'tinyint_col','int',regex:'.*'
'smallint_col','int',regex:'.*'
'int_col','int',regex:'.*'
'bigint_col','bigint',regex:'.*'
'float_col','float',regex:'.*'
'double_col','double',regex:'.*'
'string_col','string',regex:'.*'
'timestamp_col','timestamp',regex:'.*'
'year','int',regex:'.*'
'month','int',regex:'.*'
'date_col','date',regex:'.*'
'date_string_col','string',regex:'.*'
---- TYPES
string,string,string
====
---- QUERY
create table parquet_partitioned like alltypes stored as parquet;
insert into parquet_partitioned partition(year, month, date_col, date_string_col)
select * from alltypes;
select count(*) from parquet_partitioned;
---- RESULTS
7301
---- TYPES
bigint
====
---- QUERY
describe formatted parquet_partitioned;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_partitioned','NULL'
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
---- TYPES
string, string, string
====
---- QUERY
alter table parquet_partitioned convert to iceberg tblproperties('iceberg.catalog' = 'hadoop.tables');
---- RESULTS
'Table has been migrated.'
====
---- QUERY
alter table parquet_partitioned convert to iceberg tblproperties('iceberg.catalog' = 'hadoop.tables');
---- RESULTS
'Table has already been migrated.'
====
---- QUERY
select count(*) from parquet_partitioned;
---- RESULTS
7301
---- TYPES
bigint
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 0
====
---- QUERY
describe formatted parquet_partitioned;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_partitioned','NULL'
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
'','iceberg.catalog ','hadoop.tables '
'','external.table.purge','true '
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
'','write.format.default','parquet '
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
---- TYPES
string, string, string
====
---- QUERY
create table alltypesnopart like functional.alltypesnopart stored as parquet;
alter table alltypesnopart change column smallint_col smallint_col int comment 'changed to int col';
alter table alltypesnopart change column tinyint_col tinyint_col int comment 'changed to int col';
describe alltypesnopart;
---- RESULTS
'id','int',regex:'.*'
'bool_col','boolean',regex:'.*'
'tinyint_col','int',regex:'.*'
'smallint_col','int',regex:'.*'
'int_col','int',regex:'.*'
'bigint_col','bigint',regex:'.*'
'float_col','float',regex:'.*'
'double_col','double',regex:'.*'
'date_string_col','string',regex:'.*'
'string_col','string',regex:'.*'
'timestamp_col','timestamp',regex:'.*'
---- TYPES
string,string,string
====
---- QUERY
create table parquet_nopartitioned like alltypesnopart stored as parquet;
insert into parquet_nopartitioned
select id,
bool_col,
tinyint_col,
smallint_col,
int_col,
bigint_col,
float_col,
double_col,
date_string_col,
string_col,
timestamp_col
from alltypes;
select count(*) from parquet_nopartitioned;
---- RESULTS
7301
---- TYPES
bigint
====
---- QUERY
describe formatted parquet_nopartitioned;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_nopartitioned','NULL'
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
---- TYPES
string, string, string
====
---- QUERY
alter table parquet_nopartitioned convert to iceberg tblproperties('iceberg.catalog' = 'hadoop.catalog');
---- CATCH
AnalysisException: The Hadoop Catalog is not supported because the location may change
====
---- QUERY
alter table parquet_nopartitioned convert to iceberg;
---- RESULTS
'Table has been migrated.'
====
---- QUERY
select count(*) from parquet_nopartitioned;
---- RESULTS
7301
---- TYPES
bigint
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 0
====
---- QUERY
describe formatted parquet_nopartitioned;
---- RESULTS: VERIFY_IS_NOT_IN
'','iceberg.catalog ','hadoop.tables '
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_nopartitioned','NULL'
row_regex: '','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_nopartitioned/metadata/.*.metadata.json'
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
'','external.table.purge','true '
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
'','write.format.default','parquet '
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
---- TYPES
string, string, string
====
---- QUERY
create table hdfs_table (col int);
alter table hdfs_table set tblproperties ('EXTERNAL'='FALSE');
alter table hdfs_table convert to iceberg;
---- CATCH
AnalysisException: CONVERT TO ICEBERG is not supported for managed tables
====
---- QUERY
alter table hdfs_table set tblproperties ('EXTERNAL'='TRUE', 'transactional'='true', 'transactional_properties'='insert_only');
alter table hdfs_table convert to iceberg;
---- CATCH
AnalysisException: CONVERT TO ICEBERG is not supported for transactional tables
====
---- QUERY
# Check that we get an error when converting a table that has column type(s) that is invalid in Iceberg.
create table hdfs_table2 (col tinyint) stored as parquet;
alter table hdfs_table2 convert to iceberg;
---- CATCH
AnalysisException: Incompatible column type in source table. Unsupported Hive type: BYTE, use integer instead
====
---- QUERY
# Test table migration for decimal partitioned table.
create table decimal_tbl (
d2 decimal(10,0),
d3 decimal(20,10),
d4 decimal(38,38),
d5 decimal(10,5),
d6 decimal(9,0))
partitioned by (d1 decimal(9,0))
stored as parquet;
insert into decimal_tbl partition (d1)
select d2, d3, d4, d5, d6, d1 from functional_parquet.decimal_tbl;
describe formatted decimal_tbl;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/decimal_tbl','NULL'
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
---- TYPES
string,string,string
====
---- QUERY
alter table decimal_tbl convert to iceberg;
---- RESULTS
'Table has been migrated.'
====
---- QUERY
describe formatted decimal_tbl;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/decimal_tbl','NULL'
row_regex: '','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/decimal_tbl/metadata/.*.metadata.json'
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
'','external.table.purge','true '
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
'','write.format.default','parquet '
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
---- TYPES
string, string, string
====
---- QUERY
# Test table migration when table is at a different location than what the table name
# would imply.
create table table_at_random_location (i int, s string)
stored as parquet
location '$NAMENODE/test-warehouse/$DATABASE.db/random_location/';
insert into table_at_random_location values (1, "str1"), (2, "str2"), (3, "str3");
select * from table_at_random_location
---- RESULTS
1,'str1'
2,'str2'
3,'str3'
---- TYPES
int, string
====
---- QUERY
describe formatted table_at_random_location;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/random_location','NULL'
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
---- TYPES
string, string, string
====
---- QUERY
alter table table_at_random_location convert to iceberg;
---- RESULTS
'Table has been migrated.'
====
---- QUERY
describe formatted table_at_random_location;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/random_location','NULL'
row_regex: '','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/random_location/metadata/.*.metadata.json'
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
'','external.table.purge','true '
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
'','write.format.default','parquet '
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
---- TYPES
string, string, string
====
---- QUERY
select * from table_at_random_location;
---- RESULTS
1,'str1'
2,'str2'
3,'str3'
---- TYPES
int, string
====
---- QUERY
# Convert to non-default format-version 1, it should work.
create table converted_into_v1 (i int) partitioned by (s string) stored as parquet;
alter table converted_into_v1 convert to iceberg tblproperties ('format-version'='1');
---- RESULTS
'Table has been migrated.'
====
---- QUERY
describe formatted converted_into_v1;
---- RESULTS: VERIFY_IS_SUBSET
'','format-version ','1 '
---- TYPES
string, string, string
====
---- QUERY
# Currently not feasible to convert directly into a V3 Iceberg table.
create table converted_into_v3 (i int) partitioned by (s string) stored as parquet;
alter table converted_into_v3 convert to iceberg tblproperties ('format-version'='3');
---- CATCH
AnalysisException: Unsupported Iceberg format version '3'.
====
---- QUERY
create table simple_tbl (i int) stored as parquet;
set debug_action="CONVERT_TABLE_FAIL_ICEBERG_CALL";
alter table simple_tbl convert to iceberg;
---- CATCH
ImpalaRuntimeException: Unable load data files for location:
====
---- QUERY
create table special_chars (i int) partitioned by (s string) stored as parquet;
insert into special_chars partition (s='11 22-33&44%55"') values (1);
insert into special_chars partition (s='aa - bb') values (2);
insert into special_chars partition (s=null) values (3);
insert into special_chars partition (s='11/22/33') values (4);
alter table special_chars convert to iceberg;
---- RESULTS
'Table has been migrated.'
====
---- QUERY
select * from special_chars;
---- RESULTS
1,'11 22-33&44%55"'
2,'aa - bb'
3,'NULL'
4,'11/22/33'
---- TYPES
int, string
====
---- QUERY
select * from special_chars where s='aa - bb';
---- RESULTS
2,'aa - bb'
---- TYPES
int, string
====
---- QUERY
select * from special_chars where s is null;
---- RESULTS
3,'NULL'
---- TYPES
int, string
====
---- QUERY
select * from special_chars where s='11/22/33';
---- RESULTS
4,'11/22/33'
---- TYPES
INT,STRING
====