mirror of
https://github.com/apache/impala.git
synced 2025-12-19 09:58:28 -05:00
Previously, running ALTER TABLE <table> CONVERT TO ICEBERG on an Iceberg table produced an error. This patch fixes that, so the statement will do nothing when called on an Iceberg table and return with 'Table has already been migrated.' message. This is achieved by adding a new flag to StatementBase to signal when a statement ends up NO_OP, if that's true, the new TStmtType::NO_OP will be set as TExecRequest's type and noop_result can be used to set result from Frontend-side. Tests: * extended fe and e2e tests Change-Id: I41ecbfd350d38e4e3fd7b813a4fc27211d828f73 Reviewed-on: http://gerrit.cloudera.org:8080/23699 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Peter Rozsa <prozsa@cloudera.com>
380 lines
13 KiB
Plaintext
380 lines
13 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table alltypes (
|
|
id int,
|
|
bool_col boolean,
|
|
tinyint_col int,
|
|
smallint_col int,
|
|
int_col int,
|
|
bigint_col bigint,
|
|
float_col float,
|
|
double_col double,
|
|
string_col string,
|
|
timestamp_col timestamp)
|
|
partitioned by (year int, month int, date_col date, date_string_col string)
|
|
stored as parquet;
|
|
insert into alltypes partition (year, month, date_col, date_string_col)
|
|
select
|
|
id,
|
|
bool_col,
|
|
cast(tinyint_col as int) as tinyint_col,
|
|
cast(smallint_col as int) as smallint_col,
|
|
int_col,
|
|
bigint_col,
|
|
float_col,
|
|
double_col,
|
|
string_col,
|
|
timestamp_col,
|
|
year,
|
|
month,
|
|
cast(date_string_col as date format 'MM/DD/YY') as date_col,
|
|
date_string_col
|
|
from functional.alltypes t;
|
|
insert into alltypes partition (year, month, date_col, date_string_col)
|
|
values (10000, true, 1, 2, 3, 4, 5.1, 6.2, "str", "2023-05-01 01:02:03", 2023, 5,
|
|
cast("2023-05-02" as date) as date_col, null as date_string_col);
|
|
describe alltypes;
|
|
---- RESULTS
|
|
'id','int',regex:'.*'
|
|
'bool_col','boolean',regex:'.*'
|
|
'tinyint_col','int',regex:'.*'
|
|
'smallint_col','int',regex:'.*'
|
|
'int_col','int',regex:'.*'
|
|
'bigint_col','bigint',regex:'.*'
|
|
'float_col','float',regex:'.*'
|
|
'double_col','double',regex:'.*'
|
|
'string_col','string',regex:'.*'
|
|
'timestamp_col','timestamp',regex:'.*'
|
|
'year','int',regex:'.*'
|
|
'month','int',regex:'.*'
|
|
'date_col','date',regex:'.*'
|
|
'date_string_col','string',regex:'.*'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
create table parquet_partitioned like alltypes stored as parquet;
|
|
insert into parquet_partitioned partition(year, month, date_col, date_string_col)
|
|
select * from alltypes;
|
|
select count(*) from parquet_partitioned;
|
|
---- RESULTS
|
|
7301
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
describe formatted parquet_partitioned;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_partitioned','NULL'
|
|
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
|
|
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
alter table parquet_partitioned convert to iceberg tblproperties('iceberg.catalog' = 'hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been migrated.'
|
|
====
|
|
---- QUERY
|
|
alter table parquet_partitioned convert to iceberg tblproperties('iceberg.catalog' = 'hadoop.tables');
|
|
---- RESULTS
|
|
'Table has already been migrated.'
|
|
====
|
|
---- QUERY
|
|
select count(*) from parquet_partitioned;
|
|
---- RESULTS
|
|
7301
|
|
---- TYPES
|
|
bigint
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
describe formatted parquet_partitioned;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_partitioned','NULL'
|
|
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
|
|
'','iceberg.catalog ','hadoop.tables '
|
|
'','external.table.purge','true '
|
|
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
|
|
'','write.format.default','parquet '
|
|
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
|
|
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
create table alltypesnopart like functional.alltypesnopart stored as parquet;
|
|
alter table alltypesnopart change column smallint_col smallint_col int comment 'changed to int col';
|
|
alter table alltypesnopart change column tinyint_col tinyint_col int comment 'changed to int col';
|
|
describe alltypesnopart;
|
|
---- RESULTS
|
|
'id','int',regex:'.*'
|
|
'bool_col','boolean',regex:'.*'
|
|
'tinyint_col','int',regex:'.*'
|
|
'smallint_col','int',regex:'.*'
|
|
'int_col','int',regex:'.*'
|
|
'bigint_col','bigint',regex:'.*'
|
|
'float_col','float',regex:'.*'
|
|
'double_col','double',regex:'.*'
|
|
'date_string_col','string',regex:'.*'
|
|
'string_col','string',regex:'.*'
|
|
'timestamp_col','timestamp',regex:'.*'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
create table parquet_nopartitioned like alltypesnopart stored as parquet;
|
|
insert into parquet_nopartitioned
|
|
select id,
|
|
bool_col,
|
|
tinyint_col,
|
|
smallint_col,
|
|
int_col,
|
|
bigint_col,
|
|
float_col,
|
|
double_col,
|
|
date_string_col,
|
|
string_col,
|
|
timestamp_col
|
|
from alltypes;
|
|
select count(*) from parquet_nopartitioned;
|
|
---- RESULTS
|
|
7301
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
describe formatted parquet_nopartitioned;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_nopartitioned','NULL'
|
|
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
|
|
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
alter table parquet_nopartitioned convert to iceberg tblproperties('iceberg.catalog' = 'hadoop.catalog');
|
|
---- CATCH
|
|
AnalysisException: The Hadoop Catalog is not supported because the location may change
|
|
====
|
|
---- QUERY
|
|
alter table parquet_nopartitioned convert to iceberg;
|
|
---- RESULTS
|
|
'Table has been migrated.'
|
|
====
|
|
---- QUERY
|
|
select count(*) from parquet_nopartitioned;
|
|
---- RESULTS
|
|
7301
|
|
---- TYPES
|
|
bigint
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
describe formatted parquet_nopartitioned;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'','iceberg.catalog ','hadoop.tables '
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_nopartitioned','NULL'
|
|
row_regex: '','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/parquet_nopartitioned/metadata/.*.metadata.json'
|
|
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
|
|
'','external.table.purge','true '
|
|
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
|
|
'','write.format.default','parquet '
|
|
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
|
|
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
create table hdfs_table (col int);
|
|
alter table hdfs_table set tblproperties ('EXTERNAL'='FALSE');
|
|
alter table hdfs_table convert to iceberg;
|
|
---- CATCH
|
|
AnalysisException: CONVERT TO ICEBERG is not supported for managed tables
|
|
====
|
|
---- QUERY
|
|
alter table hdfs_table set tblproperties ('EXTERNAL'='TRUE', 'transactional'='true', 'transactional_properties'='insert_only');
|
|
alter table hdfs_table convert to iceberg;
|
|
---- CATCH
|
|
AnalysisException: CONVERT TO ICEBERG is not supported for transactional tables
|
|
====
|
|
---- QUERY
|
|
# Check that we get an error when converting a table that has column type(s) that is invalid in Iceberg.
|
|
create table hdfs_table2 (col tinyint) stored as parquet;
|
|
alter table hdfs_table2 convert to iceberg;
|
|
---- CATCH
|
|
AnalysisException: Incompatible column type in source table. Unsupported Hive type: BYTE, use integer instead
|
|
====
|
|
---- QUERY
|
|
# Test table migration for decimal partitioned table.
|
|
create table decimal_tbl (
|
|
d2 decimal(10,0),
|
|
d3 decimal(20,10),
|
|
d4 decimal(38,38),
|
|
d5 decimal(10,5),
|
|
d6 decimal(9,0))
|
|
partitioned by (d1 decimal(9,0))
|
|
stored as parquet;
|
|
insert into decimal_tbl partition (d1)
|
|
select d2, d3, d4, d5, d6, d1 from functional_parquet.decimal_tbl;
|
|
describe formatted decimal_tbl;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/decimal_tbl','NULL'
|
|
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
|
|
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
alter table decimal_tbl convert to iceberg;
|
|
---- RESULTS
|
|
'Table has been migrated.'
|
|
====
|
|
---- QUERY
|
|
describe formatted decimal_tbl;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/decimal_tbl','NULL'
|
|
row_regex: '','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/decimal_tbl/metadata/.*.metadata.json'
|
|
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
|
|
'','external.table.purge','true '
|
|
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
|
|
'','write.format.default','parquet '
|
|
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
|
|
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
# Test table migration when table is at a different location than what the table name
|
|
# would imply.
|
|
create table table_at_random_location (i int, s string)
|
|
stored as parquet
|
|
location '$NAMENODE/test-warehouse/$DATABASE.db/random_location/';
|
|
insert into table_at_random_location values (1, "str1"), (2, "str2"), (3, "str3");
|
|
select * from table_at_random_location
|
|
---- RESULTS
|
|
1,'str1'
|
|
2,'str2'
|
|
3,'str3'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
describe formatted table_at_random_location;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/random_location','NULL'
|
|
'SerDe Library: ','org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe','NULL'
|
|
'InputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
alter table table_at_random_location convert to iceberg;
|
|
---- RESULTS
|
|
'Table has been migrated.'
|
|
====
|
|
---- QUERY
|
|
describe formatted table_at_random_location;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/random_location','NULL'
|
|
row_regex: '','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/random_location/metadata/.*.metadata.json'
|
|
'','format-version ','$ICEBERG_DEFAULT_FORMAT_VERSION '
|
|
'','external.table.purge','true '
|
|
'','storage_handler ','org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
|
|
'','write.format.default','parquet '
|
|
'SerDe Library: ','org.apache.iceberg.mr.hive.HiveIcebergSerDe','NULL'
|
|
'InputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergInputFormat','NULL'
|
|
'OutputFormat: ','org.apache.iceberg.mr.hive.HiveIcebergOutputFormat','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
select * from table_at_random_location;
|
|
---- RESULTS
|
|
1,'str1'
|
|
2,'str2'
|
|
3,'str3'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
# Convert to non-default format-version 1, it should work.
|
|
create table converted_into_v1 (i int) partitioned by (s string) stored as parquet;
|
|
alter table converted_into_v1 convert to iceberg tblproperties ('format-version'='1');
|
|
---- RESULTS
|
|
'Table has been migrated.'
|
|
====
|
|
---- QUERY
|
|
describe formatted converted_into_v1;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','format-version ','1 '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
# Currently not feasible to convert directly into a V3 Iceberg table.
|
|
create table converted_into_v3 (i int) partitioned by (s string) stored as parquet;
|
|
alter table converted_into_v3 convert to iceberg tblproperties ('format-version'='3');
|
|
---- CATCH
|
|
AnalysisException: Unsupported Iceberg format version '3'.
|
|
====
|
|
---- QUERY
|
|
create table simple_tbl (i int) stored as parquet;
|
|
set debug_action="CONVERT_TABLE_FAIL_ICEBERG_CALL";
|
|
alter table simple_tbl convert to iceberg;
|
|
---- CATCH
|
|
ImpalaRuntimeException: Unable load data files for location:
|
|
====
|
|
---- QUERY
|
|
create table special_chars (i int) partitioned by (s string) stored as parquet;
|
|
insert into special_chars partition (s='11 22-33&44%55"') values (1);
|
|
insert into special_chars partition (s='aa - bb') values (2);
|
|
insert into special_chars partition (s=null) values (3);
|
|
insert into special_chars partition (s='11/22/33') values (4);
|
|
alter table special_chars convert to iceberg;
|
|
---- RESULTS
|
|
'Table has been migrated.'
|
|
====
|
|
---- QUERY
|
|
select * from special_chars;
|
|
---- RESULTS
|
|
1,'11 22-33&44%55"'
|
|
2,'aa - bb'
|
|
3,'NULL'
|
|
4,'11/22/33'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
select * from special_chars where s='aa - bb';
|
|
---- RESULTS
|
|
2,'aa - bb'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
select * from special_chars where s is null;
|
|
---- RESULTS
|
|
3,'NULL'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
select * from special_chars where s='11/22/33';
|
|
---- RESULTS
|
|
4,'11/22/33'
|
|
---- TYPES
|
|
INT,STRING
|
|
====
|