IMPALA-11438: Add tests for CREATE TABLE LIKE PARQUET STORED AS ICEBERG

Impala already supports said statement, added query tests for it.

Testing:
 - Generated parquet table with complex type by hive
 - Created hive and iceberg table from said file, provides same output
for describe statement

Change-Id: Ia363b913e101fd49b62a280721680f0eb88761c0
Reviewed-on: http://gerrit.cloudera.org:8080/18969
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Gergely Fürnstáhl
2022-09-12 14:55:55 +02:00
committed by Impala Public Jenkins
parent cfd79b40be
commit 5fd4e6a11f
3 changed files with 62 additions and 2 deletions

View File

@@ -907,4 +907,9 @@ alter table iceberg_mixed_file_format_test set tblproperties("write.format.defau
insert into iceberg_mixed_file_format_test values (3, "C", 2.5);
alter table iceberg_mixed_file_format_test set tblproperties("write.format.default"="orc");
insert into iceberg_mixed_file_format_test values (4, "D", 3.5);
Converted similarly to iceberg_v2_no_deletes
Converted similarly to iceberg_v2_no_deletes
create_table_like_parquet_test.parquet:
Generated by Hive
create table iceberg_create_table_like_parquet_test (col_int int, col_float float, col_double double, col_string string, col_struct struct<col_int:int, col_float:float>, col_array array<string>, col_map map<string,array<int>>) stored as parquet;
insert into iceberg_create_table_like_parquet_test values (0, 1.0, 2.0, "3", named_struct("col_int", 4, "col_float", cast(5.0 as float)), array("6","7","8"), map("A", array(11,12), "B", array(21,22)));

Binary file not shown.

View File

@@ -32,7 +32,9 @@ import json
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.iceberg_test_suite import IcebergTestSuite
from tests.common.skip import SkipIf, SkipIfDockerizedCluster
from tests.common.file_utils import create_iceberg_table_from_directory
from tests.common.file_utils import (
create_iceberg_table_from_directory,
create_table_from_parquet)
from tests.shell.util import run_impala_shell_cmd
from tests.util.filesystem_utils import get_fs_path, IS_HDFS
from tests.util.get_parquet_metadata import get_parquet_metadata
@@ -790,6 +792,59 @@ class TestIcebergTable(IcebergTestSuite):
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
unique_database)
def _create_table_like_parquet_helper(self, vector, unique_database, tbl_name,
expect_success):
create_table_from_parquet(self.client, unique_database, tbl_name)
args = ['-q', "show files in {0}.{1}".format(unique_database, tbl_name)]
results = run_impala_shell_cmd(vector, args)
result_rows = results.stdout.strip().split('\n')
hdfs_file = None
for row in result_rows:
if "hdfs://" in row:
hdfs_file = row.split('|')[1].lstrip()
break
assert hdfs_file
iceberg_tbl_name = "iceberg_{0}".format(tbl_name)
sql_stmt = "create table {0}.{1} like parquet '{2}' stored as iceberg".format(
unique_database, iceberg_tbl_name, hdfs_file
)
args = ['-q', sql_stmt]
return run_impala_shell_cmd(vector, args, expect_success=expect_success)
def test_create_table_like_parquet(self, vector, unique_database):
tbl_name = 'alltypes_tiny_pages'
# Not all types are supported by iceberg
self._create_table_like_parquet_helper(vector, unique_database, tbl_name, False)
tbl_name = "create_table_like_parquet_test"
results = self._create_table_like_parquet_helper(vector, unique_database, tbl_name,
True)
result_rows = results.stdout.strip().split('\n')
assert result_rows[3].split('|')[1] == ' Table has been created. '
sql_stmt = "describe {0}.{1}".format(unique_database, tbl_name)
args = ['-q', sql_stmt]
parquet_results = run_impala_shell_cmd(vector, args)
parquet_result_rows = parquet_results.stdout.strip().split('\n')
parquet_column_name_type_list = []
for row in parquet_result_rows[1:-2]:
parquet_column_name_type_list.append(row.split('|')[1:3])
sql_stmt = "describe {0}.iceberg_{1}".format(unique_database, tbl_name)
args = ['-q', sql_stmt]
iceberg_results = run_impala_shell_cmd(vector, args)
iceberg_result_rows = iceberg_results.stdout.strip().split('\n')
iceberg_column_name_type_list = []
for row in iceberg_result_rows[1:-2]:
iceberg_column_name_type_list.append(row.split('|')[1:3])
assert parquet_column_name_type_list == iceberg_column_name_type_list
class TestIcebergV2Table(IcebergTestSuite):
"""Tests related to Iceberg V2 tables."""