Nested Types: Check un/supported file formats for complex types.

Before this patch, we used to accept any query referencing complex types, regardless of the table/partition's file format being scanned. We would ultimately hit a DCHECK in the BE when attempting to scan complex types of a table/partition with an unsupported format. This patch makes queries fail gracefully during planning if a scan would access a table/partition in a format for which we do not support complex types. For mixed-format partitioned Hdfs tables we perform this check at the partition granularity, so such a table can be scanned as long as only partitions with supported formats are accessed. HBase tables with complex-typed columns can be scanned as long as no complex-typed columns are accessed in the query. Change-Id: I2fd2e386c9755faf2cfe326541698a7094fa0ffc Reviewed-on: http://gerrit.cloudera.org:8080/705 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
2026-01-05 21:00:54 -05:00 · 2015-08-26 18:09:20 -07:00
parent d52318d648
commit 9d46853fbc
19 changed files with 336 additions and 51 deletions
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -555,6 +555,55 @@ delimited fields terminated by ','  escaped by '\\'
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
+complextypes_fileformat
+---- CREATE_HIVE
+-- Used for positive/negative testing of complex types on various file formats.
+-- In particular, queries on file formats for which we do not support complex types
+-- should fail gracefully.
+CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
+  id int,
+  s struct<f1:string,f2:int>,
+  a array<int>,
+  m map<string,bigint>)
+STORED AS {file_format};
+---- ALTER
+-- This INSERT is placed in the ALTER section and not in the DEPENDENT_LOAD section because
+-- it must always be executed in Hive. The DEPENDENT_LOAD section is sometimes executed in
+-- Impala, but Impala currently does not support inserting into tables with complex types.
+INSERT OVERWRITE TABLE {table_name} SELECT * FROM functional.{table_name};
+---- LOAD
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT id, named_struct("f1",string_col,"f2",int_col), array(1, 2, 3), map("k", cast(0 as bigint)) FROM functional.alltypestiny;
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
+complextypes_multifileformat
+---- CREATE_HIVE
+-- Used for positive/negative testing of complex types on various file formats.
+-- In particular, queries on file formats for which we do not support complex types
+-- should fail gracefully. This table allows testing at a partition granularity.
+CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
+  id int,
+  s struct<f1:string,f2:int>,
+  a array<int>,
+  m map<string,bigint>)
+PARTITIONED BY (p int)
+STORED AS {file_format};
+---- LOAD
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} PARTITION(p=1) SELECT id, named_struct("f1",string_col,"f2",int_col), array(1, 2, 3), map("k", cast(0 as bigint)) FROM functional.alltypestiny;
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} PARTITION(p=2) SELECT id, named_struct("f1",string_col,"f2",int_col), array(1, 2, 3), map("k", cast(0 as bigint)) FROM functional.alltypestiny;
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} PARTITION(p=3) SELECT id, named_struct("f1",string_col,"f2",int_col), array(1, 2, 3), map("k", cast(0 as bigint)) FROM functional.alltypestiny;
+-- The order of insertions and alterations is deliberately chose to work around a Hive
+-- bug where the format of an altered partition is reverted back to the original format after
+-- an insert. So we first do the insert, and then alter the format.
+USE {db_name}{db_suffix};
+ALTER TABLE {table_name} PARTITION (p=2) SET FILEFORMAT PARQUET;
+ALTER TABLE {table_name} PARTITION (p=3) SET FILEFORMAT AVRO;
+USE default;
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
 testtbl
 ---- COLUMNS
 id bigint