mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
IMPALA-9499: Display support for all complex types in a SELECT * query
This change adds EXPAND_COMPLEX_TYPES query option to support the display of complex types in SELECT statements where star (*) expression is in the select list. By default, the query option is disabled. When it's enabled, it changes the behaviour of star expansion to list all top-level column fields including ones with complex types, instead of listing the scalar column fields only. Nested complex type expansion is also supported, eg.: struct.* will enumerate the members of the struct. Array, map and struct types are supported. Testing: - Analyzer tests check select statements when the query option is enabled or disabled. - EE tests check the proper complex type deserialization when the query option is enabled, and the original behaviour when the option is disabled. Change-Id: I84b5e5703f9e0ce0f4f8bff83941677dd7489974 Reviewed-on: http://gerrit.cloudera.org:8080/18863 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
0b251cc6bc
commit
04b5319e6e
@@ -1229,6 +1229,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
|
||||
query_options->__set_orc_schema_resolution(enum_type);
|
||||
break;
|
||||
}
|
||||
case TImpalaQueryOptions::EXPAND_COMPLEX_TYPES: {
|
||||
query_options->__set_expand_complex_types(IsTrue(value));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (IsRemovedQueryOption(key)) {
|
||||
LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
|
||||
|
||||
@@ -50,7 +50,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
|
||||
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
|
||||
#define QUERY_OPTS_TABLE \
|
||||
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), \
|
||||
TImpalaQueryOptions::ORC_SCHEMA_RESOLUTION + 1); \
|
||||
TImpalaQueryOptions::EXPAND_COMPLEX_TYPES + 1); \
|
||||
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
|
||||
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR) \
|
||||
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS) \
|
||||
@@ -274,7 +274,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
|
||||
QUERY_OPT_FN(enable_replan, ENABLE_REPLAN, TQueryOptionLevel::ADVANCED) \
|
||||
QUERY_OPT_FN(test_replan, TEST_REPLAN, TQueryOptionLevel::ADVANCED) \
|
||||
QUERY_OPT_FN(lock_max_wait_time_s, LOCK_MAX_WAIT_TIME_S, TQueryOptionLevel::REGULAR) \
|
||||
QUERY_OPT_FN(orc_schema_resolution, ORC_SCHEMA_RESOLUTION, TQueryOptionLevel::REGULAR);
|
||||
QUERY_OPT_FN(orc_schema_resolution, ORC_SCHEMA_RESOLUTION, TQueryOptionLevel::REGULAR) \
|
||||
QUERY_OPT_FN(expand_complex_types, EXPAND_COMPLEX_TYPES, TQueryOptionLevel::REGULAR);
|
||||
|
||||
/// Enforce practical limits on some query options to avoid undesired query state.
|
||||
static const int64_t SPILLABLE_BUFFER_LIMIT = 1LL << 40; // 1 TB
|
||||
|
||||
@@ -736,6 +736,10 @@ enum TImpalaQueryOptions {
|
||||
|
||||
// Determines how to resolve ORC files' schemas. Valid values are "position" and "name".
|
||||
ORC_SCHEMA_RESOLUTION = 146;
|
||||
|
||||
// Expands complex types in star queries
|
||||
EXPAND_COMPLEX_TYPES = 147
|
||||
|
||||
}
|
||||
|
||||
// The summary of a DML statement.
|
||||
|
||||
@@ -594,6 +594,9 @@ struct TQueryOptions {
|
||||
|
||||
// See comment in ImpalaService.thrift
|
||||
147: optional TSchemaResolutionStrategy orc_schema_resolution = 0;
|
||||
|
||||
// See comment in ImpalaService.thrift
|
||||
148: optional bool expand_complex_types = false;
|
||||
}
|
||||
|
||||
// Impala currently has three types of sessions: Beeswax, HiveServer2 and external
|
||||
|
||||
@@ -455,7 +455,9 @@ public class InlineViewRef extends TableRef {
|
||||
int numColLabels = getColLabels().size();
|
||||
Preconditions.checkState(numColLabels > 0);
|
||||
Set<String> uniqueColAliases = Sets.newHashSetWithExpectedSize(numColLabels);
|
||||
List<StructField> fields = Lists.newArrayListWithCapacity(numColLabels);
|
||||
// Using linked set to preserve order and uniqueness of fields. If using a
|
||||
// list, star-expanded complex columns would be enumerated multiple times.
|
||||
Set<StructField> fields = Sets.newLinkedHashSetWithExpectedSize(numColLabels);
|
||||
for (int i = 0; i < numColLabels; ++i) {
|
||||
// inline view select statement has been analyzed. Col label should be filled.
|
||||
Expr selectItemExpr = queryStmt_.getResultExprs().get(i);
|
||||
@@ -512,7 +514,7 @@ public class InlineViewRef extends TableRef {
|
||||
result.setMaskedTable(baseTbl);
|
||||
}
|
||||
}
|
||||
result.setType(new StructType(fields));
|
||||
result.setType(new StructType(Lists.newArrayList(fields)));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -793,24 +793,27 @@ public class SelectStmt extends QueryStmt {
|
||||
/**
|
||||
* Helper function used during star expansion to add a single result expr
|
||||
* based on a given raw path to be resolved relative to an existing path.
|
||||
* Ignores paths with a complex-typed destination because they are currently
|
||||
* illegal in any select list (even for inline views, etc.)
|
||||
*/
|
||||
private void addStarResultExpr(Path resolvedPath,
|
||||
String... relRawPath) throws AnalysisException {
|
||||
Path p = Path.createRelPath(resolvedPath, relRawPath);
|
||||
Preconditions.checkState(p.resolve());
|
||||
if (p.destType().isComplexType()) return;
|
||||
SlotDescriptor slotDesc = analyzer_.registerSlotRef(p);
|
||||
if (p.destType().isComplexType() &&
|
||||
!analyzer_.getQueryCtx().client_request.query_options.expand_complex_types) {
|
||||
return;
|
||||
}
|
||||
SlotDescriptor slotDesc = analyzer_.registerSlotRef(p, false);
|
||||
SlotRef slotRef = new SlotRef(slotDesc);
|
||||
Preconditions.checkState(slotRef.isAnalyzed(),
|
||||
"Analysis should be done in constructor");
|
||||
|
||||
if(slotRef.getType().isStructType()){
|
||||
slotRef.reExpandStruct(analyzer_);
|
||||
}
|
||||
// Empty matched types means this is expanded from star of a catalog table.
|
||||
// For star of complex types, e.g. my_struct.*, my_array.*, my_map.*, the matched
|
||||
// types will have the complex type so it's not empty.
|
||||
if (resolvedPath.getMatchedTypes().isEmpty()) {
|
||||
Preconditions.checkState(!slotDesc.getType().isComplexType(),
|
||||
"Star expansion should only introduce scalar columns");
|
||||
analyzer_.registerColumnForMasking(slotDesc);
|
||||
}
|
||||
resultExprs_.add(slotRef);
|
||||
|
||||
@@ -23,6 +23,8 @@ import org.apache.impala.thrift.TColumnType;
|
||||
import org.apache.impala.thrift.TStructField;
|
||||
import org.apache.impala.thrift.TTypeNode;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* TODO: Support comments for struct fields. The Metastore does not properly store
|
||||
* comments of struct fields. We set comment_ to null to avoid compatibility issues.
|
||||
@@ -99,4 +101,8 @@ public class StructField {
|
||||
StructField otherStructField = (StructField) other;
|
||||
return otherStructField.name_.equals(name_) && otherStructField.type_.equals(type_);
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(name_, type_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1071,6 +1071,33 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
|
||||
"Unable to INSERT into target table (default.new_tbl) because the column " +
|
||||
"'tiny_struct' has a complex type 'STRUCT<b:BOOLEAN>' and Impala doesn't " +
|
||||
"support inserting into tables containing complex type columns");
|
||||
|
||||
//Make complex types available in star queries
|
||||
ctx.getQueryOptions().setExpand_complex_types(true);
|
||||
//TODO: Once IMPALA-10851 is resolved it can be removed
|
||||
ctx.getQueryOptions().setDisable_codegen(true);
|
||||
|
||||
AnalyzesOk("select * from functional_parquet.complextypes_structs",ctx);
|
||||
AnalyzesOk("select * from functional_parquet.complextypes_nested_structs",ctx);
|
||||
AnalyzesOk("select * from functional_parquet.complextypes_maps_view",ctx);
|
||||
|
||||
AnalyzesOk("select outer_struct.str, outer_struct.* from " +
|
||||
"functional_parquet.complextypes_nested_structs",ctx);
|
||||
AnalyzesOk("select * from (select * from " +
|
||||
"functional_parquet.complextypes_nested_structs) v",ctx);
|
||||
AnalyzesOk("select * from (select int_map, int_map_array from " +
|
||||
"functional_parquet.complextypestbl) v",ctx);
|
||||
|
||||
ctx.getQueryOptions().setDisable_codegen(false);
|
||||
|
||||
AnalyzesOk("select * from functional_parquet.complextypes_arrays",ctx);
|
||||
AnalyzesOk("select * from " +
|
||||
"functional_parquet.complextypes_arrays_only_view",ctx);
|
||||
AnalyzesOk("select v.id, v.* from " +
|
||||
"(select * from functional_parquet.complextypes_arrays) v",ctx);
|
||||
|
||||
AnalysisError("select * from functional.allcomplextypes",
|
||||
ctx,"STRUCT type inside collection types is not supported.");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -4634,7 +4661,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
|
||||
testNumberOfMembers(BaseTableRef.class, 0);
|
||||
testNumberOfMembers(InlineViewRef.class, 10);
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private void testNumberOfMembers(Class cl, int expectedNumMembers) {
|
||||
int actualNumMembers = 0;
|
||||
|
||||
164
testdata/workloads/functional-query/queries/QueryTest/nested-types-star-expansion.test
vendored
Normal file
164
testdata/workloads/functional-query/queries/QueryTest/nested-types-star-expansion.test
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on table with array type
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select * from complextypes_arrays where id = 1
|
||||
---- RESULTS
|
||||
1,'[1,2,3,4,5]','["one","two","three","four","five"]'
|
||||
---- TYPES
|
||||
INT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on table with array type, and an additional scalar field, without EXPAND_COMPLEX_TYPES
|
||||
select id, * from complextypes_arrays where id = 2
|
||||
---- RESULTS
|
||||
2,2
|
||||
---- TYPES
|
||||
INT, INT
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on table with array type, and an additional scalar field
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select id, * from complextypes_arrays where id = 3
|
||||
---- RESULTS
|
||||
3,3,'[10,9,8]','["ten"]'
|
||||
---- TYPES
|
||||
INT, INT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on view with array type, and an additional scalar field
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select id, * from complextypes_arrays_only_view where id = 4
|
||||
---- RESULTS
|
||||
4,4,'NULL','[]'
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on view with map type, and an additional scalar field
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select id, * from complextypes_maps_view where id = 5
|
||||
---- RESULTS
|
||||
5,5,'{}','NULL'
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on views with array type, and an additional scalar field
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select v1.id, * from complextypes_arrays_only_view v1, complextypes_arrays_only_view v2
|
||||
where v1.id = v2.id and v1.id = 6
|
||||
---- RESULTS
|
||||
6,6,'NULL','NULL',6,'NULL','NULL'
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, STRING, STRING, BIGINT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on views with array type, and an additional scalar field, without EXPAND_COMPLEX_TYPES
|
||||
select v1.id, *
|
||||
from complextypes_arrays_only_view v1, complextypes_arrays_only_view v2
|
||||
where v1.id = v2.id and v1.id = 7
|
||||
---- RESULTS
|
||||
7,7,7
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding stars on view aliases with array type
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select v1.*, v2.* from complextypes_arrays_only_view v1, complextypes_arrays_only_view v2
|
||||
where v1.id = v2.id and v1.id = 1
|
||||
---- RESULTS
|
||||
1,'[1,2,3]','[[1,2],[3,4]]',1,'[1,2,3]','[[1,2],[3,4]]'
|
||||
---- TYPES
|
||||
BIGINT, STRING, STRING, BIGINT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on table with struct type
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select * from complextypes_structs where id = 2
|
||||
---- RESULTS
|
||||
2,'second item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11 12:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab ","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
|
||||
---- TYPES
|
||||
INT, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on table with struct type, and an additional scalar field
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select id, * from complextypes_structs where id = 3
|
||||
---- RESULTS
|
||||
3,3,'third item','{"ti":null,"si":null,"i":null,"bi":null,"b":null,"f":null,"do":null,"da":null,"ts":null,"s1":null,"s2":null,"c1":null,"c2":null,"vc":null,"de1":null,"de2":null}','{"b":true}','{"i":98765,"s":null}'
|
||||
---- TYPES
|
||||
INT, INT, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on tables with struct type, and an additional scalar field
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select t1.id, *
|
||||
from complextypes_structs t1, complextypes_structs t2
|
||||
where t1.id = t2.id and t1.id = 4
|
||||
---- RESULTS
|
||||
4,4,'fourth item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2024-01-01 00:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d ","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}',4,'fourth item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2024-01-01 00:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d ","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
|
||||
---- TYPES
|
||||
INT, INT, STRING, STRING, STRING, STRING, INT, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding inner struct
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select alltypes.* from complextypes_structs where alltypes.ti = 100
|
||||
---- RESULTS
|
||||
100,12348,156789012,163234345342,True,1234.56005859375,65323423.33,2021-05-30,2021-06-01 10:19:04,'some string','another str','x','xyz','somevarcha',12345,NULL
|
||||
---- TYPES
|
||||
TINYINT, SMALLINT, INT, BIGINT, BOOLEAN, FLOAT, DOUBLE, DATE, TIMESTAMP, STRING, STRING, CHAR, CHAR, VARCHAR, DECIMAL, DECIMAL
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding struct and array
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select * from complextypes_structs s, complextypes_arrays a
|
||||
where s.id = a.id and s.id = 5
|
||||
---- RESULTS
|
||||
5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}',5,'[10,NULL,12]','["ten","eleven","twelve","thirteen"]'
|
||||
---- TYPES
|
||||
INT, STRING, STRING, STRING, STRING, INT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding star on tables with array and struct type, without EXPAND_COMPLEX_TYPES
|
||||
select a.*, s.*
|
||||
from complextypes_arrays a, complextypes_structs s
|
||||
where a.id = s.id and a.id = 6
|
||||
---- RESULTS
|
||||
6,6,'sixth item'
|
||||
---- TYPES
|
||||
INT, INT, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding struct and array as separate aliases
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select s.*, a.* from complextypes_structs s, complextypes_arrays a
|
||||
where s.id = a.id and s.id = 1
|
||||
---- RESULTS
|
||||
1,'first item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01 10:19:04","s1":"some string","s2":"another str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL',1,'[1,2,3,4,5]','["one","two","three","four","five"]'
|
||||
---- TYPES
|
||||
INT, STRING, STRING, STRING, STRING, INT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding struct, map and array as separate aliases
|
||||
set EXPAND_COMPLEX_TYPES=true;
|
||||
select s.*, a.*, m.*
|
||||
from complextypes_structs s, complextypes_arrays a, complextypes_maps_view m
|
||||
where s.id = a.id and s.id = m.id and s.id = 2
|
||||
---- RESULTS
|
||||
2,'second item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11 12:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab ","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}',2,'[1,NULL,3,4,5]','["one","two","three",NULL,"five"]',2,'{"k1":2,"k2":NULL}','[{"k3":NULL,"k1":1},NULL,{}]'
|
||||
---- TYPES
|
||||
INT, STRING, STRING, STRING, STRING, INT, STRING, STRING, BIGINT, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Expanding struct, map and array as separate aliases, without EXPAND_COMPLEX_TYPES
|
||||
select s.*, a.*, m.*
|
||||
from complextypes_structs s, complextypes_arrays a, complextypes_maps_view m
|
||||
where s.id = a.id and s.id = m.id and s.id = 3
|
||||
---- RESULTS
|
||||
3,'third item',3,3
|
||||
---- TYPES
|
||||
INT, STRING, INT, BIGINT
|
||||
====
|
||||
@@ -877,3 +877,34 @@ class TestMaxNestingDepth(ImpalaTestSuite):
|
||||
assert False, "Expected table loading to fail."
|
||||
except ImpalaBeeswaxException, e:
|
||||
assert "Type exceeds the maximum nesting depth" in str(e)
|
||||
|
||||
|
||||
class TestNestedTypesStarExpansion(ImpalaTestSuite):
|
||||
"""Functional tests for nested types when star expansion query
|
||||
option (EXPAND_COMPLEX_TYPES) is enabled/disabled, run for all file formats that
|
||||
support nested types."""
|
||||
|
||||
@classmethod
|
||||
def get_workload(self):
|
||||
return 'functional-query'
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestNestedTypesStarExpansion, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('table_format').file_format in ['parquet', 'orc'])
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('protocol') == 'hs2')
|
||||
cls.ImpalaTestMatrix.add_dimension(
|
||||
create_exec_option_dimension_from_dict({
|
||||
'disable_codegen': ['True']}))
|
||||
cls.ImpalaTestMatrix.add_mandatory_exec_option(
|
||||
'convert_legacy_hive_parquet_utc_timestamps', 'true')
|
||||
cls.ImpalaTestMatrix.add_mandatory_exec_option('TIMEZONE', '"Europe/Budapest"')
|
||||
|
||||
def test_star_expansion(self, vector):
|
||||
# Queries with star (*) expression on tables with array, map
|
||||
# and struct complex type, through views, with query option
|
||||
# EXPAND_COMPLEX_TYPES enabled and disabled.
|
||||
self.run_test_case('QueryTest/nested-types-star-expansion', vector)
|
||||
|
||||
Reference in New Issue
Block a user