Files
impala/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
Gabor Kaszab 5d021ce5a7 IMPALA-9496: Allow struct type in the select list for Parquet tables
This patch is to extend the support of Struct columns in the select
list to Parquet files as well.

There are some limitation with this patch:
  - Dictionary filtering could work when we have conjuncts on a member
    of a struct, however, if this struct is given in the select list
    then the dictionary filtering is disabled. The reason is that in
    this case there would be a mismatch between the slot/tuple IDs in
    the conjunct between the ones in the select list due to expr
    substitution logic when a struct is in the select list. Solving
    this puzzle would be a nice future performance enhancement. See
    IMPALA-11361.
  - When structs are read in a batched manner it delegates the actual
    reading of the data to the column readers of its children, however,
    would use the simple ReadValue() on these readers instead of the
    batched version. The reason is that calling the batched reader in
    the member column readers would in fact read in batches, but it
    won't handle the case when the parent struct is NULL and would set
    only itself to NULL but not the parent struct. This might also be a
    future performance enhancement. See IMPALA-11363.
  - If there is a struct in the select list then late materialization
    is turned off. The reason is that LM expects the column readers to
    be used through the batched reading interface, however, as said in
    the above bulletpoint currently struct column readers use the
    non-batched reading interface of its children. As a result after
    reading the column readers are not in a state as SkipRows() of LM
    expects and then results in a query failure because it's not able
    to skip the rows for non-filter readers.
    Once IMPALA-11363 is implemented and the struct will also use the
    ReadValueBatch() interface of its children then late
    materialization could be turned on even if structs are in the
    select list. See IMPALA-11364.

Testing:
  - There were a lot of tests already to exercise this functionality
    but they were only run on ORC table. I changed these to cover
    Parquet tables too.

Change-Id: I3e8b4cbc2c4d1dd5fbefb7c87dad8d4e6ac2f452
Reviewed-on: http://gerrit.cloudera.org:8080/18596
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-06-22 17:55:07 +00:00

453 lines
18 KiB
Plaintext

====
---- QUERY
# Select a struct that contains multiple structs.
select id, outer_struct
from complextypes_nested_structs;
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
3,'NULL'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
---- TYPES
INT,STRING
====
---- QUERY
# Select a struct that contains multiple structs using a filter on a non-struct field.
select id, outer_struct
from complextypes_nested_structs
where id > 2;
---- RESULTS
3,'NULL'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
---- TYPES
INT,STRING
====
---- QUERY
# Select a struct that contains multiple structs using a filter on a struct field.
select id, outer_struct
from complextypes_nested_structs
where length(outer_struct.str) > 3;
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
---- TYPES
INT,STRING
====
---- QUERY
# Select a nested struct with an order by.
select id, outer_struct
from complextypes_nested_structs
order by id;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
3,'NULL'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
---- TYPES
INT,STRING
====
---- QUERY
# Select a nested struct with an order by.
select id, outer_struct
from complextypes_nested_structs
order by id desc;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
3,'NULL'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
---- TYPES
INT,STRING
====
---- QUERY
# Select the same nested struct multiple times in one query.
select id, outer_struct, outer_struct
from complextypes_nested_structs;
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}','{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
3,'NULL','NULL'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}','{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}','{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
---- TYPES
INT,STRING,STRING
====
---- QUERY
# Select the same nested struct multiple times in one query and order the results.
select id, outer_struct, outer_struct
from complextypes_nested_structs
order by id desc;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}','{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}','{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
3,'NULL','NULL'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}','{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
---- TYPES
INT,STRING,STRING
====
---- QUERY
# Similar to the above query but here the 'id' field is not in the select list but still
# used in the order by.
select outer_struct, outer_struct
from complextypes_nested_structs
order by id desc;
---- RESULTS: VERIFY_IS_EQUAL_SORTED
'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}','{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}','{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
'NULL','NULL'
'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}','{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
---- TYPES
STRING,STRING
====
---- QUERY
# Select an inner struct where the outer struct is null.
select outer_struct.inner_struct1 from complextypes_nested_structs
where id = 3;
---- RESULTS
'NULL'
---- TYPES
STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select sub.id, sub.outer_struct from sub;
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
3,'NULL'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
---- TYPES
INT,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct and we query a nested
# field.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select sub.id, sub.outer_struct.inner_struct2 from sub;
---- RESULTS
1,'{"i":333222111,"str":"somestr3"}'
2,'{"i":100,"str":"str3"}'
3,'NULL'
4,'{"i":1,"str":"string"}'
5,'NULL'
---- TYPES
INT,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct and we query a doubly
# nested field.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select sub.id, sub.outer_struct.inner_struct2.i from sub;
---- RESULTS
1,333222111
2,100
3,NULL
4,1
5,NULL
---- TYPES
INT,INT
====
---- QUERY
# WITH clause creates an inline view containing a nested struct and one of its members
# that is a struct itself; query both in the main select statement.
with sub as (
select id, outer_struct, outer_struct.inner_struct3 inner3
from complextypes_nested_structs)
select id, outer_struct, inner3 from sub;
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}','{"s":{"i":112288,"s":null}}'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"s":{"i":321,"s":"dfgs"}}'
3,'NULL','NULL'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}','{"s":null}'
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}','NULL'
---- TYPES
INT,STRING,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct ('outer_struct') and one
# of its members that is a struct itself ('inner3'); query a different member of
# 'outer_struct' as well as 'inner3' in the main select statement.
with sub as (
select id, outer_struct, outer_struct.inner_struct3 inner3
from complextypes_nested_structs)
select id, outer_struct.inner_struct2, inner3 from sub;
---- RESULTS
1,'{"i":333222111,"str":"somestr3"}','{"s":{"i":112288,"s":null}}'
2,'{"i":100,"str":"str3"}','{"s":{"i":321,"s":"dfgs"}}'
3,'NULL','NULL'
4,'{"i":1,"str":"string"}','{"s":null}'
5,'NULL','NULL'
---- TYPES
INT,STRING,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct ('outer_struct') and one
# of its members that is a struct itself ('inner3'); query a different member of
# 'outer_struct' as well as a member of 'inner3' in the main select statement.
with sub as (
select id, outer_struct, outer_struct.inner_struct3 inner3
from complextypes_nested_structs)
select id, outer_struct.inner_struct2, inner3.s from sub;
---- RESULTS
1,'{"i":333222111,"str":"somestr3"}','{"i":112288,"s":null}'
2,'{"i":100,"str":"str3"}','{"i":321,"s":"dfgs"}'
3,'NULL','NULL'
4,'{"i":1,"str":"string"}','NULL'
5,'NULL','NULL'
---- TYPES
INT,STRING,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct; we order by id.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select sub.id, sub.outer_struct.inner_struct2 from sub order by sub.id desc;
---- RESULTS
5,'NULL'
4,'{"i":1,"str":"string"}'
3,'NULL'
2,'{"i":100,"str":"str3"}'
1,'{"i":333222111,"str":"somestr3"}'
---- TYPES
INT,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct; we order by a nested
# field.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select sub.id, sub.outer_struct.inner_struct2 from sub
order by sub.outer_struct.inner_struct2.i, sub.id;
---- RESULTS
4,'{"i":1,"str":"string"}'
2,'{"i":100,"str":"str3"}'
1,'{"i":333222111,"str":"somestr3"}'
3,'NULL'
5,'NULL'
---- TYPES
INT,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct; we order by a nested
# field that is not present in the select list.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select sub.id, sub.outer_struct.inner_struct1 from sub
order by sub.outer_struct.inner_struct2.i, sub.id;
---- RESULTS
4,'{"str":"somestr2","de":12345.12}'
2,'NULL'
1,'{"str":"somestr2","de":12345.12}'
3,'NULL'
5,'NULL'
---- TYPES
INT,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct; filter by a struct field
# from the inline view.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select sub.id, sub.outer_struct.str
from sub
where length(sub.outer_struct.str) < 4;
---- RESULTS
2,'str'
4,''
---- TYPES
INT,STRING
====
---- QUERY
# WITH clause creates an inline view containing a two structs; select only one of them and
# filter by one of its fields.
with sub as (
select id, outer_struct.inner_struct1 s1, outer_struct.inner_struct2 s2
from complextypes_nested_structs)
select sub.id, s2
from sub
where length(s2.str) < 8;
---- RESULTS
2,'{"i":100,"str":"str3"}'
4,'{"i":1,"str":"string"}'
---- TYPES
INT,STRING
====
---- QUERY
# WITH clause creates an inline view containing a nested struct; filter by a struct field
# from the inline view but do not select anything from it.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select 1
from sub
where length(sub.outer_struct.str) < 4;
---- RESULTS
1
1
---- TYPES
TINYINT
====
---- QUERY
# WITH clause creates an inline view containing a nested struct. Also has a filter on
# the inline view and ordering by a non-complex item from the view.
with sub as (
select id, outer_struct
from complextypes_nested_structs
where length(outer_struct.str) > 3)
select sub.id, sub.outer_struct from sub order by sub.id desc;
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
---- TYPES
INT,STRING
====
---- QUERY
# Two-level inline view, querying a struct filed.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select id, s from (select id, outer_struct.inner_struct1 as s from sub) v
order by id;
---- RESULTS
1,'{"str":"somestr2","de":12345.12}'
2,'NULL'
3,'NULL'
4,'{"str":"somestr2","de":12345.12}'
5,'NULL'
---- TYPES
INT,STRING
====
---- QUERY
# Two-level inline view, querying the top level struct.
with sub as (
select id, outer_struct from complextypes_nested_structs)
select id, s from (select id, outer_struct as s from sub) v
order by id;
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
3,'NULL'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
---- TYPES
INT,STRING
====
---- QUERY
# Select a struct that contains multiple structs using a filter on multiple struct fields
# when the struct fields in the predicate are itemTupleDescriptors within the struct(s),
# not in the main tuple.
select id, outer_struct
from complextypes_nested_structs
where outer_struct.inner_struct2.i > length(outer_struct.str)
---- RESULTS
1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
---- TYPES
INT,STRING
====
---- QUERY
# Select a nested struct member first, and then the enclosing struct so that even the
# enclosing struct is embedded in another one, but the top level struct is not present in
# the query.
select id, outer_struct.inner_struct2.i, outer_struct.inner_struct2
from complextypes_nested_structs;
---- RESULTS
1,333222111,'{"i":333222111,"str":"somestr3"}'
2,100,'{"i":100,"str":"str3"}'
3,NULL,'NULL'
4,1,'{"i":1,"str":"string"}'
5,NULL,'NULL'
---- TYPES
INT,INT,STRING
====
---- QUERY
# An inner join where struct fields are in the join condition and their parent struct is
# in the select list.
select a.outer_struct, b.small_struct
from complextypes_nested_structs a
inner join complextypes_structs b
on b.small_struct.i = a.outer_struct.inner_struct2.i + 19091;
---- RESULTS
'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"i":19191,"s":"small_struct_str"}'
---- TYPES
STRING,STRING
====
---- QUERY
# An outer join where struct fields are in the join condition and their parent struct is
# in the select list.
select a.outer_struct, b.small_struct
from complextypes_nested_structs a
full outer join complextypes_structs b
on b.small_struct.i = a.outer_struct.inner_struct2.i + 19091;
---- RESULTS
'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}','NULL'
'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"i":19191,"s":"small_struct_str"}'
'NULL','NULL'
'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}','NULL'
'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}','NULL'
'NULL','{"i":98765,"s":"abcde f"}'
'NULL','{"i":98765,"s":null}'
'NULL','{"i":null,"s":null}'
'NULL','{"i":null,"s":"str"}'
'NULL','NULL'
---- TYPES
STRING,STRING
====
---- QUERY
# Checks that "SELECT nested_struct.* ..." omits the nested structs from the output.
select id, outer_struct.* from complextypes_nested_structs;
---- RESULTS
1,'somestr1'
2,'str'
3,'NULL'
4,''
5,'NULL'
---- TYPES
INT,STRING
====
---- QUERY
# IMPALA-10839: Display nulls at the correct level.
select id, outer_struct.inner_struct3
from complextypes_nested_structs;
---- RESULTS
1,'{"s":{"i":112288,"s":null}}'
2,'{"s":{"i":321,"s":"dfgs"}}'
3,'NULL'
4,'{"s":null}'
5,'NULL'
---- TYPES
INT,STRING
====
---- QUERY
# IMPALA-10839: Display nulls at the correct level.
select id, outer_struct.inner_struct3.s
from complextypes_nested_structs;
---- RESULTS
1,'{"i":112288,"s":null}'
2,'{"i":321,"s":"dfgs"}'
3,'NULL'
4,'NULL'
5,'NULL'
---- TYPES
INT,STRING
====
---- QUERY
# Subquery that returns a complex type is not supported.
# IMPALA-9500
select outer_struct
from complextypes_nested_structs
where outer_struct in
(select outer_struct from functional_parquet.complextypes_nested_structs);
---- CATCH
AnalysisException: A subquery can't return complex types. (SELECT outer_struct FROM functional_parquet.complex
types_nested_structs)
====