mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
As a first stage of IMPALA-10939, this change implements support for
including in the sorting tuple top-level collections that only contain
fixed length types (including fixed length structs). For these types the
implementation is almost the same as the existing handling of strings.
Another limitation is that structs that contain any type of collection
are not yet allowed in the sorting tuple.
Also refactored the RawValue::Write*() functions to have a clearer
interface.
Testing:
- Added a new test table that contains many rows with arrays. This is
queried in a new test added in test_sort.py, to ensure that we handle
spilling correctly.
- Added tests that have arrays and/or maps in the sorting tuple in
test_queries.py::TestQueries::{test_sort,
test_top_n,test_partitioned_top_n}.
Change-Id: Ic7974ef392c1412e8c60231e3420367bd189677a
Reviewed-on: http://gerrit.cloudera.org:8080/19660
Reviewed-by: Csaba Ringhofer <csringhofer@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
51 lines
1.5 KiB
Plaintext
51 lines
1.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
# In-memory partitioned top-N containing a collection, with some partitions that hit
|
|
# limit.
|
|
with joined as (
|
|
select a.*, b.int_array from alltypesagg a left join complextypestbl b on a.tinyint_col = b.id
|
|
),
|
|
v as (
|
|
select tinyint_col, id, int_array, row_number() over (partition by tinyint_col order by id) as rn
|
|
from joined where id % 777 = 0 or id % 10 = 7)
|
|
select tinyint_col, id, int_array, rn from v
|
|
where rn <= 5
|
|
order by tinyint_col, rn
|
|
---- RESULTS
|
|
1,2331,'[1,2,3]',1
|
|
2,4662,'[null,1,2,null,3,null]',1
|
|
3,6993,'[]',1
|
|
4,1554,'NULL',1
|
|
4,9324,'NULL',2
|
|
5,3885,'NULL',1
|
|
6,6216,'NULL',1
|
|
7,7,'NULL',1
|
|
7,17,'NULL',2
|
|
7,27,'NULL',3
|
|
7,37,'NULL',4
|
|
7,47,'NULL',5
|
|
8,3108,'[-1]',1
|
|
9,5439,'NULL',1
|
|
NULL,0,'NULL',1
|
|
NULL,0,'NULL',2
|
|
NULL,7770,'NULL',3
|
|
NULL,7770,'NULL',4
|
|
---- TYPES
|
|
TINYINT, INT, STRING, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Sorting is not supported yet when the sorting tuple contains collections containing
|
|
# varlen types: IMPALA-10939.
|
|
with joined as (
|
|
select a.*, b.int_array, b.int_array_array from alltypesagg a left join complextypestbl b on a.tinyint_col = b.id
|
|
),
|
|
v as (
|
|
select tinyint_col, id, int_array, int_array_array, row_number() over (partition by tinyint_col order by id) as rn
|
|
from joined where id % 777 = 0 or id % 10 = 7)
|
|
select tinyint_col, id, int_array, int_array_array, rn from v
|
|
where rn <= 5
|
|
order by tinyint_col, rn
|
|
---- CATCH
|
|
AnalysisException: Sorting is not supported if the select list contains (possibly nested) collections with variable length data types.
|
|
====
|