mirror of
https://github.com/apache/impala.git
synced 2026-01-05 12:01:11 -05:00
Make many builtin aggregate functions use fixed-length intermediate types: * avg() * ndv() * stddev(), variance(), etc * distinctpc(), distinctpcsa() sample(), appx_median(), histogram() and group_concat() actually allocate var-len data so aren't changed. This has some major benefits: * Spill-to-disk works properly with these aggregations. * Aggregations are more efficient because there is one less pointer indirection. * Aggregations use less memory, because we don't need an extra 12-byte StringValue for the indirection. Adds a special-purpose internal type FIXED_UDA_INTERMEDIATE. The type is represented in the same way as CHAR - a fixed-size array of bytes, stored inline in tuples. However, it is not user-visible and does not support CHAR semantics, i.e. users can't declare tables, functions, etc with the type. The pointer and length is passed into aggregate functions wrapped in a StringVal. Updates some internal codegen functions to work better with the new type. E.g. store values directly into the result tuple instead of via an intermediate stack allocation. Testing: This change only affects builtin aggregate functions, for which we have test coverage already. If we were to allow wider use of this type, it would need further testing. Added an analyzer test to ensure we can't use the type for UDAs. Added a regression test for spilling avg(). Added a regression test for UDA with CHAR intermediate hitting DCHECK. Perf: Ran TPC-H locally. TPC-H Q17, which has a high-cardinality AVG(), improved dramatically. +----------+-----------------------+---------+------------+------------+----------------+ | Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) | +----------+-----------------------+---------+------------+------------+----------------+ | TPCH(60) | parquet / none / none | 18.44 | -17.54% | 11.92 | -5.34% | +----------+-----------------------+---------+------------+------------+----------------+ +----------+----------+-----------------------+--------+-------------+------------+-----------+----------------+-------------+-------+ | Workload | Query | File Format | Avg(s) | Base Avg(s) | Delta(Avg) | StdDev(%) | Base StdDev(%) | Num Clients | Iters | +----------+----------+-----------------------+--------+-------------+------------+-----------+----------------+-------------+-------+ | TPCH(60) | TPCH-Q12 | parquet / none / none | 18.40 | 17.64 | +4.32% | 0.77% | 1.09% | 1 | 5 | | TPCH(60) | TPCH-Q22 | parquet / none / none | 7.07 | 6.90 | +2.36% | 0.28% | 0.30% | 1 | 5 | | TPCH(60) | TPCH-Q3 | parquet / none / none | 12.37 | 12.11 | +2.10% | 0.18% | 0.15% | 1 | 5 | | TPCH(60) | TPCH-Q7 | parquet / none / none | 42.48 | 42.09 | +0.93% | 2.45% | 0.80% | 1 | 5 | | TPCH(60) | TPCH-Q6 | parquet / none / none | 3.18 | 3.15 | +0.89% | 0.67% | 0.76% | 1 | 5 | | TPCH(60) | TPCH-Q19 | parquet / none / none | 7.24 | 7.20 | +0.50% | 0.95% | 0.67% | 1 | 5 | | TPCH(60) | TPCH-Q10 | parquet / none / none | 13.37 | 13.30 | +0.50% | 0.48% | 1.39% | 1 | 5 | | TPCH(60) | TPCH-Q5 | parquet / none / none | 7.47 | 7.44 | +0.36% | 0.58% | 0.54% | 1 | 5 | | TPCH(60) | TPCH-Q11 | parquet / none / none | 2.03 | 2.02 | +0.06% | 0.26% | 1.95% | 1 | 5 | | TPCH(60) | TPCH-Q4 | parquet / none / none | 5.48 | 5.50 | -0.27% | 0.62% | 1.12% | 1 | 5 | | TPCH(60) | TPCH-Q13 | parquet / none / none | 22.11 | 22.18 | -0.31% | 0.18% | 0.55% | 1 | 5 | | TPCH(60) | TPCH-Q15 | parquet / none / none | 8.45 | 8.48 | -0.32% | 0.40% | 0.47% | 1 | 5 | | TPCH(60) | TPCH-Q9 | parquet / none / none | 33.39 | 33.66 | -0.81% | 0.75% | 0.59% | 1 | 5 | | TPCH(60) | TPCH-Q21 | parquet / none / none | 71.34 | 72.07 | -1.01% | 1.84% | 1.79% | 1 | 5 | | TPCH(60) | TPCH-Q14 | parquet / none / none | 5.93 | 6.00 | -1.07% | 0.15% | 0.69% | 1 | 5 | | TPCH(60) | TPCH-Q20 | parquet / none / none | 5.72 | 5.79 | -1.09% | 0.59% | 0.51% | 1 | 5 | | TPCH(60) | TPCH-Q18 | parquet / none / none | 45.42 | 45.93 | -1.10% | 1.42% | 0.50% | 1 | 5 | | TPCH(60) | TPCH-Q2 | parquet / none / none | 4.81 | 4.89 | -1.52% | 1.68% | 1.01% | 1 | 5 | | TPCH(60) | TPCH-Q16 | parquet / none / none | 5.41 | 5.52 | -1.98% | 0.66% | 0.73% | 1 | 5 | | TPCH(60) | TPCH-Q1 | parquet / none / none | 27.58 | 29.13 | -5.34% | 0.24% | 1.51% | 1 | 5 | | TPCH(60) | TPCH-Q8 | parquet / none / none | 12.61 | 14.30 | -11.78% | 6.20% | * 15.28% * | 1 | 5 | | TPCH(60) | TPCH-Q17 | parquet / none / none | 43.74 | 126.58 | I -65.44% | 1.34% | 9.60% | 1 | 5 | +----------+----------+-----------------------+--------+-------------+------------+-----------+----------------+-------------+-------+ Change-Id: Ife90cf27989f98ffb5ef5c39f1e09ce92e8cb87c Reviewed-on: http://gerrit.cloudera.org:8080/7526 Tested-by: Impala Public Jenkins Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
63 lines
1.8 KiB
Plaintext
63 lines
1.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
# TODO: IMPALA-3350: Add 'group by' to these tests to exercise different code paths.
|
|
select min(string_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select max(string_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select sample(timestamp_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 248 bytes.
|
|
====
|
|
---- QUERY
|
|
select group_concat(string_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 4 bytes.
|
|
====
|
|
---- QUERY
|
|
select extract(year from timestamp_col) from functional.alltypes limit 10
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateLocal() failed to allocate 4 bytes.
|
|
====
|
|
---- QUERY
|
|
select trunc(timestamp_col, 'YEAR') from functional.alltypes limit 10
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateLocal() failed to allocate 4 bytes.
|
|
====
|
|
---- QUERY
|
|
select first_value(string_col) over (partition by month order by year) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select last_value(string_col) over (partition by month order by year) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 1 bytes.
|
|
====
|
|
---- QUERY
|
|
select rand() from functional.alltypes;
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 4 bytes.
|
|
====
|
|
---- QUERY
|
|
select case when min(int_col) = 0 then 0 end from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 16 bytes.
|
|
====
|
|
---- QUERY
|
|
select cast(string_col as char(120)) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContextImpl::AllocateLocal() failed to allocate 120 bytes.
|
|
====
|
|
---- QUERY
|
|
select appx_median(int_col) from functional.alltypes
|
|
---- CATCH
|
|
FunctionContext::Allocate() failed to allocate 248 bytes.
|
|
====
|