Files
impala/testdata/workloads/functional-query/queries/QueryTest/alloc-fail-init.test
Taras Bobrovytsky 529a5f99b9 IMPALA-4787: Optimize APPX_MEDIAN() memory usage
Before this change, ReservoirSample functions (such as APPX_MEDIAN())
allocated memory for 20,000 elements up front per grouping key. This
caused inefficient memory usage for aggregations with many grouping
keys.

This patch fixes this by initially allocating memory for 16 elements.
Once the buffer becomes full, we reallocate a new buffer with double
capacity and copy the original buffer into the new one. We continue
doubling the buffer size until the buffer has room for 20,000 elements
as before.

Testing:
Added some EE APPX_MEDIAN() tests on larger datasets that exercise the
resize code path.

Perf Benchrmark (about 35,000 elements per bucket):

SELECT MAX(a) from (
  SELECT c1, appx_median(c2) as a FROM benchmark GROUP BY c1) t

BEFORE: 11s067ms
Operator       #Hosts   Avg Time   Max Time   #Rows  Est. #Rows  Peak Mem  Est. Peak Mem  Detail
-------------------------------------------------------------------------------------------------------------------------
06:AGGREGATE        1  124.726us  124.726us       1           1  28.00 KB        -1.00 B  FINALIZE
05:EXCHANGE         1   29.544us   29.544us       3           1         0        -1.00 B  UNPARTITIONED
02:AGGREGATE        3   86.406us  120.372us       3           1  44.00 KB       10.00 MB
04:AGGREGATE        3    1s840ms    2s824ms   2.00K          -1   1.02 GB      128.00 MB  FINALIZE
03:EXCHANGE         3    1s163ms    1s989ms   6.00K          -1         0              0  HASH(c1)
01:AGGREGATE        3    3s356ms    3s416ms   6.00K          -1   1.95 GB      128.00 MB  STREAMING
00:SCAN HDFS        3   64.962ms   65.490ms  65.54M          -1  25.97 MB       64.00 MB  tpcds_10_parquet.benchmark

AFTER: 9s465ms
Operator       #Hosts   Avg Time  Max Time   #Rows  Est. #Rows  Peak Mem  Est. Peak Mem  Detail
------------------------------------------------------------------------------------------------------------------------
06:AGGREGATE        1   73.961us  73.961us       1           1  28.00 KB        -1.00 B  FINALIZE
05:EXCHANGE         1   18.101us  18.101us       3           1         0        -1.00 B  UNPARTITIONED
02:AGGREGATE        3   75.795us  83.969us       3           1  44.00 KB       10.00 MB
04:AGGREGATE        3    1s608ms   2s683ms   2.00K          -1   1.02 GB      128.00 MB  FINALIZE
03:EXCHANGE         3  826.683ms   1s322ms   6.00K          -1         0              0  HASH(c1)
01:AGGREGATE        3    2s457ms   2s672ms   6.00K          -1   3.14 GB      128.00 MB  STREAMING
00:SCAN HDFS        3   81.514ms  89.056ms  65.54M          -1  25.94 MB       64.00 MB  tpcds_10_parquet.benchmark

Memory Benchmark (about 12 elements per bucket):

SELECT MAX(a) FROM (
  SELECT ss_customer_sk, APPX_MEDIAN(ss_sold_date_sk) as a
  FROM tpcds_parquet.store_sales
  GROUP BY ss_customer_sk) t

BEFORE: 7s477ms
Operator       #Hosts   Avg Time   Max Time    #Rows  Est. #Rows  Peak Mem  Est. Peak Mem  Detail
---------------------------------------------------------------------------------------------------------------------
06:AGGREGATE        1  114.686us  114.686us        1           1  28.00 KB        -1.00 B  FINALIZE
05:EXCHANGE         1   18.214us   18.214us        3           1         0        -1.00 B  UNPARTITIONED
02:AGGREGATE        3  147.055us  165.464us        3           1  28.00 KB       10.00 MB
04:AGGREGATE        3    2s043ms    2s147ms   14.82K          -1   4.94 GB      128.00 MB  FINALIZE
03:EXCHANGE         3  840.528ms  943.254ms   15.61K          -1         0              0  HASH(ss_customer_sk)
01:AGGREGATE        3    1s769ms    1s869ms   15.61K          -1   5.32 GB      128.00 MB  STREAMING
00:SCAN HDFS        3   17.941ms   37.109ms  183.59K          -1   1.94 MB       16.00 MB  tpcds_parquet.store_sales

AFTER: 434ms
Operator       #Hosts   Avg Time   Max Time    #Rows  Est. #Rows  Peak Mem  Est. Peak Mem  Detail
---------------------------------------------------------------------------------------------------------------------
06:AGGREGATE        1  125.915us  125.915us        1           1  28.00 KB        -1.00 B  FINALIZE
05:EXCHANGE         1   72.179us   72.179us        3           1         0        -1.00 B  UNPARTITIONED
02:AGGREGATE        3   79.054us   83.385us        3           1  28.00 KB       10.00 MB
04:AGGREGATE        3    6.559ms    7.669ms   14.82K          -1  17.32 MB      128.00 MB  FINALIZE
03:EXCHANGE         3   67.370us   85.068us   15.60K          -1         0              0  HASH(ss_customer_sk)
01:AGGREGATE        3   19.245ms   24.472ms   15.60K          -1   9.48 MB      128.00 MB  STREAMING
00:SCAN HDFS        3   53.173ms   55.844ms  183.59K          -1   1.18 MB       16.00 MB  tpcds_parquet.store_sales

Change-Id: I99adaad574d4fb0a3cf38c6cbad8b2a23df12968
Reviewed-on: http://gerrit.cloudera.org:8080/6025
Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com>
Tested-by: Impala Public Jenkins
2017-03-16 05:59:40 +00:00

98 lines
2.8 KiB
Plaintext

====
---- QUERY
# TODO: IMPALA-3350: Add 'group by' to these tests to exercise different code paths.
select ndv(string_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 1024 bytes.
====
---- QUERY
select min(string_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 1 bytes.
====
---- QUERY
select max(string_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 1 bytes.
====
---- QUERY
select avg(d1) from functional.decimal_tbl
---- CATCH
FunctionContext::Allocate() failed to allocate 48 bytes.
====
---- QUERY
select avg(double_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 16 bytes.
====
---- QUERY
select avg(timestamp_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 16 bytes.
====
---- QUERY
select sample(timestamp_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 248 bytes.
====
---- QUERY
select distinctpc(int_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 256 bytes.
====
---- QUERY
select distinctpcsa(string_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 256 bytes.
====
---- QUERY
select group_concat(string_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 4 bytes.
====
---- QUERY
select rank() over (partition by month order by year) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 16 bytes.
====
---- QUERY
select extract(year from timestamp_col) from functional.alltypes limit 10
---- CATCH
FunctionContextImpl::AllocateLocal() failed to allocate 4 bytes.
====
---- QUERY
select trunc(timestamp_col, 'YEAR') from functional.alltypes limit 10
---- CATCH
FunctionContextImpl::AllocateLocal() failed to allocate 4 bytes.
====
---- QUERY
select first_value(string_col) over (partition by month order by year) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 1 bytes.
====
---- QUERY
select last_value(string_col) over (partition by month order by year) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 1 bytes.
====
---- QUERY
select rand() from functional.alltypes;
---- CATCH
FunctionContext::Allocate() failed to allocate 4 bytes.
====
---- QUERY
select case when min(int_col) = 0 then 0 end from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 16 bytes.
====
---- QUERY
select cast(string_col as char(120)) from functional.alltypes
---- CATCH
FunctionContextImpl::AllocateLocal() failed to allocate 120 bytes.
====
---- QUERY
select appx_median(int_col) from functional.alltypes
---- CATCH
FunctionContext::Allocate() failed to allocate 248 bytes.
====