mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
Before this change, ReservoirSample functions (such as APPX_MEDIAN()) allocated memory for 20,000 elements up front per grouping key. This caused inefficient memory usage for aggregations with many grouping keys. This patch fixes this by initially allocating memory for 16 elements. Once the buffer becomes full, we reallocate a new buffer with double capacity and copy the original buffer into the new one. We continue doubling the buffer size until the buffer has room for 20,000 elements as before. Testing: Added some EE APPX_MEDIAN() tests on larger datasets that exercise the resize code path. Perf Benchrmark (about 35,000 elements per bucket): SELECT MAX(a) from ( SELECT c1, appx_median(c2) as a FROM benchmark GROUP BY c1) t BEFORE: 11s067ms Operator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail ------------------------------------------------------------------------------------------------------------------------- 06:AGGREGATE 1 124.726us 124.726us 1 1 28.00 KB -1.00 B FINALIZE 05:EXCHANGE 1 29.544us 29.544us 3 1 0 -1.00 B UNPARTITIONED 02:AGGREGATE 3 86.406us 120.372us 3 1 44.00 KB 10.00 MB 04:AGGREGATE 3 1s840ms 2s824ms 2.00K -1 1.02 GB 128.00 MB FINALIZE 03:EXCHANGE 3 1s163ms 1s989ms 6.00K -1 0 0 HASH(c1) 01:AGGREGATE 3 3s356ms 3s416ms 6.00K -1 1.95 GB 128.00 MB STREAMING 00:SCAN HDFS 3 64.962ms 65.490ms 65.54M -1 25.97 MB 64.00 MB tpcds_10_parquet.benchmark AFTER: 9s465ms Operator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail ------------------------------------------------------------------------------------------------------------------------ 06:AGGREGATE 1 73.961us 73.961us 1 1 28.00 KB -1.00 B FINALIZE 05:EXCHANGE 1 18.101us 18.101us 3 1 0 -1.00 B UNPARTITIONED 02:AGGREGATE 3 75.795us 83.969us 3 1 44.00 KB 10.00 MB 04:AGGREGATE 3 1s608ms 2s683ms 2.00K -1 1.02 GB 128.00 MB FINALIZE 03:EXCHANGE 3 826.683ms 1s322ms 6.00K -1 0 0 HASH(c1) 01:AGGREGATE 3 2s457ms 2s672ms 6.00K -1 3.14 GB 128.00 MB STREAMING 00:SCAN HDFS 3 81.514ms 89.056ms 65.54M -1 25.94 MB 64.00 MB tpcds_10_parquet.benchmark Memory Benchmark (about 12 elements per bucket): SELECT MAX(a) FROM ( SELECT ss_customer_sk, APPX_MEDIAN(ss_sold_date_sk) as a FROM tpcds_parquet.store_sales GROUP BY ss_customer_sk) t BEFORE: 7s477ms Operator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail --------------------------------------------------------------------------------------------------------------------- 06:AGGREGATE 1 114.686us 114.686us 1 1 28.00 KB -1.00 B FINALIZE 05:EXCHANGE 1 18.214us 18.214us 3 1 0 -1.00 B UNPARTITIONED 02:AGGREGATE 3 147.055us 165.464us 3 1 28.00 KB 10.00 MB 04:AGGREGATE 3 2s043ms 2s147ms 14.82K -1 4.94 GB 128.00 MB FINALIZE 03:EXCHANGE 3 840.528ms 943.254ms 15.61K -1 0 0 HASH(ss_customer_sk) 01:AGGREGATE 3 1s769ms 1s869ms 15.61K -1 5.32 GB 128.00 MB STREAMING 00:SCAN HDFS 3 17.941ms 37.109ms 183.59K -1 1.94 MB 16.00 MB tpcds_parquet.store_sales AFTER: 434ms Operator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail --------------------------------------------------------------------------------------------------------------------- 06:AGGREGATE 1 125.915us 125.915us 1 1 28.00 KB -1.00 B FINALIZE 05:EXCHANGE 1 72.179us 72.179us 3 1 0 -1.00 B UNPARTITIONED 02:AGGREGATE 3 79.054us 83.385us 3 1 28.00 KB 10.00 MB 04:AGGREGATE 3 6.559ms 7.669ms 14.82K -1 17.32 MB 128.00 MB FINALIZE 03:EXCHANGE 3 67.370us 85.068us 15.60K -1 0 0 HASH(ss_customer_sk) 01:AGGREGATE 3 19.245ms 24.472ms 15.60K -1 9.48 MB 128.00 MB STREAMING 00:SCAN HDFS 3 53.173ms 55.844ms 183.59K -1 1.18 MB 16.00 MB tpcds_parquet.store_sales Change-Id: I99adaad574d4fb0a3cf38c6cbad8b2a23df12968 Reviewed-on: http://gerrit.cloudera.org:8080/6025 Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com> Tested-by: Impala Public Jenkins
1258 lines
30 KiB
Plaintext
1258 lines
30 KiB
Plaintext
====
|
|
---- QUERY
|
|
# test a larger dataset, includes nulls
|
|
# the exact result could vary slightly due to numeric instability
|
|
# 0.001 is a conservative upperbound on the possible difference in results
|
|
SELECT abs(cast(variance(tinyint_col) as double) - 6.66741) < 0.001,
|
|
abs(cast(variance(double_col) as double) - 8470806.708) < 0.001
|
|
from alltypesagg
|
|
---- RESULTS
|
|
true,true
|
|
---- TYPES
|
|
boolean, boolean
|
|
====
|
|
---- QUERY
|
|
# No tuples processed (should return null)
|
|
SELECT variance(tinyint_col), stddev(smallint_col), variance_pop(int_col),
|
|
stddev_pop(bigint_col)
|
|
from alltypesagg WHERE id = -9999999
|
|
---- RESULTS
|
|
NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
double, double, double, double
|
|
====
|
|
---- QUERY
|
|
# exactly 1 tuple processed (variance_pop & stddev_pop are 0, stddev and variance
|
|
# are NULL)
|
|
SELECT variance(tinyint_col), variance_samp(smallint_col), variance_pop(int_col),
|
|
stddev(smallint_col), stddev_samp(smallint_col), stddev_pop(bigint_col)
|
|
from alltypesagg WHERE id = 1006
|
|
---- RESULTS
|
|
NULL,NULL,0,NULL,NULL,0
|
|
---- TYPES
|
|
double, double, double, double, double, double
|
|
====
|
|
---- QUERY
|
|
# Includes one row which is null, and test the aliases for variance() as well
|
|
SELECT variance(tinyint_col), variance(smallint_col), variance(int_col),
|
|
variance(bigint_col), variance(float_col), variance(double_col),
|
|
var_samp(double_col), variance_samp(double_col)
|
|
from alltypesagg WHERE id >= 1000 AND id < 1006
|
|
---- RESULTS
|
|
2.5,2.5,2.5,250,3.025,255.025,255.025,255.025
|
|
---- TYPES
|
|
double, double, double, double, double, double,double, double
|
|
====
|
|
---- QUERY
|
|
# Test population variance (including the var_pop() alias)
|
|
SELECT variance_pop(tinyint_col), variance_pop(smallint_col), variance_pop(int_col),
|
|
variance_pop(bigint_col), variance_pop(float_col), variance_pop(double_col),
|
|
var_pop(double_col)
|
|
from alltypesagg WHERE id >= 1000 AND id < 1006
|
|
---- RESULTS
|
|
2,2,2,200,2.42,204.02,204.02
|
|
---- TYPES
|
|
double, double, double, double, double, double, double
|
|
====
|
|
---- QUERY
|
|
SELECT round(stddev(tinyint_col), 5),
|
|
round(stddev(smallint_col), 5),
|
|
round(stddev(int_col), 5),
|
|
round(stddev(bigint_col), 5),
|
|
round(stddev(float_col), 5),
|
|
round(stddev(double_col), 5),
|
|
round(stddev_samp(double_col), 5)
|
|
from alltypesagg WHERE id >= 1000 AND id < 1006
|
|
---- RESULTS
|
|
1.58114,1.58114,1.58114,15.81139,1.73925,15.96950,15.96950
|
|
---- TYPES
|
|
double, double, double, double, double, double, double
|
|
====
|
|
---- QUERY
|
|
# no grouping exprs, cols contain nulls except for bool cols
|
|
SELECT round(stddev_pop(tinyint_col), 5),
|
|
round(stddev_pop(smallint_col), 5),
|
|
round(stddev_pop(int_col), 5),
|
|
round(stddev_pop(bigint_col), 5),
|
|
round(stddev_pop(float_col), 5),
|
|
round(stddev_pop(double_col), 5)
|
|
from alltypesagg WHERE id >= 1000 AND id < 1006
|
|
---- RESULTS
|
|
1.41421,1.41421,1.41421,14.14214,1.55563,14.28356
|
|
---- TYPES
|
|
double, double, double, double, double, double
|
|
====
|
|
---- QUERY
|
|
# no grouping exprs, cols contain nulls except for bool cols
|
|
select count(bool_col), min(bool_col), max(bool_col)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,false,true
|
|
---- TYPES
|
|
bigint, boolean, boolean
|
|
====
|
|
---- QUERY
|
|
# no grouping exprs, cols contain nulls
|
|
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
|
|
avg(tinyint_col)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,9000,1,9,45000,5
|
|
---- TYPES
|
|
bigint, bigint, tinyint, tinyint, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), count(smallint_col), min(smallint_col), max(smallint_col), sum(smallint_col),
|
|
avg(smallint_col)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,9900,1,99,495000,50
|
|
---- TYPES
|
|
bigint, bigint, smallint, smallint, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), count(int_col), min(int_col), max(int_col), sum(int_col), avg(int_col)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,9990,1,999,4995000,500
|
|
---- TYPES
|
|
bigint, bigint, int, int, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), count(bigint_col), min(bigint_col), max(bigint_col), sum(bigint_col),
|
|
avg(bigint_col)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,9990,10,9990,49950000,5000
|
|
---- TYPES
|
|
bigint, bigint, bigint, bigint, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), count(float_col), min(float_col), max(float_col), sum(float_col),
|
|
avg(float_col)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,9990,1.100000023841858,1098.900024414062,5494499.999767542,549.9999999767309
|
|
---- TYPES
|
|
bigint, bigint, float, float, double, double
|
|
====
|
|
---- QUERY
|
|
select count(*), count(double_col), min(double_col), max(double_col), round(sum(double_col), 0),
|
|
round(avg(double_col), 0)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,9990,10.1,10089.9,50449500,5050
|
|
---- TYPES
|
|
bigint, bigint, double, double, double, double
|
|
====
|
|
---- QUERY
|
|
select count(*), min(string_col), max(string_col), min(date_string_col),
|
|
max(date_string_col)
|
|
from alltypesagg where day is not null
|
|
---- RESULTS
|
|
10000,'0','999','01/01/10','01/10/10'
|
|
---- TYPES
|
|
bigint, string, string, string, string
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-3018. Verify update() functions of min() and max() handle
|
|
# zero-length string correctly.
|
|
select max(str), min(str) from (values ('aaa' as str), (''), ('123')) as tmp
|
|
---- RESULTS
|
|
'aaa',''
|
|
---- TYPES
|
|
string,string
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-3018. Verify update() function of last_value() handles
|
|
# zero-length string correctly.
|
|
select last_value(b) over (partition by a order by d) from functional.nulltable;
|
|
---- RESULTS
|
|
''
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Test for IMPALA-3018. Verify update() function of first_value() handles
|
|
# zero-length string correctly.
|
|
select first_value(b) over (partition by a order by d) from functional.nulltable;
|
|
---- RESULTS
|
|
''
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# grouping by different data types, with NULLs
|
|
select tinyint_col, count(*) from alltypesagg where day is not null group by 1 order by 1
|
|
---- RESULTS
|
|
1,1000
|
|
2,1000
|
|
3,1000
|
|
4,1000
|
|
5,1000
|
|
6,1000
|
|
7,1000
|
|
8,1000
|
|
9,1000
|
|
NULL,1000
|
|
---- TYPES
|
|
tinyint, bigint
|
|
====
|
|
---- QUERY
|
|
# grouping by different data types, with NULLs, grouping expr missing from select list
|
|
select bool_col,min(bool_col),max(bool_col) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
false,false,false
|
|
true,true,true
|
|
---- TYPES
|
|
boolean,boolean,boolean
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg where day is not null group by tinyint_col
|
|
---- RESULTS
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select smallint_col % 10, count(*) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
3,1000
|
|
NULL,100
|
|
8,1000
|
|
7,1000
|
|
0,900
|
|
6,1000
|
|
9,1000
|
|
5,1000
|
|
4,1000
|
|
1,1000
|
|
2,1000
|
|
---- TYPES
|
|
smallint, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg where day is not null group by smallint_col % 10
|
|
---- RESULTS
|
|
1000
|
|
100
|
|
1000
|
|
1000
|
|
900
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 10, count(*) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
4,1000
|
|
9,1000
|
|
NULL,10
|
|
6,1000
|
|
5,1000
|
|
2,1000
|
|
0,990
|
|
1,1000
|
|
3,1000
|
|
8,1000
|
|
7,1000
|
|
---- TYPES
|
|
int, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg where day is not null group by int_col % 10
|
|
---- RESULTS
|
|
1000
|
|
1000
|
|
10
|
|
1000
|
|
1000
|
|
1000
|
|
990
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Check that ALL inside aggregates is correct
|
|
select count(ALL *) from alltypesagg where day is not null group by int_col % 10
|
|
---- RESULTS
|
|
1000
|
|
1000
|
|
10
|
|
1000
|
|
1000
|
|
1000
|
|
990
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select bigint_col % 100, count(*) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
60,1000
|
|
70,1000
|
|
20,1000
|
|
NULL,10
|
|
40,1000
|
|
80,1000
|
|
30,1000
|
|
0,990
|
|
50,1000
|
|
90,1000
|
|
10,1000
|
|
---- TYPES
|
|
bigint, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg where day is not null group by bigint_col % 100
|
|
---- RESULTS
|
|
1000
|
|
1000
|
|
1000
|
|
10
|
|
1000
|
|
1000
|
|
1000
|
|
990
|
|
1000
|
|
1000
|
|
1000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select float_col, float_col * 2, count(*) from alltypes group by 1, 2
|
|
---- RESULTS
|
|
0,0,730
|
|
3.299999952316284,6.599999904632568,730
|
|
8.800000190734863,17.60000038146973,730
|
|
6.599999904632568,13.19999980926514,730
|
|
7.699999809265137,15.39999961853027,730
|
|
2.200000047683716,4.400000095367432,730
|
|
5.5,11,730
|
|
1.100000023841858,2.200000047683716,730
|
|
9.899999618530273,19.79999923706055,730
|
|
4.400000095367432,8.800000190734863,730
|
|
---- TYPES
|
|
float, double, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypes group by float_col
|
|
---- RESULTS
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select float_col, count(*) from alltypesagg where float_col is null and day is not null group by 1
|
|
---- RESULTS
|
|
NULL,10
|
|
---- TYPES
|
|
float, bigint
|
|
====
|
|
---- QUERY
|
|
select double_col, double_col * 2, count(*) from alltypes group by 1, 2
|
|
---- RESULTS
|
|
0,0,730
|
|
90.90000000000001,181.8,730
|
|
40.4,80.8,730
|
|
20.2,40.4,730
|
|
80.8,161.6,730
|
|
10.1,20.2,730
|
|
70.7,141.4,730
|
|
50.5,101,730
|
|
30.3,60.6,730
|
|
60.6,121.2,730
|
|
---- TYPES
|
|
double, double, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypes group by double_col
|
|
---- RESULTS
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
730
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select double_col, count(*) from alltypesagg where double_col is null and day is not null group by 1
|
|
---- RESULTS
|
|
NULL,10
|
|
---- TYPES
|
|
double, bigint
|
|
====
|
|
---- QUERY
|
|
select date_string_col, count(*) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
'01/08/10',1000
|
|
'01/09/10',1000
|
|
'01/02/10',1000
|
|
'01/06/10',1000
|
|
'01/01/10',1000
|
|
'01/03/10',1000
|
|
'01/04/10',1000
|
|
'01/10/10',1000
|
|
'01/07/10',1000
|
|
'01/05/10',1000
|
|
---- TYPES
|
|
string, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg where day is not null group by date_string_col
|
|
---- RESULTS
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
1000
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# grouping by multiple exprs, with nulls
|
|
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg
|
|
where day = 1 group by 1, 2
|
|
---- RESULTS
|
|
0,0,120
|
|
0,1,90
|
|
0,2,90
|
|
1,0,90
|
|
1,1,120
|
|
1,2,90
|
|
2,0,90
|
|
2,1,90
|
|
2,2,120
|
|
NULL,0,30
|
|
NULL,1,30
|
|
NULL,2,30
|
|
NULL,NULL,10
|
|
---- TYPES
|
|
tinyint, smallint, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypesagg
|
|
where day = 1 group by tinyint_col % 3, smallint_col % 3
|
|
---- RESULTS
|
|
10
|
|
120
|
|
120
|
|
120
|
|
30
|
|
30
|
|
30
|
|
90
|
|
90
|
|
90
|
|
90
|
|
90
|
|
90
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# same result as previous query
|
|
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg where day = 1 group by 2, 1
|
|
---- RESULTS
|
|
0,0,120
|
|
0,1,90
|
|
0,2,90
|
|
1,0,90
|
|
1,1,120
|
|
1,2,90
|
|
2,0,90
|
|
2,1,90
|
|
2,2,120
|
|
NULL,0,30
|
|
NULL,1,30
|
|
NULL,2,30
|
|
NULL,NULL,10
|
|
---- TYPES
|
|
tinyint, smallint, bigint
|
|
====
|
|
---- QUERY
|
|
select tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col, count(*)
|
|
from alltypesagg
|
|
where (date_string_col = '01/01/10' or date_string_col = '01/02/10') and day is not null
|
|
group by 1, 2, 3, 4, 5
|
|
---- RESULTS
|
|
1,1,1,0,'01/02/10',500
|
|
0,0,0,0,'01/02/10',400
|
|
NULL,NULL,0,0,'01/02/10',9
|
|
NULL,NULL,NULL,NULL,'01/02/10',1
|
|
0,0,0,0,'01/01/10',400
|
|
NULL,NULL,0,0,'01/01/10',9
|
|
NULL,NULL,NULL,NULL,'01/01/10',1
|
|
NULL,0,0,0,'01/02/10',90
|
|
1,1,1,0,'01/01/10',500
|
|
NULL,0,0,0,'01/01/10',90
|
|
---- TYPES
|
|
tinyint, smallint, int, bigint, string, bigint
|
|
====
|
|
---- QUERY
|
|
select count(*)
|
|
from alltypesagg
|
|
where (date_string_col = '01/01/10' or date_string_col = '01/02/10') and day is not null
|
|
group by tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col
|
|
---- RESULTS
|
|
500
|
|
400
|
|
9
|
|
1
|
|
400
|
|
9
|
|
1
|
|
90
|
|
500
|
|
90
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# no grouping cols, no matching rows
|
|
select count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
|
|
from alltypesagg
|
|
where tinyint_col = -1 and day is not null
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint, tinyint, tinyint, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col)
|
|
from alltypesagg
|
|
where smallint_col = -1 and day is not null
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint, smallint, smallint, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), min(int_col), max(int_col), sum(int_col), avg(int_col)
|
|
from alltypesagg
|
|
where int_col = -1 and day is not null
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint, int, int, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col)
|
|
from alltypesagg
|
|
where bigint_col = -1 and day is not null
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint, bigint, bigint, bigint, double
|
|
====
|
|
---- QUERY
|
|
select count(*), min(float_col), max(float_col), sum(float_col), avg(float_col)
|
|
from alltypesagg
|
|
where float_col < -1.0 and day is not null
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint, float, float, double, double
|
|
====
|
|
---- QUERY
|
|
select count(*), min(double_col), max(double_col), sum(double_col), avg(double_col)
|
|
from alltypesagg
|
|
where double_col < -1.0 and day is not null
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint, double, double, double, double
|
|
====
|
|
---- QUERY
|
|
# HAVING clauses over all aggregation functions, plus compound HAVING clauses
|
|
select int_col % 7, count(*), max(int_col) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
4,1430,998
|
|
NULL,10,NULL
|
|
6,1420,993
|
|
5,1430,999
|
|
2,1430,996
|
|
0,1420,994
|
|
1,1430,995
|
|
3,1430,997
|
|
---- TYPES
|
|
int, bigint, int
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*) from alltypesagg where day is not null group by 1 having max(int_col) > 991
|
|
---- RESULTS
|
|
4,1430
|
|
6,1420
|
|
5,1430
|
|
2,1430
|
|
0,1420
|
|
1,1430
|
|
3,1430
|
|
---- TYPES
|
|
int, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*) from alltypesagg where day is not null group by 1
|
|
having max(int_col) > 991 and count(*) > 1420
|
|
---- RESULTS
|
|
4,1430
|
|
5,1430
|
|
2,1430
|
|
1,1430
|
|
3,1430
|
|
---- TYPES
|
|
int, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*) from alltypesagg where day is not null group by 1
|
|
having min(int_col) < 7
|
|
---- RESULTS
|
|
4,1430
|
|
6,1420
|
|
5,1430
|
|
2,1430
|
|
1,1430
|
|
3,1430
|
|
---- TYPES
|
|
int, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*) from alltypesagg where day is not null group by 1
|
|
having min(int_col) < 7 and count(*) > 1420
|
|
---- RESULTS
|
|
4,1430
|
|
5,1430
|
|
2,1430
|
|
1,1430
|
|
3,1430
|
|
---- TYPES
|
|
int, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
4,1430,716430
|
|
NULL,10,NULL
|
|
6,1420,709290
|
|
5,1430,717860
|
|
2,1430,713570
|
|
0,1420,710710
|
|
1,1430,712140
|
|
3,1430,715000
|
|
---- TYPES
|
|
int, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
|
|
having sum(int_col) >= 715000
|
|
---- RESULTS
|
|
4,1430,716430
|
|
5,1430,717860
|
|
3,1430,715000
|
|
---- TYPES
|
|
int, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
|
|
having sum(int_col) >= 715000 or count(*) > 1420
|
|
---- RESULTS
|
|
4,1430,716430
|
|
5,1430,717860
|
|
2,1430,713570
|
|
1,1430,712140
|
|
3,1430,715000
|
|
---- TYPES
|
|
int, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1
|
|
having sum(int_col) is null
|
|
---- RESULTS
|
|
NULL,10,NULL
|
|
---- TYPES
|
|
int, bigint, bigint
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1
|
|
---- RESULTS
|
|
4,1430,501
|
|
NULL,10,NULL
|
|
6,1420,499.5
|
|
5,1430,502
|
|
2,1430,499
|
|
0,1420,500.5
|
|
1,1430,498
|
|
3,1430,500
|
|
---- TYPES
|
|
int, bigint, double
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1
|
|
having avg(int_col) > 500
|
|
---- RESULTS
|
|
4,1430,501
|
|
5,1430,502
|
|
0,1420,500.5
|
|
---- TYPES
|
|
int, bigint, double
|
|
====
|
|
---- QUERY
|
|
select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1
|
|
having avg(int_col) > 500 or count(*) = 10
|
|
---- RESULTS
|
|
4,1430,501
|
|
NULL,10,NULL
|
|
5,1430,502
|
|
0,1420,500.5
|
|
---- TYPES
|
|
int, bigint, double
|
|
====
|
|
---- QUERY
|
|
select timestamp_col, count(*) from alltypesagg where day is not null
|
|
group by timestamp_col having timestamp_col < cast('2010-01-01 01:05:20' as timestamp)
|
|
---- RESULTS
|
|
2010-01-01 00:49:11.760000000,1
|
|
2010-01-01 01:01:18.300000000,1
|
|
2010-01-01 00:17:01.360000000,1
|
|
2010-01-01 00:58:16.530000000,1
|
|
2010-01-01 00:09:00.360000000,1
|
|
2010-01-01 00:00:00,1
|
|
2010-01-01 01:00:17.700000000,1
|
|
2010-01-01 00:57:15.960000000,1
|
|
2010-01-01 00:24:02.760000000,1
|
|
2010-01-01 00:23:02.530000000,1
|
|
2010-01-01 00:45:09.900000000,1
|
|
2010-01-01 00:39:07.410000000,1
|
|
2010-01-01 00:33:05.280000000,1
|
|
2010-01-01 00:03:00.300000000,1
|
|
2010-01-01 00:20:01.900000000,1
|
|
2010-01-01 00:36:06.300000000,1
|
|
2010-01-01 00:44:09.460000000,1
|
|
2010-01-01 00:14:00.910000000,1
|
|
2010-01-01 00:31:04.650000000,1
|
|
2010-01-01 00:48:11.280000000,1
|
|
2010-01-01 01:03:19.530000000,1
|
|
2010-01-01 00:29:04.600000000,1
|
|
2010-01-01 01:02:18.910000000,1
|
|
2010-01-01 00:16:01.200000000,1
|
|
2010-01-01 00:47:10.810000000,1
|
|
2010-01-01 00:51:12.750000000,1
|
|
2010-01-01 00:55:14.850000000,1
|
|
2010-01-01 00:42:08.610000000,1
|
|
2010-01-01 00:56:15.400000000,1
|
|
2010-01-01 00:05:00.100000000,1
|
|
2010-01-01 00:43:09.300000000,1
|
|
2010-01-01 00:28:03.780000000,1
|
|
2010-01-01 00:04:00.600000000,1
|
|
2010-01-01 00:54:14.310000000,1
|
|
2010-01-01 00:26:03.250000000,1
|
|
2010-01-01 00:32:04.960000000,1
|
|
2010-01-01 00:46:10.350000000,1
|
|
2010-01-01 00:37:06.660000000,1
|
|
2010-01-01 00:50:12.250000000,1
|
|
2010-01-01 00:27:03.510000000,1
|
|
2010-01-01 00:19:01.710000000,1
|
|
2010-01-01 00:40:07.800000000,1
|
|
2010-01-01 00:07:00.210000000,1
|
|
2010-01-01 00:22:02.310000000,1
|
|
2010-01-01 00:21:02.100000000,1
|
|
2010-01-01 00:18:01.530000000,1
|
|
2010-01-01 00:11:00.550000000,1
|
|
2010-01-01 00:35:05.950000000,1
|
|
2010-01-01 00:30:04.350000000,1
|
|
2010-01-01 00:08:00.280000000,1
|
|
2010-01-01 00:34:05.610000000,1
|
|
2010-01-01 00:15:01.500000000,1
|
|
2010-01-01 00:41:08.200000000,1
|
|
2010-01-01 00:02:00.100000000,1
|
|
2010-01-01 00:01:00,1
|
|
2010-01-01 00:10:00.450000000,1
|
|
2010-01-01 00:52:13.260000000,1
|
|
2010-01-01 01:04:20.160000000,1
|
|
2010-01-01 00:12:00.660000000,1
|
|
2010-01-01 00:38:07.300000000,1
|
|
2010-01-01 00:53:13.780000000,1
|
|
2010-01-01 00:25:03,1
|
|
2010-01-01 00:59:17.110000000,1
|
|
2010-01-01 00:06:00.150000000,1
|
|
2010-01-01 00:13:00.780000000,1
|
|
---- TYPES
|
|
timestamp, bigint
|
|
====
|
|
---- QUERY
|
|
# Test NULLs in aggregate functions
|
|
select count(NULL), min(NULL), max(NULL), sum(NULL), avg(NULL) from alltypesagg
|
|
where day is not null
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint, boolean, boolean, bigint, double
|
|
====
|
|
---- QUERY
|
|
# Test ignored distinct in MIN and MAX with NULLs
|
|
select min(distinct NULL), max(distinct NULL) from alltypes
|
|
---- RESULTS
|
|
NULL,NULL
|
|
---- TYPES
|
|
boolean, boolean
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with default delimiter. Use a subquery with an ORDER BY to
|
|
# ensure group_concat results are in a deterministic order.
|
|
select day, group_concat(string_col)
|
|
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
|
|
group by day
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
3,'3, 103, 203, 303, 403, 503, 603, 703, 803, 903'
|
|
5,'5, 105, 205, 305, 405, 505, 605, 705, 805, 905'
|
|
8,'8, 108, 208, 308, 408, 508, 608, 708, 808, 908'
|
|
4,'4, 104, 204, 304, 404, 504, 604, 704, 804, 904'
|
|
9,'9, 109, 209, 309, 409, 509, 609, 709, 809, 909'
|
|
2,'2, 102, 202, 302, 402, 502, 602, 702, 802, 902'
|
|
6,'6, 106, 206, 306, 406, 506, 606, 706, 806, 906'
|
|
10,'10, 110, 210, 310, 410, 510, 610, 710, 810, 910'
|
|
7,'7, 107, 207, 307, 407, 507, 607, 707, 807, 907'
|
|
1,'1, 101, 201, 301, 401, 501, 601, 701, 801, 901'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with NULL (default) delimiter
|
|
select day, group_concat(string_col, NULL)
|
|
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
|
|
group by day
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
3,'3, 103, 203, 303, 403, 503, 603, 703, 803, 903'
|
|
5,'5, 105, 205, 305, 405, 505, 605, 705, 805, 905'
|
|
8,'8, 108, 208, 308, 408, 508, 608, 708, 808, 908'
|
|
4,'4, 104, 204, 304, 404, 504, 604, 704, 804, 904'
|
|
9,'9, 109, 209, 309, 409, 509, 609, 709, 809, 909'
|
|
2,'2, 102, 202, 302, 402, 502, 602, 702, 802, 902'
|
|
6,'6, 106, 206, 306, 406, 506, 606, 706, 806, 906'
|
|
10,'10, 110, 210, 310, 410, 510, 610, 710, 810, 910'
|
|
7,'7, 107, 207, 307, 407, 507, 607, 707, 807, 907'
|
|
1,'1, 101, 201, 301, 401, 501, 601, 701, 801, 901'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with both args as NULL
|
|
select day, group_concat(NULL, NULL)
|
|
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
|
|
group by day
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
3,'NULL'
|
|
5,'NULL'
|
|
8,'NULL'
|
|
4,'NULL'
|
|
9,'NULL'
|
|
2,'NULL'
|
|
6,'NULL'
|
|
10,'NULL'
|
|
7,'NULL'
|
|
1,'NULL'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with arrow delimiter
|
|
select day, group_concat(string_col, "->")
|
|
from (select * from alltypesagg where id % 100 = day order by id limit 99999) a
|
|
group by day
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
3,'3->103->203->303->403->503->603->703->803->903'
|
|
5,'5->105->205->305->405->505->605->705->805->905'
|
|
8,'8->108->208->308->408->508->608->708->808->908'
|
|
4,'4->104->204->304->404->504->604->704->804->904'
|
|
9,'9->109->209->309->409->509->609->709->809->909'
|
|
2,'2->102->202->302->402->502->602->702->802->902'
|
|
6,'6->106->206->306->406->506->606->706->806->906'
|
|
10,'10->110->210->310->410->510->610->710->810->910'
|
|
7,'7->107->207->307->407->507->607->707->807->907'
|
|
1,'1->101->201->301->401->501->601->701->801->901'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with column delimiter
|
|
# Will cause all columns save first to be duplicated
|
|
select day, group_concat(trim(string_col), trim(string_col))
|
|
from (select * from alltypesagg where id % 200 = day order by id limit 99999) a
|
|
group by day
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
3,'3203203403403603603803803'
|
|
5,'5205205405405605605805805'
|
|
8,'8208208408408608608808808'
|
|
4,'4204204404404604604804804'
|
|
9,'9209209409409609609809809'
|
|
2,'2202202402402602602802802'
|
|
6,'6206206406406606606806806'
|
|
10,'10210210410410610610810810'
|
|
7,'7207207407407607607807807'
|
|
1,'1201201401401601601801801'
|
|
---- TYPES
|
|
int, string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with multiple agg columns
|
|
select day, group_concat(string_col, '->'), group_concat(date_string_col)
|
|
from (select * from alltypesagg where id % 250 = day order by id limit 99999) a
|
|
group by day
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
3,'3->253->503->753','01/03/10, 01/03/10, 01/03/10, 01/03/10'
|
|
5,'5->255->505->755','01/05/10, 01/05/10, 01/05/10, 01/05/10'
|
|
8,'8->258->508->758','01/08/10, 01/08/10, 01/08/10, 01/08/10'
|
|
4,'4->254->504->754','01/04/10, 01/04/10, 01/04/10, 01/04/10'
|
|
9,'9->259->509->759','01/09/10, 01/09/10, 01/09/10, 01/09/10'
|
|
2,'2->252->502->752','01/02/10, 01/02/10, 01/02/10, 01/02/10'
|
|
6,'6->256->506->756','01/06/10, 01/06/10, 01/06/10, 01/06/10'
|
|
10,'10->260->510->760','01/10/10, 01/10/10, 01/10/10, 01/10/10'
|
|
7,'7->257->507->757','01/07/10, 01/07/10, 01/07/10, 01/07/10'
|
|
1,'1->251->501->751','01/01/10, 01/01/10, 01/01/10, 01/01/10'
|
|
---- TYPES
|
|
int, string, string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat distinct with multiple agg columns
|
|
select day, group_concat(string_col, '->'), group_concat(date_string_col),
|
|
group_concat(distinct date_string_col)
|
|
from (select * from alltypesagg where id % 250 = day order by id limit 99999) a
|
|
group by day order by day
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
1,'1->251->501->751','01/01/10, 01/01/10, 01/01/10, 01/01/10','01/01/10'
|
|
2,'2->252->502->752','01/02/10, 01/02/10, 01/02/10, 01/02/10','01/02/10'
|
|
3,'3->253->503->753','01/03/10, 01/03/10, 01/03/10, 01/03/10','01/03/10'
|
|
4,'4->254->504->754','01/04/10, 01/04/10, 01/04/10, 01/04/10','01/04/10'
|
|
5,'5->255->505->755','01/05/10, 01/05/10, 01/05/10, 01/05/10','01/05/10'
|
|
6,'6->256->506->756','01/06/10, 01/06/10, 01/06/10, 01/06/10','01/06/10'
|
|
7,'7->257->507->757','01/07/10, 01/07/10, 01/07/10, 01/07/10','01/07/10'
|
|
8,'8->258->508->758','01/08/10, 01/08/10, 01/08/10, 01/08/10','01/08/10'
|
|
9,'9->259->509->759','01/09/10, 01/09/10, 01/09/10, 01/09/10','01/09/10'
|
|
10,'10->260->510->760','01/10/10, 01/10/10, 01/10/10, 01/10/10','01/10/10'
|
|
---- TYPES
|
|
int, string, string, string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with null result
|
|
select group_concat(string_col) from alltypesagg where string_col = NULL;
|
|
---- RESULTS
|
|
'NULL'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat distinct with null result
|
|
select group_concat(distinct string_col) from alltypesagg where string_col = NULL;
|
|
---- RESULTS
|
|
'NULL'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat with merge node
|
|
select group_concat(string_col) from alltypesagg where int_col = 1
|
|
---- RESULTS
|
|
'1, 1, 1, 1, 1, 1, 1, 1, 1, 1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test merge phase uses correct separator (IMPALA-1110). The query needs to load data
|
|
# from multiple nodes in order to exercise this path, however the merge order is
|
|
# non-deterministic. So, aggregate a string literal to make the result deterministic.
|
|
select group_concat('abc', 'xy') from functional.alltypesagg where id % 1000 = day
|
|
---- RESULTS
|
|
'abcxyabcxyabcxyabcxyabcxyabcxyabcxyabcxyabcxyabc'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Same as last query test, now adding the "distinct" clause
|
|
select group_concat(distinct 'abc', 'xy') from functional.alltypesagg
|
|
where id % 1000 = day
|
|
---- RESULTS
|
|
'abc'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Test group_concat when separator varies by row.
|
|
select group_concat(cast(id as string), cast ((10 - id) as string))
|
|
from functional.alltypesagg
|
|
where id < 10 and day is not null
|
|
group by day
|
|
---- RESULTS
|
|
'0918273645546372819'
|
|
---- TYPES
|
|
string
|
|
====
|
|
---- QUERY
|
|
# Test correct removal of redundant group-by expressions (IMPALA-817)
|
|
select int_col * int_col, int_col + int_col
|
|
from functional.alltypesagg
|
|
group by int_col * int_col, int_col + int_col, int_col * int_col
|
|
having (int_col + int_col) < 5 order by 1 limit 10
|
|
---- RESULTS
|
|
1,2
|
|
4,4
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|
|
---- QUERY
|
|
# Test that binding predicates on an aggregation properly trigger materialization of
|
|
# slots in the agg tuple and the slots needed for evaluating the corresponding agg funcs
|
|
# (IMPALA-822).
|
|
select 1 from
|
|
(select count(bigint_col) c from functional.alltypesagg
|
|
having min(int_col) is not null) as t
|
|
where c is not null
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
tinyint
|
|
====
|
|
---- QUERY
|
|
# Regression test for subexpr elimination in codegen. IMPALA-765
|
|
select count(tinyint_col), sum(tinyint_col * tinyint_col) from alltypesagg
|
|
---- RESULTS
|
|
9000,285000
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|
|
---- QUERY
|
|
# Regression test for subexpr elimination in codegen. IMPALA-765
|
|
select count(int_col), sum(int_col), avg(int_col) from alltypesagg where int_col is NULL
|
|
---- RESULTS
|
|
0,NULL,NULL
|
|
---- TYPES
|
|
bigint,bigint,double
|
|
====
|
|
---- QUERY
|
|
# Regression test for subexpr elimination in codegen. IMPALA-850
|
|
select id % 2, int_col > 1, id from alltypesagg where id < 2 group by 1,2,3
|
|
---- RESULTS
|
|
0,NULL,0
|
|
1,false,1
|
|
---- TYPES
|
|
int,boolean,int
|
|
====
|
|
---- QUERY
|
|
# Regression test for min/max of all negative values. IMPALA-869.
|
|
select min(cast(-1.0 as float)), max(cast(-1.0 as float)) from tinytable
|
|
---- RESULTS
|
|
-1,-1
|
|
---- TYPES
|
|
float,float
|
|
====
|
|
---- QUERY
|
|
# Regression test codegen with nulls and compound predicates. IMPALA-892.
|
|
select COUNT(int_col is not null AND bool_col) - COUNT(bool_col) FROM alltypesagg
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select histogram(bool_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 1, 1, 1, 1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(tinyint_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 1, 1, 1, 1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(smallint_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 1, 1, 1, 1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(int_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 1, 1, 1, 1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(bigint_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 10, 10, 10, 10'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(float_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 1.1, 1.1, 1.1, 1.1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(double_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 10.1, 10.1, 10.1, 10.1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(string_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'0, 0, 0, 0, 1, 1, 1, 1'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select histogram(timestamp_col) from functional.alltypestiny;
|
|
---- RESULTS
|
|
'2009-01-01 00:00:00, 2009-01-01 00:01:00, 2009-02-01 00:00:00, 2009-02-01 00:01:00, 2009-03-01 00:00:00, 2009-03-01 00:01:00, 2009-04-01 00:00:00, 2009-04-01 00:01:00'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4787: appx_median() on a medium sized dataset. This should excercise merge() with
|
|
# differently sized inputs in the Reservoir Sampling algorithm.
|
|
select
|
|
appx_median(bool_col),
|
|
appx_median(tinyint_col),
|
|
appx_median(smallint_col),
|
|
appx_median(int_col),
|
|
appx_median(float_col),
|
|
appx_median(double_col),
|
|
appx_median(string_col),
|
|
appx_median(timestamp_col)
|
|
from alltypes
|
|
---- RESULTS
|
|
true,5,5,5,5.5,50.5,'5',2010-01-01 00:00:00
|
|
---- TYPES
|
|
BOOLEAN, TINYINT, SMALLINT, INT, FLOAT, DOUBLE, STRING, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4787: appx_median on a large dataset. This requires several buffer resizes in the
|
|
# Reservoir Sampling algorithm.
|
|
select appx_median(l_returnflag)
|
|
from tpch.lineitem
|
|
where l_returnflag = "N"
|
|
---- RESULTS
|
|
'N'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1419: Agg fn containing arithmetic expr on NULL fails
|
|
select count(null * 1) from functional.alltypes
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1898: ordinal in group/order by combined with explicit select-list alias
|
|
# that match columns in underlying table
|
|
select extract(timestamp_col, 'year') as timestamp_col,
|
|
extract(timestamp_col, 'month') as month,
|
|
sum(tinyint_col)
|
|
from functional.alltypes
|
|
group by 1, 2
|
|
order by 1, 2;
|
|
---- RESULTS
|
|
2009,1,1395
|
|
2009,2,1260
|
|
2009,3,1395
|
|
2009,4,1350
|
|
2009,5,1395
|
|
2009,6,1350
|
|
2009,7,1395
|
|
2009,8,1395
|
|
2009,9,1350
|
|
2009,10,1395
|
|
2009,11,1350
|
|
2009,12,1395
|
|
2010,1,1395
|
|
2010,2,1260
|
|
2010,3,1395
|
|
2010,4,1350
|
|
2010,5,1395
|
|
2010,6,1350
|
|
2010,7,1395
|
|
2010,8,1395
|
|
2010,9,1350
|
|
2010,10,1395
|
|
2010,11,1350
|
|
2010,12,1395
|
|
---- TYPES
|
|
INT,INT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2089: Tests correct elimination of redundant predicates.
|
|
# The equivalences between inline-view slots are enforced inside the inline-view plan.
|
|
# Equivalences between simple grouping slots (with SlotRef grouping exprs) are enforced
|
|
# at the scan, and equivalences between grouping slots with complex grouping exprs are
|
|
# enforced at the aggregation.
|
|
select t2.timestamp_col, t1.int_col_1
|
|
from
|
|
(select coalesce(t1.smallint_col, t1.month, t1.month) as int_col,
|
|
(count(t1.int_col)) <= (coalesce(t1.smallint_col, t1.month, t1.month)) as boolean_col,
|
|
(t1.bigint_col) + (t1.smallint_col) as int_col_1
|
|
from functional.alltypes t1
|
|
group by coalesce(t1.smallint_col, t1.month, t1.month), (t1.bigint_col) + (t1.smallint_col)
|
|
having (t1.bigint_col) + (t1.smallint_col) != (count(t1.bigint_col + t1.smallint_col))
|
|
) t1
|
|
inner join functional.alltypes t2
|
|
on (t2.month = t1.int_col and t2.month = t1.int_col_1 and t2.tinyint_col = t1.int_col)
|
|
where t2.int_col IN (t1.int_col_1, t1.int_col)
|
|
---- RESULTS
|
|
---- TYPES
|
|
TIMESTAMP,BIGINT
|
|
====
|