==== ---- QUERY # test a larger dataset, includes nulls # the exact result could vary slightly due to numeric instability # 0.001 is a conservative upperbound on the possible difference in results SELECT abs(cast(variance(tinyint_col) as double) - 6.66741) < 0.001, abs(cast(variance(double_col) as double) - 8470806.708) < 0.001 from alltypesagg ---- RESULTS true,true ---- TYPES boolean, boolean ==== ---- QUERY # No tuples processed (should return null) SELECT variance(tinyint_col), stddev(smallint_col), variance_pop(int_col), stddev_pop(bigint_col) from alltypesagg WHERE id = -9999999 ---- RESULTS NULL,NULL,NULL,NULL ---- TYPES double, double, double, double ==== ---- QUERY # exactly 1 tuple processed (variance_pop & stddev_pop are 0, stddev and variance # are NULL) SELECT variance(tinyint_col), variance_samp(smallint_col), variance_pop(int_col), stddev(smallint_col), stddev_samp(smallint_col), stddev_pop(bigint_col) from alltypesagg WHERE id = 1006 ---- RESULTS NULL,NULL,0,NULL,NULL,0 ---- TYPES double, double, double, double, double, double ==== ---- QUERY # Includes one row which is null, and test the aliases for variance() as well SELECT variance(tinyint_col), variance(smallint_col), variance(int_col), variance(bigint_col), variance(float_col), variance(double_col), var_samp(double_col), variance_samp(double_col) from alltypesagg WHERE id >= 1000 AND id < 1006 ---- RESULTS 2.5,2.5,2.5,250,3.025,255.025,255.025,255.025 ---- TYPES double, double, double, double, double, double,double, double ==== ---- QUERY # Test population variance (including the var_pop() alias) SELECT variance_pop(tinyint_col), variance_pop(smallint_col), variance_pop(int_col), variance_pop(bigint_col), variance_pop(float_col), variance_pop(double_col), var_pop(double_col) from alltypesagg WHERE id >= 1000 AND id < 1006 ---- RESULTS 2,2,2,200,2.42,204.02,204.02 ---- TYPES double, double, double, double, double, double, double ==== ---- QUERY SELECT round(stddev(tinyint_col), 5), round(stddev(smallint_col), 5), round(stddev(int_col), 5), round(stddev(bigint_col), 5), round(stddev(float_col), 5), round(stddev(double_col), 5), round(stddev_samp(double_col), 5) from alltypesagg WHERE id >= 1000 AND id < 1006 ---- RESULTS 1.58114,1.58114,1.58114,15.81139,1.73925,15.96950,15.96950 ---- TYPES double, double, double, double, double, double, double ==== ---- QUERY # no grouping exprs, cols contain nulls except for bool cols SELECT round(stddev_pop(tinyint_col), 5), round(stddev_pop(smallint_col), 5), round(stddev_pop(int_col), 5), round(stddev_pop(bigint_col), 5), round(stddev_pop(float_col), 5), round(stddev_pop(double_col), 5) from alltypesagg WHERE id >= 1000 AND id < 1006 ---- RESULTS 1.41421,1.41421,1.41421,14.14214,1.55563,14.28356 ---- TYPES double, double, double, double, double, double ==== ---- QUERY # no grouping exprs, cols contain nulls except for bool cols select count(bool_col), min(bool_col), max(bool_col) from alltypesagg where day is not null ---- RESULTS 10000,false,true ---- TYPES bigint, boolean, boolean ==== ---- QUERY # no grouping exprs, cols contain nulls select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) from alltypesagg where day is not null ---- RESULTS 10000,9000,1,9,45000,5 ---- TYPES bigint, bigint, tinyint, tinyint, bigint, double ==== ---- QUERY select count(*), count(smallint_col), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col) from alltypesagg where day is not null ---- RESULTS 10000,9900,1,99,495000,50 ---- TYPES bigint, bigint, smallint, smallint, bigint, double ==== ---- QUERY select count(*), count(int_col), min(int_col), max(int_col), sum(int_col), avg(int_col) from alltypesagg where day is not null ---- RESULTS 10000,9990,1,999,4995000,500 ---- TYPES bigint, bigint, int, int, bigint, double ==== ---- QUERY select count(*), count(bigint_col), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col) from alltypesagg where day is not null ---- RESULTS 10000,9990,10,9990,49950000,5000 ---- TYPES bigint, bigint, bigint, bigint, bigint, double ==== ---- QUERY select count(*), count(float_col), min(float_col), max(float_col), sum(float_col), avg(float_col) from alltypesagg where day is not null ---- RESULTS 10000,9990,1.100000023841858,1098.900024414062,5494499.999767542,549.9999999767309 ---- TYPES bigint, bigint, float, float, double, double ==== ---- QUERY select count(*), count(double_col), min(double_col), max(double_col), round(sum(double_col), 0), round(avg(double_col), 0) from alltypesagg where day is not null ---- RESULTS 10000,9990,10.1,10089.9,50449500,5050 ---- TYPES bigint, bigint, double, double, double, double ==== ---- QUERY select count(*), min(string_col), max(string_col), min(date_string_col), max(date_string_col) from alltypesagg where day is not null ---- RESULTS 10000,'0','999','01/01/10','01/10/10' ---- TYPES bigint, string, string, string, string ==== ---- QUERY # Test for IMPALA-3018. Verify update() functions of min() and max() handle # zero-length string correctly. select max(str), min(str) from (values ('aaa' as str), (''), ('123')) as tmp ---- RESULTS 'aaa','' ---- TYPES string,string ==== ---- QUERY # Test for IMPALA-3018. Verify update() function of last_value() handles # zero-length string correctly. select last_value(b) over (partition by a order by d) from functional.nulltable; ---- RESULTS '' ---- TYPES string ==== ---- QUERY # Test for IMPALA-3018. Verify update() function of first_value() handles # zero-length string correctly. select first_value(b) over (partition by a order by d) from functional.nulltable; ---- RESULTS '' ---- TYPES string ==== ---- QUERY # grouping by different data types, with NULLs select tinyint_col, count(*) from alltypesagg where day is not null group by 1 order by 1 ---- RESULTS 1,1000 2,1000 3,1000 4,1000 5,1000 6,1000 7,1000 8,1000 9,1000 NULL,1000 ---- TYPES tinyint, bigint ==== ---- QUERY # grouping by different data types, with NULLs, grouping expr missing from select list select bool_col,min(bool_col),max(bool_col) from alltypesagg where day is not null group by 1 ---- RESULTS false,false,false true,true,true ---- TYPES boolean,boolean,boolean ==== ---- QUERY select count(*) from alltypesagg where day is not null group by tinyint_col ---- RESULTS 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ---- TYPES bigint ==== ---- QUERY select smallint_col % 10, count(*) from alltypesagg where day is not null group by 1 ---- RESULTS 3,1000 NULL,100 8,1000 7,1000 0,900 6,1000 9,1000 5,1000 4,1000 1,1000 2,1000 ---- TYPES smallint, bigint ==== ---- QUERY select count(*) from alltypesagg where day is not null group by smallint_col % 10 ---- RESULTS 1000 100 1000 1000 900 1000 1000 1000 1000 1000 1000 ---- TYPES bigint ==== ---- QUERY select int_col % 10, count(*) from alltypesagg where day is not null group by 1 ---- RESULTS 4,1000 9,1000 NULL,10 6,1000 5,1000 2,1000 0,990 1,1000 3,1000 8,1000 7,1000 ---- TYPES int, bigint ==== ---- QUERY select count(*) from alltypesagg where day is not null group by int_col % 10 ---- RESULTS 1000 1000 10 1000 1000 1000 990 1000 1000 1000 1000 ---- TYPES bigint ==== ---- QUERY # Check that ALL inside aggregates is correct select count(ALL *) from alltypesagg where day is not null group by int_col % 10 ---- RESULTS 1000 1000 10 1000 1000 1000 990 1000 1000 1000 1000 ---- TYPES bigint ==== ---- QUERY select bigint_col % 100, count(*) from alltypesagg where day is not null group by 1 ---- RESULTS 60,1000 70,1000 20,1000 NULL,10 40,1000 80,1000 30,1000 0,990 50,1000 90,1000 10,1000 ---- TYPES bigint, bigint ==== ---- QUERY select count(*) from alltypesagg where day is not null group by bigint_col % 100 ---- RESULTS 1000 1000 1000 10 1000 1000 1000 990 1000 1000 1000 ---- TYPES bigint ==== ---- QUERY select float_col, float_col * 2, count(*) from alltypes group by 1, 2 ---- RESULTS 0,0,730 3.299999952316284,6.599999904632568,730 8.800000190734863,17.60000038146973,730 6.599999904632568,13.19999980926514,730 7.699999809265137,15.39999961853027,730 2.200000047683716,4.400000095367432,730 5.5,11,730 1.100000023841858,2.200000047683716,730 9.899999618530273,19.79999923706055,730 4.400000095367432,8.800000190734863,730 ---- TYPES float, double, bigint ==== ---- QUERY select count(*) from alltypes group by float_col ---- RESULTS 730 730 730 730 730 730 730 730 730 730 ---- TYPES bigint ==== ---- QUERY select float_col, count(*) from alltypesagg where float_col is null and day is not null group by 1 ---- RESULTS NULL,10 ---- TYPES float, bigint ==== ---- QUERY select double_col, double_col * 2, count(*) from alltypes group by 1, 2 ---- RESULTS 0,0,730 90.90000000000001,181.8,730 40.4,80.8,730 20.2,40.4,730 80.8,161.6,730 10.1,20.2,730 70.7,141.4,730 50.5,101,730 30.3,60.6,730 60.6,121.2,730 ---- TYPES double, double, bigint ==== ---- QUERY select count(*) from alltypes group by double_col ---- RESULTS 730 730 730 730 730 730 730 730 730 730 ---- TYPES bigint ==== ---- QUERY select double_col, count(*) from alltypesagg where double_col is null and day is not null group by 1 ---- RESULTS NULL,10 ---- TYPES double, bigint ==== ---- QUERY select date_string_col, count(*) from alltypesagg where day is not null group by 1 ---- RESULTS '01/08/10',1000 '01/09/10',1000 '01/02/10',1000 '01/06/10',1000 '01/01/10',1000 '01/03/10',1000 '01/04/10',1000 '01/10/10',1000 '01/07/10',1000 '01/05/10',1000 ---- TYPES string, bigint ==== ---- QUERY select count(*) from alltypesagg where day is not null group by date_string_col ---- RESULTS 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ---- TYPES bigint ==== ---- QUERY # grouping by multiple exprs, with nulls select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg where day = 1 group by 1, 2 ---- RESULTS 0,0,120 0,1,90 0,2,90 1,0,90 1,1,120 1,2,90 2,0,90 2,1,90 2,2,120 NULL,0,30 NULL,1,30 NULL,2,30 NULL,NULL,10 ---- TYPES tinyint, smallint, bigint ==== ---- QUERY select count(*) from alltypesagg where day = 1 group by tinyint_col % 3, smallint_col % 3 ---- RESULTS 10 120 120 120 30 30 30 90 90 90 90 90 90 ---- TYPES bigint ==== ---- QUERY # same result as previous query select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg where day = 1 group by 2, 1 ---- RESULTS 0,0,120 0,1,90 0,2,90 1,0,90 1,1,120 1,2,90 2,0,90 2,1,90 2,2,120 NULL,0,30 NULL,1,30 NULL,2,30 NULL,NULL,10 ---- TYPES tinyint, smallint, bigint ==== ---- QUERY select tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col, count(*) from alltypesagg where (date_string_col = '01/01/10' or date_string_col = '01/02/10') and day is not null group by 1, 2, 3, 4, 5 ---- RESULTS 1,1,1,0,'01/02/10',500 0,0,0,0,'01/02/10',400 NULL,NULL,0,0,'01/02/10',9 NULL,NULL,NULL,NULL,'01/02/10',1 0,0,0,0,'01/01/10',400 NULL,NULL,0,0,'01/01/10',9 NULL,NULL,NULL,NULL,'01/01/10',1 NULL,0,0,0,'01/02/10',90 1,1,1,0,'01/01/10',500 NULL,0,0,0,'01/01/10',90 ---- TYPES tinyint, smallint, int, bigint, string, bigint ==== ---- QUERY select count(*) from alltypesagg where (date_string_col = '01/01/10' or date_string_col = '01/02/10') and day is not null group by tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col ---- RESULTS 500 400 9 1 400 9 1 90 500 90 ---- TYPES bigint ==== ---- QUERY # no grouping cols, no matching rows select count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col) from alltypesagg where tinyint_col = -1 and day is not null ---- RESULTS 0,NULL,NULL,NULL,NULL ---- TYPES bigint, tinyint, tinyint, bigint, double ==== ---- QUERY select count(*), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col) from alltypesagg where smallint_col = -1 and day is not null ---- RESULTS 0,NULL,NULL,NULL,NULL ---- TYPES bigint, smallint, smallint, bigint, double ==== ---- QUERY select count(*), min(int_col), max(int_col), sum(int_col), avg(int_col) from alltypesagg where int_col = -1 and day is not null ---- RESULTS 0,NULL,NULL,NULL,NULL ---- TYPES bigint, int, int, bigint, double ==== ---- QUERY select count(*), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col) from alltypesagg where bigint_col = -1 and day is not null ---- RESULTS 0,NULL,NULL,NULL,NULL ---- TYPES bigint, bigint, bigint, bigint, double ==== ---- QUERY select count(*), min(float_col), max(float_col), sum(float_col), avg(float_col) from alltypesagg where float_col < -1.0 and day is not null ---- RESULTS 0,NULL,NULL,NULL,NULL ---- TYPES bigint, float, float, double, double ==== ---- QUERY select count(*), min(double_col), max(double_col), sum(double_col), avg(double_col) from alltypesagg where double_col < -1.0 and day is not null ---- RESULTS 0,NULL,NULL,NULL,NULL ---- TYPES bigint, double, double, double, double ==== ---- QUERY # HAVING clauses over all aggregation functions, plus compound HAVING clauses select int_col % 7, count(*), max(int_col) from alltypesagg where day is not null group by 1 ---- RESULTS 4,1430,998 NULL,10,NULL 6,1420,993 5,1430,999 2,1430,996 0,1420,994 1,1430,995 3,1430,997 ---- TYPES int, bigint, int ==== ---- QUERY select int_col % 7, count(*) from alltypesagg where day is not null group by 1 having max(int_col) > 991 ---- RESULTS 4,1430 6,1420 5,1430 2,1430 0,1420 1,1430 3,1430 ---- TYPES int, bigint ==== ---- QUERY select int_col % 7, count(*) from alltypesagg where day is not null group by 1 having max(int_col) > 991 and count(*) > 1420 ---- RESULTS 4,1430 5,1430 2,1430 1,1430 3,1430 ---- TYPES int, bigint ==== ---- QUERY select int_col % 7, count(*) from alltypesagg where day is not null group by 1 having min(int_col) < 7 ---- RESULTS 4,1430 6,1420 5,1430 2,1430 1,1430 3,1430 ---- TYPES int, bigint ==== ---- QUERY select int_col % 7, count(*) from alltypesagg where day is not null group by 1 having min(int_col) < 7 and count(*) > 1420 ---- RESULTS 4,1430 5,1430 2,1430 1,1430 3,1430 ---- TYPES int, bigint ==== ---- QUERY select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1 ---- RESULTS 4,1430,716430 NULL,10,NULL 6,1420,709290 5,1430,717860 2,1430,713570 0,1420,710710 1,1430,712140 3,1430,715000 ---- TYPES int, bigint, bigint ==== ---- QUERY select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1 having sum(int_col) >= 715000 ---- RESULTS 4,1430,716430 5,1430,717860 3,1430,715000 ---- TYPES int, bigint, bigint ==== ---- QUERY select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1 having sum(int_col) >= 715000 or count(*) > 1420 ---- RESULTS 4,1430,716430 5,1430,717860 2,1430,713570 1,1430,712140 3,1430,715000 ---- TYPES int, bigint, bigint ==== ---- QUERY select int_col % 7, count(*), sum(int_col) from alltypesagg where day is not null group by 1 having sum(int_col) is null ---- RESULTS NULL,10,NULL ---- TYPES int, bigint, bigint ==== ---- QUERY select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1 ---- RESULTS 4,1430,501 NULL,10,NULL 6,1420,499.5 5,1430,502 2,1430,499 0,1420,500.5 1,1430,498 3,1430,500 ---- TYPES int, bigint, double ==== ---- QUERY select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1 having avg(int_col) > 500 ---- RESULTS 4,1430,501 5,1430,502 0,1420,500.5 ---- TYPES int, bigint, double ==== ---- QUERY select int_col % 7, count(*), avg(int_col) from alltypesagg where day is not null group by 1 having avg(int_col) > 500 or count(*) = 10 ---- RESULTS 4,1430,501 NULL,10,NULL 5,1430,502 0,1420,500.5 ---- TYPES int, bigint, double ==== ---- QUERY select timestamp_col, count(*) from alltypesagg where day is not null group by timestamp_col having timestamp_col < cast('2010-01-01 01:05:20' as timestamp) ---- RESULTS 2010-01-01 00:49:11.760000000,1 2010-01-01 01:01:18.300000000,1 2010-01-01 00:17:01.360000000,1 2010-01-01 00:58:16.530000000,1 2010-01-01 00:09:00.360000000,1 2010-01-01 00:00:00,1 2010-01-01 01:00:17.700000000,1 2010-01-01 00:57:15.960000000,1 2010-01-01 00:24:02.760000000,1 2010-01-01 00:23:02.530000000,1 2010-01-01 00:45:09.900000000,1 2010-01-01 00:39:07.410000000,1 2010-01-01 00:33:05.280000000,1 2010-01-01 00:03:00.300000000,1 2010-01-01 00:20:01.900000000,1 2010-01-01 00:36:06.300000000,1 2010-01-01 00:44:09.460000000,1 2010-01-01 00:14:00.910000000,1 2010-01-01 00:31:04.650000000,1 2010-01-01 00:48:11.280000000,1 2010-01-01 01:03:19.530000000,1 2010-01-01 00:29:04.600000000,1 2010-01-01 01:02:18.910000000,1 2010-01-01 00:16:01.200000000,1 2010-01-01 00:47:10.810000000,1 2010-01-01 00:51:12.750000000,1 2010-01-01 00:55:14.850000000,1 2010-01-01 00:42:08.610000000,1 2010-01-01 00:56:15.400000000,1 2010-01-01 00:05:00.100000000,1 2010-01-01 00:43:09.300000000,1 2010-01-01 00:28:03.780000000,1 2010-01-01 00:04:00.600000000,1 2010-01-01 00:54:14.310000000,1 2010-01-01 00:26:03.250000000,1 2010-01-01 00:32:04.960000000,1 2010-01-01 00:46:10.350000000,1 2010-01-01 00:37:06.660000000,1 2010-01-01 00:50:12.250000000,1 2010-01-01 00:27:03.510000000,1 2010-01-01 00:19:01.710000000,1 2010-01-01 00:40:07.800000000,1 2010-01-01 00:07:00.210000000,1 2010-01-01 00:22:02.310000000,1 2010-01-01 00:21:02.100000000,1 2010-01-01 00:18:01.530000000,1 2010-01-01 00:11:00.550000000,1 2010-01-01 00:35:05.950000000,1 2010-01-01 00:30:04.350000000,1 2010-01-01 00:08:00.280000000,1 2010-01-01 00:34:05.610000000,1 2010-01-01 00:15:01.500000000,1 2010-01-01 00:41:08.200000000,1 2010-01-01 00:02:00.100000000,1 2010-01-01 00:01:00,1 2010-01-01 00:10:00.450000000,1 2010-01-01 00:52:13.260000000,1 2010-01-01 01:04:20.160000000,1 2010-01-01 00:12:00.660000000,1 2010-01-01 00:38:07.300000000,1 2010-01-01 00:53:13.780000000,1 2010-01-01 00:25:03,1 2010-01-01 00:59:17.110000000,1 2010-01-01 00:06:00.150000000,1 2010-01-01 00:13:00.780000000,1 ---- TYPES timestamp, bigint ==== ---- QUERY # Test NULLs in aggregate functions select count(NULL), min(NULL), max(NULL), sum(NULL), avg(NULL) from alltypesagg where day is not null ---- RESULTS 0,NULL,NULL,NULL,NULL ---- TYPES bigint, boolean, boolean, bigint, double ==== ---- QUERY # Test ignored distinct in MIN and MAX with NULLs select min(distinct NULL), max(distinct NULL) from alltypes ---- RESULTS NULL,NULL ---- TYPES boolean, boolean ==== ---- QUERY # Test group_concat with default delimiter. Use a subquery with an ORDER BY to # ensure group_concat results are in a deterministic order. select day, group_concat(string_col) from (select * from alltypesagg where id % 100 = day order by id limit 99999) a group by day ---- RESULTS: VERIFY_IS_EQUAL_SORTED 3,'3, 103, 203, 303, 403, 503, 603, 703, 803, 903' 5,'5, 105, 205, 305, 405, 505, 605, 705, 805, 905' 8,'8, 108, 208, 308, 408, 508, 608, 708, 808, 908' 4,'4, 104, 204, 304, 404, 504, 604, 704, 804, 904' 9,'9, 109, 209, 309, 409, 509, 609, 709, 809, 909' 2,'2, 102, 202, 302, 402, 502, 602, 702, 802, 902' 6,'6, 106, 206, 306, 406, 506, 606, 706, 806, 906' 10,'10, 110, 210, 310, 410, 510, 610, 710, 810, 910' 7,'7, 107, 207, 307, 407, 507, 607, 707, 807, 907' 1,'1, 101, 201, 301, 401, 501, 601, 701, 801, 901' ---- TYPES int, string ==== ---- QUERY # Test group_concat with NULL (default) delimiter select day, group_concat(string_col, NULL) from (select * from alltypesagg where id % 100 = day order by id limit 99999) a group by day ---- RESULTS: VERIFY_IS_EQUAL_SORTED 3,'3, 103, 203, 303, 403, 503, 603, 703, 803, 903' 5,'5, 105, 205, 305, 405, 505, 605, 705, 805, 905' 8,'8, 108, 208, 308, 408, 508, 608, 708, 808, 908' 4,'4, 104, 204, 304, 404, 504, 604, 704, 804, 904' 9,'9, 109, 209, 309, 409, 509, 609, 709, 809, 909' 2,'2, 102, 202, 302, 402, 502, 602, 702, 802, 902' 6,'6, 106, 206, 306, 406, 506, 606, 706, 806, 906' 10,'10, 110, 210, 310, 410, 510, 610, 710, 810, 910' 7,'7, 107, 207, 307, 407, 507, 607, 707, 807, 907' 1,'1, 101, 201, 301, 401, 501, 601, 701, 801, 901' ---- TYPES int, string ==== ---- QUERY # Test group_concat with both args as NULL select day, group_concat(NULL, NULL) from (select * from alltypesagg where id % 100 = day order by id limit 99999) a group by day ---- RESULTS: VERIFY_IS_EQUAL_SORTED 3,'NULL' 5,'NULL' 8,'NULL' 4,'NULL' 9,'NULL' 2,'NULL' 6,'NULL' 10,'NULL' 7,'NULL' 1,'NULL' ---- TYPES int, string ==== ---- QUERY # Test group_concat with arrow delimiter select day, group_concat(string_col, "->") from (select * from alltypesagg where id % 100 = day order by id limit 99999) a group by day ---- RESULTS: VERIFY_IS_EQUAL_SORTED 3,'3->103->203->303->403->503->603->703->803->903' 5,'5->105->205->305->405->505->605->705->805->905' 8,'8->108->208->308->408->508->608->708->808->908' 4,'4->104->204->304->404->504->604->704->804->904' 9,'9->109->209->309->409->509->609->709->809->909' 2,'2->102->202->302->402->502->602->702->802->902' 6,'6->106->206->306->406->506->606->706->806->906' 10,'10->110->210->310->410->510->610->710->810->910' 7,'7->107->207->307->407->507->607->707->807->907' 1,'1->101->201->301->401->501->601->701->801->901' ---- TYPES int, string ==== ---- QUERY # Test group_concat with column delimiter # Will cause all columns save first to be duplicated select day, group_concat(trim(string_col), trim(string_col)) from (select * from alltypesagg where id % 200 = day order by id limit 99999) a group by day ---- RESULTS: VERIFY_IS_EQUAL_SORTED 3,'3203203403403603603803803' 5,'5205205405405605605805805' 8,'8208208408408608608808808' 4,'4204204404404604604804804' 9,'9209209409409609609809809' 2,'2202202402402602602802802' 6,'6206206406406606606806806' 10,'10210210410410610610810810' 7,'7207207407407607607807807' 1,'1201201401401601601801801' ---- TYPES int, string ==== ---- QUERY # Test group_concat with multiple agg columns select day, group_concat(string_col, '->'), group_concat(date_string_col) from (select * from alltypesagg where id % 250 = day order by id limit 99999) a group by day ---- RESULTS: VERIFY_IS_EQUAL_SORTED 3,'3->253->503->753','01/03/10, 01/03/10, 01/03/10, 01/03/10' 5,'5->255->505->755','01/05/10, 01/05/10, 01/05/10, 01/05/10' 8,'8->258->508->758','01/08/10, 01/08/10, 01/08/10, 01/08/10' 4,'4->254->504->754','01/04/10, 01/04/10, 01/04/10, 01/04/10' 9,'9->259->509->759','01/09/10, 01/09/10, 01/09/10, 01/09/10' 2,'2->252->502->752','01/02/10, 01/02/10, 01/02/10, 01/02/10' 6,'6->256->506->756','01/06/10, 01/06/10, 01/06/10, 01/06/10' 10,'10->260->510->760','01/10/10, 01/10/10, 01/10/10, 01/10/10' 7,'7->257->507->757','01/07/10, 01/07/10, 01/07/10, 01/07/10' 1,'1->251->501->751','01/01/10, 01/01/10, 01/01/10, 01/01/10' ---- TYPES int, string, string ==== ---- QUERY # Test group_concat distinct with multiple agg columns select day, group_concat(string_col, '->'), group_concat(date_string_col), group_concat(distinct date_string_col) from (select * from alltypesagg where id % 250 = day order by id limit 99999) a group by day order by day ---- RESULTS: VERIFY_IS_EQUAL_SORTED 1,'1->251->501->751','01/01/10, 01/01/10, 01/01/10, 01/01/10','01/01/10' 2,'2->252->502->752','01/02/10, 01/02/10, 01/02/10, 01/02/10','01/02/10' 3,'3->253->503->753','01/03/10, 01/03/10, 01/03/10, 01/03/10','01/03/10' 4,'4->254->504->754','01/04/10, 01/04/10, 01/04/10, 01/04/10','01/04/10' 5,'5->255->505->755','01/05/10, 01/05/10, 01/05/10, 01/05/10','01/05/10' 6,'6->256->506->756','01/06/10, 01/06/10, 01/06/10, 01/06/10','01/06/10' 7,'7->257->507->757','01/07/10, 01/07/10, 01/07/10, 01/07/10','01/07/10' 8,'8->258->508->758','01/08/10, 01/08/10, 01/08/10, 01/08/10','01/08/10' 9,'9->259->509->759','01/09/10, 01/09/10, 01/09/10, 01/09/10','01/09/10' 10,'10->260->510->760','01/10/10, 01/10/10, 01/10/10, 01/10/10','01/10/10' ---- TYPES int, string, string, string ==== ---- QUERY # Test group_concat with null result select group_concat(string_col) from alltypesagg where string_col = NULL; ---- RESULTS 'NULL' ---- TYPES string ==== ---- QUERY # Test group_concat distinct with null result select group_concat(distinct string_col) from alltypesagg where string_col = NULL; ---- RESULTS 'NULL' ---- TYPES string ==== ---- QUERY # Test group_concat with merge node select group_concat(string_col) from alltypesagg where int_col = 1 ---- RESULTS '1, 1, 1, 1, 1, 1, 1, 1, 1, 1' ---- TYPES STRING ==== ---- QUERY # Test merge phase uses correct separator (IMPALA-1110). The query needs to load data # from multiple nodes in order to exercise this path, however the merge order is # non-deterministic. So, aggregate a string literal to make the result deterministic. select group_concat('abc', 'xy') from functional.alltypesagg where id % 1000 = day ---- RESULTS 'abcxyabcxyabcxyabcxyabcxyabcxyabcxyabcxyabcxyabc' ---- TYPES string ==== ---- QUERY # Same as last query test, now adding the "distinct" clause select group_concat(distinct 'abc', 'xy') from functional.alltypesagg where id % 1000 = day ---- RESULTS 'abc' ---- TYPES string ==== ---- QUERY # Test group_concat when separator varies by row. select group_concat(cast(id as string), cast ((10 - id) as string)) from functional.alltypesagg where id < 10 and day is not null group by day ---- RESULTS '0918273645546372819' ---- TYPES string ==== ---- QUERY # Test correct removal of redundant group-by expressions (IMPALA-817) select int_col * int_col, int_col + int_col from functional.alltypesagg group by int_col * int_col, int_col + int_col, int_col * int_col having (int_col + int_col) < 5 order by 1 limit 10 ---- RESULTS 1,2 4,4 ---- TYPES bigint,bigint ==== ---- QUERY # Test that binding predicates on an aggregation properly trigger materialization of # slots in the agg tuple and the slots needed for evaluating the corresponding agg funcs # (IMPALA-822). select 1 from (select count(bigint_col) c from functional.alltypesagg having min(int_col) is not null) as t where c is not null ---- RESULTS 1 ---- TYPES tinyint ==== ---- QUERY # Regression test for subexpr elimination in codegen. IMPALA-765 select count(tinyint_col), sum(tinyint_col * tinyint_col) from alltypesagg ---- RESULTS 9000,285000 ---- TYPES bigint,bigint ==== ---- QUERY # Regression test for subexpr elimination in codegen. IMPALA-765 select count(int_col), sum(int_col), avg(int_col) from alltypesagg where int_col is NULL ---- RESULTS 0,NULL,NULL ---- TYPES bigint,bigint,double ==== ---- QUERY # Regression test for subexpr elimination in codegen. IMPALA-850 select id % 2, int_col > 1, id from alltypesagg where id < 2 group by 1,2,3 ---- RESULTS 0,NULL,0 1,false,1 ---- TYPES int,boolean,int ==== ---- QUERY # Regression test for min/max of all negative values. IMPALA-869. select min(cast(-1.0 as float)), max(cast(-1.0 as float)) from tinytable ---- RESULTS -1,-1 ---- TYPES float,float ==== ---- QUERY # Regression test codegen with nulls and compound predicates. IMPALA-892. select COUNT(int_col is not null AND bool_col) - COUNT(bool_col) FROM alltypesagg ---- RESULTS 0 ---- TYPES BIGINT ==== ---- QUERY select histogram(bool_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 1, 1, 1, 1' ---- TYPES STRING ==== ---- QUERY select histogram(tinyint_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 1, 1, 1, 1' ---- TYPES STRING ==== ---- QUERY select histogram(smallint_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 1, 1, 1, 1' ---- TYPES STRING ==== ---- QUERY select histogram(int_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 1, 1, 1, 1' ---- TYPES STRING ==== ---- QUERY select histogram(bigint_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 10, 10, 10, 10' ---- TYPES STRING ==== ---- QUERY select histogram(float_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 1.1, 1.1, 1.1, 1.1' ---- TYPES STRING ==== ---- QUERY select histogram(double_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 10.1, 10.1, 10.1, 10.1' ---- TYPES STRING ==== ---- QUERY select histogram(string_col) from functional.alltypestiny; ---- RESULTS '0, 0, 0, 0, 1, 1, 1, 1' ---- TYPES STRING ==== ---- QUERY select histogram(timestamp_col) from functional.alltypestiny; ---- RESULTS '2009-01-01 00:00:00, 2009-01-01 00:01:00, 2009-02-01 00:00:00, 2009-02-01 00:01:00, 2009-03-01 00:00:00, 2009-03-01 00:01:00, 2009-04-01 00:00:00, 2009-04-01 00:01:00' ---- TYPES STRING ==== ---- QUERY # IMPALA-4787: appx_median() on a medium sized dataset. This should excercise merge() with # differently sized inputs in the Reservoir Sampling algorithm. select appx_median(bool_col), appx_median(tinyint_col), appx_median(smallint_col), appx_median(int_col), appx_median(float_col), appx_median(double_col), appx_median(string_col), appx_median(timestamp_col) from alltypes ---- RESULTS true,5,5,5,5.5,50.5,'5',2010-01-01 00:00:00 ---- TYPES BOOLEAN, TINYINT, SMALLINT, INT, FLOAT, DOUBLE, STRING, TIMESTAMP ==== ---- QUERY # IMPALA-4787: appx_median on a large dataset. This requires several buffer resizes in the # Reservoir Sampling algorithm. select appx_median(l_returnflag) from tpch.lineitem where l_returnflag = "N" ---- RESULTS 'N' ---- TYPES STRING ==== ---- QUERY # IMPALA-1419: Agg fn containing arithmetic expr on NULL fails select count(null * 1) from functional.alltypes ---- RESULTS 0 ---- TYPES BIGINT ==== ---- QUERY # IMPALA-1898: ordinal in group/order by combined with explicit select-list alias # that match columns in underlying table select extract(timestamp_col, 'year') as timestamp_col, extract(timestamp_col, 'month') as month, sum(tinyint_col) from functional.alltypes group by 1, 2 order by 1, 2; ---- RESULTS 2009,1,1395 2009,2,1260 2009,3,1395 2009,4,1350 2009,5,1395 2009,6,1350 2009,7,1395 2009,8,1395 2009,9,1350 2009,10,1395 2009,11,1350 2009,12,1395 2010,1,1395 2010,2,1260 2010,3,1395 2010,4,1350 2010,5,1395 2010,6,1350 2010,7,1395 2010,8,1395 2010,9,1350 2010,10,1395 2010,11,1350 2010,12,1395 ---- TYPES BIGINT,BIGINT,BIGINT ==== ---- QUERY # IMPALA-2089: Tests correct elimination of redundant predicates. # The equivalences between inline-view slots are enforced inside the inline-view plan. # Equivalences between simple grouping slots (with SlotRef grouping exprs) are enforced # at the scan, and equivalences between grouping slots with complex grouping exprs are # enforced at the aggregation. select t2.timestamp_col, t1.int_col_1 from (select coalesce(t1.smallint_col, t1.month, t1.month) as int_col, (count(t1.int_col)) <= (coalesce(t1.smallint_col, t1.month, t1.month)) as boolean_col, (t1.bigint_col) + (t1.smallint_col) as int_col_1 from functional.alltypes t1 group by coalesce(t1.smallint_col, t1.month, t1.month), (t1.bigint_col) + (t1.smallint_col) having (t1.bigint_col) + (t1.smallint_col) != (count(t1.bigint_col + t1.smallint_col)) ) t1 inner join functional.alltypes t2 on (t2.month = t1.int_col and t2.month = t1.int_col_1 and t2.tinyint_col = t1.int_col) where t2.int_col IN (t1.int_col_1, t1.int_col) ---- RESULTS ---- TYPES TIMESTAMP,BIGINT ==== ---- QUERY # IMPALA-5036: Tests the correctness of the Parquet count(*) optimization. select count(1) from functional_parquet.alltypes ---- RESULTS 7300 ---- TYPES bigint ===== ---- QUERY # IMPALA-5036: Parquet count(*) optimization with predicates on the partition columns. select count(1) from functional_parquet.alltypes where year < 2010 and month > 8 ---- RESULTS 1220 ---- TYPES bigint ===== ---- QUERY # IMPALA-5036: Parquet count(*) optimization with group by partition columns. select year, month, count(1) from functional_parquet.alltypes where month > 10 group by year, month ---- RESULTS 2009,11,300 2009,12,310 2010,11,300 2010,12,310 ---- TYPES int, int, bigint ===== ---- QUERY # IMPALA-5036: Parquet count(*) optimization with both group by and predicates on # partition columns. select count(1) from functional_parquet.alltypes where year < 2010 and month > 8 group by month ---- RESULTS 310 300 310 300 ---- TYPES bigint ===== ---- QUERY # IMPALA-5036: Parquet count(*) optimization with the result of the going into a join. select x.bigint_col from functional.alltypes x inner join ( select count(1) as a from functional_parquet.alltypes group by year ) t on x.id = t.a; ---- RESULTS 0 0 ---- TYPES bigint ===== ---- QUERY # IMPALA-5036: Parquet count(*) optimization with the agg function in the having clause. select 1 from functional_parquet.alltypes having count(*) > 1 ---- RESULTS 1 ---- TYPES tinyint ==== ---- QUERY # IMPALA-5855: pre-aggregation does not reserve enough memory with 2MB buffers. # The pre-aggregation in this query is estimated to consume enough memory for the planner # to use 2MB buffers. set debug_action="-1:PREPARE:SET_DENY_RESERVATION_PROBABILITY@1.0"; select count(*) from ( select distinct l_orderkey, l_comment from tpch_parquet.lineitem) v ---- RESULTS 6001198 ---- TYPES bigint ==== ---- QUERY # IMPALA-6295: min/max where nan/inf/-inf are the only values with x as (select cast('nan' as float) a, cast('inf' as float) b, cast('-inf' as float) c) select min(a), min(b), min(c), max(a), max(b), max(c) from x ---- RESULTS NaN,Infinity,-Infinity,NaN,Infinity,-Infinity ---- TYPES FLOAT,FLOAT,FLOAT,FLOAT,FLOAT,FLOAT ==== ---- QUERY # IMPALA-6295: min/max/sum/avg should return nan if any of the values is nan with x as (values (0), (1), (cast('nan' as double)), (cast('inf' as double)), (cast('-inf' as double))) select min(`0`), max(`0`), sum(`0`), avg(`0`) from x ---- RESULTS NaN,NaN,NaN,NaN ---- TYPES DOUBLE,DOUBLE,DOUBLE,DOUBLE ==== ---- QUERY # Test behavior of inf with x as (values (0), (cast('inf' as double)), (5.2)) select min(`0`), max(`0`), sum(`0`), avg(`0`) from x ---- RESULTS 0,Infinity,Infinity,Infinity ---- TYPES DOUBLE,DOUBLE,DOUBLE,DOUBLE ==== ---- QUERY # Test behavior of -inf with x as (values (cast('-inf' as double)), (0), (-10)) select min(`cast('-inf' as double)`), max(`cast('-inf' as double)`), sum(`cast('-inf' as double)`), avg(`cast('-inf' as double)`) from x ---- RESULTS -Infinity,0,-Infinity,-Infinity ---- TYPES DOUBLE,DOUBLE,DOUBLE,DOUBLE ==== ---- QUERY # IMPALA-7397: conjunct that makes allocations in Prepare (extract) assigned to agg node select timestamp_col, count(int_col) from alltypesagg group by timestamp_col, int_col having extract(hour from timestamp_col) = int_col ---- TYPES TIMESTAMP,BIGINT ---- RESULTS ==== ---- QUERY # GROUP BY of NaN values aggregates NaN's as one grouping select count(*), sqrt(0.5-x) as Z from (VALUES((1.6 x, 2 y), (3.2, 4), (5.4,6))) T group by Z ---- RESULTS 3, NaN ---- TYPES bigint, double ==== ---- QUERY # GROUP BY of NaN values aggregates NaN's as one grouping select count(*), cast(sqrt(0.4-x) as FLOAT) as P, cast(sqrt(1.5-y) as FLOAT) as Q from (VALUES((1.6 x, 1.6 y, 0 z), (0.5, 0.5, 0), (5.4, 6, 0), (0.5, 0.5, 0), (0.5, 0.5, 0), (-0.6, 0.5, 0))) T group by P, Q order by P, Q ---- RESULTS 2, NaN, NaN 3, NaN, 1 1, 1, 1 ---- TYPES bigint, float, float ==== ---- QUERY # IMPALA-6660: GROUP BY of -0/+0 values aggregates zeros as one grouping select x, count(*) from (values(cast("-0" as float) x), (cast("0" as float))) v group by x ---- RESULTS 0,2 ---- TYPES float, bigint ==== ---- QUERY # IMPALA-6660: -0/+0 values are not distinct select distinct * from (values(cast("-0" as float)), (cast("0" as float))) v; ---- RESULTS 0 ---- TYPES float ==== ---- QUERY # IMPALA-8140: Test that group by with limit does not crash on Asan select count(*) from tpch_parquet.orders o group by o.o_clerk limit 10 # We don't validate the results since the order is not deterministic. ---- TYPES bigint ==== ---- QUERY # CORR() function examples select corr(ps_availqty, ps_supplycost) from tpch.partsupp; ---- RESULTS 0.000321849166315 ---- TYPES double ==== ---- QUERY # Behavior of CORR() on null table select corr(d, e) from functional.nulltable; ---- RESULTS NULL ---- TYPES double ==== ---- QUERY # Behavior of CORR() on empty table select corr(f2, f2) from functional.emptytable; ---- RESULTS NULL ---- TYPES double ==== ---- QUERY # CORR() on different datatypes select corr(tinyint_col, tinyint_col), corr(smallint_col, smallint_col), corr(int_col, int_col), corr(bigint_col, bigint_col), corr(float_col, float_col), corr(double_col, double_col), corr(timestamp_col, timestamp_col) from functional.alltypes; ---- RESULTS 1.0,1.0,1.0,1.0,1.0,1.0,1.0 ---- TYPES double,double,double,double,double,double,double ==== ---- QUERY # CORR() on timestamp columns select corr(utctime, localtime) from functional.alltimezones; ---- RESULTS 0.999995807708 ---- TYPES double ==== ---- QUERY # Since group by id will result in a single row, this test shows that corr() returns null in case of a single row. select id, corr(int_col, int_col) from functional.alltypestiny group by id; ---- RESULTS 2,NULL 4,NULL 0,NULL 6,NULL 1,NULL 7,NULL 3,NULL 5,NULL ---- TYPES int,double ==== ---- QUERY # CORR() on decimal datatype select corr(d3, d4) from functional.decimal_tbl; ---- RESULTS NULL ---- TYPES double ==== ---- QUERY select year, corr(double_col, double_col) from functional.alltypes group by year; ---- RESULTS 2009,1.0 2010,1.0 ---- TYPES int,double ==== ---- QUERY select corr(double_col, -double_col) from functional.alltypes; ---- RESULTS -1.0 ---- TYPES double ==== ---- QUERY select corr(double_col, double_col) from functional.alltypes; ---- RESULTS 1.0 ---- TYPES double ==== ---- QUERY select corr(ss_sold_time_sk, ss_quantity) from tpcds.store_sales; ---- RESULTS 0.000136790587885 ---- TYPES double ==== ---- QUERY select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk) from tpcds.store; ---- RESULTS 5,NULL 8,1.0 12,-0.328248435346 1,NULL 2,-1.0 3,-0.511481796057 4,-0.881269090479 6,-0.853760432996 7,0.0825005896565 9,0.0282032912339 10,0.223462144551 11,0.325760965741 ---- TYPES int,double ==== ---- QUERY select id, double_col, corr(double_col, int_col) over (partition by month order by id) from functional.alltypes order by id limit 10; ---- RESULTS 0,0.0,NULL 1,10.1,1.0 2,20.2,1.0 3,30.3,1.0 4,40.4,1.0 5,50.5,1.0 6,60.6,1.0 7,70.7,1.0 8,80.8,1.0 9,90.9,1.0 ---- TYPES int,double,double ==== ---- QUERY # CORR() when one column is filled with null select corr(null_int, rand()), corr(rand(), null_int) from functional.nullrows; ---- RESULTS NULL,NULL ---- TYPES double,double ==== ---- QUERY # CORR() supporting join select corr(A.double_col, B.double_col) from functional.alltypes A, functional.alltypes B where A.id=B.id; ---- RESULTS 1.0 ---- TYPES double ==== ---- QUERY # Tests functioning of CorrRemoveState() select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 5 preceding and 2 following) from tpcds.store; ---- RESULTS 5,-0.328248435346 8,-0.328248435346 12,-0.328248435346 1,-0.511481796057 2,-0.881269090479 3,-0.853760432996 4,0.0825005896565 6,0.0282032912339 7,0.223462144551 9,0.294637370885 10,0.115190500757 11,-0.0690212865931 ---- TYPES int,double ==== ---- QUERY # Mathematical operations on double can lead to variance becoming negative by a very small amount (around +1e-13), # to avoid that a check is added (state->xvar <= 0.0 || state->yvar <= 0.0), without which the below test will # result in nan for certain cases. select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and 1 following) from tpcds.store; ---- RESULTS 5,1.0 8,-0.328248435346 12,-1.0 1,-1.0 2,-0.511481796057 3,-0.829250636825 4,-0.884750027905 6,-0.735742100541 7,-0.165224541153 9,0.97066733931 10,1.0 11,NULL ---- TYPES int,double ==== ---- QUERY # Corr() when window size is 2 select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8,1.0 12,-1.0 1,NULL 2,-1.0 3,NULL 4,-1.0 6,-1.0 7,-1.0 9,1.0 10,1.0 11,NULL ---- TYPES int,double ==== ---- QUERY select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and 1 following) from tpcds.store; ---- RESULTS 5,1.0 8,-1.0 12,NULL 1,-1.0 2,NULL 3,-1.0 4,-1.0 6,-1.0 7,1.0 9,1.0 10,NULL 11,NULL ---- TYPES int,double ==== ---- QUERY select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and unbounded following) from tpcds.store; ---- RESULTS 12,NULL 8,-1.0 5,-0.328248435346 11,NULL 10,NULL 9,1.0 7,0.965705311825 6,0.17735847272 4,-0.0690212865931 3,0.115190500757 2,0.294637370885 1,0.325760965741 ---- TYPES int,double ==== ---- QUERY select s_store_sk, corr(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between unbounded preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8,1.0 12,-0.328248435346 1,NULL 2,-1.0 3,-0.511481796057 4,-0.881269090479 6,-0.853760432996 7,0.0825005896565 9,0.0282032912339 10,0.223462144551 11,0.325760965741 ---- TYPES int,double ==== ---- QUERY # Sample and population covariance select covar_samp(ps_availqty, ps_supplycost), covar_pop(ps_availqty, ps_supplycost) from tpch.partsupp; ---- RESULTS 267.607127282,267.606792773 ---- TYPES double,double ==== ---- QUERY # covar_samp() and covar_pop() on null table select covar_samp(d, e), covar_pop(d, e) from functional.nulltable; ---- RESULTS NULL,NULL ---- TYPES double,double ==== ---- QUERY # covar_samp() on different datatypes select covar_samp(tinyint_col, tinyint_col), covar_samp(smallint_col, smallint_col), covar_samp(int_col, int_col), covar_samp(bigint_col, bigint_col), covar_samp(float_col, float_col), covar_samp(double_col, double_col) from functional.alltypes; ---- RESULTS 8.251130291820797,8.251130291820797,8.251130291820797,825.1130291820797,9.983867246543589,841.6978010686292 ---- TYPES double,double,double,double,double,double ==== ---- QUERY # covar_pop() on different datatypes select covar_pop(tinyint_col, tinyint_col), covar_pop(smallint_col, smallint_col), covar_pop(int_col, int_col), covar_pop(bigint_col, bigint_col), covar_pop(float_col, float_col), covar_pop(double_col, double_col) from functional.alltypes; ---- RESULTS 8.25,8.25,8.25,825.0,9.9824995935,841.5825 ---- TYPES double,double,double,double,double,double ==== ---- QUERY # Behavior of covar_samp() and covar_pop on empty table select covar_samp(f2, f2), covar_pop(f2, f2) from functional.emptytable; ---- RESULTS NULL,NULL ---- TYPES double,double ==== ---- QUERY # covar_samp() and covar_pop() on timestamp datatype. The expression is divided by a large number like 1E+10 # because unlike overfow in int, overflow in double looses precision select covar_samp(utctime, localtime)/1E+10, covar_pop(utctime, localtime)/1E+10 from functional.alltimezones; ---- RESULTS 5502.915940016994,5496.494801230509 ---- TYPES double,double ==== ---- QUERY # Since group by id will result in a single row, this test shows # behavior of covar_samp() and covar_pop() on a single row select id, covar_samp(int_col, int_col), covar_pop(int_col, int_col) from functional.alltypestiny group by id; ---- RESULTS 2,NULL,0.0 4,NULL,0.0 0,NULL,0.0 6,NULL,0.0 1,NULL,0.0 7,NULL,0.0 3,NULL,0.0 5,NULL,0.0 ---- TYPES int,double,double ==== ---- QUERY # covar_samp() and covar_pop() on decimal type select covar_samp(d3, d4), covar_pop(d3, d4) from functional.decimal_tbl; ---- RESULTS 0.0,0.0 ---- TYPES double,double ==== ---- QUERY # covar_samp() and covar_pop() with group by clause select year, covar_samp(double_col, double_col), covar_pop(double_col, double_col) from functional.alltypes group by year; ---- RESULTS 2009,841.813133735,841.5825 2010,841.813133735,841.5825 ---- TYPES int,double,double ==== ---- QUERY # Example of negative covar_samp() and covar_pop() select covar_samp(double_col, -double_col), covar_pop(double_col, -double_col) from functional.alltypes; ---- RESULTS -841.697801069,-841.5825 ---- TYPES double,double ==== ---- QUERY select covar_samp(double_col, double_col), covar_pop(double_col, double_col) from functional.alltypes; ---- RESULTS 841.697801069,841.5825 ---- TYPES double,double ==== ---- QUERY select covar_samp(ss_sold_time_sk, ss_quantity), covar_pop(ss_sold_time_sk, ss_quantity) from tpcds.store_sales; ---- RESULTS 50.2400502845,50.2400315775 ---- TYPES double,double ==== ---- QUERY # covar_samp() on analytic query select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk) from tpcds.store; ---- RESULTS 5,NULL 8,10414050.0 12,-5125047.666666668 1,NULL 2,-158355.0 3,-3513583.5 4,-19719420.0 6,-16848393.6 7,4309087.100000003 9,1337692.761904765 10,11663050.25 11,17362201.83333334 ---- TYPES int,double ==== ---- QUERY # covar_pop() on analytic query select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk) from tpcds.store; ---- RESULTS 5,0.0 8,5207025.0 12,-3416698.444444445 1,0.0 2,-79177.5 3,-2342389.0 4,-14789565.0 6,-13478714.88 7,3590905.916666669 9,1146593.79591837 10,10205168.96875 11,15433068.2962963 ---- TYPES int,double ==== ---- QUERY select id, double_col, covar_samp(double_col, int_col) over (partition by month order by id) from functional.alltypes order by id limit 10; ---- RESULTS 0,0.0,NULL 1,10.1,5.05 2,20.2,10.1 3,30.3,16.8333333333 4,40.4,25.25 5,50.5,35.35 6,60.6,47.1333333333 7,70.7,60.6 8,80.8,75.75 9,90.9,92.5833333333 ---- TYPES int,double,double ==== ---- QUERY select id, double_col, covar_pop(double_col, int_col) over (partition by month order by id) from functional.alltypes order by id limit 10; ---- RESULTS 0,0.0,0.0 1,10.1,2.525 2,20.2,6.73333333333 3,30.3,12.625 4,40.4,20.2 5,50.5,29.4583333333 6,60.6,40.4 7,70.7,53.025 8,80.8,67.3333333333 9,90.9,83.325 ---- TYPES int,double,double ==== ---- QUERY # covar_samp() and covar_pop() when one column is filled with null select covar_samp(null_int, rand()), covar_samp(rand(), null_int), covar_pop(null_int, rand()), covar_pop(rand(), null_int) from functional.nullrows; ---- RESULTS NULL,NULL,NULL,NULL ---- TYPES double,double,double,double ==== ---- QUERY # covar_samp() and covar_pop() supporting join select covar_samp(A.double_col, B.double_col), covar_pop(A.double_col, B.double_col) from functional.alltypes A, functional.alltypes B where A.id=B.id; ---- RESULTS 841.697801069,841.5825 ---- TYPES double,double ==== ---- QUERY # covar_samp() and covar_pop() supporting timestamp datatype select covar_samp(timestamp_col, timestamp_col)/1E+13, covar_pop(timestamp_col, timestamp_col)/1E+13 from functional.alltypes; ---- RESULTS 33.1559162368,33.1513743305 ---- TYPES double,double ==== ---- QUERY # corr(), covar_samp(), covar_pop() when both columns are filled with 0 select corr(double_col*0, double_col*0), covar_samp(double_col*0, double_col*0), covar_pop(double_col*0, double_col*0) from functional.alltypes; ---- RESULTS NULL,0.0,0.0 ---- TYPES double,double,double ==== ---- QUERY # corr(), covar_samp(), covar_pop() when one column is filled with 0 select corr(double_col, double_col*0), covar_samp(double_col, double_col*0), covar_pop(double_col, double_col*0) from functional.alltypes; ---- RESULTS NULL,0.0,0.0 ---- TYPES double,double,double ==== ---- QUERY select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between unbounded preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8,10414050.0 12,-5125047.666666668 1,NULL 2,-158355.0 3,-3513583.5 4,-19719420.0 6,-16848393.6 7,4309087.100000003 9,1337692.761904765 10,11663050.25 11,17362201.83333334 ---- TYPES int,double ==== ---- QUERY select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between unbounded preceding and current row) from tpcds.store; ---- RESULTS 5,0.0 8,5207025.0 12,-3416698.444444445 1,0.0 2,-79177.5 3,-2342389.0 4,-14789565.0 6,-13478714.88 7,3590905.916666669 9,1146593.79591837 10,10205168.96875 11,15433068.2962963 ---- TYPES int,double ==== ---- QUERY # Tests functioning of CovarRemoveState() select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 5 preceding and 2 following) from tpcds.store; ---- RESULTS 5,-5125047.666666668 8,-5125047.666666668 12,-5125047.666666668 1,-3513583.5 2,-19719420.0 3,-16848393.6 4,4309087.100000003 6,1337692.761904765 7,11663050.25 9,14424596.67857143 10,3770362.571428577 11,-2206709.166666666 ---- TYPES int,double ==== ---- QUERY # Tests functioning of CovarRemoveState() select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 5 preceding and 2 following) from tpcds.store; ---- RESULTS 5,-3416698.444444445 8,-3416698.444444445 12,-3416698.444444445 1,-2342389.0 2,-14789565.0 3,-13478714.88 4,3590905.916666669 6,1146593.79591837 7,10205168.96875 9,12621522.09375001 10,3231739.346938781 11,-1838924.305555555 ---- TYPES int,double ==== ---- QUERY # Covar_samp() when window size is 2 select s_store_sk, covar_samp(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8,10414050.0 12,-14211464 1,NULL 2,-158355.0 3,0 4,-16051572 6,-1733847.5 7,-2425526 9,25465544 10,26428357 11,0 ---- TYPES int,double ==== ---- QUERY # Covar_pop() when window size is 2 select s_store_sk, covar_pop(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and current row) from tpcds.store; ---- RESULTS 5,0.0 8,5207025.0 12,-7105732 1,0.0 2,-79177.5 3,0 4,-8025786 6,-866923.75 7,-1212763 9,12732772 10,13214178.5 11,0 ---- TYPES int,double ==== ---- QUERY # regression function examples select regr_slope(ps_availqty, ps_supplycost), regr_intercept(ps_availqty, ps_supplycost), regr_r2(ps_availqty, ps_supplycost) from tpch.partsupp; ---- RESULTS 0.003223046670647307,5001.613715742804,1.035868858574101e-07 ---- TYPES double, double, double ==== ---- QUERY # Behavior of regression functions on null table select regr_slope(d, e), regr_intercept(d, e), regr_r2(d, e) from functional.nulltable; ---- RESULTS NULL,NULL,NULL ---- TYPES double, double, double ==== ---- QUERY # Behavior of regression functions on empty table select regr_slope(f2, f2), regr_intercept(f2, f2), regr_r2(f2, f2) from functional.emptytable; ---- RESULTS NULL,NULL,NULL ---- TYPES double, double, double ==== ---- QUERY # regr_slope() on different datatypes select regr_slope(tinyint_col, tinyint_col), regr_slope(smallint_col, smallint_col), regr_slope(int_col, int_col), regr_slope(bigint_col, bigint_col), regr_slope(float_col, float_col), regr_slope(double_col, double_col), regr_slope(timestamp_col, timestamp_col) from functional.alltypes; ---- RESULTS 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ---- TYPES double, double, double, double, double, double, double ==== ---- QUERY # regr_intercept() on different datatypes select regr_intercept(tinyint_col, tinyint_col), regr_intercept(smallint_col, smallint_col), regr_intercept(int_col, int_col), regr_intercept(bigint_col, bigint_col), regr_intercept(float_col, float_col), regr_intercept(double_col, double_col), regr_intercept(timestamp_col, timestamp_col) from functional.alltypes; ---- RESULTS 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ---- TYPES double, double, double, double, double, double, double ==== ---- QUERY # regr_r2() on different datatypes select regr_r2(tinyint_col, tinyint_col), regr_r2(smallint_col, smallint_col), regr_r2(int_col, int_col), regr_r2(bigint_col, bigint_col), regr_r2(float_col, float_col), regr_r2(double_col, double_col), regr_r2(timestamp_col, timestamp_col) from functional.alltypes; ---- RESULTS 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ---- TYPES double, double, double, double, double, double, double ==== ---- QUERY # regr_slope(), regr_intercept(), regr_r2() on timestamp columns select regr_slope(utctime, localtime), regr_intercept(utctime, localtime), regr_r2(utctime, localtime) from functional.alltimezones; ---- RESULTS 0.9995840247725529, 593958.8067555428, 0.9999916154327646 ---- TYPES double, double, double ==== ---- QUERY # Since group by id will result in a single row, this test shows that regr_slope() returns null in case of a single row. select id, regr_slope(int_col, int_col) from functional.alltypestiny group by id; ---- RESULTS 2,NULL 4,NULL 0,NULL 6,NULL 1,NULL 7,NULL 3,NULL 5,NULL ---- TYPES int,double ==== ---- QUERY # Since group by id will result in a single row, this test shows that regr_intercept() returns null in # case of a single row. select id, regr_intercept(int_col, int_col) from functional.alltypestiny group by id; ---- RESULTS 2,NULL 4,NULL 0,NULL 6,NULL 1,NULL 7,NULL 3,NULL 5,NULL ---- TYPES int,double ==== ---- QUERY # Since group by id will result in a single row, this test shows that regr_r2() returns null in case of a # single row. select id, regr_r2(int_col, int_col) from functional.alltypestiny group by id; ---- RESULTS 2,NULL 4,NULL 0,NULL 6,NULL 1,NULL 7,NULL 3,NULL 5,NULL ---- TYPES int,double ==== ---- QUERY # regr_slope(), regr_intercept(), regr_r2() on decimal datatype select regr_slope(d3, d4), regr_intercept(d3, d4), regr_r2(d3, d4) from functional.decimal_tbl; ---- RESULTS NULL,NULL,NULL ---- TYPES double, double, double ==== ---- QUERY select year, regr_slope(double_col, double_col) from functional.alltypes group by year; ---- RESULTS 2009,1.0 2010,1.0 ---- TYPES int,double ==== ---- QUERY select year, regr_intercept(double_col, double_col) from functional.alltypes group by year; ---- RESULTS 2009,0.0 2010,0.0 ---- TYPES int,double ==== ---- QUERY select year, regr_r2(double_col, double_col) from functional.alltypes group by year; ---- RESULTS 2009,1.0 2010,1.0 ---- TYPES int,double ==== ---- QUERY select regr_slope(double_col, -double_col), regr_intercept(double_col, -double_col), regr_r2(double_col, -double_col) from functional.alltypes; ---- RESULTS -1.0, 0.0, 1.0 ---- TYPES double, double, double ==== ---- QUERY select regr_slope(double_col, double_col), regr_intercept(double_col, double_col), regr_r2(double_col, double_col) from functional.alltypes; ---- RESULTS 1.0, 0.0, 1.0 ---- TYPES double, double, double ==== ---- QUERY select regr_slope(ss_sold_time_sk, ss_quantity), regr_intercept(ss_sold_time_sk, ss_quantity)/10000, regr_r2(ss_sold_time_sk, ss_quantity) from tpcds.store_sales; ---- RESULTS 0.0602719636627,5.1709905412,1.87116649337e-08 ---- TYPES double, double, double ==== ---- QUERY select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk) from tpcds.store; ---- RESULTS 5,NULL 8,4.80120606296e-06 12,-1.37354292706e-06 1,NULL 2,-0.000255754475703 3,-2.01036006341e-06 4,-5.05103424244e-06 6,-4.3565531677e-06 7,1.21229193717e-06 9,4.44553019714e-07 10,3.9044206462e-06 11,6.00483790103e-06 ---- TYPES int, double ==== ---- QUERY select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk) from tpcds.store; ---- RESULTS 5,NULL 8, 244.41078639 12, 296.416240104 1,NULL 2, 1587.90537084399 3, 251.125599973 4, 268.395215604 6, 264.570040249 7, 234.323507488 9, 244.099933952 10, 223.133569068 11, 210.405303328 ---- TYPES int, double ==== ---- QUERY select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk) from tpcds.store; ---- RESULTS 5,NULL 8, 1.0 12, 0.107747035307 1,NULL 2, 1.0 3, 0.261613627698 4, 0.776635209834 6, 0.728906876949 7, 0.00680634729367 9, 0.000795425636424 10, 0.0499353300472 11, 0.1061202068 ---- TYPES int, double ==== ---- QUERY select id, double_col, regr_slope(double_col, int_col) over (partition by month order by id) from functional.alltypes order by id limit 10; ---- RESULTS 0,0.0,NULL 1,10.1,10.1 2,20.2,10.1 3,30.3,10.1 4,40.4,10.1 5,50.5,10.1 6,60.6,10.1 7,70.7,10.1 8,80.8,10.1 9,90.9,10.1 ---- TYPES int,double,double ==== ---- QUERY select id, double_col, regr_intercept(double_col, int_col) over (partition by month order by id) from functional.alltypes order by id limit 10; ---- RESULTS 0, 0.0,NULL 1, 10.1, 0.0 2, 20.2, 0.0 3, 30.3, 1.7763568394e-15 4, 40.4, 0.0 5, 50.5, 0.0 6, 60.6, 3.5527136788e-15 7, 70.7, -7.1054273576e-15 8, 80.8, -7.1054273576e-15 9, 90.9, -7.1054273576e-15 ---- TYPES int,double,double ==== ---- QUERY select id, double_col, regr_r2(double_col, int_col) over (partition by month order by id) from functional.alltypes order by id limit 10; ---- RESULTS 0,0.0,NULL 1,10.1,1.0 2,20.2,1.0 3,30.3,1.0 4,40.4,1.0 5,50.5,1.0 6,60.6,1.0 7,70.7,1.0 8,80.8,1.0 9,90.9,1.0 ---- TYPES int,double,double ==== ---- QUERY # Regression functions when one column is filled with null select regr_slope(null_int, rand()), regr_slope(rand(), null_int), regr_intercept(null_int, rand()), regr_intercept(rand(), null_int), regr_r2(null_int, rand()), regr_r2(rand(), null_int) from functional.nullrows; ---- RESULTS NULL,NULL,NULL,NULL,NULL,NULL ---- TYPES double, double, double, double, double, double ==== ---- QUERY # Regression functions supporting join select regr_slope(A.double_col, B.double_col), regr_intercept(A.double_col, B.double_col), regr_r2(A.double_col, B.double_col) from functional.alltypes A, functional.alltypes B where A.id=B.id; ---- RESULTS 1.0, 0.0, 1.0 ---- TYPES double, double, double ==== ---- QUERY # Tests functioning of RegrSlopeRemoveState() select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 5 preceding and 2 following) from tpcds.store; ---- RESULTS 5,-1.37354292706e-06 8,-1.37354292706e-06 12,-1.37354292706e-06 1,-2.01036006341e-06 2,-5.05103424244e-06 3,-4.3565531677e-06 4,1.21229193717e-06 6,4.44553019714e-07 7,3.9044206462e-06 9,6.59936024162e-06 10,4.13595759719e-06 11,-2.68509660433e-06 ---- TYPES int,double ==== ---- QUERY # Tests functioning of RegrSlopeRemoveState() for regr_intercept() select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 5 preceding and 2 following) from tpcds.store; ---- RESULTS 5, 296.416240104 8, 296.416240104 12, 296.416240104 1, 251.125599973 2, 268.395215604 3, 264.570040249 4, 234.323507488 6, 244.099933952 7, 223.133569068 9, 205.13592892 10, 226.988621372 11, 290.843308372 ---- TYPES int,double ==== ---- QUERY # Tests functioning of CorrRemoveState() for regr_r2() select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 5 preceding and 2 following) from tpcds.store; ---- RESULTS 5, 0.107747035307 8, 0.107747035307 12, 0.107747035307 1, 0.261613627698 2, 0.776635209834 3, 0.728906876949 4, 0.00680634729367 6, 0.000795425636424 7, 0.0499353300472 9, 0.0868111803219 10, 0.0132688514648 11, 0.00476393800297 ---- TYPES int,double ==== ---- QUERY # Mathematical operations on double can lead to variance becoming negative by a very small amount (around +1e-13), # to avoid that a check is added (state->xvar < 0.0 || state->yvar <= 0.0), without which the below test will # result in nan for certain cases. # Testcase when dependednt variable becomes negative: select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and 1 following) from tpcds.store; ---- RESULTS 5, 1.0 8, 0.107747035307 12, 1.0 1, 1.0 2, 0.261613627698 3, 0.687656618674 4, 0.782782611877 6, 0.541316438509 7, 0.0272991489992 9, 0.942195083603 10, 1.0 11,NULL ---- TYPES int,double ==== ---- QUERY # regr_slope() when window size is 2 select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8,4.80120606296e-06 12,-9.00681309118e-06 1,NULL 2,-0.000255754475703 3,0.0 4,-1.00924694479e-05 6,-3.48934955352e-05 7,-0.000953195306915 9,1.32728364256e-05 10,1.00081893097e-05 11,NULL ---- TYPES int,double ==== ---- QUERY # regr_intercept() when window size is 2 select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8, 244.41078638954104 12, 341.01161935181347 1,NULL 2, 1587.9053708439897 3, 236.0 4, 312.2784702956196 6, 543.9564370568922 7, 8832.752449571763 9, 178.14330273093714 10, 200.98275763037407 11,NULL ---- TYPES int,double ==== ---- QUERY # regr_r2() when window size is 2 select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between 1 preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8, 1.0 12, 1.0 1,NULL 2, 1.0 3, 1.0 4, 1.0 6, 1.0 7, 1.0 9, 1.0 10, 1.0 11,NULL ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and 1 following) from tpcds.store; ---- RESULTS 5,4.80120606296e-06 8,-9.00681309118e-06 12,NULL 1,-0.000255754475703 2,0.0 3,-1.00924694479e-05 4,-3.48934955352e-05 6,-0.000953195306915 7,1.32728364256e-05 9,1.00081893097e-05 10,NULL 11,NULL ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and 1 following) from tpcds.store; ---- RESULTS 5, 244.41078639 8, 341.011619352 12,NULL 1, 1587.90537084399 2, 236.0 3, 312.278470296 4, 543.956437057 6, 8832.752449571763 7, 178.143302731 9, 200.98275763 10,NULL 11,NULL ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and 1 following) from tpcds.store; ---- RESULTS 5, 1.0 8, 1.0 12,NULL 1, 1.0 2, 1.0 3, 1.0 4, 1.0 6, 1.0 7, 1.0 9, 1.0 10,NULL 11,NULL ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and unbounded following) from tpcds.store; ---- RESULTS 12,NULL 8,-9.00681309118e-06 5,-1.37354292706e-06 11,NULL 10,NULL 9,1.00081893097e-05 7,1.05678876878e-05 6,5.25458148115e-06 4,-2.68509660433e-06 3,4.13595759719e-06 2,6.59936024162e-06 1,6.00483790103e-06 ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and unbounded following) from tpcds.store; ---- RESULTS 12,NULL 8, 341.011619352 5, 296.416240104 11,NULL 10,NULL 9, 200.98275763 7, 197.748657023 6, 231.216488956 4, 290.843308372 3, 226.988621372 2, 205.13592892 1, 210.405303328 ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between current row and unbounded following) from tpcds.store; ---- RESULTS 12,NULL 8, 1.0 5, 0.107747035307 11,NULL 10,NULL 9, 1.0 7, 0.932586749287 6, 0.0314560278457 4, 0.00476393800297 3, 0.0132688514648 2, 0.0868111803219 1, 0.1061202068 ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_slope(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between unbounded preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8,4.80120606296e-06 12,-1.37354292706e-06 1,NULL 2,-0.000255754475703 3,-2.01036006341e-06 4,-5.05103424244e-06 6,-4.3565531677e-06 7,1.21229193717e-06 9,4.44553019714e-07 10,3.9044206462e-06 11,6.00483790103e-06 ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_intercept(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between unbounded preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8, 244.41078639 12, 296.416240104 1,NULL 2, 1587.90537084399 3, 251.125599973 4, 268.395215604 6, 264.570040249 7, 234.323507488 9, 244.099933952 10, 223.133569068 11, 210.405303328 ---- TYPES int,double ==== ---- QUERY select s_store_sk, regr_r2(s_number_employees, s_floor_space) over (partition by s_city order by s_store_sk rows between unbounded preceding and current row) from tpcds.store; ---- RESULTS 5,NULL 8, 1.0 12, 0.10774703530702502 1,NULL 2, 1.0 3, 0.26161362769774743 4, 0.7766352098335481 6, 0.7289068769493157 7, 0.006806347293667032 9, 7.954256364237451E-4 10, 0.049935330047174945 11, 0.10612020680026239 ---- TYPES int,double ==== ---- QUERY # Testcase when independent variable becomes negative: select s_store_sk, regr_r2(s_floor_space, s_number_employees) over (partition by s_city order by s_store_sk rows between 1 preceding and 1 following) from tpcds.store; ---- RESULTS 5, 1.0 8, 0.107747035307 12, 1.0 1, 1.0 2, 0.261613627698 3, 0.687656618674 4, 0.782782611877 6, 0.541316438509 7, 0.0272991489992 9, 0.942195083603 10, 1.0 11,NULL ---- TYPES int, double ====