mirror of
https://github.com/apache/impala.git
synced 2026-01-02 03:00:32 -05:00
This patch implements codegen support for aggregations with decimal input and intermediate type. For the following benchmark query: SELECT l_discount, count(*) AS cnt FROM biglineitem GROUP BY l_discount HAVING cnt > 9999999999999 Query time went from 8.85s to 3.74s (2.4x faster). Change-Id: I25934fcd6324e5bf1fa6859496107bf2ec68b8d3 Reviewed-on: http://gerrit.cloudera.org:8080/2050 Reviewed-by: Skye Wanderman-Milne <skye@cloudera.com> Tested-by: Internal Jenkins
452 lines
11 KiB
Plaintext
452 lines
11 KiB
Plaintext
====
|
|
---- QUERY
|
|
select count(*),count(c1),count(c2) from decimal_tiny
|
|
---- RESULTS
|
|
100,100,100
|
|
---- TYPES
|
|
bigint,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
select * from decimal_tiny where c1 = cast(0.1111 as decimal(5,4))
|
|
---- RESULTS
|
|
0.1111,101.22222,0.1
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
select count(*) from decimal_tiny where c1 != cast(0.1111 as decimal(5,4))
|
|
---- RESULTS
|
|
99
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from decimal_tiny where c1 = c2
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select min(c1), max(c2) from decimal_tiny
|
|
---- RESULTS
|
|
0.0000,220.99978
|
|
---- TYPES
|
|
decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select sum(d1), sum(d2), sum(d3), sum(d4), sum(d5) from decimal_tbl
|
|
---- RESULTS
|
|
161111,3332,13717.2838257900,0.61728394500000000000000000000000000000,12361.02889
|
|
---- TYPES
|
|
decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select count(*), sum(d1), sum(d2), sum(d3), sum(d4), sum(d5) from decimal_tbl where d6 = 1
|
|
---- RESULTS
|
|
5,161111,3332,13717.2838257900,0.61728394500000000000000000000000000000,12361.02889
|
|
---- TYPES
|
|
bigint,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select count(*), sum(d1), sum(d2), sum(d3), sum(d4), sum(d5) from decimal_tbl where d6 = 0
|
|
---- RESULTS
|
|
0,NULL,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
bigint,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select c3, count(*) from decimal_tiny group by c3
|
|
---- RESULTS
|
|
0.3,10
|
|
0.5,10
|
|
0.8,10
|
|
0.4,10
|
|
0.9,10
|
|
0.2,10
|
|
0.0,10
|
|
0.6,10
|
|
0.7,10
|
|
0.1,10
|
|
---- TYPES
|
|
decimal,bigint
|
|
====
|
|
---- QUERY
|
|
select c3, max(c1 + c2) from decimal_tiny group by c3
|
|
having max(c1 + c2) > cast('225' as decimal(18,5))
|
|
---- RESULTS
|
|
0.5,226.66540
|
|
0.8,230.66536
|
|
0.4,225.33208
|
|
0.9,231.99868
|
|
0.6,227.99872
|
|
0.7,229.33204
|
|
---- TYPES
|
|
decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select ndv(c1), distinctpc(c2), distinctpcsa(c1 + c2) from decimal_tiny
|
|
---- RESULTS
|
|
98,94,103
|
|
---- TYPES
|
|
bigint,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
select ndv(d1), distinctpc(d1), distinctpcsa(d1), count(distinct d1)
|
|
from decimal_tbl;
|
|
---- RESULTS
|
|
4,5,4,4
|
|
---- TYPES
|
|
bigint,bigint,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
select c1 + c2 from decimal_tiny order by 1 limit 5
|
|
---- RESULTS
|
|
100.00000
|
|
101.33332
|
|
102.66664
|
|
103.99996
|
|
105.33328
|
|
---- TYPES
|
|
decimal
|
|
====
|
|
---- QUERY
|
|
select c1 + c2 from decimal_tiny order by 1 desc limit 5
|
|
---- RESULTS
|
|
231.99868
|
|
230.66536
|
|
229.33204
|
|
227.99872
|
|
226.66540
|
|
---- TYPES
|
|
decimal
|
|
====
|
|
---- QUERY
|
|
select t1.*,t2.* from decimal_tiny t1
|
|
join decimal_tiny t2
|
|
on t1.c1=t2.c1
|
|
order by t1.c1 desc limit 3
|
|
---- RESULTS
|
|
10.9989,220.99978,0.9,10.9989,220.99978,0.9
|
|
10.8878,219.77756,0.8,10.8878,219.77756,0.8
|
|
10.7767,218.55534,0.7,10.7767,218.55534,0.7
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
select avg(d1), avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl;
|
|
---- RESULTS
|
|
32222,666,2743.4567651580,0.12345678900000000000000000000000000000,2472.20577,1
|
|
---- TYPES
|
|
decimal,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select d1, avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl group by d1;
|
|
---- RESULTS
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
12345,333,679.0123395000,0.12345678900000000000000000000000000000,5.66000,1
|
|
---- TYPES
|
|
decimal,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select count(d1), count(d2), count(d3), count(d4), count(d5), count(d6) from decimal_tbl;
|
|
---- RESULTS
|
|
5,5,5,5,5,5
|
|
---- TYPES
|
|
bigint,bigint,bigint,bigint,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
select d1, count(d2), count(d3), count(d4), count(d5), count(d6) from decimal_tbl group by d1;
|
|
---- RESULTS
|
|
132842,1,1,1,1,1
|
|
2345,1,1,1,1,1
|
|
1234,1,1,1,1,1
|
|
12345,2,2,2,2,2
|
|
---- TYPES
|
|
decimal,bigint,bigint,bigint,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
select min(d1), min(d2), min(d3), min(d4), min(d5), min(d6) from decimal_tbl;
|
|
---- RESULTS
|
|
1234,111,1.2345678900,0.12345678900000000000000000000000000000,0.10000,1
|
|
---- TYPES
|
|
decimal,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select d1, min(d2), min(d3), min(d4), min(d5), min(d6) from decimal_tbl group by d1;
|
|
---- RESULTS
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
12345,333,123.4567890000,0.12345678900000000000000000000000000000,0.10000,1
|
|
---- TYPES
|
|
decimal,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select max(d1), max(d2), max(d3), max(d4), max(d5), max(d6) from decimal_tbl;
|
|
---- RESULTS
|
|
132842,2222,12345.6789000000,0.12345678900000000000000000000000000000,12345.78900,1
|
|
---- TYPES
|
|
decimal,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select d1, max(d2), max(d3), max(d4), max(d5), max(d6) from decimal_tbl group by d1;
|
|
---- RESULTS
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,11.22000,1
|
|
---- TYPES
|
|
decimal,decimal,decimal,decimal,decimal,decimal
|
|
====
|
|
---- QUERY
|
|
select ndv(d1), ndv(d2), ndv(d3), ndv(d4), ndv(d5), ndv(d6) from decimal_tbl;
|
|
---- RESULTS
|
|
4,3,5,1,5,1
|
|
---- TYPES
|
|
bigint,bigint,bigint,bigint,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
select d1, ndv(d2), ndv(d3), ndv(d4), ndv(d5), ndv(d6) from decimal_tbl group by d1;
|
|
---- RESULTS
|
|
132842,1,1,1,1,1
|
|
2345,1,1,1,1,1
|
|
1234,1,1,1,1,1
|
|
12345,1,2,1,2,1
|
|
---- TYPES
|
|
decimal,bigint,bigint,bigint,bigint,bigint
|
|
====
|
|
---- QUERY
|
|
select cast(avg(c1) as decimal(10,4)) as c from decimal_tiny
|
|
group by c3 having c > 5.5 order by 1
|
|
---- RESULTS
|
|
5.5550
|
|
5.6661
|
|
5.7772
|
|
5.8883
|
|
5.9994
|
|
---- TYPES
|
|
decimal
|
|
====
|
|
---- QUERY
|
|
select a.c1 from decimal_tiny a left semi join decimal_tiny b on a.c1=b.c3
|
|
---- RESULTS
|
|
0.0000
|
|
---- TYPES
|
|
decimal
|
|
====
|
|
---- QUERY
|
|
select * from decimal_tiny
|
|
---- RESULTS
|
|
0.0000,100.00000,0.0
|
|
0.1111,101.22222,0.1
|
|
0.2222,102.44444,0.2
|
|
0.3333,103.66666,0.3
|
|
0.4444,104.88888,0.4
|
|
0.5555,106.11110,0.5
|
|
0.6666,107.33332,0.6
|
|
0.7777,108.55554,0.7
|
|
0.8888,109.77776,0.8
|
|
0.9999,110.99998,0.9
|
|
1.1110,112.22220,0.0
|
|
1.2221,113.44442,0.1
|
|
1.3332,114.66664,0.2
|
|
1.4443,115.88886,0.3
|
|
1.5554,117.11108,0.4
|
|
1.6665,118.33330,0.5
|
|
1.7776,119.55552,0.6
|
|
1.8887,120.77774,0.7
|
|
1.9998,121.99996,0.8
|
|
2.1109,123.22218,0.9
|
|
2.2220,124.44440,0.0
|
|
2.3331,125.66662,0.1
|
|
2.4442,126.88884,0.2
|
|
2.5553,128.11106,0.3
|
|
2.6664,129.33328,0.4
|
|
2.7775,130.55550,0.5
|
|
2.8886,131.77772,0.6
|
|
2.9997,132.99994,0.7
|
|
3.1108,134.22216,0.8
|
|
3.2219,135.44438,0.9
|
|
3.3330,136.66660,0.0
|
|
3.4441,137.88882,0.1
|
|
3.5552,139.11104,0.2
|
|
3.6663,140.33326,0.3
|
|
3.7774,141.55548,0.4
|
|
3.8885,142.77770,0.5
|
|
3.9996,143.99992,0.6
|
|
4.1107,145.22214,0.7
|
|
4.2218,146.44436,0.8
|
|
4.3329,147.66658,0.9
|
|
4.4440,148.88880,0.0
|
|
4.5551,150.11102,0.1
|
|
4.6662,151.33324,0.2
|
|
4.7773,152.55546,0.3
|
|
4.8884,153.77768,0.4
|
|
4.9995,154.99990,0.5
|
|
5.1106,156.22212,0.6
|
|
5.2217,157.44434,0.7
|
|
5.3328,158.66656,0.8
|
|
5.4439,159.88878,0.9
|
|
5.5550,161.11100,0.0
|
|
5.6661,162.33322,0.1
|
|
5.7772,163.55544,0.2
|
|
5.8883,164.77766,0.3
|
|
5.9994,165.99988,0.4
|
|
6.1105,167.22210,0.5
|
|
6.2216,168.44432,0.6
|
|
6.3327,169.66654,0.7
|
|
6.4438,170.88876,0.8
|
|
6.5549,172.11098,0.9
|
|
6.6660,173.33320,0.0
|
|
6.7771,174.55542,0.1
|
|
6.8882,175.77764,0.2
|
|
6.9993,176.99986,0.3
|
|
7.1104,178.22208,0.4
|
|
7.2215,179.44430,0.5
|
|
7.3326,180.66652,0.6
|
|
7.4437,181.88874,0.7
|
|
7.5548,183.11096,0.8
|
|
7.6659,184.33318,0.9
|
|
7.7770,185.55540,0.0
|
|
7.8881,186.77762,0.1
|
|
7.9992,187.99984,0.2
|
|
8.1103,189.22206,0.3
|
|
8.2214,190.44428,0.4
|
|
8.3325,191.66650,0.5
|
|
8.4436,192.88872,0.6
|
|
8.5547,194.11094,0.7
|
|
8.6658,195.33316,0.8
|
|
8.7769,196.55538,0.9
|
|
8.8880,197.77760,0.0
|
|
8.9991,198.99982,0.1
|
|
9.1102,200.22204,0.2
|
|
9.2213,201.44426,0.3
|
|
9.3324,202.66648,0.4
|
|
9.4435,203.88870,0.5
|
|
9.5546,205.11092,0.6
|
|
9.6657,206.33314,0.7
|
|
9.7768,207.55536,0.8
|
|
9.8879,208.77758,0.9
|
|
9.9990,209.99980,0.0
|
|
10.1101,211.22202,0.1
|
|
10.2212,212.44424,0.2
|
|
10.3323,213.66646,0.3
|
|
10.4434,214.88868,0.4
|
|
10.5545,216.11090,0.5
|
|
10.6656,217.33312,0.6
|
|
10.7767,218.55534,0.7
|
|
10.8878,219.77756,0.8
|
|
10.9989,220.99978,0.9
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test sum() and avg() analytic fns with start bounds (tests Remove() for decimal)
|
|
select
|
|
sum(c1) over (order by c1 rows between 5 preceding and current row),
|
|
sum(c2) over (order by c1 rows between 5 preceding and 5 following),
|
|
sum(c3) over (order by c1 rows between 5 preceding and 2 preceding),
|
|
avg(c1) over (order by c1 rows between 5 preceding and current row),
|
|
avg(c2) over (order by c1 rows between 5 preceding and 5 following),
|
|
avg(c3) over (order by c1 rows between 5 preceding and 2 preceding)
|
|
from decimal_tiny where c2 < 112
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0.0000,618.33330,NULL,0.0000,103.05555,NULL
|
|
0.1111,725.66662,NULL,0.0555,103.66666,NULL
|
|
0.3333,834.22216,0.0,0.1111,104.27777,0.0
|
|
0.6666,943.99992,0.1,0.1666,104.88888,0.0
|
|
1.1110,1054.99990,0.3,0.2222,105.49999,0.1
|
|
1.6665,1054.99990,0.6,0.2777,105.49999,0.1
|
|
2.3331,954.99990,1.0,0.3888,106.11110,0.2
|
|
2.9997,853.77768,1.4,0.4999,106.72221,0.3
|
|
3.6663,751.33324,1.8,0.6110,107.33332,0.4
|
|
4.3329,647.66658,2.2,0.7221,107.94443,0.5
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1559: FIRST_VALUE rewrite function intermediate type not matching slot type
|
|
select
|
|
first_value(c3) over (order by c3 rows between 92 preceding and current row),
|
|
first_value(c2) over (order by c3 rows between 92 preceding and 1 preceding)
|
|
from decimal_tiny where c3 = 0.0
|
|
---- RESULTS
|
|
0.0,NULL
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
0.0,100.00000
|
|
---- TYPES
|
|
DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
select histogram(d1) from decimal_tbl;
|
|
---- RESULTS
|
|
'1234, 2345, 12345, 12345, 132842'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select appx_median(c1), appx_median(c2), appx_median(c3) from decimal_tiny
|
|
---- RESULTS
|
|
5.5550,161.11100,0.5
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
select sample(d1) from decimal_tbl;
|
|
---- TYPES
|
|
# Results are unstable, just check that this doesn't crash
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1837: Handle loss of precision when implicitly casting a literal to a decimal.
|
|
# Here "1.8" will be implicitly cast to a decimal(38,38), losing precision.
|
|
select coalesce(1.8, cast(0 as decimal(38,38)))
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RESULTS
|
|
0.00000000000000000000000000000000000000
|
|
---- ERRORS
|
|
UDF WARNING: Expression overflowed, returning NULL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-1837: Handle loss of precision when implicitly casting a literal to a decimal.
|
|
# Here "1.8" will be implicitly cast to a decimal(38,38), losing precision.
|
|
# No error is reported because the overflowed expr is never evaluated.
|
|
select coalesce(cast(0 as decimal(38,38)), 1.8)
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RESULTS
|
|
0.00000000000000000000000000000000000000
|
|
---- ERRORS
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2259 - wrong overload chosen for analytic function.
|
|
select lag(c1, 1) over (order by c1), c1
|
|
from decimal_tiny
|
|
order by c1
|
|
limit 10
|
|
---- RESULTS
|
|
NULL,0.0000
|
|
0.0000,0.1111
|
|
0.1111,0.2222
|
|
0.2222,0.3333
|
|
0.3333,0.4444
|
|
0.4444,0.5555
|
|
0.5555,0.6666
|
|
0.6666,0.7777
|
|
0.7777,0.8888
|
|
0.8888,0.9999
|
|
---- TYPES
|
|
DECIMAL, DECIMAL
|
|
====
|