Files
impala/testdata/workloads/functional-query/queries/QueryTest/decimal.test
Taras Bobrovytsky 575b5a20e6 IMPALA-5017: Error on decimal overflow
Before this patch, decimal operations would either silently overflow (in
the case of sum() and avg()), or produce a warning.

In this patch, the behaviour is changed so that an error is produced in
the case of overflow when DECIMAL_v2 is enabled. Decimal v1 behaviour is
unchanged.

We introduce overflow checks when computing sum() and avg(). This
results in a ~30% performance regression when we are in decimal v2 mode
compared to decimal v1.

Benchmarks:
  Query:
  select sum(dec_38_19) from decimal_tbl
    Decimal v1: 11.57s
    Decimal v2: 16.58s

  Query:
  select avg(dec_38_19) from decimal_tbl
    Decimal v1: 12.08s
    Decimal v2: 17.08s

The performance regression is not as bad if we are computing the sum or
average of decimal column with a lower precision:

  Query:
  select sum(dec_9_5) from decimal_tbl
    Decimal v1: 11.06s
    Decimal v2: 13.08s

  Query:
  select avg(dec_9_5) from decimal_tbl
    Decimal v1: 11.56s
    Decimal v2: 13.57s

Testing:
- Added several end to end tests.
- Updated Expr tests to check for error in case of overflow.

Change-Id: Id98a92c9a9469ec8cf14e518c741a2dab7053019
Reviewed-on: http://gerrit.cloudera.org:8080/8404
Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com>
Tested-by: Impala Public Jenkins
2017-12-01 23:23:01 +00:00

399 lines
9.2 KiB
Plaintext

====
---- QUERY
select count(*),count(c1),count(c2) from decimal_tiny
---- RESULTS
100,100,100
---- TYPES
bigint,bigint,bigint
====
---- QUERY
select * from decimal_tiny where c1 = cast(0.1111 as decimal(5,4))
---- RESULTS
0.1111,101.22222,0.1
---- TYPES
DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
select count(*) from decimal_tiny where c1 != cast(0.1111 as decimal(5,4))
---- RESULTS
99
---- TYPES
bigint
====
---- QUERY
select count(*) from decimal_tiny where c1 = c2
---- RESULTS
0
---- TYPES
bigint
====
---- QUERY
select min(c1), max(c2) from decimal_tiny
---- RESULTS
0.0000,220.99978
---- TYPES
decimal,decimal
====
---- QUERY
select sum(d1), sum(d2), sum(d3), sum(d4), sum(d5) from decimal_tbl
---- RESULTS
161111,3332,13717.2838257900,0.61728394500000000000000000000000000000,12361.02889
---- TYPES
decimal,decimal,decimal,decimal,decimal
====
---- QUERY
select count(*), sum(d1), sum(d2), sum(d3), sum(d4), sum(d5) from decimal_tbl where d6 = 1
---- RESULTS
5,161111,3332,13717.2838257900,0.61728394500000000000000000000000000000,12361.02889
---- TYPES
bigint,decimal,decimal,decimal,decimal,decimal
====
---- QUERY
select count(*), sum(d1), sum(d2), sum(d3), sum(d4), sum(d5) from decimal_tbl where d6 = 0
---- RESULTS
0,NULL,NULL,NULL,NULL,NULL
---- TYPES
bigint,decimal,decimal,decimal,decimal,decimal
====
---- QUERY
select c3, count(*) from decimal_tiny group by c3
---- RESULTS
0.3,10
0.5,10
0.8,10
0.4,10
0.9,10
0.2,10
0.0,10
0.6,10
0.7,10
0.1,10
---- TYPES
decimal,bigint
====
---- QUERY
select c3, max(c1 + c2) from decimal_tiny group by c3
having max(c1 + c2) > cast('225' as decimal(18,5))
---- RESULTS
0.5,226.66540
0.8,230.66536
0.4,225.33208
0.9,231.99868
0.6,227.99872
0.7,229.33204
---- TYPES
decimal,decimal
====
---- QUERY
select ndv(c1), distinctpc(c2), distinctpcsa(c1 + c2) from decimal_tiny
---- RESULTS
98,94,103
---- TYPES
bigint,bigint,bigint
====
---- QUERY
select ndv(d1), distinctpc(d1), distinctpcsa(d1), count(distinct d1)
from decimal_tbl;
---- RESULTS
4,5,4,4
---- TYPES
bigint,bigint,bigint,bigint
====
---- QUERY
select c1 + c2 from decimal_tiny order by 1 limit 5
---- RESULTS
100.00000
101.33332
102.66664
103.99996
105.33328
---- TYPES
decimal
====
---- QUERY
select c1 + c2 from decimal_tiny order by 1 desc limit 5
---- RESULTS
231.99868
230.66536
229.33204
227.99872
226.66540
---- TYPES
decimal
====
---- QUERY
select t1.*,t2.* from decimal_tiny t1
join decimal_tiny t2
on t1.c1=t2.c1
order by t1.c1 desc limit 3
---- RESULTS
10.9989,220.99978,0.9,10.9989,220.99978,0.9
10.8878,219.77756,0.8,10.8878,219.77756,0.8
10.7767,218.55534,0.7,10.7767,218.55534,0.7
---- TYPES
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
select count(d1), count(d2), count(d3), count(d4), count(d5), count(d6) from decimal_tbl;
---- RESULTS
5,5,5,5,5,5
---- TYPES
bigint,bigint,bigint,bigint,bigint,bigint
====
---- QUERY
select d1, count(d2), count(d3), count(d4), count(d5), count(d6) from decimal_tbl group by d1;
---- RESULTS
132842,1,1,1,1,1
2345,1,1,1,1,1
1234,1,1,1,1,1
12345,2,2,2,2,2
---- TYPES
decimal,bigint,bigint,bigint,bigint,bigint
====
---- QUERY
select min(d1), min(d2), min(d3), min(d4), min(d5), min(d6) from decimal_tbl;
---- RESULTS
1234,111,1.2345678900,0.12345678900000000000000000000000000000,0.10000,1
---- TYPES
decimal,decimal,decimal,decimal,decimal,decimal
====
---- QUERY
select d1, min(d2), min(d3), min(d4), min(d5), min(d6) from decimal_tbl group by d1;
---- RESULTS
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
12345,333,123.4567890000,0.12345678900000000000000000000000000000,0.10000,1
---- TYPES
decimal,decimal,decimal,decimal,decimal,decimal
====
---- QUERY
select max(d1), max(d2), max(d3), max(d4), max(d5), max(d6) from decimal_tbl;
---- RESULTS
132842,2222,12345.6789000000,0.12345678900000000000000000000000000000,12345.78900,1
---- TYPES
decimal,decimal,decimal,decimal,decimal,decimal
====
---- QUERY
select d1, max(d2), max(d3), max(d4), max(d5), max(d6) from decimal_tbl group by d1;
---- RESULTS
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,11.22000,1
---- TYPES
decimal,decimal,decimal,decimal,decimal,decimal
====
---- QUERY
select ndv(d1), ndv(d2), ndv(d3), ndv(d4), ndv(d5), ndv(d6) from decimal_tbl;
---- RESULTS
4,3,5,1,5,1
---- TYPES
bigint,bigint,bigint,bigint,bigint,bigint
====
---- QUERY
select d1, ndv(d2), ndv(d3), ndv(d4), ndv(d5), ndv(d6) from decimal_tbl group by d1;
---- RESULTS
132842,1,1,1,1,1
2345,1,1,1,1,1
1234,1,1,1,1,1
12345,1,2,1,2,1
---- TYPES
decimal,bigint,bigint,bigint,bigint,bigint
====
---- QUERY
select a.c1 from decimal_tiny a left semi join decimal_tiny b on a.c1=b.c3
---- RESULTS
0.0000
---- TYPES
decimal
====
---- QUERY
select * from decimal_tiny
---- RESULTS
0.0000,100.00000,0.0
0.1111,101.22222,0.1
0.2222,102.44444,0.2
0.3333,103.66666,0.3
0.4444,104.88888,0.4
0.5555,106.11110,0.5
0.6666,107.33332,0.6
0.7777,108.55554,0.7
0.8888,109.77776,0.8
0.9999,110.99998,0.9
1.1110,112.22220,0.0
1.2221,113.44442,0.1
1.3332,114.66664,0.2
1.4443,115.88886,0.3
1.5554,117.11108,0.4
1.6665,118.33330,0.5
1.7776,119.55552,0.6
1.8887,120.77774,0.7
1.9998,121.99996,0.8
2.1109,123.22218,0.9
2.2220,124.44440,0.0
2.3331,125.66662,0.1
2.4442,126.88884,0.2
2.5553,128.11106,0.3
2.6664,129.33328,0.4
2.7775,130.55550,0.5
2.8886,131.77772,0.6
2.9997,132.99994,0.7
3.1108,134.22216,0.8
3.2219,135.44438,0.9
3.3330,136.66660,0.0
3.4441,137.88882,0.1
3.5552,139.11104,0.2
3.6663,140.33326,0.3
3.7774,141.55548,0.4
3.8885,142.77770,0.5
3.9996,143.99992,0.6
4.1107,145.22214,0.7
4.2218,146.44436,0.8
4.3329,147.66658,0.9
4.4440,148.88880,0.0
4.5551,150.11102,0.1
4.6662,151.33324,0.2
4.7773,152.55546,0.3
4.8884,153.77768,0.4
4.9995,154.99990,0.5
5.1106,156.22212,0.6
5.2217,157.44434,0.7
5.3328,158.66656,0.8
5.4439,159.88878,0.9
5.5550,161.11100,0.0
5.6661,162.33322,0.1
5.7772,163.55544,0.2
5.8883,164.77766,0.3
5.9994,165.99988,0.4
6.1105,167.22210,0.5
6.2216,168.44432,0.6
6.3327,169.66654,0.7
6.4438,170.88876,0.8
6.5549,172.11098,0.9
6.6660,173.33320,0.0
6.7771,174.55542,0.1
6.8882,175.77764,0.2
6.9993,176.99986,0.3
7.1104,178.22208,0.4
7.2215,179.44430,0.5
7.3326,180.66652,0.6
7.4437,181.88874,0.7
7.5548,183.11096,0.8
7.6659,184.33318,0.9
7.7770,185.55540,0.0
7.8881,186.77762,0.1
7.9992,187.99984,0.2
8.1103,189.22206,0.3
8.2214,190.44428,0.4
8.3325,191.66650,0.5
8.4436,192.88872,0.6
8.5547,194.11094,0.7
8.6658,195.33316,0.8
8.7769,196.55538,0.9
8.8880,197.77760,0.0
8.9991,198.99982,0.1
9.1102,200.22204,0.2
9.2213,201.44426,0.3
9.3324,202.66648,0.4
9.4435,203.88870,0.5
9.5546,205.11092,0.6
9.6657,206.33314,0.7
9.7768,207.55536,0.8
9.8879,208.77758,0.9
9.9990,209.99980,0.0
10.1101,211.22202,0.1
10.2212,212.44424,0.2
10.3323,213.66646,0.3
10.4434,214.88868,0.4
10.5545,216.11090,0.5
10.6656,217.33312,0.6
10.7767,218.55534,0.7
10.8878,219.77756,0.8
10.9989,220.99978,0.9
---- TYPES
DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
# Test group-by with decimal
select d1, d2, sum(d3), sum(d4), sum(d5), sum(d6) from decimal_tbl group by d1,d2;
---- RESULTS
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
12345,333,1358.0246790000,0.24691357800000000000000000000000000000,11.32000,2
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
---- TYPES
decimal,decimal,decimal,decimal,decimal,decimal
====
---- QUERY
# IMPALA-1559: FIRST_VALUE rewrite function intermediate type not matching slot type
select
first_value(c3) over (order by c1 rows between 92 preceding and current row),
first_value(c2) over (order by c1 rows between 92 preceding and 1 preceding)
from decimal_tiny where c3 = 0.0
---- RESULTS: VERIFY_IS_EQUAL_SORTED
0.0,NULL
0.0,100.00000
0.0,100.00000
0.0,100.00000
0.0,100.00000
0.0,100.00000
0.0,100.00000
0.0,100.00000
0.0,100.00000
0.0,100.00000
---- TYPES
DECIMAL, DECIMAL
====
---- QUERY
select histogram(d1) from decimal_tbl;
---- RESULTS
'1234, 2345, 12345, 12345, 132842'
---- TYPES
STRING
====
---- QUERY
select appx_median(c1), appx_median(c2), appx_median(c3) from decimal_tiny
---- RESULTS
5.5550,161.11100,0.5
---- TYPES
DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
select sample(d1) from decimal_tbl;
---- TYPES
# Results are unstable, just check that this doesn't crash
STRING
====
---- QUERY
# IMPALA-1837: Handle loss of precision when implicitly casting a literal to a decimal.
# Here "1.8" will be implicitly cast to a decimal(38,38), losing precision.
# No error is reported because the overflowed expr is never evaluated.
select coalesce(cast(0 as decimal(38,38)), 1.8)
---- TYPES
DECIMAL
---- RESULTS
0.00000000000000000000000000000000000000
---- ERRORS
====
---- QUERY
# IMPALA-2259 - wrong overload chosen for analytic function.
select lag(c1, 1) over (order by c1), c1
from decimal_tiny
order by c1
limit 10
---- RESULTS
NULL,0.0000
0.0000,0.1111
0.1111,0.2222
0.2222,0.3333
0.3333,0.4444
0.4444,0.5555
0.5555,0.6666
0.6666,0.7777
0.7777,0.8888
0.8888,0.9999
---- TYPES
DECIMAL, DECIMAL
====