mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
Address rounding on divide and multiply when results are truncated. Testing: Manually ran some divides that should overflow, then added the results to the test. Made the decimal-test use rounding behavior by default, and now the error margin of the test has decreased. Initial perf results: Multiply is totall uninteresting so far, all implementations return the same values in the same time: +-------------------------+-----------------------------------+ | sum(l_quantity * l_tax) | sum(l_extendedprice * l_discount) | +-------------------------+-----------------------------------+ | 61202493.3700 | 114698450836.4234 | +-------------------------+-----------------------------------+ Fetched 1 row(s) in 1.13s Divide shows no regression from prior with DECIMAL_V2 off: +-----------------------------+-----------------------------------+ | sum(l_quantity / l_tax) | sum(l_extendedprice / l_discount) | +-----------------------------+-----------------------------------+ | 46178777464.523809516381723 | 61076151920731.010714279183910 | +-----------------------------+-----------------------------------+ before: Fetched 1 row(s) in 13.08s after: Fetched 1 row(s) in 13.06s And with DECIMAL_V2 on: +-----------------------------+-----------------------------------+ | sum(l_quantity / l_tax) | sum(l_extendedprice / l_discount) | +-----------------------------+-----------------------------------+ | 46178777464.523809523847285 | 61076151920731.010714285714202 | +-----------------------------+-----------------------------------+ Fetched 1 row(s) in 16.06s So the performance regression is not as bad as expected. Still, divide performance could use some work. Change-Id: Ie6bfcbe37555b74598d409c6f84f06b0ae5c4312 Reviewed-on: http://gerrit.cloudera.org:8080/6132 Reviewed-by: Dan Hecht <dhecht@cloudera.com> Tested-by: Impala Public Jenkins
208 lines
6.6 KiB
Plaintext
208 lines
6.6 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Test DECIMAL V1 divide result type
|
|
set decimal_v2=false;
|
|
select d1 / d2, d2 / d1, d3 / d4, d5 / d3, d3 / d5 from decimal_tbl;
|
|
---- RESULTS
|
|
0.55535553555,1.8006482982,NULL,10000.0891810008,0.000099999108197945064
|
|
21.12612612612,0.0473347547,NULL,0.2544210023,3.930493123209169054441
|
|
37.07207207207,0.0269744835,NULL,0.0000810000,12345.678900000000000000000
|
|
37.07207207207,0.0269744835,NULL,0.0908820008,11.003278877005347593582
|
|
398.92492492492,0.0025067373,NULL,0.0000630900,15850.349728459731155875669
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Verify DECIMAL V2. Differences with V1:
|
|
# * d3/d4 does not overflow
|
|
# * d5/d3 has more scale
|
|
set decimal_v2=true;
|
|
select d1 / d2, d2 / d1, d3 / d4, d5 / d3, d3 / d5 from decimal_tbl;
|
|
---- RESULTS
|
|
0.55535553555,1.8006482982,10.000000,10000.08918100081154710738508,0.000099999108197945065
|
|
21.12612612613,0.0473347548,100.000000,0.25442100231523112106860,3.930493123209169054441
|
|
37.07207207207,0.0269744836,1000.000000,0.09088200082702620752594,11.003278877005347593583
|
|
37.07207207207,0.0269744836,10000.000000,0.00008100000073710000671,12345.678900000000000000000
|
|
398.92492492492,0.0025067373,100000.000000,0.00006309009057411982422,15850.349728459731155875669
|
|
---- TYPES
|
|
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test casting behavior without decimal_v2 query option set.
|
|
set decimal_v2=false;
|
|
select cast(d3 as decimal(20, 3)) from decimal_tbl;
|
|
---- RESULTS
|
|
1.234
|
|
12.345
|
|
123.456
|
|
1234.567
|
|
12345.678
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test casting behavior with decimal_v2 query option set.
|
|
set decimal_v2=true;
|
|
select cast(d3 as decimal(20, 3)) from decimal_tbl;
|
|
---- RESULTS
|
|
1.235
|
|
12.346
|
|
123.457
|
|
1234.568
|
|
12345.679
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test casting behavior without decimal_v2 query option set.
|
|
set decimal_v2=false;
|
|
select sum(cast(d3 as DECIMAL(20,2)) + cast(d5 as DECIMAL(20,4))) from decimal_tbl;
|
|
---- RESULTS
|
|
26078.2788
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test casting behavior with decimal_v2 query option set.
|
|
set decimal_v2=true;
|
|
select sum(cast(d3 as DECIMAL(20,2)) + cast(d5 as DECIMAL(20,4))) from decimal_tbl;
|
|
---- RESULTS
|
|
26078.3189
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test AVG() with DECIMAL_V1
|
|
set decimal_v2=false;
|
|
select avg(d1), avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl;
|
|
---- RESULTS
|
|
32222,666,2743.4567651580,0.12345678900000000000000000000000000000,2472.20577,1
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test AVG() with DECIMAL_V2
|
|
set decimal_v2=true;
|
|
select avg(d1), avg(d2), avg(d3), avg(d4), avg(d5), avg(d6) from decimal_tbl;
|
|
---- RESULTS
|
|
32222.200000,666.400000,2743.4567651580,0.12345678900000000000000000000000000000,2472.205778,1.000000
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test AVG() with DECIMAL_V1
|
|
set decimal_v2=false;
|
|
select l_tax, avg(cast(l_extendedprice as decimal(38,10))), avg(l_extendedprice)
|
|
from tpch_parquet.lineitem group by l_tax order by 1;
|
|
---- RESULTS
|
|
0.00,38241.5984613546,38241.59
|
|
0.01,38283.5417664599,38283.54
|
|
0.02,38250.4873094187,38250.48
|
|
0.03,38259.2810374789,38259.28
|
|
0.04,38247.1967454731,38247.19
|
|
0.05,38234.8480874721,38234.84
|
|
0.06,38246.4342924027,38246.43
|
|
0.07,38281.1963710003,38281.19
|
|
0.08,38251.6233675941,38251.62
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test AVG() with DECIMAL_V2
|
|
set decimal_v2=true;
|
|
select l_tax, avg(cast(l_extendedprice as decimal(38,10))), avg(l_extendedprice)
|
|
from tpch_parquet.lineitem group by l_tax order by 1;
|
|
---- RESULTS
|
|
0.00,38241.5984613546,38241.598461
|
|
0.01,38283.5417664600,38283.541766
|
|
0.02,38250.4873094187,38250.487309
|
|
0.03,38259.2810374789,38259.281037
|
|
0.04,38247.1967454731,38247.196745
|
|
0.05,38234.8480874721,38234.848087
|
|
0.06,38246.4342924027,38246.434292
|
|
0.07,38281.1963710004,38281.196371
|
|
0.08,38251.6233675942,38251.623368
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test AVG() with DECIMAL_V1
|
|
set decimal_v2=false;
|
|
select avg(l_extendedprice) as a from tpch_parquet.lineitem
|
|
group by l_tax having a > 38247.190 order by 1;
|
|
---- RESULTS
|
|
38250.48
|
|
38251.62
|
|
38259.28
|
|
38281.19
|
|
38283.54
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test AVG() with DECIMAL_V2
|
|
set decimal_v2=true;
|
|
select avg(l_extendedprice) as a from tpch_parquet.lineitem
|
|
group by l_tax having a > 38247.190 order by 1;
|
|
---- RESULTS
|
|
38247.196745
|
|
38250.487309
|
|
38251.623368
|
|
38259.281037
|
|
38281.196371
|
|
38283.541766
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test sum() and avg() analytic fns with start bounds (tests Remove() for decimal)
|
|
# with DECIMAL_V1
|
|
set decimal_v2=false;
|
|
select
|
|
sum(c1) over (order by c1 rows between 5 preceding and current row),
|
|
sum(c2) over (order by c1 rows between 5 preceding and 5 following),
|
|
sum(c3) over (order by c1 rows between 5 preceding and 2 preceding),
|
|
avg(c1) over (order by c1 rows between 5 preceding and current row),
|
|
avg(c2) over (order by c1 rows between 5 preceding and 5 following),
|
|
avg(c3) over (order by c1 rows between 5 preceding and 2 preceding)
|
|
from decimal_tiny where c2 < 112
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0.0000,618.33330,NULL,0.0000,103.05555,NULL
|
|
0.1111,725.66662,NULL,0.0555,103.66666,NULL
|
|
0.3333,834.22216,0.0,0.1111,104.27777,0.0
|
|
0.6666,943.99992,0.1,0.1666,104.88888,0.0
|
|
1.1110,1054.99990,0.3,0.2222,105.49999,0.1
|
|
1.6665,1054.99990,0.6,0.2777,105.49999,0.1
|
|
2.3331,954.99990,1.0,0.3888,106.11110,0.2
|
|
2.9997,853.77768,1.4,0.4999,106.72221,0.3
|
|
3.6663,751.33324,1.8,0.6110,107.33332,0.4
|
|
4.3329,647.66658,2.2,0.7221,107.94443,0.5
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Test sum() and avg() analytic fns with start bounds (tests Remove() for decimal)
|
|
# with DECIMAL_V2
|
|
set decimal_v2=true;
|
|
select
|
|
sum(c1) over (order by c1 rows between 5 preceding and current row),
|
|
sum(c2) over (order by c1 rows between 5 preceding and 5 following),
|
|
sum(c3) over (order by c1 rows between 5 preceding and 2 preceding),
|
|
avg(c1) over (order by c1 rows between 5 preceding and current row),
|
|
avg(c2) over (order by c1 rows between 5 preceding and 5 following),
|
|
avg(c3) over (order by c1 rows between 5 preceding and 2 preceding)
|
|
from decimal_tiny where c2 < 112
|
|
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
|
0.0000,618.33330,NULL,0.000000,103.055550,NULL
|
|
0.1111,725.66662,NULL,0.055550,103.666660,NULL
|
|
0.3333,834.22216,0.0,0.111100,104.277770,0.000000
|
|
0.6666,943.99992,0.1,0.166650,104.888880,0.050000
|
|
1.1110,1054.99990,0.3,0.222200,105.499990,0.100000
|
|
1.6665,1054.99990,0.6,0.277750,105.499990,0.150000
|
|
2.3331,954.99990,1.0,0.388850,106.111100,0.250000
|
|
2.9997,853.77768,1.4,0.499950,106.722210,0.350000
|
|
3.6663,751.33324,1.8,0.611050,107.333320,0.450000
|
|
4.3329,647.66658,2.2,0.722150,107.944430,0.550000
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
==== |