mirror of
https://github.com/apache/impala.git
synced 2026-01-09 06:05:09 -05:00
StringToFloatInternal is used to parse string into float. It had logic to ensure it is faster than standard functions like strtod in many cases, but it was not as accurate. We are replacing it by a third party library named fast_double_parser which is both fast and doesn't sacrifise the accuracy for speed. On benchmarking on more than 1 million rows where string is cast to double, it is found that new patch is on par with the earlier algorithm. Results: W/O library: Fetched 1222386 row(s) in 32.10s With library: Fetched 1222386 row(s) in 31.71s Testing: 1. Added test to check for accuracy improvement. 2. Ran existing Backend tests for correctness. Change-Id: Ic105ad38a2fcbf2fb4e8ae8af6d9a8e251a9c141 Reviewed-on: http://gerrit.cloudera.org:8080/17389 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
143 lines
3.9 KiB
Plaintext
143 lines
3.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
values(1, 2+1, 1.0, 5.0 + 1.0, 'a')
|
|
---- RESULTS
|
|
1,3,1.0,6.0,'a'
|
|
---- TYPES
|
|
TINYINT, SMALLINT, DECIMAL, DECIMAL, STRING
|
|
====
|
|
---- QUERY
|
|
values(1+1, 2, 5.0, 'a') order by 1 limit 10
|
|
---- RESULTS
|
|
2,2,5.0,'a'
|
|
---- TYPES
|
|
SMALLINT, TINYINT, DECIMAL, STRING
|
|
====
|
|
---- QUERY
|
|
values((1+8, 2, 5.0, 'a'), (2, 3, 6.0, 'b'), (3, 4, 7.0, 'c'))
|
|
---- RESULTS
|
|
9,2,5.0,'a'
|
|
2,3,6.0,'b'
|
|
3,4,7.0,'c'
|
|
---- TYPES
|
|
SMALLINT, TINYINT, DECIMAL, STRING
|
|
====
|
|
---- QUERY
|
|
values((1+8, 2, 5.0, 'a'), (2, 3, 6.0, 'b'), (3, 4, 7.0, 'c')) order by 1 desc limit 2
|
|
---- RESULTS
|
|
9,2,5.0,'a'
|
|
3,4,7.0,'c'
|
|
---- TYPES
|
|
SMALLINT, TINYINT, DECIMAL, STRING
|
|
====
|
|
---- QUERY
|
|
# Test literal casts by inserting into a table that requires a float.
|
|
drop table if exists values_test_float_tbl;
|
|
create table values_test_float_tbl(f float);
|
|
insert overwrite values_test_float_tbl values
|
|
(1), (16), (1024), (65536), (1000000), (1.1), (98.6), (0.07), (33.333);
|
|
select * from values_test_float_tbl;
|
|
---- RESULTS
|
|
1
|
|
16
|
|
1024
|
|
65536
|
|
1000000
|
|
1.100000023841858
|
|
98.59999847412109
|
|
0.07
|
|
33.33300018310547
|
|
---- TYPES
|
|
float
|
|
====
|
|
---- QUERY
|
|
# Test literal casts by inserting into a table that requires a decimal.
|
|
drop table if exists values_test_decimal_tbl;
|
|
create table values_test_decimal_tbl(f decimal(20, 4));
|
|
insert overwrite values_test_decimal_tbl values
|
|
(1), (16), (1024), (65536), (1000000), (1.1), (98.6), (0.07), (33.333);
|
|
select * from values_test_decimal_tbl;
|
|
---- RESULTS
|
|
1.0000
|
|
16.0000
|
|
1024.0000
|
|
65536.0000
|
|
1000000.0000
|
|
1.1000
|
|
98.6000
|
|
0.0700
|
|
33.3330
|
|
---- TYPES
|
|
decimal
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2749: Test that multiplying a DOUBLE and a DECIMAL results in a double
|
|
# value and do not overflow.
|
|
drop table if exists i_2749;
|
|
create table i_2749 (dbl1 double, `dec` decimal(9,4), dbl2 double);
|
|
insert overwrite table i_2749 values
|
|
(0.0017,90,1.0113),
|
|
(0.0342,90,1.0113),
|
|
(0.0128,90,1.0113),
|
|
(0.0163,90,1.0113);
|
|
====
|
|
---- QUERY
|
|
select dbl1 * `dec` * dbl2, dbl1 + `dec`, dbl1 - `dec`, dbl1 / `dec` from i_2749;
|
|
---- RESULTS
|
|
0.1547289,90.0017000000000000,-89.9983000000000000,0.0000188888889
|
|
3.112781400000001,90.0342000000000000,-89.9658000000000000,0.0003800000000
|
|
1.1650176,90.0128000000000000,-89.9872000000000000,0.0001422222222
|
|
1.4835771,90.0163000000000000,-89.9837000000000000,0.0001811111111
|
|
---- TYPES
|
|
DOUBLE,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
select dbl1 * dbl2 * `dec` from i_2749;
|
|
---- RESULTS
|
|
0.1547289
|
|
3.112781400000001
|
|
1.1650176
|
|
1.4835771
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10350: Test the double precision during inserts.
|
|
# For write path to text files, the default precision of 16
|
|
# is a limitation yet to be fixed so test would not see exact values retained.
|
|
drop table if exists i_10350;
|
|
create table i_10350(dbl1 double, dbl2 double);
|
|
# insert one negative value < -2^53 * 10^(-17)
|
|
# and another negative value > -2^53 * 10^(-17)
|
|
# and corresponding positive values.
|
|
insert into table i_10350 values
|
|
(-0.43149576573887316, -0.231495765738873),
|
|
(0.43149576573887316, 0.231495765738873);
|
|
select * from i_10350;
|
|
---- RESULTS
|
|
-0.4314957657388732,-0.231495765738873
|
|
0.4314957657388732,0.231495765738873
|
|
---- TYPES
|
|
DOUBLE,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10654: Test accurate conversion from DecimalValue to double.
|
|
# Test below creates DecimalValue for 0.43149576573887316, 0.231495765738873,
|
|
# -0.43149576573887316, -0.231495765738873 and 0.43149576573887315568
|
|
# which has `value_` > 2^53, < 2^53, < -2^53, > -2^53 and > 2^64 correspondingly.
|
|
select cast(1 - 0.56850423426112684 as double),
|
|
cast(1 - 0.768504234261127 as double),
|
|
cast(0.56850423426112684 -1 as double),
|
|
cast(0.768504234261127 -1 as double),
|
|
cast(1 - 0.56850423426112684432 as double)
|
|
---- RESULTS
|
|
0.43149576573887316,0.231495765738873,-0.43149576573887316,-0.231495765738873,0.43149576573887316
|
|
---- TYPES
|
|
DOUBLE,DOUBLE,DOUBLE,DOUBLE,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10680: Test accurate conversion from string to double
|
|
select cast("0.43149576573887316" as double)
|
|
---- RESULTS
|
|
0.43149576573887316
|
|
---- TYPES
|
|
DOUBLE
|
|
==== |