Files
impala/testdata/workloads/functional-query/queries/QueryTest/distinct-estimate.test
casey fd09294b74 TimestampValue refactor and cleanup (part 2) (IMPALA-1623)
This is preparation for fixing IMPALA-97. These changes are mostly
non-functional to bring the code closer to styling standards.

The biggest functional changes should be:

1) IMPALA-1623 was caused by a misuse of a constructor and that code
   didn't compile after the refactor so the bug was fixed.
2) TimestampValue.Hash() seems to have been hashing the time twice
   instead of the time and date.
3) Timings using TimestampValue.time() would not be accurate when
   crossing midnight (time and date are separate fields).
4) Timings using local time should use UTC to avoid daylight savings
   problems.
5) Use system monotonic clock in util/time.h.

Some timings may still be affected by #3 & 4 above but fixing those
isn't the purpose of this change.

Change-Id: I26056c876c4361e898acc3656aa98abf6f153a6b
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/5779
Reviewed-by: Casey Ching <casey@cloudera.com>
Tested-by: jenkins
2015-02-03 01:49:55 -08:00

142 lines
3.7 KiB
Plaintext

# These computations take a while so we do not want to add too many test cases
# They also require a non-trivial amount of data to get reasonable results.
====
---- QUERY
select count(distinct id), distinctpc(id), distinctpcsa(id) from alltypesagg
---- TYPES
bigint, bigint, bigint
---- RESULTS
10000,10590,17429
====
---- QUERY
# Test the distinctpcsa aggregate function on all col types with group by
select
tinyint_col,
distinctpcsa(id),
distinctpcsa(bool_col),
distinctpcsa(smallint_col),
distinctpcsa(int_col),
distinctpcsa(bigint_col),
distinctpcsa(float_col),
distinctpcsa(double_col),
distinctpcsa(string_col),
distinctpcsa(timestamp_col)
from alltypesagg
group by tinyint_col
---- TYPES
tinyint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint
---- RESULTS
1,1066,82,87,148,128,146,139,163,956
2,906,83,86,151,136,140,160,158,1054
3,1200,82,86,140,131,146,139,163,896
4,1043,83,87,128,126,140,139,161,926
5,1149,82,87,137,130,148,140,163,916
6,998,83,87,137,128,146,142,161,1187
7,1125,82,87,140,140,145,140,163,830
8,977,83,88,139,124,139,142,163,804
9,867,82,88,130,131,139,136,161,840
NULL,1101,83,87,142,131,133,142,158,813
====
---- QUERY
# Test the distinctpcsa aggregate function on all col types without group by
select
distinctpcsa(id),
distinctpcsa(bool_col),
distinctpcsa(tinyint_col),
distinctpcsa(smallint_col),
distinctpcsa(int_col),
distinctpcsa(bigint_col),
distinctpcsa(float_col),
distinctpcsa(double_col),
distinctpcsa(string_col),
distinctpcsa(timestamp_col)
from alltypesagg
---- TYPES
bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint
---- RESULTS
17429,83,87,109,544,1213,867,1043,252,10476
====
---- QUERY
# Test the distinctpc aggregate function on all col types with group by
select
tinyint_col,
distinctpc(id),
distinctpc(bool_col),
distinctpc(smallint_col),
distinctpc(int_col),
distinctpc(bigint_col),
distinctpc(float_col),
distinctpc(double_col),
distinctpc(string_col),
distinctpc(timestamp_col)
from alltypesagg
group by tinyint_col
---- TYPES
tinyint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint
---- RESULTS
1,896,1,11,20,89,112,88,151,977
2,936,1,11,20,100,153,94,180,1043
3,936,1,11,20,109,118,90,182,896
4,977,1,14,20,81,84,83,190,946
5,977,1,14,20,96,97,100,151,840
6,998,1,13,20,97,128,101,178,1077
7,998,1,13,20,86,117,105,158,858
8,896,1,13,20,88,96,101,155,745
9,896,1,13,20,109,142,109,182,926
NULL,896,1,11,20,91,97,85,176,753
====
---- QUERY
# Test the distinctpc aggregate function on all col types without group by
select
distinctpc(id),
distinctpc(bool_col),
distinctpc(tinyint_col),
distinctpc(smallint_col),
distinctpc(int_col),
distinctpc(bigint_col),
distinctpc(float_col),
distinctpc(double_col),
distinctpc(string_col),
distinctpc(timestamp_col)
from alltypesagg
---- TYPES
bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint
---- RESULTS
10590,3,7,63,20,1295,936,1020,330,9711
====
---- QUERY
# Test the hll aggregate function on all col types without group by
select
ndv(id),
ndv(bool_col),
ndv(tinyint_col),
ndv(smallint_col),
ndv(int_col),
ndv(bigint_col),
ndv(float_col),
ndv(double_col),
ndv(string_col),
ndv(timestamp_col)
from alltypesagg
---- TYPES
bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint
---- RESULTS
10549,2,9,104,1881,1342,944,991,1178,9880
====
---- QUERY
# Test the distinctpc aggregate function on empty table
select distinctpc(field) from EmptyTable
---- TYPES
bigint
---- RESULTS
0
====
---- QUERY
# Test the hll aggregate function on empty table
select ndv(field) from EmptyTable
---- TYPES
bigint
---- RESULTS
0
====