Files
impala/testdata/workloads/functional-query/queries/QueryTest/distinct-estimate.test
Alex Behm 0fb380961c IMPALA-1187: Add appx_count_distinct query option to rewrite COUNT(DISTINCT) to NDV().
This patch also fixes IMPALA-1164: NDV() now returns a BIGINT (and not STRING).

Change-Id: Ia2a3272204938579d61091ee4f7f2d1cbf38ed55
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4338
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: jenkins
2014-09-20 16:11:34 -07:00

142 lines
4.0 KiB
Plaintext

# These computations take a while so we do not want to add too many test cases
# They also require a non-trivial amount of data to get reasonable results.
====
---- QUERY
select count(distinct id), distinctpc(id), distinctpcsa(id) from alltypesagg
---- TYPES
bigint, string, string
---- RESULTS
10000,'10590','17429'
====
---- QUERY
# Test the distinctpcsa aggregate function on all col types with group by
select
tinyint_col,
distinctpcsa(id),
distinctpcsa(bool_col),
distinctpcsa(smallint_col),
distinctpcsa(int_col),
distinctpcsa(bigint_col),
distinctpcsa(float_col),
distinctpcsa(double_col),
distinctpcsa(string_col),
distinctpcsa(timestamp_col)
from alltypesagg
group by tinyint_col
---- TYPES
tinyint, string, string, string, string, string, string, string, string, string
---- RESULTS
3,'1200','82','86','140','131','146','139','163','151'
NULL,'1101','83','87','142','131','133','142','158','140'
6,'998','83','87','137','128','146','142','161','143'
9,'867','82','88','130','131','139','136','161','114'
8,'977','83','88','139','124','139','142','163','139'
2,'906','83','86','151','136','140','160','158','128'
5,'1149','82','87','137','130','148','140','163','150'
4,'1043','83','87','128','126','140','139','161','134'
1,'1066','82','87','148','128','146','139','163','143'
7,'1125','82','87','140','140','145','140','163','143'
====
---- QUERY
# Test the distinctpcsa aggregate function on all col types without group by
select
distinctpcsa(id),
distinctpcsa(bool_col),
distinctpcsa(tinyint_col),
distinctpcsa(smallint_col),
distinctpcsa(int_col),
distinctpcsa(bigint_col),
distinctpcsa(float_col),
distinctpcsa(double_col),
distinctpcsa(string_col),
distinctpcsa(timestamp_col)
from alltypesagg
---- TYPES
string, string, string, string, string, string, string, string, string, string
---- RESULTS
'17429','83','87','109','544','1213','867','1043','252','1031'
====
---- QUERY
# Test the distinctpc aggregate function on all col types with group by
select
tinyint_col,
distinctpc(id),
distinctpc(bool_col),
distinctpc(smallint_col),
distinctpc(int_col),
distinctpc(bigint_col),
distinctpc(float_col),
distinctpc(double_col),
distinctpc(string_col),
distinctpc(timestamp_col)
from alltypesagg
group by tinyint_col
---- TYPES
tinyint, string, string, string, string, string, string, string, string, string
---- RESULTS
1,'896','1','11','20','89','112','88','151','108'
2,'936','1','11','20','100','153','94','180','102'
3,'936','1','11','20','109','118','90','182','103'
4,'977','1','14','20','81','84','83','190','101'
5,'977','1','14','20','96','97','100','151','102'
6,'998','1','13','20','97','128','101','178','102'
7,'998','1','13','20','86','117','105','158','97'
8,'896','1','13','20','88','96','101','155','93'
9,'896','1','13','20','109','142','109','182','100'
NULL,'896','1','11','20','91','97','85','176','98'
====
---- QUERY
# Test the distinctpc aggregate function on all col types without group by
select
distinctpc(id),
distinctpc(bool_col),
distinctpc(tinyint_col),
distinctpc(smallint_col),
distinctpc(int_col),
distinctpc(bigint_col),
distinctpc(float_col),
distinctpc(double_col),
distinctpc(string_col),
distinctpc(timestamp_col)
from alltypesagg
---- TYPES
string, string, string, string, string, string, string, string, string, string
---- RESULTS
'10590','3','7','63','20','1295','936','1020','330','988'
====
---- QUERY
# Test the hll aggregate function on all col types without group by
select
ndv(id),
ndv(bool_col),
ndv(tinyint_col),
ndv(smallint_col),
ndv(int_col),
ndv(bigint_col),
ndv(float_col),
ndv(double_col),
ndv(string_col),
ndv(timestamp_col)
from alltypesagg
---- TYPES
bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint
---- RESULTS
10549,2,9,104,1881,1342,944,991,1178,9880
====
---- QUERY
# Test the distinctpc aggregate function on empty table
select distinctpc(field) from EmptyTable
---- TYPES
string
---- RESULTS
'0'
====
---- QUERY
# Test the hll aggregate function on empty table
select ndv(field) from EmptyTable
---- TYPES
bigint
---- RESULTS
0
====