# all distributed count(distinct) plans are broken # TODO: re-enable when switching to new planner # count(distinct) #select count(distinct smallint_col, tinyint_col) #from alltypesagg$TABLE #---- TYPES #bigint #---- RESULTS #100 #==== # count(distinct) w/ grouping and non-distinct count() #select tinyint_col, count(distinct smallint_col), count(smallint_col) #from alltypesagg$TABLE group by 1 #---- TYPES #tinyint, bigint, bigint #---- RESULTS #1,10,1000 #2,10,1000 #3,10,1000 #4,10,1000 #5,10,1000 #6,10,1000 #7,10,1000 #8,10,1000 #9,10,1000 #NULL,10,900 #==== ## count(distinct) w/ grouping and non-distinct count() #select tinyint_col, count(distinct int_col, smallint_col), count(smallint_col) #from alltypesagg$TABLE group by 1 #---- TYPES #tinyint, bigint, bigint #---- RESULTS #1,100,1000 #2,100,1000 #3,100,1000 #4,100,1000 #5,100,1000 #6,100,1000 #7,100,1000 #8,100,1000 #9,100,1000 #NULL,100,900 #==== ## count(distinct) and sum(distinct) w/ grouping and non-distinct count() #select tinyint_col, count(distinct smallint_col), sum(distinct smallint_col), #count(smallint_col) #from alltypesagg$TABLE group by 1 #---- TYPES #tinyint, bigint, bigint, bigint #---- RESULTS #1,10,460,1000 #2,10,470,1000 #3,10,480,1000 #4,10,490,1000 #5,10,500,1000 #6,10,510,1000 #7,10,520,1000 #8,10,530,1000 #9,10,540,1000 #NULL,10,450,900 #==== ## count(distinct) and sum(distinct) w/ grouping; distinct in min() and max() ## ignored #select tinyint_col, count(distinct smallint_col), sum(distinct smallint_col), #count(smallint_col), min(distinct int_col), max(distinct float_col) #from alltypesagg$TABLE group by 1 #---- TYPES #tinyint, bigint, bigint, bigint, int, float #---- RESULTS #1,10,460,1000,1,1090.099975585938 #2,10,470,1000,2,1091.199951171875 #3,10,480,1000,3,1092.300048828125 #4,10,490,1000,4,1093.400024414062 #5,10,500,1000,5,1094.5 #6,10,510,1000,6,1095.599975585938 #7,10,520,1000,7,1096.699951171875 #8,10,530,1000,8,1097.800048828125 #9,10,540,1000,9,1098.900024414062 #NULL,10,450,900,10,1089 #==== # count distinct order by the same agg expr #select count(distinct id) as count_id from alltypessmall order by count_id limit 100 #---- TYPES #bigint #---- RESULTS #100 #==== ## count distinct order by a diff agg expr #select count(distinct id) as sum_id from alltypessmall order by max(distinct id) limit 100 #---- TYPES #bigint #---- RESULTS #100 #==== # distinct * select distinct * from alltypesagg$TABLE where id < 20 ---- TYPES int, int, int, int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp ---- RESULTS 2010,1,1,0,true,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/10','0',2010-01-01 00:00:00 2010,1,1,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/10','1',2010-01-01 00:01:00 2010,1,1,10,true,NULL,10,10,100,11,101,'01/01/10','10',2010-01-01 00:10:00.450000000 2010,1,1,11,false,1,11,11,110,12.10000038146973,111.1,'01/01/10','11',2010-01-01 00:11:00.550000000 2010,1,1,12,true,2,12,12,120,13.19999980926514,121.2,'01/01/10','12',2010-01-01 00:12:00.660000000 2010,1,1,13,false,3,13,13,130,14.30000019073486,131.3,'01/01/10','13',2010-01-01 00:13:00.780000000 2010,1,1,14,true,4,14,14,140,15.39999961853027,141.4,'01/01/10','14',2010-01-01 00:14:00.910000000 2010,1,1,15,false,5,15,15,150,16.5,151.5,'01/01/10','15',2010-01-01 00:15:01.500000000 2010,1,1,16,true,6,16,16,160,17.60000038146973,161.6,'01/01/10','16',2010-01-01 00:16:01.200000000 2010,1,1,17,false,7,17,17,170,18.70000076293945,171.7,'01/01/10','17',2010-01-01 00:17:01.360000000 2010,1,1,18,true,8,18,18,180,19.79999923706055,181.8,'01/01/10','18',2010-01-01 00:18:01.530000000 2010,1,1,19,false,9,19,19,190,20.89999961853027,191.9,'01/01/10','19',2010-01-01 00:19:01.710000000 2010,1,1,2,true,2,2,2,20,2.200000047683716,20.2,'01/01/10','2',2010-01-01 00:02:00.100000000 2010,1,1,3,false,3,3,3,30,3.299999952316284,30.3,'01/01/10','3',2010-01-01 00:03:00.300000000 2010,1,1,4,true,4,4,4,40,4.400000095367432,40.4,'01/01/10','4',2010-01-01 00:04:00.600000000 2010,1,1,5,false,5,5,5,50,5.5,50.5,'01/01/10','5',2010-01-01 00:05:00.100000000 2010,1,1,6,true,6,6,6,60,6.599999904632568,60.6,'01/01/10','6',2010-01-01 00:06:00.150000000 2010,1,1,7,false,7,7,7,70,7.699999809265137,70.7,'01/01/10','7',2010-01-01 00:07:00.210000000 2010,1,1,8,true,8,8,8,80,8.800000190734863,80.8,'01/01/10','8',2010-01-01 00:08:00.280000000 2010,1,1,9,false,9,9,9,90,9.899999618530273,90.90000000000001,'01/01/10','9',2010-01-01 00:09:00.360000000 ==== # distinct w/ explicit select list select distinct bool_col, tinyint_col from alltypesagg$TABLE ---- TYPES boolean, tinyint ---- RESULTS false,1 false,3 false,5 false,7 false,9 true,2 true,4 true,6 true,8 true,NULL ====