diff --git a/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-insert-errors.test b/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-insert-errors.test index 7ad6fa55c..d7503d17f 100644 --- a/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-insert-errors.test +++ b/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-insert-errors.test @@ -1,12 +1,12 @@ ==== ---- QUERY -insert into functional_hbase.alltypessmall +insert into alltypessmall values (NULL,80,false,"02/01/09",80.8,8.800000190734863,8,2,8,"8","2009-02-01 00:08:00.280000000",8,2009) ---- ERRORS Cannot insert into HBase with a null row key. ==== ---- QUERY -insert into functional_hbase.alltypessmall +insert into alltypessmall select case when id >= 0 then NULL else 999 end,80,false,"02/01/09",80.8,8.800000190734863,8,2,8,"8","2009-02-01 00:08:00.280000000",8,2009 from functional.alltypessmall ---- ERRORS diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test index 12c12b3ea..613c79bcd 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test @@ -342,13 +342,6 @@ show table stats compute_stats_db.incremental_string_part_value; STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY -compute incremental stats functional_hbase.alltypes; ----- RESULTS -'Updated 1 partition(s) and 13 column(s).' ----- TYPES -STRING -==== ----- QUERY # Check that dropping a column still allows stats to be computed create table compute_stats_db.incremental_drop_column (a int, b int, c int) partitioned by (d int); diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test index ba6ff358d..317cac24a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test @@ -343,96 +343,6 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY -# test computing stats on an HBase table -create table compute_stats_db.alltypessmall_hbase -like functional_hbase.alltypessmall; -==== ----- QUERY -compute stats compute_stats_db.alltypessmall_hbase ----- RESULTS -'Updated 1 partition(s) and 13 column(s).' ----- TYPES -STRING -==== ----- QUERY -show table stats compute_stats_db.alltypessmall_hbase ----- LABELS -REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ----- RESULTS: VERIFY_IS_EQUAL -regex:.+,'',regex:.+,regex:.+KB -regex:.+,'1',regex:.+,regex:.+KB -regex:.+,'3',regex:.+,regex:.+KB -regex:.+,'5',regex:.+,regex:.+KB -regex:.+,'7',regex:.+,regex:.+KB -regex:.+,'9',regex:.+,regex:.+KB -'Total','',regex:.+,regex:.+KB ----- TYPES -STRING, STRING, BIGINT, STRING -==== ----- QUERY -show column stats compute_stats_db.alltypessmall_hbase ----- LABELS -COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ----- RESULTS -'id','INT',105,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'bool_col','BOOLEAN',2,-1,1,1 -'date_string_col','STRING',12,-1,8,8 -'double_col','DOUBLE',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'int_col','INT',10,-1,4,4 -'month','INT',4,-1,4,4 -'smallint_col','SMALLINT',10,-1,2,2 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',101,-1,16,16 -'tinyint_col','TINYINT',10,-1,1,1 -'year','INT',1,-1,4,4 ----- TYPES -STRING, STRING, BIGINT, BIGINT, INT, DOUBLE -==== ----- QUERY -# test computing stats on an binary HBase table -create table compute_stats_db.alltypessmall_hbase_bin -like functional_hbase.alltypessmallbinary; -==== ----- QUERY -compute stats compute_stats_db.alltypessmall_hbase_bin ----- RESULTS -'Updated 1 partition(s) and 13 column(s).' ----- TYPES -STRING -==== ----- QUERY: VERIFY_IS_EQUAL -show table stats compute_stats_db.alltypessmall_hbase_bin ----- LABELS -REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ----- RESULTS -regex:.+,'',regex:.+,regex:.+ ----- TYPES -STRING, STRING, BIGINT, STRING -==== ----- QUERY -show column stats compute_stats_db.alltypessmall_hbase_bin ----- LABELS -COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ----- RESULTS -'id','INT',105,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'bool_col','BOOLEAN',2,-1,1,1 -'date_string_col','STRING',12,-1,8,8 -'double_col','DOUBLE',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'int_col','INT',10,-1,4,4 -'month','INT',4,-1,4,4 -'smallint_col','SMALLINT',10,-1,2,2 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',101,-1,16,16 -'tinyint_col','TINYINT',10,-1,1,1 -'year','INT',1,-1,4,4 ----- TYPES -STRING, STRING, BIGINT, BIGINT, INT, DOUBLE -==== ----- QUERY # test computing stats on an empty table create table compute_stats_db.alltypes_empty like functional_rc_snap.alltypes ==== @@ -474,51 +384,6 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY -# IMP-1227: Test computing stats on an HBase table that has a -# complex-typed column that Impala does not yet support. -create table compute_stats_db.allcomplextypes -like functional_hbase.allcomplextypes -==== ----- QUERY -compute stats compute_stats_db.allcomplextypes ----- RESULTS -'Updated 1 partition(s) and 3 column(s).' ----- TYPES -STRING -==== ----- QUERY: VERIFY_IS_EQUAL -show table stats compute_stats_db.allcomplextypes ----- LABELS -REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ----- RESULTS -regex:.+,'',regex:.+,regex:.+ ----- TYPES -STRING, STRING, BIGINT, STRING -==== ----- QUERY -show column stats compute_stats_db.allcomplextypes ----- LABELS -COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ----- RESULTS -'id','INT',0,-1,4,4 -'array_array_col','ARRAY>',-1,-1,-1,-1 -'array_map_col','MAP>',-1,-1,-1,-1 -'complex_nested_struct_col','STRUCT>>>>',-1,-1,-1,-1 -'complex_struct_col','STRUCT,f3:MAP>',-1,-1,-1,-1 -'int_array_col','ARRAY',-1,-1,-1,-1 -'int_map_col','MAP',-1,-1,-1,-1 -'int_struct_col','STRUCT',-1,-1,-1,-1 -'map_array_col','ARRAY>',-1,-1,-1,-1 -'map_map_col','MAP>',-1,-1,-1,-1 -'month','INT',0,-1,4,4 -'nested_struct_col','STRUCT>>',-1,-1,-1,-1 -'struct_array_col','ARRAY>',-1,-1,-1,-1 -'struct_map_col','MAP>',-1,-1,-1,-1 -'year','INT',0,-1,4,4 ----- TYPES -STRING, STRING, BIGINT, BIGINT, INT, DOUBLE -==== ----- QUERY # IMPALA-867: Test computing stats on Avro tables created by Hive with # matching/mismatched column definitions and Avro schema. # Clone the used tables here. diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test new file mode 100644 index 000000000..4d536bc60 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test @@ -0,0 +1,7 @@ +==== +---- QUERY +compute incremental stats alltypes; +---- RESULTS +'Updated 1 partition(s) and 13 column(s).' +---- TYPES +STRING diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test new file mode 100644 index 000000000..3376846ed --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test @@ -0,0 +1,142 @@ +==== +---- QUERY +# test computing stats on an HBase table +create table compute_stats_db_hbase.alltypessmall_hbase +like alltypessmall; +==== +---- QUERY +compute stats compute_stats_db_hbase.alltypessmall_hbase +---- RESULTS +'Updated 1 partition(s) and 13 column(s).' +---- TYPES +STRING +==== +---- QUERY +show table stats compute_stats_db_hbase.alltypessmall_hbase +---- LABELS +REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE +---- RESULTS: VERIFY_IS_EQUAL +regex:.+,'',regex:.+,regex:.+KB +regex:.+,'1',regex:.+,regex:.+KB +regex:.+,'3',regex:.+,regex:.+KB +regex:.+,'5',regex:.+,regex:.+KB +regex:.+,'7',regex:.+,regex:.+KB +regex:.+,'9',regex:.+,regex:.+KB +'Total','',regex:.+,regex:.+KB +---- TYPES +STRING, STRING, BIGINT, STRING +==== +---- QUERY +show column stats compute_stats_db_hbase.alltypessmall_hbase +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',105,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'bool_col','BOOLEAN',2,-1,1,1 +'date_string_col','STRING',12,-1,8,8 +'double_col','DOUBLE',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'int_col','INT',10,-1,4,4 +'month','INT',4,-1,4,4 +'smallint_col','SMALLINT',10,-1,2,2 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',101,-1,16,16 +'tinyint_col','TINYINT',10,-1,1,1 +'year','INT',1,-1,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE +==== +---- QUERY +# test computing stats on an binary HBase table +create table compute_stats_db_hbase.alltypessmall_hbase_bin +like alltypessmallbinary; +==== +---- QUERY +compute stats compute_stats_db_hbase.alltypessmall_hbase_bin +---- RESULTS +'Updated 1 partition(s) and 13 column(s).' +---- TYPES +STRING +==== +---- QUERY: VERIFY_IS_EQUAL +show table stats compute_stats_db_hbase.alltypessmall_hbase_bin +---- LABELS +REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE +---- RESULTS +regex:.+,'',regex:.+,regex:.+ +---- TYPES +STRING, STRING, BIGINT, STRING +==== +---- QUERY +show column stats compute_stats_db_hbase.alltypessmall_hbase_bin +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',105,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'bool_col','BOOLEAN',2,-1,1,1 +'date_string_col','STRING',12,-1,8,8 +'double_col','DOUBLE',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'int_col','INT',10,-1,4,4 +'month','INT',4,-1,4,4 +'smallint_col','SMALLINT',10,-1,2,2 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',101,-1,16,16 +'tinyint_col','TINYINT',10,-1,1,1 +'year','INT',1,-1,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE +==== +---- QUERY +# IMP-1227: Test computing stats on an HBase table that has a +# complex-typed column that Impala does not yet support. +create table compute_stats_db_hbase.allcomplextypes +like allcomplextypes +==== +---- QUERY +compute stats compute_stats_db_hbase.allcomplextypes +---- RESULTS +'Updated 1 partition(s) and 3 column(s).' +---- TYPES +STRING +==== +---- QUERY: VERIFY_IS_EQUAL +show table stats compute_stats_db_hbase.allcomplextypes +---- LABELS +REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE +---- RESULTS +regex:.+,'',regex:.+,regex:.+ +---- TYPES +STRING, STRING, BIGINT, STRING +==== +---- QUERY +show column stats compute_stats_db_hbase.allcomplextypes +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',0,-1,4,4 +'array_array_col','ARRAY>',-1,-1,-1,-1 +'array_map_col','MAP>',-1,-1,-1,-1 +'complex_nested_struct_col','STRUCT>>>>',-1,-1,-1,-1 +'complex_struct_col','STRUCT,f3:MAP>',-1,-1,-1,-1 +'int_array_col','ARRAY',-1,-1,-1,-1 +'int_map_col','MAP',-1,-1,-1,-1 +'int_struct_col','STRUCT',-1,-1,-1,-1 +'map_array_col','ARRAY>',-1,-1,-1,-1 +'map_map_col','MAP>',-1,-1,-1,-1 +'month','INT',0,-1,4,4 +'nested_struct_col','STRUCT>>',-1,-1,-1,-1 +'struct_array_col','ARRAY>',-1,-1,-1,-1 +'struct_map_col','MAP>',-1,-1,-1,-1 +'year','INT',0,-1,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE +==== +---- QUERY +compute incremental stats alltypes; +---- RESULTS +'Updated 1 partition(s) and 13 column(s).' +---- TYPES +STRING diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-inline-view.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-inline-view.test new file mode 100644 index 000000000..8bafb34c1 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-inline-view.test @@ -0,0 +1,51 @@ +==== +---- QUERY +# multiple levels of aggregation +select c1, c3, m2 +from ( + select c1, c3, max(c2) m2 + from ( + select c1, c2, c3 + from ( + select int_col c1, tinyint_col c2, max(id) c3 + from alltypessmall + group by 1, 2 + order by 1,2 + limit 5 + ) x + ) x2 + group by c1, c3 + limit 10 + ) t +where c1 > 0 +order by 2, 1 desc +limit 3 +---- RESULTS +1,96,1 +2,97,2 +3,98,3 +---- TYPES +int, int, tinyint +==== +---- QUERY +# do not materialize the agg expr slot +select c1, c2 +from ( + select int_col c1, tinyint_col c2, min(float_col) c3 + from alltypessmall + group by 1, 2 + ) x +---- RESULTS +0,0 +1,1 +2,2 +3,3 +4,4 +5,5 +6,6 +7,7 +8,8 +9,9 +---- TYPES +int, tinyint + diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-limit.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-limit.test new file mode 100644 index 000000000..e20c4a9fd --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-limit.test @@ -0,0 +1,14 @@ +==== +---- QUERY +# limit 0 on hbase table +select * from alltypessmall where string_col = '4' limit 0 +---- RESULTS +---- TYPES +int, bigint, boolean, string, double, float, int, int, smallint, string, timestamp, tinyint, int +==== +---- QUERY +# limit 0 with arithmetic expr +select * from alltypessmall where string_col = '4' limit 3 % 3 +---- RESULTS +---- TYPES +int, bigint, boolean, string, double, float, int, int, smallint, string, timestamp, tinyint, int diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-rowkeys.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-rowkeys.test index 007cdb74f..81ddd2869 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/hbase-rowkeys.test +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-rowkeys.test @@ -2,7 +2,7 @@ ---- QUERY # predicate on row key col is applied to scan if row key is mapped as string col select count(*) -from functional_hbase.stringids +from stringids where id = '5' ---- TYPES bigint @@ -13,7 +13,7 @@ bigint # predicate on row key col is not applied to scan if row key is mapped as non-string col # but the result is still correct select count(*) -from functional_hbase.alltypesagg +from alltypesagg where id = 5 ---- TYPES bigint @@ -24,7 +24,7 @@ bigint # ids are stored in ascii and ordered lexicographically # exclusive upper bound select count(*) -from functional_hbase.stringids +from stringids where id < '5' ---- TYPES bigint @@ -34,7 +34,7 @@ bigint ---- QUERY # inclusive upper bound select count(*) -from functional_hbase.stringids +from stringids where id <= '5' ---- TYPES bigint @@ -44,7 +44,7 @@ bigint ---- QUERY # inclusive lower bound select count(*) -from functional_hbase.stringids +from stringids where id >= '6' ---- TYPES bigint @@ -54,7 +54,7 @@ bigint ---- QUERY # exclusive lower bound select count(*) -from functional_hbase.stringids +from stringids where id > '6' ---- TYPES bigint @@ -64,7 +64,7 @@ bigint ---- QUERY # combinations select count(*) -from functional_hbase.stringids +from stringids where id > concat('', '5') and id < concat('6', '') ---- TYPES @@ -74,7 +74,7 @@ bigint ==== ---- QUERY select count(*) -from functional_hbase.stringids +from stringids where id >= '5' and id < '6' ---- TYPES @@ -84,7 +84,7 @@ bigint ==== ---- QUERY select count(*) -from functional_hbase.stringids +from stringids where id > '5' and id <= '6' ---- TYPES @@ -94,7 +94,7 @@ bigint ==== ---- QUERY select count(*) -from functional_hbase.stringids +from stringids where id >= '5' and id <= '6' ---- TYPES @@ -106,7 +106,7 @@ bigint # predicates on non-key cols are evaluated in the executor # and non-string comparisons work select count(*) -from functional_hbase.stringids +from stringids where id < '5' and smallint_col < 5 ---- TYPES @@ -116,7 +116,7 @@ bigint ==== ---- QUERY # IMP-1188 - row key lower bound is bigger than upper bound. -select count(*) from functional_hbase.stringids where id > 'b' and id < 'a' +select count(*) from stringids where id > 'b' and id < 'a' ---- RESULTS 0 ---- TYPES @@ -124,7 +124,7 @@ BIGINT ==== ---- QUERY # IMP-1188 - row key predicate is null. -select count(*) from functional_hbase.stringids +select count(*) from stringids where id > cast(cast('sdfs' as int) as string) ---- RESULTS 0 @@ -133,7 +133,7 @@ BIGINT ==== ---- QUERY # IMP-1188 - cast string row key to int -select count(*) from functional_hbase.stringids +select count(*) from stringids where cast(id as int) < 5 ---- RESULTS 5 diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-show-create-table.test new file mode 100644 index 000000000..806221684 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-show-create-table.test @@ -0,0 +1,24 @@ +==== +---- QUERY +SHOW CREATE TABLE alltypes +---- RESULTS +CREATE EXTERNAL TABLE alltypes ( + id INT COMMENT 'Add a comment', + bigint_col BIGINT, + bool_col BOOLEAN, + date_string_col STRING, + double_col DOUBLE, + float_col FLOAT, + int_col INT, + month INT, + smallint_col SMALLINT, + string_col STRING, + timestamp_col TIMESTAMP, + tinyint_col TINYINT, + year INT +) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,d:bool_col,d:tinyint_col,d:smallint_col,d:int_col,d:bigint_col,d:float_col,d:double_col,d:date_string_col,d:string_col,d:timestamp_col,d:year,d:month', + 'serialization.format'='1') +TBLPROPERTIES ('hbase.table.name'='functional_hbase.alltypes', + 'storage_handler'='org.apache.hadoop.hive.hbase.HBaseStorageHandler') diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-show-stats.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-show-stats.test new file mode 100644 index 000000000..50bc95391 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-show-stats.test @@ -0,0 +1,38 @@ +==== +---- QUERY +# Stats on an HBase table +show table stats alltypesagg +---- LABELS +REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE +---- RESULTS: VERIFY_IS_EQUAL +regex:.+,'',regex:.+,regex:.+KB +regex:.+,'1',regex:.+,regex:.+MB +regex:.+,'3',regex:.+,regex:.+MB +regex:.+,'5',regex:.+,regex:.+MB +regex:.+,'7',regex:.+,regex:.+MB +regex:.+,'9',regex:.+,regex:.+KB +'Total','',regex:.+,regex:.+MB +---- TYPES +STRING, STRING, BIGINT, STRING +==== +---- QUERY +# Column stats on an HBaseTable +show column stats alltypessmall +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',105,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'bool_col','BOOLEAN',2,-1,1,1 +'date_string_col','STRING',12,-1,8,8 +'double_col','DOUBLE',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'int_col','INT',10,-1,4,4 +'month','INT',4,-1,4,4 +'smallint_col','SMALLINT',10,-1,2,2 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',101,-1,16,16 +'tinyint_col','TINYINT',10,-1,1,1 +'year','INT',1,-1,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-top-n.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-top-n.test new file mode 100644 index 000000000..39d45effb --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-top-n.test @@ -0,0 +1,29 @@ +==== +---- QUERY +# multiple levels of aggregation in a subquery +# Taken from subquery.test +select c1, c3, m2 +from ( + select c1, c3, max(c2) m2 + from ( + select c1, c2, c3 + from ( + select int_col c1, tinyint_col c2, max(id) c3 + from alltypessmall + group by 1, 2 + order by 1,2 + limit 5 + ) x + ) x2 + group by c1, c3 + limit 10 + ) t +where c1 > 0 +order by 2, 1 desc +limit 3 +---- RESULTS +1,96,1 +2,97,2 +3,98,3 +---- TYPES +int, int, tinyint diff --git a/testdata/workloads/functional-query/queries/QueryTest/inline-view.test b/testdata/workloads/functional-query/queries/QueryTest/inline-view.test index 9440c02d5..acd8dac99 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/inline-view.test +++ b/testdata/workloads/functional-query/queries/QueryTest/inline-view.test @@ -132,56 +132,6 @@ and c2 > 10 int, bigint, double ==== ---- QUERY -# multiple levels of aggregation -select c1, c3, m2 -from ( - select c1, c3, max(c2) m2 - from ( - select c1, c2, c3 - from ( - select int_col c1, tinyint_col c2, max(id) c3 - from functional_hbase.alltypessmall - group by 1, 2 - order by 1,2 - limit 5 - ) x - ) x2 - group by c1, c3 - limit 10 - ) t -where c1 > 0 -order by 2, 1 desc -limit 3 ----- RESULTS -1,96,1 -2,97,2 -3,98,3 ----- TYPES -int, int, tinyint -==== ----- QUERY -# do not materialize the agg expr slot -select c1, c2 -from ( - select int_col c1, tinyint_col c2, min(float_col) c3 - from functional_hbase.alltypessmall - group by 1, 2 - ) x ----- RESULTS -0,0 -1,1 -2,2 -3,3 -4,4 -5,5 -6,6 -7,7 -8,8 -9,9 ----- TYPES -int, tinyint -==== ----- QUERY # subquery with aggregation and order by/limit, as left-hand side of join; # having clause in subquery is transfered to merge agg step in distrib plan select * diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins-against-hbase.test b/testdata/workloads/functional-query/queries/QueryTest/joins-against-hbase.test new file mode 100644 index 000000000..44f8b2405 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/joins-against-hbase.test @@ -0,0 +1,112 @@ +==== +---- QUERY +# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in +# joins cols and non-equality join predicate +# (alltypesagg.tinyint_col contains nulls instead of 0s) +# Should be same result as the test below +select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col +from alltypesagg a join functional_hbase.alltypessmall b + on (a.tinyint_col = b.id and a.tinyint_col + b.tinyint_col < 5) +where a.month=1 +and a.day=1 +and a.string_col > '88' +and b.bool_col = false +---- RESULTS +1,1,'881',2 +1,1,'891',2 +1,1,'901',2 +1,1,'91',2 +1,1,'911',2 +1,1,'921',2 +1,1,'931',2 +1,1,'941',2 +1,1,'951',2 +1,1,'961',2 +1,1,'971',2 +1,1,'981',2 +1,1,'991',2 +---- TYPES +tinyint, int, string, smallint +==== +---- QUERY +# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in joins cols +# (alltypesagg.tinyint_col contains nulls instead of 0s) +# Should be same result as the test below +select a.tinyint_col, b.id, a.string_col +from alltypesagg a join functional_hbase.alltypessmall b on (a.tinyint_col = b.id) +where a.month=1 +and a.day=1 +and a.tinyint_col + b.tinyint_col < 5 +and a.string_col > '88' +and b.bool_col = false +---- RESULTS +1,1,'881' +1,1,'891' +1,1,'901' +1,1,'91' +1,1,'911' +1,1,'921' +1,1,'931' +1,1,'941' +1,1,'951' +1,1,'961' +1,1,'971' +1,1,'981' +1,1,'991' +---- TYPES +tinyint, int, string +==== +---- QUERY +# reversing the order of the tables produces the same result +select a.tinyint_col, b.id, a.string_col +from alltypessmall b join functional_hbase.alltypesagg a on (a.tinyint_col = b.id) +where a.month=1 +and a.day=1 +and a.tinyint_col + b.tinyint_col < 5 +and a.string_col > '88' +and b.bool_col = false +---- RESULTS +1,1,'881' +1,1,'891' +1,1,'901' +1,1,'91' +1,1,'911' +1,1,'921' +1,1,'931' +1,1,'941' +1,1,'951' +1,1,'961' +1,1,'971' +1,1,'981' +1,1,'991' +---- TYPES +tinyint, int, string +==== +---- QUERY +# cross join between hdfs and hbase, where predicate, extra scan predicates, nulls +# in joins cols and non-equality join predicate +# (alltypesagg.tinyint_col contains nulls instead of 0s) +select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col +from alltypesagg a cross join functional_hbase.alltypessmall b +where a.tinyint_col = b.id +and a.tinyint_col + b.tinyint_col < 5 +and a.month=1 +and a.day=1 +and a.string_col > '88' +and b.bool_col = false +---- RESULTS +1,1,'91',2 +1,1,'881',2 +1,1,'891',2 +1,1,'901',2 +1,1,'911',2 +1,1,'921',2 +1,1,'931',2 +1,1,'941',2 +1,1,'951',2 +1,1,'961',2 +1,1,'971',2 +1,1,'981',2 +1,1,'991',2 +---- TYPES +tinyint, int, string, smallint diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test b/testdata/workloads/functional-query/queries/QueryTest/joins.test index 7c1e86d83..b5bc66054 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/joins.test +++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test @@ -51,117 +51,6 @@ select j.*, d.* from JoinTbl j inner join DimTbl d on 1002,'Name2',94611,5000,1002,'Name2',94611 ---- TYPES bigint, string, int, int, bigint, string, int -==== ----- QUERY -# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in -# joins cols and non-equality join predicate -# (alltypesagg.tinyint_col contains nulls instead of 0s) -# Should be same result as the test below -select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col -from alltypesagg a join functional_hbase.alltypessmall b - on (a.tinyint_col = b.id and a.tinyint_col + b.tinyint_col < 5) -where a.month=1 -and a.day=1 -and a.string_col > '88' -and b.bool_col = false ----- RESULTS -1,1,'881',2 -1,1,'891',2 -1,1,'901',2 -1,1,'91',2 -1,1,'911',2 -1,1,'921',2 -1,1,'931',2 -1,1,'941',2 -1,1,'951',2 -1,1,'961',2 -1,1,'971',2 -1,1,'981',2 -1,1,'991',2 ----- TYPES -tinyint, int, string, smallint -==== ----- QUERY -# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in joins cols -# (alltypesagg.tinyint_col contains nulls instead of 0s) -# Should be same result as the test below -select a.tinyint_col, b.id, a.string_col -from alltypesagg a join functional_hbase.alltypessmall b on (a.tinyint_col = b.id) -where a.month=1 -and a.day=1 -and a.tinyint_col + b.tinyint_col < 5 -and a.string_col > '88' -and b.bool_col = false ----- RESULTS -1,1,'881' -1,1,'891' -1,1,'901' -1,1,'91' -1,1,'911' -1,1,'921' -1,1,'931' -1,1,'941' -1,1,'951' -1,1,'961' -1,1,'971' -1,1,'981' -1,1,'991' ----- TYPES -tinyint, int, string -==== ----- QUERY -# join between two tables, extra join predicate, extra scan predicates, nulls in joins cols -# (alltypesagg.tinyint_col contains nulls instead of 0s) -select a.tinyint_col, b.id, a.string_col -from alltypesagg a join alltypessmall b on (a.tinyint_col = b.id) -where a.month=1 -and a.day=1 -and a.tinyint_col + b.tinyint_col < 5 -and a.string_col > '88' -and b.bool_col = false ----- RESULTS -1,1,'881' -1,1,'891' -1,1,'901' -1,1,'91' -1,1,'911' -1,1,'921' -1,1,'931' -1,1,'941' -1,1,'951' -1,1,'961' -1,1,'971' -1,1,'981' -1,1,'991' ----- TYPES -tinyint, int, string -==== ----- QUERY -# reversing the order of the tables produces the same result -select a.tinyint_col, b.id, a.string_col -from alltypessmall b join alltypesagg a on (a.tinyint_col = b.id) -where a.month=1 -and a.day=1 -and a.tinyint_col + b.tinyint_col < 5 -and a.string_col > '88' -and b.bool_col = false ----- RESULTS -1,1,'881' -1,1,'891' -1,1,'901' -1,1,'91' -1,1,'911' -1,1,'921' -1,1,'931' -1,1,'941' -1,1,'951' -1,1,'961' -1,1,'971' -1,1,'981' -1,1,'991' ----- TYPES -tinyint, int, string -==== ---- QUERY # join between three tables, extra join predicates, extra scan predicates, nulls in joins cols # (alltypesagg.tinyint_col contains nulls instead of 0s) @@ -396,35 +285,6 @@ limit 100 SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING, INT ==== ---- QUERY -# cross join between hdfs and hbase, where predicate, extra scan predicates, nulls -# in joins cols and non-equality join predicate -# (alltypesagg.tinyint_col contains nulls instead of 0s) -select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col -from alltypesagg a cross join functional_hbase.alltypessmall b -where a.tinyint_col = b.id -and a.tinyint_col + b.tinyint_col < 5 -and a.month=1 -and a.day=1 -and a.string_col > '88' -and b.bool_col = false ----- RESULTS -1,1,'91',2 -1,1,'881',2 -1,1,'891',2 -1,1,'901',2 -1,1,'911',2 -1,1,'921',2 -1,1,'931',2 -1,1,'941',2 -1,1,'951',2 -1,1,'961',2 -1,1,'971',2 -1,1,'981',2 -1,1,'991',2 ----- TYPES -tinyint, int, string, smallint -==== ----- QUERY # FULL OUTER JOIN between two inline views followed by a GROUP BY (IMPALA-964) select a.x FROM (VALUES(1 x, 1 y)) a FULL OUTER JOIN (VALUES(1 x, 1 y)) b ON (a.x = b.y) GROUP BY a.x diff --git a/testdata/workloads/functional-query/queries/QueryTest/limit.test b/testdata/workloads/functional-query/queries/QueryTest/limit.test index 0da57eaad..334ed451a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/limit.test +++ b/testdata/workloads/functional-query/queries/QueryTest/limit.test @@ -7,20 +7,6 @@ select * from alltypesagg where day = 1 limit 0 int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int, int ==== ---- QUERY -# limit 0 on hbase table -select * from functional_hbase.alltypessmall where string_col = '4' limit 0 ----- RESULTS ----- TYPES -int, bigint, boolean, string, double, float, int, int, smallint, string, timestamp, tinyint, int -==== ----- QUERY -# limit 0 with arithmetic expr -select * from functional_hbase.alltypessmall where string_col = '4' limit 3 % 3 ----- RESULTS ----- TYPES -int, bigint, boolean, string, double, float, int, int, smallint, string, timestamp, tinyint, int -==== ----- QUERY # limit 0 on complex subquery join select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col from ( diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test index f28b20693..d5f355653 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test @@ -239,30 +239,6 @@ STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat' LOCATION '$$location_uri$$' ==== ---- QUERY -SHOW CREATE TABLE functional_hbase.alltypes ----- RESULTS -CREATE EXTERNAL TABLE functional_hbase.alltypes ( - id INT COMMENT 'Add a comment', - bigint_col BIGINT, - bool_col BOOLEAN, - date_string_col STRING, - double_col DOUBLE, - float_col FLOAT, - int_col INT, - month INT, - smallint_col SMALLINT, - string_col STRING, - timestamp_col TIMESTAMP, - tinyint_col TINYINT, - year INT -) -STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' -WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,d:bool_col,d:tinyint_col,d:smallint_col,d:int_col,d:bigint_col,d:float_col,d:double_col,d:date_string_col,d:string_col,d:timestamp_col,d:year,d:month', - 'serialization.format'='1') -TBLPROPERTIES ('hbase.table.name'='functional_hbase.alltypes', - 'storage_handler'='org.apache.hadoop.hive.hbase.HBaseStorageHandler') -==== ----- QUERY SHOW CREATE TABLE functional.allcomplextypes ---- RESULTS CREATE EXTERNAL TABLE functional.allcomplextypes ( @@ -289,4 +265,4 @@ WITH SERDEPROPERTIES ('field.delim'=',', 'serialization.format'=',', 'escape.del STORED AS TEXTFILE LOCATION '$$location_uri$$' TBLPROPERTIES ('transient_lastDdlTime'='1405990341') -==== \ No newline at end of file +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test index 4dbab6014..2c0eb2dd9 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test @@ -130,22 +130,6 @@ YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCRE STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING ==== ---- QUERY -# Stats on an HBase table -show table stats functional_hbase.alltypesagg ----- LABELS -REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE ----- RESULTS: VERIFY_IS_EQUAL -regex:.+,'',regex:.+,regex:.+KB -regex:.+,'1',regex:.+,regex:.+MB -regex:.+,'3',regex:.+,regex:.+MB -regex:.+,'5',regex:.+,regex:.+MB -regex:.+,'7',regex:.+,regex:.+MB -regex:.+,'9',regex:.+,regex:.+KB -'Total','',regex:.+,regex:.+MB ----- TYPES -STRING, STRING, BIGINT, STRING -==== ----- QUERY # Column stats on an HdfsTable show column stats alltypes ---- LABELS @@ -168,28 +152,6 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY -# Column stats on an HBaseTable -show column stats functional_hbase.alltypessmall ----- LABELS -COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ----- RESULTS -'id','INT',105,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'bool_col','BOOLEAN',2,-1,1,1 -'date_string_col','STRING',12,-1,8,8 -'double_col','DOUBLE',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'int_col','INT',10,-1,4,4 -'month','INT',4,-1,4,4 -'smallint_col','SMALLINT',10,-1,2,2 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',101,-1,16,16 -'tinyint_col','TINYINT',10,-1,1,1 -'year','INT',1,-1,4,4 ----- TYPES -STRING, STRING, BIGINT, BIGINT, INT, DOUBLE -==== ----- QUERY # Column column stats for a table with complex types. show column stats functional.allcomplextypes ---- LABELS diff --git a/testdata/workloads/functional-query/queries/QueryTest/top-n.test b/testdata/workloads/functional-query/queries/QueryTest/top-n.test index 89c9a6149..f054f531b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/top-n.test +++ b/testdata/workloads/functional-query/queries/QueryTest/top-n.test @@ -1187,35 +1187,6 @@ values((1+8, 2, 5.0, 'a'), (2, 3, 6.0, 'b'), (3, 4, 7.0, 'c')) order by 1 desc l SMALLINT, TINYINT, DECIMAL, STRING ==== ---- QUERY -# multiple levels of aggregation in a subquery -# Taken from subquery.test -select c1, c3, m2 -from ( - select c1, c3, max(c2) m2 - from ( - select c1, c2, c3 - from ( - select int_col c1, tinyint_col c2, max(id) c3 - from functional_hbase.alltypessmall - group by 1, 2 - order by 1,2 - limit 5 - ) x - ) x2 - group by c1, c3 - limit 10 - ) t -where c1 > 0 -order by 2, 1 desc -limit 3 ----- RESULTS -1,96,1 -2,97,2 -3,98,3 ----- TYPES -int, int, tinyint -==== ----- QUERY # Basic test with a single with-clause view that references a virtual view. # Taken from with-clause.test with t as (select abc x, xyz y from functional.complex_view) diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index f02ce0046..c72c3bd8f 100755 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -52,6 +52,7 @@ IMPALAD_HS2_HOST_PORT =\ HIVE_HS2_HOST_PORT = pytest.config.option.hive_server2 WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR'] HDFS_CONF = HdfsConfig(pytest.config.option.minicluster_xml_conf) +TARGET_FILESYSTEM = os.getenv("TARGET_FILESYSTEM") or "hdfs" # Base class for Impala tests. All impala test cases should inherit from this class class ImpalaTestSuite(BaseTestSuite): @@ -400,9 +401,16 @@ class ImpalaTestSuite(BaseTestSuite): for tf in pytest.config.option.table_formats.split(','): dataset = get_dataset_from_workload(cls.get_workload()) table_formats.append(TableFormatInfo.create_from_string(dataset, tf)) - return TestDimension('table_format', *table_formats) + tf_dimensions = TestDimension('table_format', *table_formats) else: - return load_table_info_dimension(cls.get_workload(), exploration_strategy) + tf_dimensions = load_table_info_dimension(cls.get_workload(), exploration_strategy) + # If the filesystem is either isilon or s3, we don't need the hbase dimension. + if TARGET_FILESYSTEM.lower() in ['s3', 'isilon']: + for tf_dimension in tf_dimensions: + if tf_dimension.value.file_format == "hbase": + tf_dimensions.remove(tf_dimension) + break + return tf_dimensions @classmethod def __create_exec_option_dimension(cls): diff --git a/tests/data_errors/test_data_errors.py b/tests/data_errors/test_data_errors.py index 53a22acfd..b353d9925 100755 --- a/tests/data_errors/test_data_errors.py +++ b/tests/data_errors/test_data_errors.py @@ -74,7 +74,7 @@ class TestHBaseDataErrors(TestDataErrors): def add_test_dimensions(cls): super(TestHBaseDataErrors, cls).add_test_dimensions() - # Only run on delimited text with no compression. + # Only run on hbase. cls.TestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format == 'hbase' and\ v.get_value('table_format').compression_codec == 'none') diff --git a/tests/metadata/test_hbase_metadata.py b/tests/metadata/test_hbase_metadata.py new file mode 100755 index 000000000..4d69ae9a2 --- /dev/null +++ b/tests/metadata/test_hbase_metadata.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright (c) 2012 Cloudera, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from tests.common.test_vector import * +from tests.common.impala_test_suite import * +from tests.common.test_dimensions import create_uncompressed_text_dimension + +# Tests the COMPUTE STATS command for gathering table and column stats. +# TODO: Merge this test file with test_col_stats.py +class TestHbaseMetadata(ImpalaTestSuite): + TEST_DB_NAME = "compute_stats_db_hbase" + + @classmethod + def get_workload(self): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestHbaseMetadata, cls).add_test_dimensions() + cls.TestMatrix.add_dimension(create_single_exec_option_dimension()) + cls.TestMatrix.add_constraint(\ + lambda v: v.get_value('table_format').file_format == 'hbase') + + def setup_method(self, method): + # cleanup and create a fresh test database + self.cleanup_db(self.TEST_DB_NAME) + self.execute_query("create database %s" % (self.TEST_DB_NAME)) + + def teardown_method(self, method): + self.cleanup_db(self.TEST_DB_NAME) + + def test_hbase_compute_stats(self, vector): + self.run_test_case('QueryTest/hbase-compute-stats', vector) + + def test_hbase_compute_stats_incremental(self, vector): + self.run_test_case('QueryTest/hbase-compute-stats-incremental', vector) diff --git a/tests/query_test/test_hbase_queries.py b/tests/query_test/test_hbase_queries.py index 2fe9753e1..b0de23eb4 100644 --- a/tests/query_test/test_hbase_queries.py +++ b/tests/query_test/test_hbase_queries.py @@ -30,6 +30,15 @@ class TestHBaseQueries(ImpalaTestSuite): def test_hbase_subquery(self, vector): self.run_test_case('QueryTest/hbase-subquery', vector) + def test_hbase_inline_views(self, vector): + self.run_test_case('QueryTest/hbase-inline-view', vector) + + def test_hbase_top_n(self, vector): + self.run_test_case('QueryTest/hbase-top-n', vector) + + def test_hbase_limits(self, vector): + self.run_test_case('QueryTest/hbase-limit', vector) + @pytest.mark.execute_serially def test_hbase_inserts(self, vector): self.run_test_case('QueryTest/hbase-inserts', vector) diff --git a/tests/query_test/test_join_queries.py b/tests/query_test/test_join_queries.py index b5a5debf5..fb1ee6183 100644 --- a/tests/query_test/test_join_queries.py +++ b/tests/query_test/test_join_queries.py @@ -34,6 +34,11 @@ class TestJoinQueries(ImpalaTestSuite): new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size') self.run_test_case('QueryTest/joins', new_vector) + def test_joins_against_hbase(self, vector): + new_vector = copy(vector) + new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size') + self.run_test_case('QueryTest/joins-against-hbase', new_vector) + def test_outer_joins(self, vector): new_vector = copy(vector) new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size')