mirror of
https://github.com/apache/impala.git
synced 2026-01-02 21:00:35 -05:00
This change loads the missing tables in TPC-DS. In addition, it also fixes up the loading of the partitioned table store_sales so all partitions will be loaded. The existing TPC-DS queries are also updated to use the parameters for qualification runs as noted in the TPC-DS specification. Some hard-coded partition filters were also removed. They were there due to the lack of dynamic partitioning in the past. Some missing TPC-DS queries are also added to this change, including query28 which discovered the infamous IMPALA-5251. Having all tables in TPC-DS available paves the way for us to include all supported TPCDS queries in our functional testing. Due to the change in the data, planner tests and the E2E tests have different results than before. The results of E2E tests were compared against the run done with Netezza and Vertica. The divergence were all due to the truncation behavior of decimal types in DECIMAL_V1. Change-Id: Ic5277245fd20827c9c09ce5c1a7a37266ca476b9 Reviewed-on: http://gerrit.cloudera.org:8080/6877 Reviewed-by: Michael Brown <mikeb@cloudera.com> Tested-by: Impala Public Jenkins
291 lines
8.2 KiB
Plaintext
291 lines
8.2 KiB
Plaintext
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
create table __seq_write (i int, s string, d double)
|
|
stored as SEQUENCEFILE;
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write select 0, "a", 1.1;
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (1, "b", 2.2);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (2, "c", 3.3);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (3, "d", 4.4);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (4, "e", 5.5);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write select 5, "a", 1.1;
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (6, "b", 2.2);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (7, "c", 3.3);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (8, "d", 4.4);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (9, "e", 5.5);
|
|
====
|
|
---- QUERY
|
|
SET ALLOW_UNSUPPORTED_FORMATS=0;
|
|
insert into __seq_write values (4, "e", 5.5);
|
|
---- CATCH
|
|
Writing to table format SEQUENCE_FILE is not supported. Use query option
|
|
====
|
|
---- QUERY
|
|
select * from __seq_write;
|
|
---- RESULTS
|
|
0,'a',1.1
|
|
1,'b',2.2
|
|
2,'c',3.3
|
|
3,'d',4.4
|
|
4,'e',5.5
|
|
5,'a',1.1
|
|
6,'b',2.2
|
|
7,'c',3.3
|
|
8,'d',4.4
|
|
9,'e',5.5
|
|
---- TYPES
|
|
INT,STRING,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with NONE+RECORD and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_none_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_none_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_none_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with DEFAULT+RECORD and then
|
|
# read it back
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_def_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_def_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_def_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY_BLOCKED+RECORD and
|
|
# then read it back
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snapb_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snapb_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snapb_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY+RECORD and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snap_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snap_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snap_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with GZIP+RECORD and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_gzip_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_gzip_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_gzip_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with NONE+BLOCK and then read it
|
|
# back
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_none_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_none_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_none_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with DEFAULT+BLOCK and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_def_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_def_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_def_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY_BLOCKED+BLOCK and
|
|
# then read it back
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snapb_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snapb_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snapb_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY+BLOCK and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snap_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snap_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snap_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with GZIP+BLOCK and then read it
|
|
# back
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_gzip_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_gzip_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_gzip_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|