Files
impala/testdata/workloads/functional-query/queries/QueryTest/seq-writer.test
Attila Jeges bc56d3c48c IMPALA-5407: Fix crash in HdfsSequenceTableWriter
The following use of sequence file writer can lead to a crash:
> set compression_codec=gzip;
> set seq_compression_mode=record;
> set allow_unsupported_formats=1;
> create table seq_tbl like tbl stored as sequencefile;
> insert into seq_tbl select * from tbl;

This fix removes the MemPool::FreeAll() call from
HdfsSequenceTableWriter::Flush(). Freeing the memory pool in Flush()
is incorrect because a memory pool buffer is cached by the compressor
in the table writer which isn't reset across calls to Flush().

If the file that is being written is big enough,
HdfsSequenceTableWriter::AppendRows() will call Flush() multiple
times causing memory corruption.

Change-Id: Ida0b9f189175358ae54149d0e1af7caa06ae3bec
Reviewed-on: http://gerrit.cloudera.org:8080/7394
Reviewed-by: Michael Ho <kwho@cloudera.com>
Tested-by: Impala Public Jenkins
2017-07-19 06:48:06 +00:00

309 lines
8.9 KiB
Plaintext

====
---- QUERY
SET COMPRESSION_CODEC=NONE;
SET ALLOW_UNSUPPORTED_FORMATS=1;
SET SEQ_COMPRESSION_MODE=BLOCK;
create table __seq_write (i int, s string, d double)
stored as SEQUENCEFILE;
====
---- QUERY
SET COMPRESSION_CODEC=NONE;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write select 0, "a", 1.1;
====
---- QUERY
SET COMPRESSION_CODEC=DEFAULT;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (1, "b", 2.2);
====
---- QUERY
SET COMPRESSION_CODEC=SNAPPY;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (2, "c", 3.3);
====
---- QUERY
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (3, "d", 4.4);
====
---- QUERY
SET COMPRESSION_CODEC=GZIP;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (4, "e", 5.5);
====
---- QUERY
SET COMPRESSION_CODEC=NONE;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write select 5, "a", 1.1;
====
---- QUERY
SET COMPRESSION_CODEC=DEFAULT;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (6, "b", 2.2);
====
---- QUERY
SET COMPRESSION_CODEC=SNAPPY;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (7, "c", 3.3);
====
---- QUERY
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (8, "d", 4.4);
====
---- QUERY
SET COMPRESSION_CODEC=GZIP;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
insert into __seq_write values (9, "e", 5.5);
====
---- QUERY
SET ALLOW_UNSUPPORTED_FORMATS=0;
insert into __seq_write values (4, "e", 5.5);
---- CATCH
Writing to table format SEQUENCE_FILE is not supported. Use query option
====
---- QUERY
select * from __seq_write;
---- RESULTS
0,'a',1.1
1,'b',2.2
2,'c',3.3
3,'d',4.4
4,'e',5.5
5,'a',1.1
6,'b',2.2
7,'c',3.3
8,'d',4.4
9,'e',5.5
---- TYPES
INT,STRING,DOUBLE
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with NONE+RECORD and then read
# it back
SET COMPRESSION_CODEC=NONE;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_none_rec like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_none_rec partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_none_rec;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with DEFAULT+RECORD and then
# read it back
SET COMPRESSION_CODEC=DEFAULT;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_def_rec like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_def_rec partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_def_rec;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with SNAPPY_BLOCKED+RECORD and
# then read it back
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_snapb_rec like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_snapb_rec partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_snapb_rec;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with SNAPPY+RECORD and then read
# it back
SET COMPRESSION_CODEC=SNAPPY;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_snap_rec like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_snap_rec partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_snap_rec;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with GZIP+RECORD and then read
# it back
SET COMPRESSION_CODEC=GZIP;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_gzip_rec like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_gzip_rec partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_gzip_rec;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with NONE+BLOCK and then read it
# back
SET COMPRESSION_CODEC=NONE;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_none_block like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_none_block partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_none_block;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with DEFAULT+BLOCK and then read
# it back
SET COMPRESSION_CODEC=DEFAULT;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_def_block like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_def_block partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_def_block;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with SNAPPY_BLOCKED+BLOCK and
# then read it back
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_snapb_block like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_snapb_block partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_snapb_block;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with SNAPPY+BLOCK and then read
# it back
SET COMPRESSION_CODEC=SNAPPY;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_snap_block like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_snap_block partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_snap_block;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-3079: Create a table containing larger seq files with GZIP+BLOCK and then read it
# back
SET COMPRESSION_CODEC=GZIP;
SET SEQ_COMPRESSION_MODE=BLOCK;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table store_sales_seq_gzip_block like tpcds_parquet.store_sales
stored as SEQUENCEFILE;
insert into store_sales_seq_gzip_block partition(ss_sold_date_sk)
select * from tpcds_parquet.store_sales
where (ss_sold_date_sk between 2451175 and 2451200) or
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
====
---- QUERY
select count(*) from store_sales_seq_gzip_block;
---- RESULTS
60091
---- TYPES
BIGINT
====
---- QUERY
# IMPALA-5407: Create a table containing seq files with GZIP+RECORD. If the number of
# impalad workers is three, three files will be created, two of which are large enough
# (> 64MB) to force multiple flushes. Make sure that the files have been created
# successfully.
SET COMPRESSION_CODEC=GZIP;
SET SEQ_COMPRESSION_MODE=RECORD;
SET ALLOW_UNSUPPORTED_FORMATS=1;
create table catalog_sales_seq_gzip_rec like tpcds.catalog_sales stored as SEQUENCEFILE;
insert into catalog_sales_seq_gzip_rec select * from tpcds.catalog_sales;
====
---- QUERY
select count(*) from catalog_sales_seq_gzip_rec;
---- RESULTS
1441548
---- TYPES
BIGINT
====