mirror of
https://github.com/apache/impala.git
synced 2026-01-01 18:00:30 -05:00
The following use of sequence file writer can lead to a crash: > set compression_codec=gzip; > set seq_compression_mode=record; > set allow_unsupported_formats=1; > create table seq_tbl like tbl stored as sequencefile; > insert into seq_tbl select * from tbl; This fix removes the MemPool::FreeAll() call from HdfsSequenceTableWriter::Flush(). Freeing the memory pool in Flush() is incorrect because a memory pool buffer is cached by the compressor in the table writer which isn't reset across calls to Flush(). If the file that is being written is big enough, HdfsSequenceTableWriter::AppendRows() will call Flush() multiple times causing memory corruption. Change-Id: Ida0b9f189175358ae54149d0e1af7caa06ae3bec Reviewed-on: http://gerrit.cloudera.org:8080/7394 Reviewed-by: Michael Ho <kwho@cloudera.com> Tested-by: Impala Public Jenkins
309 lines
8.9 KiB
Plaintext
309 lines
8.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
create table __seq_write (i int, s string, d double)
|
|
stored as SEQUENCEFILE;
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write select 0, "a", 1.1;
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (1, "b", 2.2);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (2, "c", 3.3);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (3, "d", 4.4);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (4, "e", 5.5);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write select 5, "a", 1.1;
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (6, "b", 2.2);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (7, "c", 3.3);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (8, "d", 4.4);
|
|
====
|
|
---- QUERY
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
insert into __seq_write values (9, "e", 5.5);
|
|
====
|
|
---- QUERY
|
|
SET ALLOW_UNSUPPORTED_FORMATS=0;
|
|
insert into __seq_write values (4, "e", 5.5);
|
|
---- CATCH
|
|
Writing to table format SEQUENCE_FILE is not supported. Use query option
|
|
====
|
|
---- QUERY
|
|
select * from __seq_write;
|
|
---- RESULTS
|
|
0,'a',1.1
|
|
1,'b',2.2
|
|
2,'c',3.3
|
|
3,'d',4.4
|
|
4,'e',5.5
|
|
5,'a',1.1
|
|
6,'b',2.2
|
|
7,'c',3.3
|
|
8,'d',4.4
|
|
9,'e',5.5
|
|
---- TYPES
|
|
INT,STRING,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with NONE+RECORD and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_none_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_none_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_none_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with DEFAULT+RECORD and then
|
|
# read it back
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_def_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_def_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_def_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY_BLOCKED+RECORD and
|
|
# then read it back
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snapb_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snapb_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snapb_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY+RECORD and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snap_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snap_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snap_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with GZIP+RECORD and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_gzip_rec like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_gzip_rec partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_gzip_rec;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with NONE+BLOCK and then read it
|
|
# back
|
|
SET COMPRESSION_CODEC=NONE;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_none_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_none_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_none_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with DEFAULT+BLOCK and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=DEFAULT;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_def_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_def_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_def_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY_BLOCKED+BLOCK and
|
|
# then read it back
|
|
SET COMPRESSION_CODEC=SNAPPY_BLOCKED;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snapb_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snapb_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snapb_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with SNAPPY+BLOCK and then read
|
|
# it back
|
|
SET COMPRESSION_CODEC=SNAPPY;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_snap_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_snap_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_snap_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-3079: Create a table containing larger seq files with GZIP+BLOCK and then read it
|
|
# back
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=BLOCK;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table store_sales_seq_gzip_block like tpcds_parquet.store_sales
|
|
stored as SEQUENCEFILE;
|
|
insert into store_sales_seq_gzip_block partition(ss_sold_date_sk)
|
|
select * from tpcds_parquet.store_sales
|
|
where (ss_sold_date_sk between 2451175 and 2451200) or
|
|
(ss_sold_date_sk is null and ss_sold_time_sk > 60000);
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales_seq_gzip_block;
|
|
---- RESULTS
|
|
60091
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5407: Create a table containing seq files with GZIP+RECORD. If the number of
|
|
# impalad workers is three, three files will be created, two of which are large enough
|
|
# (> 64MB) to force multiple flushes. Make sure that the files have been created
|
|
# successfully.
|
|
SET COMPRESSION_CODEC=GZIP;
|
|
SET SEQ_COMPRESSION_MODE=RECORD;
|
|
SET ALLOW_UNSUPPORTED_FORMATS=1;
|
|
create table catalog_sales_seq_gzip_rec like tpcds.catalog_sales stored as SEQUENCEFILE;
|
|
insert into catalog_sales_seq_gzip_rec select * from tpcds.catalog_sales;
|
|
====
|
|
---- QUERY
|
|
select count(*) from catalog_sales_seq_gzip_rec;
|
|
---- RESULTS
|
|
1441548
|
|
---- TYPES
|
|
BIGINT
|
|
====
|