mirror of
https://github.com/apache/impala.git
synced 2026-01-05 12:01:11 -05:00
IMPALA-1476: Impala incorrectly handles text data missing a newline on the last line.
I did a local benchmark and there's minimal performance impact(<1%) Change-Id: I8d84a145acad886c52587258b27d33cff96ea399 (cherry picked from commit 7e750ad5d90007cc85ebe493af4dce7a537ad7c0) Reviewed-on: http://gerrit.cloudera.org:8080/189 Reviewed-by: Juan Yu <jyu@cloudera.com> Tested-by: Internal Jenkins
This commit is contained in:
@@ -1470,3 +1470,100 @@ value DECIMAL(5,2)
|
||||
LOAD DATA LOCAL INPATH '${{env:IMPALA_HOME}}/testdata/data/avro_decimal_tbl.avro'
|
||||
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
table_no_newline
|
||||
---- CREATE
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
id INT, col_1 BOOLEAN, col_2 DOUBLE, col_3 TIMESTAMP)
|
||||
row format delimited fields terminated by ','
|
||||
LOCATION '/test-warehouse/{table_name}';
|
||||
---- LOAD
|
||||
`hadoop fs -mkdir -p /test-warehouse/table_no_newline && \
|
||||
hadoop fs -put -f ${IMPALA_HOME}/testdata/data/table_no_newline.csv /test-warehouse/table_no_newline
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
testescape_16_lf
|
||||
---- CREATE
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
col string)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
LOCATION '/test-warehouse/{table_name}';
|
||||
---- LOAD
|
||||
`${IMPALA_HOME}/testdata/common/text_delims_table.py --table_dir '/tmp/testescape_16_lf' --file_len 16 --only_newline && \
|
||||
hadoop fs -mkdir -p /test-warehouse/testescape_16_lf && \
|
||||
hadoop fs -put -f /tmp/testescape_16_lf/* /test-warehouse/testescape_16_lf/
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
testescape_16_crlf
|
||||
---- CREATE
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
col string)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
LOCATION '/test-warehouse/{table_name}';
|
||||
---- LOAD
|
||||
`${IMPALA_HOME}/testdata/common/text_delims_table.py --table_dir '/tmp/testescape_16_crlf' --file_len 16 && \
|
||||
hadoop fs -mkdir -p /test-warehouse/testescape_16_crlf && \
|
||||
hadoop fs -put -f /tmp/testescape_16_crlf/* /test-warehouse/testescape_16_crlf/
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
testescape_17_lf
|
||||
---- CREATE
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
col string)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
LOCATION '/test-warehouse/{table_name}';
|
||||
---- LOAD
|
||||
`${IMPALA_HOME}/testdata/common/text_delims_table.py --table_dir '/tmp/testescape_17_lf' --file_len 17 --only_newline && \
|
||||
hadoop fs -mkdir -p /test-warehouse/testescape_17_lf && \
|
||||
hadoop fs -put -f /tmp/testescape_17_lf/* /test-warehouse/testescape_17_lf/
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
testescape_17_crlf
|
||||
---- CREATE
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
col string)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
LOCATION '/test-warehouse/{table_name}';
|
||||
---- LOAD
|
||||
`${IMPALA_HOME}/testdata/common/text_delims_table.py --table_dir '/tmp/testescape_17_crlf' --file_len 17 && \
|
||||
hadoop fs -mkdir -p /test-warehouse/testescape_17_crlf && \
|
||||
hadoop fs -put -f /tmp/testescape_17_crlf/* /test-warehouse/testescape_17_crlf/
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
testescape_32_lf
|
||||
---- CREATE
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
col string)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
LOCATION '/test-warehouse/{table_name}';
|
||||
---- LOAD
|
||||
`${IMPALA_HOME}/testdata/common/text_delims_table.py --table_dir '/tmp/testescape_32_lf' --file_len 32 --only_newline && \
|
||||
hadoop fs -mkdir -p /test-warehouse/testescape_32_lf && \
|
||||
hadoop fs -put -f /tmp/testescape_32_lf/* /test-warehouse/testescape_32_lf/
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
testescape_32_crlf
|
||||
---- CREATE
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
col string)
|
||||
row format delimited fields terminated by ',' escaped by '\\'
|
||||
LOCATION '/test-warehouse/{table_name}';
|
||||
---- LOAD
|
||||
`${IMPALA_HOME}/testdata/common/text_delims_table.py --table_dir '/tmp/testescape_32_crlf' --file_len 32 && \
|
||||
hadoop fs -mkdir -p /test-warehouse/testescape_32_crlf && \
|
||||
hadoop fs -put -f /tmp/testescape_32_crlf/* /test-warehouse/testescape_32_crlf/
|
||||
====
|
||||
|
||||
Reference in New Issue
Block a user