mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
If the user only queries virtual column INPUT__FILE__NAME from a table backed by text files, and the last row doesn't end with the row delimiter (e.g. '\n') then Impala crashes. In HdfsTextScanner::FinishScanRange() there is specific code to deal with the last row if it doesn't end with the row delimiter, and we fill the last tuple here. This code wasn't active when we only read INPUT__FILE__NAME, which means the last tuple contained garbage which caused a segfault later. The fix is to always fill the last tuple if we have a template tuple as it means we either have partition expressions, or file-level virtual columns like INPUT__FILE__NAME. Other file-level virtual columns only apply to Iceberg tables which don't support text data files, so those are not affected by this bug. Testing * added e2e tests Change-Id: I0ea8e7fed77cbc9ae90a858eafeee9dcfd73d143 Reviewed-on: http://gerrit.cloudera.org:8080/22141 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Gabor Kaszab <gaborkaszab@cloudera.com>
56 lines
2.1 KiB
Plaintext
56 lines
2.1 KiB
Plaintext
====
|
|
---- QUERY
|
|
# User column INPUT__FILE__NAME hides virtual column INPUT__FILE__NAME
|
|
create table i_n_f (input__file__name string);
|
|
insert into table i_n_f values ('impala');
|
|
select input__file__name from i_n_f;
|
|
---- RESULTS
|
|
'impala'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select * from i_n_f;
|
|
---- RESULTS
|
|
'impala'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select input__file__name, * from i_n_f;
|
|
---- RESULTS
|
|
'impala','impala'
|
|
---- TYPES
|
|
STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-13589: Select INPUT_FILE__NAME only
|
|
select input__file__name from functional.table_no_newline;
|
|
---- RESULTS
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline/table_no_newline.csv'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-13589: Select INPUT_FILE__NAME only in partitioned table
|
|
select input__file__name from functional.table_no_newline_part;
|
|
---- RESULTS
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2010/month=3/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2010/month=3/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2010/month=3/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2010/month=3/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2010/month=3/table_no_newline.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2015/month=3/table_missing_columns.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2015/month=3/table_missing_columns.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2015/month=3/table_missing_columns.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2015/month=3/table_missing_columns.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2015/month=3/table_missing_columns.csv'
|
|
regex:'$NAMENODE/test-warehouse/table_no_newline_part/year=2015/month=3/table_missing_columns.csv'
|
|
---- TYPES
|
|
STRING
|
|
====
|