mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
IMPALA-12903: Querying virtual column FILE__POSITION for TEXT and JSON tables crashes Impala
Impala generates segmentation fault when it queries the virtual column FILE__POSITION for TEXT or JSON tables. When the scanners that do not support the FILE__POSITION virtual column detect its presence they try to report an error and close themselves. The segfault is in the scanners' Close() method when they try to dereference a NULL stream object. This patch simply adds NULL-checks in Close(). Alternatively we could detect the presence of FILE__POSITION during planning in the HdfsScanNode, but doing it in the scanners lets us handle more queries, e.g. queries that dynamically prune partitions and the surviving partitions all have file formats that support FILE__POSITION. Testing: * added negative tests to properly report the errors * added tests for mixed file format tables Change-Id: I8e1af8d526f9046aceddb5944da9e6f9c63768b0 Reviewed-on: http://gerrit.cloudera.org:8080/21148 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
This commit is contained in:
@@ -95,8 +95,10 @@ void HdfsJsonScanner::Close(RowBatch* row_batch) {
|
||||
// Verify all resources (if any) have been transferred or freed.
|
||||
DCHECK_EQ(template_tuple_pool_.get()->total_allocated_bytes(), 0);
|
||||
DCHECK_EQ(data_buffer_pool_.get()->total_allocated_bytes(), 0);
|
||||
scan_node_->RangeComplete(THdfsFileFormat::JSON,
|
||||
stream_->file_desc()->file_compression);
|
||||
if (stream_ != nullptr) {
|
||||
scan_node_->RangeComplete(THdfsFileFormat::JSON,
|
||||
stream_->file_desc()->file_compression);
|
||||
}
|
||||
CloseInternal();
|
||||
}
|
||||
|
||||
|
||||
@@ -209,7 +209,7 @@ void HdfsTextScanner::Close(RowBatch* row_batch) {
|
||||
DCHECK_EQ(template_tuple_pool_.get()->total_allocated_bytes(), 0);
|
||||
DCHECK_EQ(data_buffer_pool_.get()->total_allocated_bytes(), 0);
|
||||
DCHECK_EQ(boundary_pool_.get()->total_allocated_bytes(), 0);
|
||||
if (!only_parsing_header_) {
|
||||
if (!only_parsing_header_ && stream_ != nullptr) {
|
||||
scan_node_->RangeComplete(THdfsFileFormat::TEXT,
|
||||
stream_->file_desc()->file_compression);
|
||||
}
|
||||
|
||||
@@ -155,3 +155,34 @@ order by id;
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, INT
|
||||
====
|
||||
---- QUERY
|
||||
# Regression test for IMPALA-12903. The following query uses static pruning. The surviving
|
||||
# partitions have file formats that support virtual column FILE__POSITION.
|
||||
select file__position, year, month, id, date_string_col
|
||||
from functional.alltypesmixedformat
|
||||
where year=2009 and month = 4 and id < 905
|
||||
order by id;
|
||||
---- RESULTS
|
||||
regex:\d+,2009,4,900,'04/01/09'
|
||||
regex:\d+,2009,4,901,'04/01/09'
|
||||
regex:\d+,2009,4,902,'04/01/09'
|
||||
regex:\d+,2009,4,903,'04/01/09'
|
||||
regex:\d+,2009,4,904,'04/01/09'
|
||||
---- TYPES
|
||||
BIGINT,INT,INT,INT,STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Regression test for IMPALA-12903. The following query uses dynamic pruning. The
|
||||
# surviving partitions have file formats that support virtual column FILE__POSITION.
|
||||
select straight_join lhs.file__position, lhs.year, lhs.month, lhs.id
|
||||
from functional.alltypesmixedformat lhs, functional.alltypes rhs
|
||||
where lhs.id = rhs.id and lhs.year = rhs.year and lhs.month = rhs.month and
|
||||
rhs.id > 900 and rhs.id < 900 + rhs.month
|
||||
order by id;
|
||||
---- RESULTS
|
||||
regex:\d+,2009,4,901
|
||||
regex:\d+,2009,4,902
|
||||
regex:\d+,2009,4,903
|
||||
---- TYPES
|
||||
BIGINT,INT,INT,INT
|
||||
====
|
||||
|
||||
41
testdata/workloads/functional-query/queries/QueryTest/virtual-column-file-position-negative.test
vendored
Normal file
41
testdata/workloads/functional-query/queries/QueryTest/virtual-column-file-position-negative.test
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
====
|
||||
---- QUERY
|
||||
select file__position from functional.alltypes
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported for TEXT files.
|
||||
====
|
||||
---- QUERY
|
||||
select file__position from functional_avro.alltypes
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported for AVRO files.
|
||||
====
|
||||
---- QUERY
|
||||
select file__position from functional_json.alltypes
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported for JSON files.
|
||||
====
|
||||
---- QUERY
|
||||
select file__position from functional_seq.alltypes
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported for SEQUENCE_FILE files.
|
||||
====
|
||||
---- QUERY
|
||||
select file__position from functional_rc.alltypes
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported for RC_FILE files.
|
||||
====
|
||||
---- QUERY
|
||||
select file__position, year, month, id, date_string_col from functional.alltypesmixedformat where year=2009 and month = 3;
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported for RC_FILE files.
|
||||
====
|
||||
---- QUERY
|
||||
select file__position, year, month, id, date_string_col from functional.alltypesmixedformat where year=2009 and month = 1;
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported for TEXT files.
|
||||
====
|
||||
---- QUERY
|
||||
select file__position, year, month, id, date_string_col from functional.alltypesmixedformat;
|
||||
---- CATCH
|
||||
Virtual column FILE__POSITION is not supported
|
||||
====
|
||||
@@ -175,6 +175,23 @@ class TestScannersVirtualColumns(ImpalaTestSuite):
|
||||
self.run_test_case('QueryTest/mixing-virtual-columns', vector, unique_database)
|
||||
|
||||
|
||||
class TestScannersVirtualColumnsNegative(ImpalaTestSuite):
|
||||
@classmethod
|
||||
def get_workload(cls):
|
||||
return 'functional-query'
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestScannersVirtualColumnsNegative, cls).add_test_dimensions()
|
||||
# In the tests we explicitly refer to the databases, i.e. no need to
|
||||
# run this test with multiple file formats.
|
||||
cls.ImpalaTestMatrix.add_dimension(
|
||||
create_uncompressed_text_dimension(cls.get_workload()))
|
||||
|
||||
def test_virtual_column_file_position_negative(self, vector):
|
||||
self.run_test_case('QueryTest/virtual-column-file-position-negative', vector)
|
||||
|
||||
|
||||
class TestIcebergVirtualColumns(ImpalaTestSuite):
|
||||
|
||||
@classmethod
|
||||
|
||||
Reference in New Issue
Block a user