IMPALA-14569: Fix IllegalStateException in partition pruning on type mismatch

This fixes an IllegalStateException in HdfsPartitionPruner when
evaluating 'IN' predicates whose consist of two compatible types, for
example DATE and STRING: date_col in (<date as string>).

Previously, 'canEvalUsingPartitionMd' did not check if the slot type
matched the literal type. This caused the frontend to attempt invalid
comparisons via 'LiteralExpr.compareTo', leading to
IllegalStateException or incorrect pruning.

The fix ensures 'canEvalUsingPartitionMd' returns false on type
mismatches, deferring evaluation to the backend where proper casting
occurs.

Testing:
- Added regression test in hdfs-partition-pruning.test.

Change-Id: Idc226a628c8df559329a060cb963b81e27e21eda
Reviewed-on: http://gerrit.cloudera.org:8080/23706
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Peter Rozsa
2025-11-21 15:41:23 +01:00
committed by Impala Public Jenkins
parent 685745f785
commit 6cf21464b4
3 changed files with 24 additions and 0 deletions

View File

@@ -285,6 +285,9 @@ public class HdfsPartitionPruner {
if (slot == null) return false;
for (int i = 1; i < expr.getChildren().size(); ++i) {
if (!Expr.IS_LITERAL.apply(expr.getChild(i))) return false;
// Mismatched types should be evaluated with their respective casts
// in the backend.
if (!slot.getType().equals(expr.getChild(i).getType())) return false;
}
return true;
}

View File

@@ -0,0 +1,17 @@
====
---- QUERY
# IMPALA-14569: failing partition pruning due to mismatchig partition types
create table a(id int) partitioned by (date_stored_as_string string);
create table b(id int) partitioned by (date_stored_as_date date);
insert into a(id, date_stored_as_string) values(1, '2025-12-12');
insert into a(id, date_stored_as_string) values(1, '2025-12-10');
insert into b(id, date_stored_as_date) values(1, '2025-12-12');
select * from b
left outer join a on date_stored_as_date = date_stored_as_string
where date_stored_as_date in ( '2025-12-12');
---- RESULTS
1,2025-12-12,1,'2025-12-12'
---- TYPES
INT,DATE,INT,STRING
---- RUNTIME_PROFILE
row_regex:.*HDFS partitions=1/2 files=1 size=2B

View File

@@ -358,6 +358,10 @@ class TestHdfsQueries(ImpalaTestSuite):
def test_file_partitions(self, vector):
self.run_test_case('QueryTest/hdfs-partitions', vector)
def test_partition_pruning(self, vector, unique_database):
self.run_test_case('QueryTest/hdfs-partition-pruning',
vector, unique_database)
class TestPartitionKeyScans(ImpalaTestSuite):
"""Tests for queries that exercise partition key scan optimisation. These