IMPALA-14258: Disable tuple caching for Full Hive ACID tables

TestAcidRowValidation.test_row_validation fails with tuple caching
correction verification. The test creates a Full Hive ACID table
with a file using valid write ids, mimicking a streaming ingest.
As the valid write ids change, the scan of that file produces
different rows without the file changing. Tuple caching currently
doesn't understand valid write ids, so this produces incorrect
results.

This marks Full Hive ACID tables as ineligible for caching until
valid write ids can be supported properly. Insert-only tables are
still eligible.

Testing:
 - Added test cases to TupleCacheTest
 - Ran TestAcidRowValidation.test_row_validation with correctness
   verification

Change-Id: Icab9613b8e2973aed1d34427c51d2fd8b37a9aba
Reviewed-on: http://gerrit.cloudera.org:8080/23454
Reviewed-by: Yida Wu <wydbaggio000@gmail.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
This commit is contained in:
Joe McDonnell
2025-09-22 17:33:56 -07:00
parent 52b87fcefd
commit e1896d4bf8
2 changed files with 18 additions and 0 deletions

View File

@@ -45,6 +45,7 @@ import org.apache.impala.thrift.TScanRangeSpec;
import org.apache.impala.thrift.TSlotDescriptor;
import org.apache.impala.thrift.TTableName;
import org.apache.impala.thrift.TTupleDescriptor;
import org.apache.impala.util.AcidUtils;
import org.apache.thrift.TBase;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TBinaryProtocol;
@@ -107,6 +108,7 @@ public class TupleCacheInfo {
NONDETERMINISTIC_FN,
MERGING_EXCHANGE,
PARTITIONED_EXCHANGE,
FULL_ACID,
}
private EnumSet<IneligibilityReason> ineligibilityReasons_;
@@ -532,6 +534,14 @@ public class TupleCacheInfo {
"registerTable() only applies to base tables");
Preconditions.checkState(tbl != null, "Invalid null argument to registerTable()");
// IMPALA-14258: Tuple caching does not support Full Hive ACID tables, as it does
// not yet support handling valid write ids.
if (tbl.getMetaStoreTable() != null &&
AcidUtils.isFullAcidTable(tbl.getMetaStoreTable().getParameters())) {
setIneligible(IneligibilityReason.FULL_ACID);
return;
}
// Right now, we only hash the database / table name.
TTableName tblName = tbl.getTableName().toThrift();
hashThrift("Table", tblName);

View File

@@ -252,6 +252,14 @@ public class TupleCacheTest extends PlannerTestBase {
verifyCacheIneligible("select id from functional_kudu.alltypes");
verifyCacheIneligible("select id from functional_hbase.alltypes");
// Caching for Full Hive ACID is not implemented due to complications
// with valid write ids. ORC tables are loaded as Full ACID tables.
verifyCacheIneligible("select count(*) from functional_orc_def.alltypes");
// Hive ACID insert-only tables are eligible
verifyAllEligible(
"select count(*) from functional_parquet.insert_only_major_and_minor_compacted",
/* isDistributedPlan */ false);
// Runtime filter produced by Kudu table is not implemented
verifyCacheIneligible("select a.id from functional.alltypes a, " +
"functional_kudu.alltypes b where a.id = b.id");