mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14258: Disable tuple caching for Full Hive ACID tables
TestAcidRowValidation.test_row_validation fails with tuple caching correction verification. The test creates a Full Hive ACID table with a file using valid write ids, mimicking a streaming ingest. As the valid write ids change, the scan of that file produces different rows without the file changing. Tuple caching currently doesn't understand valid write ids, so this produces incorrect results. This marks Full Hive ACID tables as ineligible for caching until valid write ids can be supported properly. Insert-only tables are still eligible. Testing: - Added test cases to TupleCacheTest - Ran TestAcidRowValidation.test_row_validation with correctness verification Change-Id: Icab9613b8e2973aed1d34427c51d2fd8b37a9aba Reviewed-on: http://gerrit.cloudera.org:8080/23454 Reviewed-by: Yida Wu <wydbaggio000@gmail.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Michael Smith <michael.smith@cloudera.com>
This commit is contained in:
@@ -45,6 +45,7 @@ import org.apache.impala.thrift.TScanRangeSpec;
|
||||
import org.apache.impala.thrift.TSlotDescriptor;
|
||||
import org.apache.impala.thrift.TTableName;
|
||||
import org.apache.impala.thrift.TTupleDescriptor;
|
||||
import org.apache.impala.util.AcidUtils;
|
||||
import org.apache.thrift.TBase;
|
||||
import org.apache.thrift.TSerializer;
|
||||
import org.apache.thrift.protocol.TBinaryProtocol;
|
||||
@@ -107,6 +108,7 @@ public class TupleCacheInfo {
|
||||
NONDETERMINISTIC_FN,
|
||||
MERGING_EXCHANGE,
|
||||
PARTITIONED_EXCHANGE,
|
||||
FULL_ACID,
|
||||
}
|
||||
private EnumSet<IneligibilityReason> ineligibilityReasons_;
|
||||
|
||||
@@ -532,6 +534,14 @@ public class TupleCacheInfo {
|
||||
"registerTable() only applies to base tables");
|
||||
Preconditions.checkState(tbl != null, "Invalid null argument to registerTable()");
|
||||
|
||||
// IMPALA-14258: Tuple caching does not support Full Hive ACID tables, as it does
|
||||
// not yet support handling valid write ids.
|
||||
if (tbl.getMetaStoreTable() != null &&
|
||||
AcidUtils.isFullAcidTable(tbl.getMetaStoreTable().getParameters())) {
|
||||
setIneligible(IneligibilityReason.FULL_ACID);
|
||||
return;
|
||||
}
|
||||
|
||||
// Right now, we only hash the database / table name.
|
||||
TTableName tblName = tbl.getTableName().toThrift();
|
||||
hashThrift("Table", tblName);
|
||||
|
||||
@@ -252,6 +252,14 @@ public class TupleCacheTest extends PlannerTestBase {
|
||||
verifyCacheIneligible("select id from functional_kudu.alltypes");
|
||||
verifyCacheIneligible("select id from functional_hbase.alltypes");
|
||||
|
||||
// Caching for Full Hive ACID is not implemented due to complications
|
||||
// with valid write ids. ORC tables are loaded as Full ACID tables.
|
||||
verifyCacheIneligible("select count(*) from functional_orc_def.alltypes");
|
||||
// Hive ACID insert-only tables are eligible
|
||||
verifyAllEligible(
|
||||
"select count(*) from functional_parquet.insert_only_major_and_minor_compacted",
|
||||
/* isDistributedPlan */ false);
|
||||
|
||||
// Runtime filter produced by Kudu table is not implemented
|
||||
verifyCacheIneligible("select a.id from functional.alltypes a, " +
|
||||
"functional_kudu.alltypes b where a.id = b.id");
|
||||
|
||||
Reference in New Issue
Block a user