IMPALA-14473: Fix absolute path logic for sorting scan ranges oldest to newest

When IMPALA-14462 added tie-breaking logic to
ScanRangeOldestToNewestComparator, it relied on absolute path
being unset if the relative path is set. However, the code
always sets absolute path and uses an empty string to indicate
whether it is set. This caused the tie-breaking logic to see
two unrelated scan ranges as equal, triggering a DCHECK when
running query_test/test_tuple_cache_tpc_queries.py.

The fix is to rearrange the logic to check whether the relative
path is not empty rather than checking whether the absolute
path is set.

Testing:
 - Ran query_test/test_tuple_cache_tpc_queries.py
 - Ran custom_cluster/test_tuple_cache.py

Change-Id: I449308f4a0efdca7fc238e3dda24985a2931dd37
Reviewed-on: http://gerrit.cloudera.org:8080/23495
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Yida Wu <wydbaggio000@gmail.com>
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
Joe McDonnell
2025-10-06 12:57:49 -07:00
parent a77fec6391
commit 762fe0a4f5

View File

@@ -194,9 +194,9 @@ static bool ScanRangeOldestToNewestComparator(
// Multiple files (or multiple splits from the same file) can have the same
// modification time, so we need tie-breaking when they are equal.
if (split1.mtime != split2.mtime) return split1.mtime < split2.mtime;
if (!split1.__isset.absolute_path && !split2.__isset.absolute_path) {
// If neither has an absolute path set (the common case), compare the
// partition hash and relative path
if (!split1.relative_path.empty() && !split2.relative_path.empty()) {
// If both have a relative path set (the common case), compare the partition hash
// and relative path
if (split1.partition_path_hash != split2.partition_path_hash) {
return split1.partition_path_hash < split2.partition_path_hash;
}
@@ -204,10 +204,19 @@ static bool ScanRangeOldestToNewestComparator(
return split1.relative_path < split2.relative_path;
}
} else {
// If only one has an absolute path, sort absolute paths ahead of relative paths.
if (split1.__isset.absolute_path && !split2.__isset.absolute_path) return true;
if (!split1.__isset.absolute_path && split2.__isset.absolute_path) return false;
// Both are absolute, so compare them
// If only one has a relative path, sort absolute paths ahead of relative paths.
if (split1.relative_path.empty()) {
DCHECK(split1.__isset.absolute_path && !split1.absolute_path.empty());
return true;
}
if (split2.relative_path.empty()) {
DCHECK(split2.__isset.absolute_path && !split2.absolute_path.empty());
return false;
}
// Both have empty relative paths, so compare the absolute paths (which must be
// non-empty)
DCHECK(split1.__isset.absolute_path && !split1.absolute_path.empty());
DCHECK(split2.__isset.absolute_path && !split2.absolute_path.empty());
if (split1.absolute_path != split2.absolute_path) {
return split1.absolute_path < split2.absolute_path;
}