IMPALA-12668: Enable clang-tidy checks for implicit fallthrough

In switch/case statements, one case can fallthrough to the next case. Sometimes this is intentional, but it is also a common source of bugs (i.e. a missing break/return statement). Clang-Tidy's clang-diagnostic-implicit-fallthrough flags locations where a case falls through to the next case without an explicit fallthrough declaration. This change enables clang-diagnostic-implicit-fallthrough and fixes failing locations. Since Impala uses C++17, this uses C++17's [[fallthrough]] to indicate an explicit fallthrough. This also adjusts clang-tidy's output to suggest [[fallthrough]] as the preferred way to indicate fallthrough. Testing: - Ran core job - Ran clang tidy Change-Id: I6d65c92b442fa0317c3af228997571e124a54092 Reviewed-on: http://gerrit.cloudera.org:8080/20847 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Zihao Ye <eyizoha@163.com> Reviewed-by: Michael Smith <michael.smith@cloudera.com>
2025-12-19 18:12:08 -05:00 · 2023-12-15 16:34:47 -08:00
parent 324a1aa37e
commit dd8ddf77c3
22 changed files with 105 additions and 32 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -48,7 +48,6 @@ Checks: "-*,clang*,\
 -clang-diagnostic-gnu-anonymous-struct,\
 -clang-diagnostic-gnu-zero-variadic-macro-arguments,\
 -clang-diagnostic-header-hygiene,\
 -clang-diagnostic-implicit-fallthrough,\
 -clang-diagnostic-missing-prototypes,\
 -clang-diagnostic-missing-variable-declarations,\
 -clang-diagnostic-nested-anon-types,\
--- a/be/src/benchmarks/atoi-benchmark.cc
+++ b/be/src/benchmarks/atoi-benchmark.cc
@@ -91,7 +91,9 @@ inline int32_t AtoiUnsafe(const char* s, int len) {
  bool negative = false;
  int i = 0;
  switch (*s) {
-    case '-': negative = true;
+    case '-':
      negative = true;
      [[fallthrough]];
    case '+': ++i;
  }
@@ -107,25 +109,34 @@ inline int32_t AtoiUnrolled(const char* s, int len) {
    int32_t val = 0;
    bool negative = false;
    switch (*s) {
-      case '-': negative = true;
+      case '-':
        negative = true;
        [[fallthrough]];
      case '+': --len; ++s;
    }
    switch(len) {
      case 8:
        val += (DIGIT(s[len - 8])) * 10000;
        [[fallthrough]];
      case 7:
        val += (DIGIT(s[len - 7])) * 10000;
        [[fallthrough]];
      case 6:
        val += (DIGIT(s[len - 6])) * 10000;
        [[fallthrough]];
      case 5:
        val += (DIGIT(s[len - 5])) * 10000;
        [[fallthrough]];
      case 4:
        val += (DIGIT(s[len - 4])) * 1000;
        [[fallthrough]];
      case 3:
        val += (DIGIT(s[len - 3])) * 100;
        [[fallthrough]];
      case 2:
        val += (DIGIT(s[len - 2])) * 10;
        [[fallthrough]];
      case 1:
        val += (DIGIT(s[len - 1]));
    }
@@ -140,7 +151,9 @@ inline int32_t AtoiCased(const char* s, int len) {
    int32_t val = 0;
    bool negative = false;
    switch (*s) {
-      case '-': negative = true;
+      case '-':
        negative = true;
        [[fallthrough]];
      case '+': --len; ++s;
    }
--- a/be/src/codegen/codegen-anyval-read-write-info.cc
+++ b/be/src/codegen/codegen-anyval-read-write-info.cc
@@ -134,6 +134,7 @@ void CodegenAnyValReadWriteInfo::CodegenConvertToCanonicalForm() {
      llvm::Value* new_val = CodegenAnyVal::ConvertToCanonicalForm(codegen_, builder_,
          type_, GetSimpleVal());
      SetSimpleVal(new_val);
      break;
    }
    default:
      ;
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -598,6 +598,7 @@ void CodegenAnyVal::ConvertToCanonicalForm() {
      llvm::Value* new_val = ConvertToCanonicalForm(codegen_,
          builder_, type_, GetVal());
      SetVal(new_val);
      break;
    }
    default:
      ;
--- a/be/src/exec/orc/hdfs-orc-scanner.cc
+++ b/be/src/exec/orc/hdfs-orc-scanner.cc
@@ -689,6 +689,7 @@ void HdfsOrcScanner::SetSyntheticAcidFieldForOriginalFile(const SlotDescriptor*
      break;
    case ACID_FIELD_ROWID_INDEX:
      file_position_ = slot_desc;
      break;
    default:
      break;
  }
--- a/be/src/exec/parquet/parquet-column-stats.cc
+++ b/be/src/exec/parquet/parquet-column-stats.cc
@@ -174,6 +174,7 @@ bool ColumnStatsReader::ReadFromString(StatsField stats_field,
              static_cast<Decimal16Value*>(slot));
        }
      DCHECK(false) << "Unknown decimal byte size: " << col_type_.GetByteSize();
      break;
    case TYPE_DATE:
      return ColumnStats<DateValue>::DecodePlainValue(encoded_value, slot, element_.type);
    default:
--- a/be/src/exprs/agg-fn-evaluator.cc
+++ b/be/src/exprs/agg-fn-evaluator.cc
@@ -260,7 +260,9 @@ void AggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_slo
 #endif
          return;
        default:
-          break;
+          DCHECK(false) << "Unknown decimal byte size: "
                        << dst_slot_desc.type().GetByteSize();
          return;
      }
    case TYPE_DATE:
      *reinterpret_cast<DateValue*>(slot) =
--- a/be/src/exprs/anyval-util.h
+++ b/be/src/exprs/anyval-util.h
@@ -330,7 +330,8 @@ class AnyValUtil {
 #endif
            return;
          default:
-            break;
+            DCHECK(false) << "Unknown decimal byte size: " << type.GetByteSize();
            return;
        }
      case TYPE_DATE:
        *reinterpret_cast<DateVal*>(dst) =
--- a/be/src/runtime/datetime-parser-common.cc
+++ b/be/src/runtime/datetime-parser-common.cc
@@ -130,6 +130,7 @@ void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_ty
        break;
      case MISPLACED_FX_MODIFIER_ERROR:
        ss << "PARSE ERROR: FX modifier should be at the beginning of the format string.";
        break;
      case CONFLICTING_DAY_OF_WEEK_TOKENS_ERROR:
        ss << "PARSE ERROR: Multiple day of week tokens provided.";
        break;
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -1011,6 +1011,7 @@ CodegenAnyValReadWriteInfo CodegenAnyValToReadWriteInfo(CodegenAnyVal& any_val,
    case TYPE_STRUCT:
      DCHECK(false) << "Invalid type for this function. "
                    << "Call 'StoreStructToNativePtr()' instead.";
      break;
    default:
      DCHECK(false) << "NYI: " << rwi.type().DebugString();
      break;
@@ -1074,6 +1075,7 @@ void SlotDescriptor::CodegenStoreNonNullAnyVal(
    case TYPE_STRUCT:
      DCHECK(false) << "Invalid type for this function. "
                    << "Call 'StoreStructToNativePtr()' instead.";
      break;
    default:
      DCHECK(false) << "NYI: " << type.DebugString();
      break;
--- a/be/src/runtime/io/disk-file.cc
+++ b/be/src/runtime/io/disk-file.cc
@@ -110,8 +110,10 @@ void MemBlock::Delete(bool* reserved, bool* allocated) {
      free(data_);
      data_ = nullptr;
      *allocated = true;
      [[fallthrough]];
    case MemBlockStatus::RESERVED:
      *reserved = true;
      [[fallthrough]];
    default:
      SetStatusLocked(lock, MemBlockStatus::DISABLED);
      DCHECK(data_ == nullptr);
--- a/be/src/runtime/raw-value.cc
+++ b/be/src/runtime/raw-value.cc
@@ -206,6 +206,7 @@ void RawValue::WriteNonNullPrimitive(const void* value, void* dst, const ColumnT
    case TYPE_STRUCT: {
      // Structs should be handled by a different Write() function within this class.
      DCHECK(false);
      break;
    }
    default:
      DCHECK(false) << "RawValue::WriteNonNullPrimitive(): bad type: "
--- a/be/src/runtime/raw-value.inline.h
+++ b/be/src/runtime/raw-value.inline.h
@@ -147,7 +147,8 @@ inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type)
          return reinterpret_cast<const Decimal16Value*>(v1)->value()
              == reinterpret_cast<const Decimal16Value*>(v2)->value();
        default:
-          break;
+          DCHECK(false) << "Unknown decimal byte size: " << type.GetByteSize();
          return 0;
      }
    default:
      DCHECK(false) << type;
--- a/be/src/runtime/runtime-filter-ir.cc
+++ b/be/src/runtime/runtime-filter-ir.cc
@@ -37,12 +37,14 @@ bool IR_ALWAYS_INLINE RuntimeFilter::Eval(
          return filter->EvalOverlap(col_type, val, val);
        }
      }
      break;
    }
    case TRuntimeFilterType::IN_LIST: {
      InListFilter* filter = get_in_list_filter();
      if (LIKELY(filter && !filter->AlwaysTrue())) {
        return filter->Find(val, col_type);
      }
      break;
    }
  }
  return true;
--- a/be/src/runtime/types.cc
+++ b/be/src/runtime/types.cc
@@ -146,6 +146,7 @@ TPrimitiveType::type ToThrift(PrimitiveType ptype) {
    case TYPE_ARRAY:
    case TYPE_MAP:
      DCHECK(false) << "NYI: " << ptype;
      [[fallthrough]];
    default: return TPrimitiveType::INVALID_TYPE;
  }
 }
--- a/be/src/service/query-result-set.cc
+++ b/be/src/service/query-result-set.cc
@@ -472,6 +472,7 @@ int HS2ColumnarResultSet::AddRows(
        to->binaryVal.values.insert(to->binaryVal.values.end(),
            from->binaryVal.values.begin() + start_idx,
            from->binaryVal.values.begin() + start_idx + rows_added);
        break;
      default:
        DCHECK(false) << "Unsupported type: "
                      << TypeToString(ThriftToType(
--- a/be/src/util/bit-packing.inline.h
+++ b/be/src/util/bit-packing.inline.h
@@ -411,8 +411,10 @@ const uint8_t* BitPacking::UnpackUpTo31Values(const uint8_t* __restrict__ in,
 #pragma push_macro("UNPACK_VALUES_CASE")
 #define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
-  case 31 - i: out[30 - i] = \
+  case 31 - i: \
-      static_cast<OutType>(UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer));
+    out[30 - i] = \
        static_cast<OutType>(UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer)); \
    [[fallthrough]];
  // Use switch with fall-through cases to minimise branching.
  switch (num_values) {
@@ -451,11 +453,14 @@ const uint8_t* BitPacking::UnpackAndDecodeUpTo31Values(const uint8_t* __restrict
 #pragma push_macro("DECODE_VALUES_CASE")
 #define DECODE_VALUES_CASE(ignore1, i, ignore2) \
-  case 31 - i: { \
+  case 31 - i: \
-    uint32_t idx = UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer); \
+    { \
-    uint8_t* out_pos = reinterpret_cast<uint8_t*>(out) + (30 - i) * stride; \
+      uint32_t idx = UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer); \
-    DecodeValue(dict, dict_len, idx, reinterpret_cast<OutType*>(out_pos), decode_error); \
+      uint8_t* out_pos = reinterpret_cast<uint8_t*>(out) + (30 - i) * stride; \
-  }
+      DecodeValue(dict, dict_len, idx, reinterpret_cast<OutType*>(out_pos), \
          decode_error); \
    } \
    [[fallthrough]];
  // Use switch with fall-through cases to minimise branching.
  switch (num_values) {
--- a/be/src/util/hash-util.h
+++ b/be/src/util/hash-util.h
@@ -140,14 +140,27 @@ class HashUtil {
    const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
    switch (len & 7) {
-      case 7: h ^= uint64_t(data2[6]) << 48;
+      case 7:
-      case 6: h ^= uint64_t(data2[5]) << 40;
+        h ^= uint64_t(data2[6]) << 48;
-      case 5: h ^= uint64_t(data2[4]) << 32;
+        [[fallthrough]];
-      case 4: h ^= uint64_t(data2[3]) << 24;
+      case 6:
-      case 3: h ^= uint64_t(data2[2]) << 16;
+        h ^= uint64_t(data2[5]) << 40;
-      case 2: h ^= uint64_t(data2[1]) << 8;
+        [[fallthrough]];
-      case 1: h ^= uint64_t(data2[0]);
+      case 5:
-              h *= MURMUR_PRIME;
+        h ^= uint64_t(data2[4]) << 32;
        [[fallthrough]];
      case 4:
        h ^= uint64_t(data2[3]) << 24;
        [[fallthrough]];
      case 3:
        h ^= uint64_t(data2[2]) << 16;
        [[fallthrough]];
      case 2:
        h ^= uint64_t(data2[1]) << 8;
        [[fallthrough]];
      case 1:
        h ^= uint64_t(data2[0]);
        h *= MURMUR_PRIME;
    }
    h ^= h >> MURMUR_R;
@@ -260,13 +273,26 @@ class HashUtil {
    v = 0;
    switch (len & 7) {
-      case 7: v ^= static_cast<uint64_t>(pos2[6]) << 48;
+      case 7:
-      case 6: v ^= static_cast<uint64_t>(pos2[5]) << 40;
+        v ^= static_cast<uint64_t>(pos2[6]) << 48;
-      case 5: v ^= static_cast<uint64_t>(pos2[4]) << 32;
+        [[fallthrough]];
-      case 4: v ^= static_cast<uint64_t>(pos2[3]) << 24;
+      case 6:
-      case 3: v ^= static_cast<uint64_t>(pos2[2]) << 16;
+        v ^= static_cast<uint64_t>(pos2[5]) << 40;
-      case 2: v ^= static_cast<uint64_t>(pos2[1]) << 8;
+        [[fallthrough]];
-      case 1: v ^= static_cast<uint64_t>(pos2[0]);
+      case 5:
        v ^= static_cast<uint64_t>(pos2[4]) << 32;
        [[fallthrough]];
      case 4:
        v ^= static_cast<uint64_t>(pos2[3]) << 24;
        [[fallthrough]];
      case 3:
        v ^= static_cast<uint64_t>(pos2[2]) << 16;
        [[fallthrough]];
      case 2:
        v ^= static_cast<uint64_t>(pos2[1]) << 8;
        [[fallthrough]];
      case 1:
        v ^= static_cast<uint64_t>(pos2[0]);
        h ^= FastHashMix(v);
        h *= m;
    }
--- a/be/src/util/jwt-util.cc
+++ b/be/src/util/jwt-util.cc
@@ -98,6 +98,7 @@ class JWKSetParser {
      case rapidjson::kNumberType:
        if (value.IsInt()) return "Integer";
        if (value.IsDouble()) return "Float";
        [[fallthrough]];
      default:
        DCHECK(false);
        return "Unknown";
@@ -908,4 +909,4 @@ Status JWTHelper::GetCustomClaimUsername(const JWTDecodedToken* decoded_token,
  return status;
 }
-} // namespace impala
+} // namespace impala
--- a/be/src/util/redactor.cc
+++ b/be/src/util/redactor.cc
@@ -63,6 +63,7 @@ string NameOfTypeOfJsonValue(const Value& value) {
    case rapidjson::kNumberType:
      if (value.IsInt()) return "Integer";
      if (value.IsDouble()) return "Float";
      [[fallthrough]];
    default:
      DCHECK(false);
      return "Unknown";
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -154,6 +154,7 @@ class StringParser {
      switch (*s) {
        case '-':
          is_negative = true;
          [[fallthrough]];
        case '+':
          ++s;
          --len;
@@ -310,6 +311,7 @@ class StringParser {
      case '-':
        negative = true;
        max_val = static_cast<UnsignedT>(std::numeric_limits<T>::max()) + 1;
        [[fallthrough]];
      case '+': ++i;
    }
@@ -366,6 +368,7 @@ class StringParser {
      case '-':
        negative = true;
        max_val = static_cast<UnsignedT>(std::numeric_limits<T>::max()) + 1;
        [[fallthrough]];
      case '+': i = 1;
    }
@@ -472,7 +475,9 @@ class StringParser {
    bool negative = false;
    int i = 0;
    switch (*s) {
-      case '-': negative = true;  // Fallthrough is intended.
+      case '-':
        negative = true;
        [[fallthrough]];
      case '+': i = 1;
    }
--- a/bin/run_clang_tidy.sh
+++ b/bin/run_clang_tidy.sh
@@ -62,10 +62,15 @@ TMP_STDERR=$(mktemp)
 STRCAT_MESSAGE="Impala-specific note: This can also be fixed using the StrCat() function \
 from be/src/gutil/strings strcat.h)"
 CLANG_STRING_CONCAT="performance-inefficient-string-concatenation"
 FALLTHROUGH_MESSAGE="Impala-specific note: Impala is a C++ 17 codebase, so the preferred \
 way to indicate intended fallthrough is C++ 17's [[fallthrough]]"
 CLANG_FALLTHROUGH="clang-diagnostic-implicit-fallthrough"
 trap "rm $TMP_STDERR" EXIT
 if ! run-clang-tidy.py -quiet -header-filter "${PIPE_DIRS%?}" \
                       -j"${CORES}" ${DIRS} 2> ${TMP_STDERR} | \
-  sed "/${CLANG_STRING_CONCAT}/ s#\$# ${STRCAT_MESSAGE}#";
+   sed "/${CLANG_STRING_CONCAT}/ s#\$# \n${STRCAT_MESSAGE}#" | \
   sed "/${CLANG_FALLTHROUGH}/ s#\$# \n${FALLTHROUGH_MESSAGE}#" | \
   sed 's#FALLTHROUGH_INTENDED#[[fallthrough]]#';
 then
  echo "run-clang-tidy.py hit an error, dumping stderr output"
  cat ${TMP_STDERR} >&2