IMPALA-12950: Improve error message in case of out-of-range numeric conversions

IMPALA-12035 introduced checks for numeric conversions that are unsafe and can fail (if the target type cannot store the value, the behaviour is undefined): - from floating-point types to integer types - from double to float However, it can be difficult to trace which part of the query caused this based on the error message. This change adds the source type, the destination type and the value to be converted to the error message. Unfortunately, at this point in the BE, the original SQL is not available, so we cannot reference that. Testing: - extended existing tests in expr-test.cc. Change-Id: Ieeed52e25f155818c35c11a8a6821708476ffb32 Reviewed-on: http://gerrit.cloudera.org:8080/21331 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-12-25 02:03:09 -05:00 · 2024-04-17 11:47:43 +02:00
parent f620e5d5c0
commit 4f033c7750
3 changed files with 85 additions and 24 deletions
--- a/be/src/exprs/cast-functions-ir.cc
+++ b/be/src/exprs/cast-functions-ir.cc
@@ -59,6 +59,27 @@ const int MAX_BOOLEAN_CHARS = 1;

 namespace {

+template <class T>
+constexpr const char* TypeToName() {
+  if constexpr (std::is_same_v<T, int8_t>) {
+    return "TINYINT";
+  } else if constexpr (std::is_same_v<T, int16_t>) {
+    return "SMALLINT";
+  } else if constexpr (std::is_same_v<T, int32_t>) {
+    return "INT";
+  } else if constexpr (std::is_same_v<T, int64_t>) {
+    return "BIGINT";
+  } else if constexpr (std::is_same_v<T, float>) {
+    return "FLOAT";
+  } else if constexpr (std::is_same_v<T, double>) {
+    return "DOUBLE";
+  } else {
+    // This function doesn't support other types.
+    static_assert(!std::is_same_v<T, T>);
+    return nullptr;
+  }
+}
+
 // This struct is used as a helper in the validation of casts. For a cast from 'FROM_TYPE'
 // to 'TO_TYPE' it provides compile time constants about what type of conversion it is.
 // These constants are used in the template specifications of the 'Validate()' function to
@@ -158,14 +179,20 @@ bool Validate(FROM_TYPE from, FunctionContext* ctx) {
  }

  if (UNLIKELY(!is_ok)) {
+    constexpr const char* FROM_TYPE_NAME = TypeToName<FROM_TYPE>();
+    constexpr const char* TO_TYPE_NAME = TypeToName<TO_TYPE>();
+    string err;
    if (std::isnan(from)) {
-      ctx->SetError("NaN value cannot be converted to integer type.");
+      err = Substitute("NaN value of type $0 cannot be converted to $1.",
+          FROM_TYPE_NAME, TO_TYPE_NAME);
    } else if (!std::isfinite(from)) {
-      ctx->SetError("Non-finite value cannot be converted to integer type.");
+      err = Substitute("Non-finite value of type $0 cannot be converted to $1.",
+          FROM_TYPE_NAME, TO_TYPE_NAME);
    } else {
-      ctx->SetError("Out-of-range floating point value cannot be converted to "
-          "integer type.");
+      err = Substitute("Converting value $0 of type $1 to $2 failed, "
+          "value out of range for destination type.", from, FROM_TYPE_NAME, TO_TYPE_NAME);
    }
+    ctx->SetError(err.c_str());
  }

  return is_ok;
@@ -194,8 +221,10 @@ bool Validate(FROM_TYPE from, FunctionContext* ctx) {
  const bool is_ok = in_range || std::isnan(from) || !std::isfinite(from);

  if (UNLIKELY(!is_ok)) {
-      ctx->SetError(
-          "Out-of-range double value cannot be converted to float.");
+    const string err = Substitute("Converting value $0 of type $1 to $2 failed, "
+        "value out of range for destination type.", from, TypeToName<FROM_TYPE>(),
+        TypeToName<TO_TYPE>());
+    ctx->SetError(err.c_str());
  }

  return is_ok;
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -704,9 +704,9 @@ class ExprTest : public testing::TestWithParam<std::tuple<bool, bool>> {

  template <class T>
  void TestValueOrError(const string& expr, PrimitiveType expr_type,
-      const T& expected_result, bool expect_error) {
+      const T& expected_result, bool expect_error, const std::string& error) {
    if (expect_error) {
-      TestError(expr);
+      TestErrorString(expr, error);
    } else {
      TestValue(expr, expr_type, expected_result);
    }
@@ -1220,17 +1220,22 @@ class ExprTest : public testing::TestWithParam<std::tuple<bool, bool>> {
    TestStringValue("cast(" + stmt + " as string)", lexical_cast<string>(val));

    TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT,
-        static_cast<int8_t>(val), min_integer_size > sizeof(int8_t));
+        static_cast<int8_t>(val), min_integer_size > sizeof(int8_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT,
-        static_cast<int16_t>(val), min_integer_size > sizeof(int16_t));
+        static_cast<int16_t>(val), min_integer_size > sizeof(int16_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as int)", TYPE_INT,
-        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT,
-        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        static_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT,
-        static_cast<int64_t>(val), min_integer_size > sizeof(int64_t));
-    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT,
-        static_cast<float>(val), float_out_of_range);
+        static_cast<int64_t>(val), min_integer_size > sizeof(int64_t),
+        " value out of range for destination type.\n");
+    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT, static_cast<float>(val),
+        float_out_of_range, "value out of range for destination type.\n");
    if (!timestamp_out_of_range) {
      TestTimestampValue("cast(" + stmt + " as timestamp)", CreateTestTimestamp(val));
    } else {
@@ -1287,17 +1292,22 @@ void ExprTest::TestCast(const string& stmt, const char* val, int min_integer_siz
    TestValue(stmt + " as boolean)", TYPE_BOOLEAN, lexical_cast<bool>(val));
 #endif
    TestValueOrError("cast(" + stmt + " as tinyint)", TYPE_TINYINT,
-        val8, min_integer_size > sizeof(int8_t));
+        val8, min_integer_size > sizeof(int8_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as smallint)", TYPE_SMALLINT,
-        lexical_cast<int16_t>(val), min_integer_size > sizeof(int16_t));
+        lexical_cast<int16_t>(val), min_integer_size > sizeof(int16_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as int)", TYPE_INT,
-        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as integer)", TYPE_INT,
-        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t));
+        lexical_cast<int32_t>(val), min_integer_size > sizeof(int32_t),
+        "value out of range for destination type.\n");
    TestValueOrError("cast(" + stmt + " as bigint)", TYPE_BIGINT,
-        lexical_cast<int64_t>(val), min_integer_size > sizeof(int64_t));
-    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT,
-        lexical_cast<float>(val), float_out_of_range);
+        lexical_cast<int64_t>(val), min_integer_size > sizeof(int64_t),
+        "value out of range for destination type.\n");
+    TestValueOrError("cast(" + stmt + " as float)", TYPE_FLOAT, lexical_cast<float>(val),
+        float_out_of_range, "value out of range for destination type.\n");

    TestValue("cast(" + stmt + " as double)", TYPE_DOUBLE, lexical_cast<double>(val));
    TestValue("cast(" + stmt + " as real)", TYPE_DOUBLE, lexical_cast<double>(val));
@@ -3503,6 +3513,26 @@ TEST_P(ExprTest, CastExprs) {
  TestIsNull("cast(cast(1180591620717411303425 as decimal(38, 0)) as timestamp)",
      TYPE_TIMESTAMP);

+  // Explicitly test the error message of an out-of-range double-to-integer conversion.
+  TestErrorString("cast(cast(500 as DOUBLE) as TINYINT)",
+      "Converting value 500 of type DOUBLE to TINYINT failed, "
+      "value out of range for destination type.\n");
+
+  // Explicitly test the error message of an out-of-range double-to-float conversion.
+  TestErrorString("cast(1e40 as FLOAT)",
+      "Converting value 1e+40 of type DOUBLE to FLOAT failed, "
+      "value out of range for destination type.\n");
+
+  // Nan and non-finite floating-point values converted to int.
+  TestErrorString("cast(cast((1/0) as FLOAT) as INT)",
+      "Non-finite value of type FLOAT cannot be converted to INT.\n");
+  TestErrorString("cast((1/0) as INT)",
+      "Non-finite value of type DOUBLE cannot be converted to INT.\n");
+  TestErrorString("cast(cast((0/0) as FLOAT) as INT)",
+      "NaN value of type FLOAT cannot be converted to INT.\n");
+  TestErrorString("cast((0/0) as INT)",
+      "NaN value of type DOUBLE cannot be converted to INT.\n");
+
  // Out of range String <--> Timestamp - invalid boundary cases.
  TestIsNull("cast('1399-12-31 23:59:59' as timestamp)", TYPE_TIMESTAMP);
  TestIsNull("cast('10000-01-01 00:00:00' as timestamp)", TYPE_TIMESTAMP);
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -162,8 +162,10 @@ class FunctionContext {
  /// Returns the query_id for the current query.
  UniqueId query_id() const;

-  /// Sets an error for this UDF. If this is called, this will trigger the
-  /// query to fail.
+  /// Sets an error for this UDF. The error message is copied and the copy is owned by
+  /// this object.
+  ///
+  /// If this is called, this will trigger the query to fail.
  void SetError(const char* error_msg);

  /// Adds a warning that is returned to the user. This can include things like