On Kudu scans, always build a schema with 0 key columns.

We currently have a bug where SELECT queries with named columns only work if the key columns are declared first. This because, on scans, we're passing a number of key columns equal to the number of key columns referred to by slot descriptors. The problem is that Kudu expects key columns to come first in the schema if the number of key columns is > 0 and we build a schema that matches the column order in the SlotDescriptors vector, which might not have key columns first. However Kudu scans don't actually care about key column ordering on scans _if_ the number of key columns is set to 0 (which is weird behavior, filed KUDU-852 for this). This patch just changes the built Kudu schema so that we always pass 0 key columns. It also adds an end-to-end test that makes sure a previously failing projection now works. Change-Id: I0826dabd87493a684cfc18058a4b5aa02f7f6cdc Reviewed-on: http://gerrit.sjc.cloudera.com:8080/7130 Tested-by: jenkins Reviewed-by: Daniel Hecht <dhecht@cloudera.com>
2026-01-07 18:02:33 -05:00 · 2015-07-09 14:44:24 -07:00
parent 2db4256c67
commit af1e1bea15
2 changed files with 14 additions and 5 deletions
--- a/be/src/exec/kudu-util.cc
+++ b/be/src/exec/kudu-util.cc
@@ -112,8 +112,8 @@ Status KuduSchemaFromExpressionList(const std::vector<TExpr>& expressions,
    RETURN_IF_ERROR(ImpalaToKuduType(node.type, &kt));

    // Key columns are not nullable, all others are for now.
-    bool is_key = key_col_names.find(col_name) == key_col_names.end();
-    kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key));
+    bool is_key = key_col_names.find(col_name) != key_col_names.end();
+    kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key));
  }

  schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size()));
@@ -151,11 +151,12 @@ Status KuduSchemaFromTupleDescriptor(const TupleDescriptor& tuple_desc,
    RETURN_IF_ERROR(ImpalaToKuduType(slots[i]->type(), &kt));

    // Key columns are not nullable, all others are for now.
-    bool is_key = key_col_names.find(col_name) == key_col_names.end();
-    kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key));
+    bool is_key = key_col_names.find(col_name) != key_col_names.end();
+    kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key));
  }

-  schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size()));
+  // Scans don't care about key columns so we always pass 0.
+  schema->Reset(kudu_cols, 0);
  return Status::OK();
 }

--- a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
@@ -22,4 +22,12 @@ select * from dimtbl order by id limit 1;
 1001,'Name1',94611
 ---- TYPES
 BIGINT, STRING, INT
+====
+---- QUERY
+# Make sure that we can list the columns to be scanned in any order.
+select zip, id from dimtbl order by id limit 1;
+---- RESULTS
+94611,1001
+---- TYPES
+INT, BIGINT
 ====