From af1e1bea15c96a91ec0056a487879dc75742da98 Mon Sep 17 00:00:00 2001
From: David Alves <david.alves@cloudera.com>
Date: Thu, 9 Jul 2015 14:44:24 -0700
Subject: [PATCH] On Kudu scans, always build a schema with 0 key columns.

We currently have a bug where SELECT queries with named columns
only work if the key columns are declared first.

This because, on scans, we're passing a number of key columns equal
to the number of key columns referred to by slot descriptors. The
problem is that Kudu expects key columns to come first in the schema
if the number of key columns is > 0 and we build a schema that matches
the column order in the SlotDescriptors vector, which might not have
key columns first. However Kudu scans don't actually care about
key column ordering on scans _if_ the number of key columns is set
to 0 (which is weird behavior, filed KUDU-852 for this).

This patch just changes the built Kudu schema so that we always pass
0 key columns. It also adds an end-to-end test that makes sure a
previously failing projection now works.

Change-Id: I0826dabd87493a684cfc18058a4b5aa02f7f6cdc
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/7130
Tested-by: jenkins
Reviewed-by: Daniel Hecht <dhecht@cloudera.com>
---
 be/src/exec/kudu-util.cc                              | 11 ++++++-----
 .../queries/QueryTest/kudu-scan-node.test             |  8 ++++++++
 2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/be/src/exec/kudu-util.cc b/be/src/exec/kudu-util.cc
index 978b6ea75..2eaebe54c 100644
--- a/be/src/exec/kudu-util.cc
+++ b/be/src/exec/kudu-util.cc
@@ -112,8 +112,8 @@ Status KuduSchemaFromExpressionList(const std::vector<TExpr>& expressions,
     RETURN_IF_ERROR(ImpalaToKuduType(node.type, &kt));
 
     // Key columns are not nullable, all others are for now.
-    bool is_key = key_col_names.find(col_name) == key_col_names.end();
-    kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key));
+    bool is_key = key_col_names.find(col_name) != key_col_names.end();
+    kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key));
   }
 
   schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size()));
@@ -151,11 +151,12 @@ Status KuduSchemaFromTupleDescriptor(const TupleDescriptor& tuple_desc,
     RETURN_IF_ERROR(ImpalaToKuduType(slots[i]->type(), &kt));
 
     // Key columns are not nullable, all others are for now.
-    bool is_key = key_col_names.find(col_name) == key_col_names.end();
-    kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key));
+    bool is_key = key_col_names.find(col_name) != key_col_names.end();
+    kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key));
   }
 
-  schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size()));
+  // Scans don't care about key columns so we always pass 0.
+  schema->Reset(kudu_cols, 0);
   return Status::OK();
 }
 
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
index 8fe1dd5a4..5e68a2c5d 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
@@ -22,4 +22,12 @@ select * from dimtbl order by id limit 1;
 1001,'Name1',94611
 ---- TYPES
 BIGINT, STRING, INT
+====
+---- QUERY
+# Make sure that we can list the columns to be scanned in any order.
+select zip, id from dimtbl order by id limit 1;
+---- RESULTS
+94611,1001
+---- TYPES
+INT, BIGINT
 ====
\ No newline at end of file