From af1e1bea15c96a91ec0056a487879dc75742da98 Mon Sep 17 00:00:00 2001 From: David Alves Date: Thu, 9 Jul 2015 14:44:24 -0700 Subject: [PATCH] On Kudu scans, always build a schema with 0 key columns. We currently have a bug where SELECT queries with named columns only work if the key columns are declared first. This because, on scans, we're passing a number of key columns equal to the number of key columns referred to by slot descriptors. The problem is that Kudu expects key columns to come first in the schema if the number of key columns is > 0 and we build a schema that matches the column order in the SlotDescriptors vector, which might not have key columns first. However Kudu scans don't actually care about key column ordering on scans _if_ the number of key columns is set to 0 (which is weird behavior, filed KUDU-852 for this). This patch just changes the built Kudu schema so that we always pass 0 key columns. It also adds an end-to-end test that makes sure a previously failing projection now works. Change-Id: I0826dabd87493a684cfc18058a4b5aa02f7f6cdc Reviewed-on: http://gerrit.sjc.cloudera.com:8080/7130 Tested-by: jenkins Reviewed-by: Daniel Hecht --- be/src/exec/kudu-util.cc | 11 ++++++----- .../queries/QueryTest/kudu-scan-node.test | 8 ++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/be/src/exec/kudu-util.cc b/be/src/exec/kudu-util.cc index 978b6ea75..2eaebe54c 100644 --- a/be/src/exec/kudu-util.cc +++ b/be/src/exec/kudu-util.cc @@ -112,8 +112,8 @@ Status KuduSchemaFromExpressionList(const std::vector& expressions, RETURN_IF_ERROR(ImpalaToKuduType(node.type, &kt)); // Key columns are not nullable, all others are for now. - bool is_key = key_col_names.find(col_name) == key_col_names.end(); - kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key)); + bool is_key = key_col_names.find(col_name) != key_col_names.end(); + kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key)); } schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size())); @@ -151,11 +151,12 @@ Status KuduSchemaFromTupleDescriptor(const TupleDescriptor& tuple_desc, RETURN_IF_ERROR(ImpalaToKuduType(slots[i]->type(), &kt)); // Key columns are not nullable, all others are for now. - bool is_key = key_col_names.find(col_name) == key_col_names.end(); - kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key)); + bool is_key = key_col_names.find(col_name) != key_col_names.end(); + kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key)); } - schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size())); + // Scans don't care about key columns so we always pass 0. + schema->Reset(kudu_cols, 0); return Status::OK(); } diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test index 8fe1dd5a4..5e68a2c5d 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test +++ b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test @@ -22,4 +22,12 @@ select * from dimtbl order by id limit 1; 1001,'Name1',94611 ---- TYPES BIGINT, STRING, INT +==== +---- QUERY +# Make sure that we can list the columns to be scanned in any order. +select zip, id from dimtbl order by id limit 1; +---- RESULTS +94611,1001 +---- TYPES +INT, BIGINT ==== \ No newline at end of file