On Kudu scans, always build a schema with 0 key columns.

We currently have a bug where SELECT queries with named columns
only work if the key columns are declared first.

This because, on scans, we're passing a number of key columns equal
to the number of key columns referred to by slot descriptors. The
problem is that Kudu expects key columns to come first in the schema
if the number of key columns is > 0 and we build a schema that matches
the column order in the SlotDescriptors vector, which might not have
key columns first. However Kudu scans don't actually care about
key column ordering on scans _if_ the number of key columns is set
to 0 (which is weird behavior, filed KUDU-852 for this).

This patch just changes the built Kudu schema so that we always pass
0 key columns. It also adds an end-to-end test that makes sure a
previously failing projection now works.

Change-Id: I0826dabd87493a684cfc18058a4b5aa02f7f6cdc
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/7130
Tested-by: jenkins
Reviewed-by: Daniel Hecht <dhecht@cloudera.com>
This commit is contained in:
David Alves
2015-07-09 14:44:24 -07:00
parent 2db4256c67
commit af1e1bea15
2 changed files with 14 additions and 5 deletions

View File

@@ -112,8 +112,8 @@ Status KuduSchemaFromExpressionList(const std::vector<TExpr>& expressions,
RETURN_IF_ERROR(ImpalaToKuduType(node.type, &kt));
// Key columns are not nullable, all others are for now.
bool is_key = key_col_names.find(col_name) == key_col_names.end();
kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key));
bool is_key = key_col_names.find(col_name) != key_col_names.end();
kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key));
}
schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size()));
@@ -151,11 +151,12 @@ Status KuduSchemaFromTupleDescriptor(const TupleDescriptor& tuple_desc,
RETURN_IF_ERROR(ImpalaToKuduType(slots[i]->type(), &kt));
// Key columns are not nullable, all others are for now.
bool is_key = key_col_names.find(col_name) == key_col_names.end();
kudu_cols.push_back(KuduColumnSchema(col_name, kt, is_key));
bool is_key = key_col_names.find(col_name) != key_col_names.end();
kudu_cols.push_back(KuduColumnSchema(col_name, kt, !is_key));
}
schema->Reset(kudu_cols, std::min(kudu_cols.size(), key_col_names.size()));
// Scans don't care about key columns so we always pass 0.
schema->Reset(kudu_cols, 0);
return Status::OK();
}

View File

@@ -22,4 +22,12 @@ select * from dimtbl order by id limit 1;
1001,'Name1',94611
---- TYPES
BIGINT, STRING, INT
====
---- QUERY
# Make sure that we can list the columns to be scanned in any order.
select zip, id from dimtbl order by id limit 1;
---- RESULTS
94611,1001
---- TYPES
INT, BIGINT
====