mirror of
https://github.com/apache/impala.git
synced 2026-01-08 12:02:54 -05:00
A compute stats command computes the table and column stats for a given table and persists them in the metastore. The table stats consist of the per-partition and per-table row count. The column stats are computed on a per-table basis and consist of the number of distinct values and the number of NULLs per column. This patch introduces a new 'child query' concept that compute stats utilizes. Child queries are cancelled if the parent query is cancelled. A compute stats stmt is executed by the following query hirarchy: parent: compute stats query (DDL) - child: compute table stats query (QUERY) - child: compute column stats query (QUERY) The new child query concept is necessary to decouple child query fetches from parent query fetches, i.e., we could not execute a child query as part of the original compute stats query, because then a client could fetch the results we need for updating the Metastore statistics. The reason why our existing CTAS works without this decoupling is that its insert 'child query' is not fetchable. Change-Id: I560533e3cb09bcbbdb3eea7fcf0b460bc6b36dcd Reviewed-on: http://gerrit.ent.cloudera.com:8080/873 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: jenkins
66 lines
1.8 KiB
Thrift
66 lines
1.8 KiB
Thrift
// Copyright 2012 Cloudera Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
namespace cpp impala
|
|
namespace java com.cloudera.impala.thrift
|
|
|
|
include "Types.thrift"
|
|
include "CatalogObjects.thrift"
|
|
|
|
// Serialized, self-contained version of a RowBatch (in be/src/runtime/row-batch.h).
|
|
struct TRowBatch {
|
|
// total number of rows contained in this batch
|
|
1: required i32 num_rows
|
|
|
|
// row composition
|
|
2: required list<Types.TTupleId> row_tuples
|
|
|
|
// There are a total of num_rows * num_tuples_per_row offsets
|
|
// pointing into tuple_data.
|
|
// An offset of -1 records a NULL.
|
|
3: list<i32> tuple_offsets
|
|
|
|
// binary tuple data
|
|
// TODO: figure out how we can avoid copying the data during TRowBatch construction
|
|
4: string tuple_data
|
|
|
|
// Indicates whether tuple_data is snappy-compressed
|
|
5: bool is_compressed
|
|
}
|
|
|
|
// this is a union over all possible return types
|
|
struct TColumnValue {
|
|
// TODO: use <type>_val instead of camelcase
|
|
1: optional bool boolVal
|
|
2: optional i32 intVal
|
|
3: optional i64 longVal
|
|
4: optional double doubleVal
|
|
5: optional string stringVal
|
|
}
|
|
|
|
struct TResultRow {
|
|
1: list<TColumnValue> colVals
|
|
}
|
|
|
|
struct TResultSetMetadata {
|
|
1: required list<CatalogObjects.TColumn> columns
|
|
}
|
|
|
|
// List of rows and metadata describing their columns.
|
|
struct TResultSet {
|
|
1: required list<TResultRow> rows
|
|
2: required TResultSetMetadata schema
|
|
}
|
|
|