mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
For files that have a Cloudera copyright (and no other copyright notice), make changes to follow the ASF source file header policy here: http://www.apache.org/legal/src-headers.html#headers Specifically: 1) Remove the Cloudera copyright. 2) Modify NOTICE.txt according to http://www.apache.org/legal/src-headers.html#notice to follow that format and add a line for Cloudera. 3) Replace or add the existing ASF license text with the one given on the website. Much of this change was automatically generated via: git grep -li 'Copyright.*Cloudera' > modified_files.txt cat modified_files.txt | xargs perl -n -i -e 'print unless m#Copyright.*Cloudera#i;' cat modified_files_txt | xargs fix_apache_license.py [1] Some manual fixups were performed following those steps, especially when license text was completely missing from the file. [1] https://gist.github.com/anonymous/ff71292094362fc5c594 with minor modification to ORIG_LICENSE to match Impala's license text. Change-Id: I2e0bd8420945b953e1b806041bea4d72a3943d86 Reviewed-on: http://gerrit.cloudera.org:8080/3779 Reviewed-by: Dan Hecht <dhecht@cloudera.com> Tested-by: Internal Jenkins
161 lines
6.2 KiB
C++
161 lines
6.2 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
|
|
#ifndef IMPALA_EXEC_AGGREGATION_NODE_H
|
|
#define IMPALA_EXEC_AGGREGATION_NODE_H
|
|
|
|
#include <boost/scoped_ptr.hpp>
|
|
|
|
#include "exec/exec-node.h"
|
|
#include "exec/old-hash-table.h"
|
|
#include "runtime/descriptors.h" // for TupleId
|
|
#include "runtime/mem-pool.h"
|
|
#include "runtime/string-value.h"
|
|
|
|
namespace llvm {
|
|
class Function;
|
|
}
|
|
|
|
namespace impala {
|
|
|
|
class AggFnEvaluator;
|
|
class LlvmCodeGen;
|
|
class RowBatch;
|
|
class RuntimeState;
|
|
struct StringValue;
|
|
class Tuple;
|
|
class TupleDescriptor;
|
|
class SlotDescriptor;
|
|
|
|
/// Node for in-memory hash aggregation.
|
|
/// The node creates a hash set of aggregation intermediate tuples, which
|
|
/// contain slots for all grouping and aggregation exprs (the grouping
|
|
/// slots precede the aggregation expr slots in the output tuple descriptor).
|
|
//
|
|
/// TODO: codegen cross-compiled UDAs and get rid of handcrafted IR.
|
|
/// TODO: investigate high compile time for wide tables
|
|
class AggregationNode : public ExecNode {
|
|
public:
|
|
AggregationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
|
|
|
|
virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
|
|
virtual Status Prepare(RuntimeState* state);
|
|
virtual Status Open(RuntimeState* state);
|
|
virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
|
|
virtual Status Reset(RuntimeState* state);
|
|
virtual void Close(RuntimeState* state);
|
|
|
|
static const char* LLVM_CLASS_NAME;
|
|
|
|
protected:
|
|
virtual void DebugString(int indentation_level, std::stringstream* out) const;
|
|
|
|
private:
|
|
boost::scoped_ptr<OldHashTable> hash_tbl_;
|
|
OldHashTable::Iterator output_iterator_;
|
|
|
|
std::vector<AggFnEvaluator*> aggregate_evaluators_;
|
|
|
|
/// FunctionContext for each agg fn and backing pool.
|
|
std::vector<impala_udf::FunctionContext*> agg_fn_ctxs_;
|
|
boost::scoped_ptr<MemPool> agg_fn_pool_;
|
|
|
|
/// Exprs used to evaluate input rows
|
|
std::vector<ExprContext*> probe_expr_ctxs_;
|
|
/// Exprs used to insert constructed aggregation tuple into the hash table.
|
|
/// All the exprs are simply SlotRefs for the intermediate tuple.
|
|
std::vector<ExprContext*> build_expr_ctxs_;
|
|
|
|
/// Tuple into which Update()/Merge()/Serialize() results are stored.
|
|
TupleId intermediate_tuple_id_;
|
|
TupleDescriptor* intermediate_tuple_desc_;
|
|
|
|
/// Tuple into which Finalize() results are stored. Possibly the same as
|
|
/// the intermediate tuple.
|
|
TupleId output_tuple_id_;
|
|
TupleDescriptor* output_tuple_desc_;
|
|
|
|
/// Intermediate result of aggregation w/o GROUP BY.
|
|
/// Note: can be NULL even if there is no grouping if the result tuple is 0 width
|
|
Tuple* singleton_intermediate_tuple_;
|
|
|
|
boost::scoped_ptr<MemPool> tuple_pool_;
|
|
|
|
/// IR for process row batch. NULL if codegen is disabled.
|
|
llvm::Function* codegen_process_row_batch_fn_;
|
|
|
|
typedef void (*ProcessRowBatchFn)(AggregationNode*, RowBatch*);
|
|
/// Jitted ProcessRowBatch function pointer. Null if codegen is disabled.
|
|
ProcessRowBatchFn process_row_batch_fn_;
|
|
|
|
/// Certain aggregates require a finalize step, which is the final step of the
|
|
/// aggregate after consuming all input rows. The finalize step converts the aggregate
|
|
/// value into its final form. This is true if this node contains aggregate that requires
|
|
/// a finalize step.
|
|
bool needs_finalize_;
|
|
|
|
/// Time spent processing the child rows
|
|
RuntimeProfile::Counter* build_timer_;
|
|
/// Time spent returning the aggregated rows
|
|
RuntimeProfile::Counter* get_results_timer_;
|
|
/// Num buckets in hash table
|
|
RuntimeProfile::Counter* hash_table_buckets_counter_;
|
|
/// Load factor in hash table
|
|
RuntimeProfile::Counter* hash_table_load_factor_counter_;
|
|
|
|
/// Constructs a new aggregation intermediate tuple (allocated from tuple_pool_),
|
|
/// initialized to grouping values computed over 'current_row_'.
|
|
/// Aggregation expr slots are set to their initial values.
|
|
Tuple* ConstructIntermediateTuple();
|
|
|
|
/// Updates the aggregation intermediate tuple 'tuple' with aggregation values
|
|
/// computed over 'row'.
|
|
void UpdateTuple(Tuple* tuple, TupleRow* row);
|
|
|
|
/// Called on the intermediate tuple of each group after all input rows have been
|
|
/// consumed and aggregated. Computes the final aggregate values to be returned in
|
|
/// GetNext() using the agg fn evaluators' Serialize() or Finalize().
|
|
/// For the Finalize() case if the output tuple is different from the intermediate
|
|
/// tuple, then a new tuple is allocated from 'pool' to hold the final result.
|
|
/// Returns the tuple holding the final aggregate values.
|
|
Tuple* FinalizeTuple(Tuple* tuple, MemPool* pool);
|
|
|
|
/// Do the aggregation for all tuple rows in the batch
|
|
void ProcessRowBatchNoGrouping(RowBatch* batch);
|
|
void ProcessRowBatchWithGrouping(RowBatch* batch);
|
|
|
|
/// Codegen the process row batch loop. The loop has already been compiled to
|
|
/// IR and loaded into the codegen object. UpdateAggTuple has also been
|
|
/// codegen'd to IR. This function will modify the loop subsituting the
|
|
/// UpdateAggTuple function call with the (inlined) codegen'd 'update_tuple_fn'.
|
|
llvm::Function* CodegenProcessRowBatch(
|
|
RuntimeState* state, llvm::Function* update_tuple_fn);
|
|
|
|
/// Codegen for updating aggregate_exprs at slot_idx. Returns NULL if unsuccessful.
|
|
/// slot_idx is the idx into aggregate_exprs_ (does not include grouping exprs).
|
|
llvm::Function* CodegenUpdateSlot(
|
|
RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc);
|
|
|
|
/// Codegen UpdateTuple(). Returns NULL if codegen is unsuccessful.
|
|
llvm::Function* CodegenUpdateTuple(RuntimeState* state);
|
|
};
|
|
|
|
}
|
|
|
|
#endif
|