Files
impala/be/src/exec/aggregation-node.h
Dan Hecht ffa7829b70 IMPALA-3918: Remove Cloudera copyrights and add ASF license header
For files that have a Cloudera copyright (and no other copyright
notice), make changes to follow the ASF source file header policy here:

http://www.apache.org/legal/src-headers.html#headers

Specifically:
1) Remove the Cloudera copyright.
2) Modify NOTICE.txt according to
   http://www.apache.org/legal/src-headers.html#notice
   to follow that format and add a line for Cloudera.
3) Replace or add the existing ASF license text with the one given
   on the website.

Much of this change was automatically generated via:

git grep -li 'Copyright.*Cloudera' > modified_files.txt
cat modified_files.txt | xargs perl -n -i -e 'print unless m#Copyright.*Cloudera#i;'
cat modified_files_txt | xargs fix_apache_license.py [1]

Some manual fixups were performed following those steps, especially when
license text was completely missing from the file.

[1] https://gist.github.com/anonymous/ff71292094362fc5c594 with minor
    modification to ORIG_LICENSE to match Impala's license text.

Change-Id: I2e0bd8420945b953e1b806041bea4d72a3943d86
Reviewed-on: http://gerrit.cloudera.org:8080/3779
Reviewed-by: Dan Hecht <dhecht@cloudera.com>
Tested-by: Internal Jenkins
2016-08-09 08:19:41 +00:00

161 lines
6.2 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_EXEC_AGGREGATION_NODE_H
#define IMPALA_EXEC_AGGREGATION_NODE_H
#include <boost/scoped_ptr.hpp>
#include "exec/exec-node.h"
#include "exec/old-hash-table.h"
#include "runtime/descriptors.h" // for TupleId
#include "runtime/mem-pool.h"
#include "runtime/string-value.h"
namespace llvm {
class Function;
}
namespace impala {
class AggFnEvaluator;
class LlvmCodeGen;
class RowBatch;
class RuntimeState;
struct StringValue;
class Tuple;
class TupleDescriptor;
class SlotDescriptor;
/// Node for in-memory hash aggregation.
/// The node creates a hash set of aggregation intermediate tuples, which
/// contain slots for all grouping and aggregation exprs (the grouping
/// slots precede the aggregation expr slots in the output tuple descriptor).
//
/// TODO: codegen cross-compiled UDAs and get rid of handcrafted IR.
/// TODO: investigate high compile time for wide tables
class AggregationNode : public ExecNode {
public:
AggregationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
virtual Status Prepare(RuntimeState* state);
virtual Status Open(RuntimeState* state);
virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
virtual Status Reset(RuntimeState* state);
virtual void Close(RuntimeState* state);
static const char* LLVM_CLASS_NAME;
protected:
virtual void DebugString(int indentation_level, std::stringstream* out) const;
private:
boost::scoped_ptr<OldHashTable> hash_tbl_;
OldHashTable::Iterator output_iterator_;
std::vector<AggFnEvaluator*> aggregate_evaluators_;
/// FunctionContext for each agg fn and backing pool.
std::vector<impala_udf::FunctionContext*> agg_fn_ctxs_;
boost::scoped_ptr<MemPool> agg_fn_pool_;
/// Exprs used to evaluate input rows
std::vector<ExprContext*> probe_expr_ctxs_;
/// Exprs used to insert constructed aggregation tuple into the hash table.
/// All the exprs are simply SlotRefs for the intermediate tuple.
std::vector<ExprContext*> build_expr_ctxs_;
/// Tuple into which Update()/Merge()/Serialize() results are stored.
TupleId intermediate_tuple_id_;
TupleDescriptor* intermediate_tuple_desc_;
/// Tuple into which Finalize() results are stored. Possibly the same as
/// the intermediate tuple.
TupleId output_tuple_id_;
TupleDescriptor* output_tuple_desc_;
/// Intermediate result of aggregation w/o GROUP BY.
/// Note: can be NULL even if there is no grouping if the result tuple is 0 width
Tuple* singleton_intermediate_tuple_;
boost::scoped_ptr<MemPool> tuple_pool_;
/// IR for process row batch. NULL if codegen is disabled.
llvm::Function* codegen_process_row_batch_fn_;
typedef void (*ProcessRowBatchFn)(AggregationNode*, RowBatch*);
/// Jitted ProcessRowBatch function pointer. Null if codegen is disabled.
ProcessRowBatchFn process_row_batch_fn_;
/// Certain aggregates require a finalize step, which is the final step of the
/// aggregate after consuming all input rows. The finalize step converts the aggregate
/// value into its final form. This is true if this node contains aggregate that requires
/// a finalize step.
bool needs_finalize_;
/// Time spent processing the child rows
RuntimeProfile::Counter* build_timer_;
/// Time spent returning the aggregated rows
RuntimeProfile::Counter* get_results_timer_;
/// Num buckets in hash table
RuntimeProfile::Counter* hash_table_buckets_counter_;
/// Load factor in hash table
RuntimeProfile::Counter* hash_table_load_factor_counter_;
/// Constructs a new aggregation intermediate tuple (allocated from tuple_pool_),
/// initialized to grouping values computed over 'current_row_'.
/// Aggregation expr slots are set to their initial values.
Tuple* ConstructIntermediateTuple();
/// Updates the aggregation intermediate tuple 'tuple' with aggregation values
/// computed over 'row'.
void UpdateTuple(Tuple* tuple, TupleRow* row);
/// Called on the intermediate tuple of each group after all input rows have been
/// consumed and aggregated. Computes the final aggregate values to be returned in
/// GetNext() using the agg fn evaluators' Serialize() or Finalize().
/// For the Finalize() case if the output tuple is different from the intermediate
/// tuple, then a new tuple is allocated from 'pool' to hold the final result.
/// Returns the tuple holding the final aggregate values.
Tuple* FinalizeTuple(Tuple* tuple, MemPool* pool);
/// Do the aggregation for all tuple rows in the batch
void ProcessRowBatchNoGrouping(RowBatch* batch);
void ProcessRowBatchWithGrouping(RowBatch* batch);
/// Codegen the process row batch loop. The loop has already been compiled to
/// IR and loaded into the codegen object. UpdateAggTuple has also been
/// codegen'd to IR. This function will modify the loop subsituting the
/// UpdateAggTuple function call with the (inlined) codegen'd 'update_tuple_fn'.
llvm::Function* CodegenProcessRowBatch(
RuntimeState* state, llvm::Function* update_tuple_fn);
/// Codegen for updating aggregate_exprs at slot_idx. Returns NULL if unsuccessful.
/// slot_idx is the idx into aggregate_exprs_ (does not include grouping exprs).
llvm::Function* CodegenUpdateSlot(
RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc);
/// Codegen UpdateTuple(). Returns NULL if codegen is unsuccessful.
llvm::Function* CodegenUpdateTuple(RuntimeState* state);
};
}
#endif