mirror of
https://github.com/apache/impala.git
synced 2025-12-19 09:58:28 -05:00
This puts all of the thrift-generated python code into the impala_thrift_gen package. This is similar to what Impyla does for its thrift-generated python code, except that it uses the impala_thrift_gen package rather than impala._thrift_gen. This is a preparatory patch for fixing the absolute import issues. This patches all of the thrift files to add the python namespace. This has code to apply the patching to the thirdparty thrift files (hive_metastore.thrift, fb303.thrift) to do the same. Putting all the generated python into a package makes it easier to understand where the imports are getting code. When the subsequent change rearranges the shell code, the thrift generated code can stay in a separate directory. This uses isort to sort the imports for the affected Python files with the provided .isort.cfg file. This also adds an impala-isort shell script to make it easy to run. Testing: - Ran a core job Change-Id: Ie2927f22c7257aa38a78084efe5bd76d566493c0 Reviewed-on: http://gerrit.cloudera.org:8080/20169 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
143 lines
6.3 KiB
Thrift
143 lines
6.3 KiB
Thrift
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
//
|
|
// This file contains structures produced by the planner.
|
|
|
|
namespace py impala_thrift_gen.Planner
|
|
namespace cpp impala
|
|
namespace java org.apache.impala.thrift
|
|
|
|
include "Types.thrift"
|
|
include "Exprs.thrift"
|
|
include "DataSinks.thrift"
|
|
include "PlanNodes.thrift"
|
|
include "Partitions.thrift"
|
|
|
|
// TPlanFragment encapsulates info needed to execute a particular
|
|
// plan fragment, including how to produce and how to partition its output.
|
|
// It leaves out node-specific parameters needed for the actual execution.
|
|
struct TPlanFragment {
|
|
// Ordinal number of fragment within a query; range: 0..<total # fragments>
|
|
1: required Types.TFragmentIdx idx
|
|
|
|
// display name to be shown in the runtime profile; unique within a query
|
|
2: required string display_name
|
|
|
|
// no plan or descriptor table: query without From clause
|
|
3: optional PlanNodes.TPlan plan
|
|
|
|
// Specifies the destination of this plan fragment's output rows.
|
|
// For example, the destination could be a stream sink which forwards
|
|
// the data to a remote plan fragment, or a sink which writes to a table (for
|
|
// insert stmts).
|
|
5: optional DataSinks.TDataSink output_sink
|
|
|
|
// Partitioning of the data created by all instances of this plan fragment;
|
|
// partitioning.type has the following meaning:
|
|
// - UNPARTITIONED: there is only one instance of the plan fragment
|
|
// - RANDOM: a particular output row is randomly assigned to any of the instances
|
|
// - HASH_PARTITIONED: output row r is produced by
|
|
// hash_value(partitioning.partitioning_exprs(r)) % #partitions
|
|
// - RANGE_PARTITIONING: currently not supported
|
|
// This is distinct from the partitioning of each plan fragment's
|
|
// output, which is specified by output_sink.output_partitioning.
|
|
6: required Partitions.TDataPartition partition
|
|
|
|
// The minimum memory reservation (in bytes) required for this plan fragment to execute
|
|
// on a single host. Split between the per-backend resources shared across all
|
|
// instances on a backend and the per-instance resources required for each instance.
|
|
7: optional i64 backend_min_mem_reservation_bytes
|
|
12: optional i64 instance_min_mem_reservation_bytes
|
|
|
|
// Total of the initial memory reservations that we expect to be claimed by this
|
|
// fragment. I.e. the sum of the min reservations over all operators (including the
|
|
// sink) in a single instance of this fragment. This is used for an optimization in
|
|
// InitialReservation. Measured in bytes. required in V1.
|
|
// Split between the per-backend resources shared across all instances on a backend
|
|
// and the per-instance resources required for each instance.
|
|
8: optional i64 instance_initial_mem_reservation_total_claims
|
|
13: optional i64 backend_initial_mem_reservation_total_claims
|
|
|
|
// The total memory (in bytes) required for the runtime filters produced by the plan
|
|
// nodes managed by this fragment. Is included in instance_min_reservation_bytes.
|
|
9: optional i64 produced_runtime_filters_reservation_bytes
|
|
|
|
// The total memory (in bytes) required for the runtime filters produced by the plan
|
|
// nodes managed by this fragment. Is included in backend_min_reservation_bytes.
|
|
11: optional i64 consumed_runtime_filters_reservation_bytes
|
|
|
|
// Maximum number of required threads that will be executing concurrently for this plan
|
|
// fragment, i.e. the number of threads that this query needs to execute successfully.
|
|
10: optional i64 thread_reservation
|
|
|
|
// The effective number of parallelism for this fragment that dictated by the frontend
|
|
// planner. If the frontend planner set this to a positive number, the backend scheduler
|
|
// must make sure that it schedules no more than this many instance fragments. Must be
|
|
// greater than 0 if query option COMPUTE_PROCESSING_COST=true. Currently not enforced
|
|
// when fragment need to exceed max_fs_writers query option (see IMPALA-8125).
|
|
14: optional i32 effective_instance_count
|
|
|
|
// If true, the fragment must be scheduled on the coordinator. In this case 'partition'
|
|
// must be UNPARTITIONED.
|
|
15: required bool is_coordinator_only
|
|
|
|
// Marker on whether this is a dominant fragment or not. Only possible to be true if
|
|
// COMPUTE_PROCESSING_COST=true. Otherwise, always false.
|
|
// See PlanFragment.java for definition of dominant fragment.
|
|
16: optional bool is_dominant = false
|
|
}
|
|
|
|
// location information for a single scan range
|
|
struct TScanRangeLocation {
|
|
// Index into TQueryExecRequest.host_list.
|
|
1: required i32 host_idx;
|
|
|
|
// disk volume identifier of a particular scan range at 'server';
|
|
// -1 indicates an unknown volume id;
|
|
// only set for TScanRange.hdfs_file_split
|
|
2: optional i32 volume_id = -1
|
|
|
|
// If true, this block is cached on this server.
|
|
3: optional bool is_cached = false
|
|
}
|
|
|
|
// A single scan range plus the hosts that serve it
|
|
struct TScanRangeLocationList {
|
|
1: required PlanNodes.TScanRange scan_range
|
|
// non-empty list
|
|
2: list<TScanRangeLocation> locations
|
|
}
|
|
|
|
// A specification for scan ranges. Scan ranges can be
|
|
// concrete or specs, which are used to generate concrete ranges.
|
|
// Each type is stored in a separate list.
|
|
struct TScanRangeSpec {
|
|
1: optional list<TScanRangeLocationList> concrete_ranges
|
|
2: optional list<PlanNodes.TFileSplitGeneratorSpec> split_specs
|
|
}
|
|
|
|
// A plan: tree of plan fragments that materializes either a query result or the build
|
|
// side of a join used by another plan; it consists of a sequence of plan fragments.
|
|
// TODO: rename both this and PlanNodes.TPlan (TPlan should be something like TExecPlan
|
|
// or TExecTree)
|
|
struct TPlanFragmentTree {
|
|
1: required i32 cohort_id
|
|
|
|
2: required list<TPlanFragment> fragments
|
|
}
|