Files
impala/fe/pom.xml
Matthew Jacobs 157c80056c IMPALA-3481: Use Kudu ScanToken API for scan ranges
Switches the planner and KuduScanNode to use Kudu's new
ScanToken API instead of explicitly constructing scan ranges
for all tablets of a table, regardless of whether they were
needed. The ScanToken API allows Impala to specify the
projected columns and predicates during planning, and Kudu
returns a set of 'scan tokens' that represent a scanner for
each tablet that needs to be scanned. The scan tokens can
be serialized and distributed to the scan nodes, which can
then deserialize them into Kudu scanner objects. Upon
deserialization, the scan token has all scan parameters
already, including the 'pushed down' predicates. Impala no
longer needs to send the Kudu predicates to the BE and
convert them at the scan node.

This change also fixes:
1) IMPALA-4016: Avoid materializing slots only referenced
                by Kudu conjuncts
2) IMPALA-3874: Predicates are not always pushed to Kudu

TODO: Consider additional planning improvements.

Testing: Updated the existing tests, verified everything
works as expected. Some BE tests no longer make sense and
they were removed.

TODO: When KUDU-1065 is resolved, add tests that demonstrate pruning.

Change-Id: I160e5849d372755748ff5ba3c90a4651c804b220
Reviewed-on: http://gerrit.cloudera.org:8080/4120
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Tested-by: Internal Jenkins
2016-09-08 01:50:51 +00:00

753 lines
24 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.cloudera.impala</groupId>
<artifactId>impala-frontend</artifactId>
<version>0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Cloudera Impala Query Engine Frontend</name>
<properties>
<surefire.reports.dir>${env.IMPALA_LOGS_DIR}/fe_tests</surefire.reports.dir>
<jacoco.skip>true</jacoco.skip>
<jacoco.data.file>${env.IMPALA_FE_TEST_COVERAGE_DIR}/jacoco.exec</jacoco.data.file>
<jacoco.report.dir>${env.IMPALA_FE_TEST_COVERAGE_DIR}</jacoco.report.dir>
<test.hive.testdata>${project.basedir}/../testdata/target/AllTypes.txt</test.hive.testdata>
<backend.library.path>${env.IMPALA_HOME}/be/build/debug/service:${env.IMPALA_HOME}/be/build/release/service</backend.library.path>
<beeswax_port>21000</beeswax_port>
<impalad>localhost</impalad>
<testExecutionMode>reduced</testExecutionMode>
<hadoop.version>${env.IMPALA_HADOOP_VERSION}</hadoop.version>
<hive.version>${env.IMPALA_HIVE_VERSION}</hive.version>
<sentry.version>${env.IMPALA_SENTRY_VERSION}</sentry.version>
<hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
<parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
<impala.extdatasrc.api.version>1.0-SNAPSHOT</impala.extdatasrc.api.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<kudu.version>${env.KUDU_JAVA_VERSION}</kudu.version>
</properties>
<dependencies>
<dependency>
<groupId>org.htrace</groupId>
<artifactId>htrace-core</artifactId>
<version>3.0.4</version>
</dependency>
<dependency>
<groupId>com.cloudera.impala</groupId>
<artifactId>impala-data-source-api</artifactId>
<version>${impala.extdatasrc.api.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-core-common</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-core-model-db</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-common</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-file</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-cache</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-policy-common</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-policy-db</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-binding-hive</artifactId>
<version>${sentry.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-db</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-db</artifactId>
<version>${sentry.version}</version>
<classifier>sh</classifier>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-hadoop-bundle</artifactId>
<version>${parquet.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>net.sourceforge.czt.dev</groupId>
<artifactId>java-cup</artifactId>
<version>0.11-a-czt02-cdh</version>
</dependency>
<!-- Moved above Hive, because Hive bundles its own Thrift version
which supercedes this one if it comes first in the dependency
tree -->
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${env.IMPALA_THRIFT_JAVA_VERSION}</version>
<!-- libthrift depends httpcore 4.1.3 which does not work with KMS. To workaround
this problem the dependency is excluded here and we explicitly add a newer
httpcore dependency version. See CDH-22024. TODO: Find a better fix. -->
<exclusions>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
<version>${env.IMPALA_THRIFT_JAVA_VERSION}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-hbase-handler</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-shims</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>${kudu.version}</version>
</dependency>
<dependency>
<groupId>com.stumbleupon</groupId>
<artifactId>async</artifactId>
<version>1.3.1</version>
</dependency>
<!-- This driver supports PostgreSQL 7.2 and newer -->
<dependency>
<groupId>postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>9.0-801.jdbc4</version>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>javax.jdo</groupId>
<artifactId>jdo-api</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr-runtime</artifactId>
<version>3.3</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>11.0.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.json</artifactId>
<version>1.0.2</version>
</dependency>
</dependencies>
<reporting>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<version>2.5.2</version>
</plugin>
</plugins>
</reporting>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<!-- Build fe/tests/ to jar -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.6</version>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>net.sourceforge.czt.dev</groupId>
<artifactId>cup-maven-plugin</artifactId>
<version>1.6-cdh</version>
<executions>
<execution>
<id>cup</id>
<phase>generate-sources</phase>
<goals>
<goal>generate</goal>
</goals>
</execution>
</executions>
<configuration>
<cupDefinition>sql-parser.cup</cupDefinition>
<className>SqlParser</className>
<symbolsName>SqlParserSymbols</symbolsName>
<outputDirectory>${project.build.directory}/generated-sources/cup</outputDirectory>
</configuration>
</plugin>
<plugin>
<groupId>de.jflex</groupId>
<artifactId>maven-jflex-plugin</artifactId>
<version>1.4.3</version>
<executions>
<execution>
<id>jflex</id>
<phase>generate-sources</phase>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<backup>false</backup>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<excludeTypes>pom</excludeTypes>
<includeScope>runtime</includeScope>
<silent>true</silent>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.18</version>
<configuration>
<reportsDirectory>${surefire.reports.dir}</reportsDirectory>
<redirectTestOutputToFile>true</redirectTestOutputToFile>
<argLine>-Djava.library.path=${java.library.path}:${backend.library.path} ${surefireJacocoArg}</argLine>
<systemProperties>
<property>
<name>testExecutionMode</name>
<value>${testExecutionMode}</value>
</property>
<property>
<name>beeswax_port</name>
<value>${beeswax_port}</value>
<name>impalad</name>
<value>${impalad}</value>
<name>use_external_impalad</name>
<value>${use_external_impalad}</value>
</property>
</systemProperties>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.5</version>
<executions>
<!-- Tell maven about our generated files -->
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<!-- Make sure Eclipse knows where to find generated sources.
Note that the flex plugin appears to do this for you, but we have
to do this manually for the CUP and Thrift generated code
-->
<source>${project.basedir}/generated-sources/gen-java</source>
<source>${project.build.directory}/generated-sources/cup</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.4.0</version>
<executions>
<execution>
<goals>
<goal>java</goal>
</goals>
</execution>
</executions>
<configuration>
<systemProperties>
<systemProperty>
<key>java.library.path</key>
<value>${java.library.path}:${backend.library.path}</value>
</systemProperty>
<systemProperty>
<key>test.hive.testdata</key>
<value>${project.basedir}/../testdata/target/AllTypes.txt</value>
</systemProperty>
</systemProperties>
</configuration>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.6.201602180812</version>
<executions>
<execution>
<id>prepare-jacoco-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
<configuration>
<destFile>${jacoco.data.file}</destFile>
<propertyName>surefireJacocoArg</propertyName>
</configuration>
</execution>
<execution>
<id>jacoco-report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
<configuration>
<dataFile>${jacoco.data.file}</dataFile>
<outputDirectory>${jacoco.report.dir}</outputDirectory>
</configuration>
</execution>
</executions>
<configuration>
<!-- Code coverage reporting is disabled by default and must be enabled
when running tests like this: mvn test -DcodeCoverage -->
<skip>${jacoco.skip}</skip>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>net.sourceforge.czt</groupId>
<artifactId>maven-cup-plugin</artifactId>
<versionRange>[1.6.4,)</versionRange>
<goals>
<goal>generate</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<versionRange>[1.6,)</versionRange>
<goals>
<goal>run</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<versionRange>[2.0,)</versionRange>
<goals>
<goal>copy-dependencies</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<repositories>
<repository>
<id>apache.snapshots</id>
<name>Apache Development Snapshot Repository</name>
<url>https://repository.apache.org/content/repositories/snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.rcs.releases.repo</id>
<url>https://repository.cloudera.com/content/groups/cdh-releases-rcs</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.releases.repo</id>
<url>https://repository.cloudera.com/content/repositories/releases</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.snapshots.repo</id>
<url>https://repository.cloudera.com/content/repositories/snapshots</url>
<name>CDH Snapshots Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cloudera.thirdparty.repo</id>
<url>https://repository.cloudera.com/content/repositories/third-party</url>
<name>Cloudera Third Party Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<!-- This is needed for java-cup. TODO add the plugin to our maven repo -->
<repository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Snapshots</name>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>cloudera.thirdparty.repo</id>
<url>https://repository.cloudera.com/content/repositories/third-party</url>
<name>Cloudera Third Party Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</pluginRepository>
<pluginRepository>
<id>cloudera.snapshot.repo</id>
<url>https://repository.cloudera.com/content/repositories/snapshots</url>
<name>Cloudera Snapshot Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</pluginRepository>
<pluginRepository>
<id>dtrott</id>
<url>http://maven.davidtrott.com/repository</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</pluginRepository>
<!-- This is needed for the cup maven plugin. TODO add the plugin to our maven repo -->
<pluginRepository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Snapshots</name>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</pluginRepository>
</pluginRepositories>
<profiles>
<profile>
<id>thrift-home-defined</id>
<activation>
<file>
<exists>${env.THRIFT_HOME}/bin/thrift</exists>
</file>
</activation>
<properties>
<thrift.executable>${env.THRIFT_HOME}/bin/thrift</thrift.executable>
</properties>
</profile>
<profile>
<id>jacoco-code-coverage</id>
<activation>
<property>
<!-- Enable code coverage if the 'codeCoverage' property is set.
Usage: mvn test -DcodeCoverage -->
<name>codeCoverage</name>
</property>
</activation>
<properties>
<jacoco.skip>false</jacoco.skip>
<!-- Due to Jacoco's runtime instrumentation some tests could fail.
In order to still produce a coverage report it is recommended
to ignore test failures. -->
<maven.test.failure.ignore>true</maven.test.failure.ignore>
</properties>
</profile>
</profiles>
</project>