Files
impala/fe/pom.xml
Sailesh Mukil 50bd015f2d IMPALA-5333: Add support for Impala to work with ADLS
This patch leverages the AdlFileSystem in Hadoop to allow
Impala to talk to the Azure Data Lake Store. This patch has
functional changes as well as adds test infrastructure for
testing Impala over ADLS.

We do not support ACLs on ADLS since the Hadoop ADLS
connector does not integrate ADLS ACLs with Hadoop users/groups.

For testing, we use the azure-data-lake-store-python client
from Microsoft. This client seems to have some consistency
issues. For example, a drop table through Impala will delete
the files in ADLS, however, listing that directory through
the python client immediately after the drop, will still show
the files. This behavior is unexpected since ADLS claims to be
strongly consistent. Some tests have been skipped due to this
limitation with the tag SkipIfADLS.slow_client. Tracked by
IMPALA-5335.

The azure-data-lake-store-python client also only works on CentOS 6.6
and over, so the python dependencies for Azure will not be downloaded
when the TARGET_FILESYSTEM is not "adls". While running ADLS tests,
the expectation will be that it runs on a machine that is at least
running CentOS 6.6.
Note: This is only a test limitation, not a functional one. Clusters
with older OSes like CentOS 6.4 will still work with ADLS.

Added another dependency to bootstrap_build.sh for the ADLS Python
client.

Testing: Ran core tests with and without TARGET_FILESYSTEM as
'adls' to make sure that all tests pass and that nothing breaks.

Change-Id: Ic56b9988b32a330443f24c44f9cb2c80842f7542
Reviewed-on: http://gerrit.cloudera.org:8080/6910
Tested-by: Impala Public Jenkins
Reviewed-by: Sailesh Mukil <sailesh@cloudera.com>
2017-05-25 19:35:24 +00:00

767 lines
24 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.impala</groupId>
<artifactId>impala-frontend</artifactId>
<version>0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Apache Impala (Incubating) Query Engine Frontend</name>
<properties>
<surefire.reports.dir>${env.IMPALA_LOGS_DIR}/fe_tests</surefire.reports.dir>
<jacoco.skip>true</jacoco.skip>
<jacoco.data.file>${env.IMPALA_FE_TEST_COVERAGE_DIR}/jacoco.exec</jacoco.data.file>
<jacoco.report.dir>${env.IMPALA_FE_TEST_COVERAGE_DIR}</jacoco.report.dir>
<test.hive.testdata>${project.basedir}/../testdata/target/AllTypes.txt</test.hive.testdata>
<backend.library.path>${env.IMPALA_HOME}/be/build/debug/service:${env.IMPALA_HOME}/be/build/release/service</backend.library.path>
<beeswax_port>21000</beeswax_port>
<impalad>localhost</impalad>
<testExecutionMode>reduced</testExecutionMode>
<hadoop.version>${env.IMPALA_HADOOP_VERSION}</hadoop.version>
<hive.version>${env.IMPALA_HIVE_VERSION}</hive.version>
<hive.major.version>${env.IMPALA_HIVE_MAJOR_VERSION}</hive.major.version>
<sentry.version>${env.IMPALA_SENTRY_VERSION}</sentry.version>
<hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>
<parquet.version>${env.IMPALA_PARQUET_VERSION}</parquet.version>
<impala.extdatasrc.api.version>1.0-SNAPSHOT</impala.extdatasrc.api.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<kudu.version>${env.KUDU_JAVA_VERSION}</kudu.version>
</properties>
<dependencies>
<dependency>
<groupId>org.htrace</groupId>
<artifactId>htrace-core</artifactId>
<version>3.0.4</version>
</dependency>
<dependency>
<groupId>org.apache.impala</groupId>
<artifactId>impala-data-source-api</artifactId>
<version>${impala.extdatasrc.api.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-azure-datalake</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-core-common</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-core-model-db</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-common</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-file</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-cache</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-policy-common</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-policy-db</artifactId>
<version>${sentry.version}</version>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-binding-hive</artifactId>
<version>${sentry.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-db</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.sentry</groupId>
<artifactId>sentry-provider-db</artifactId>
<version>${sentry.version}</version>
<classifier>sh</classifier>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-hadoop-bundle</artifactId>
<version>${parquet.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>net.sourceforge.czt.dev</groupId>
<artifactId>java-cup</artifactId>
<version>0.11-a-czt02-cdh</version>
</dependency>
<!-- Moved above Hive, because Hive bundles its own Thrift version
which supercedes this one if it comes first in the dependency
tree -->
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${env.IMPALA_THRIFT_JAVA_VERSION}</version>
<!-- libthrift depends httpcore 4.1.3 which does not work with KMS. To workaround
this problem the dependency is excluded here and we explicitly add a newer
httpcore dependency version. See IMPALA-4210. TODO: Find a better fix. -->
<exclusions>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
<version>${env.IMPALA_THRIFT_JAVA_VERSION}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-hbase-handler</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-shims</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kudu</groupId>
<artifactId>kudu-client</artifactId>
<version>${kudu.version}</version>
</dependency>
<dependency>
<groupId>com.stumbleupon</groupId>
<artifactId>async</artifactId>
<version>1.3.1</version>
</dependency>
<!-- This driver supports PostgreSQL 7.2 and newer -->
<dependency>
<groupId>postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>9.0-801.jdbc4</version>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>javax.jdo</groupId>
<artifactId>jdo-api</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr-runtime</artifactId>
<version>3.3</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>11.0.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.json</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>com.github.davidmoten</groupId>
<artifactId>flatbuffers-java</artifactId>
<version>1.6.0.1</version>
</dependency>
</dependencies>
<reporting>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<version>2.5.2</version>
</plugin>
</plugins>
</reporting>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<!-- Build fe/tests/ to jar -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.6</version>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>net.sourceforge.czt.dev</groupId>
<artifactId>cup-maven-plugin</artifactId>
<version>1.6-cdh</version>
<executions>
<execution>
<id>cup</id>
<phase>generate-sources</phase>
<goals>
<goal>generate</goal>
</goals>
</execution>
</executions>
<configuration>
<cupDefinition>sql-parser.cup</cupDefinition>
<className>SqlParser</className>
<symbolsName>SqlParserSymbols</symbolsName>
<outputDirectory>${project.build.directory}/generated-sources/cup</outputDirectory>
</configuration>
</plugin>
<plugin>
<groupId>de.jflex</groupId>
<artifactId>maven-jflex-plugin</artifactId>
<version>1.4.3</version>
<executions>
<execution>
<id>jflex</id>
<phase>generate-sources</phase>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<backup>false</backup>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<excludeTypes>pom</excludeTypes>
<includeScope>runtime</includeScope>
<silent>true</silent>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.18</version>
<configuration>
<reportsDirectory>${surefire.reports.dir}</reportsDirectory>
<redirectTestOutputToFile>true</redirectTestOutputToFile>
<argLine>-Djava.library.path=${java.library.path}:${backend.library.path} ${surefireJacocoArg}</argLine>
<systemProperties>
<property>
<name>testExecutionMode</name>
<value>${testExecutionMode}</value>
</property>
<property>
<name>beeswax_port</name>
<value>${beeswax_port}</value>
<name>impalad</name>
<value>${impalad}</value>
<name>use_external_impalad</name>
<value>${use_external_impalad}</value>
</property>
</systemProperties>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.5</version>
<executions>
<!-- Tell maven about our generated files -->
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<!-- Make sure Eclipse knows where to find generated sources.
Note that the flex plugin appears to do this for you, but we have
to do this manually for the CUP and Thrift generated code
-->
<source>${project.basedir}/generated-sources/gen-java</source>
<source>${project.build.directory}/generated-sources/cup</source>
<source>${project.basedir}/src/compat-hive-${hive.major.version}/java</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.4.0</version>
<executions>
<execution>
<goals>
<goal>java</goal>
</goals>
</execution>
</executions>
<configuration>
<systemProperties>
<systemProperty>
<key>java.library.path</key>
<value>${java.library.path}:${backend.library.path}</value>
</systemProperty>
<systemProperty>
<key>test.hive.testdata</key>
<value>${project.basedir}/../testdata/target/AllTypes.txt</value>
</systemProperty>
</systemProperties>
</configuration>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.6.201602180812</version>
<executions>
<execution>
<id>prepare-jacoco-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
<configuration>
<destFile>${jacoco.data.file}</destFile>
<propertyName>surefireJacocoArg</propertyName>
</configuration>
</execution>
<execution>
<id>jacoco-report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
<configuration>
<dataFile>${jacoco.data.file}</dataFile>
<outputDirectory>${jacoco.report.dir}</outputDirectory>
</configuration>
</execution>
</executions>
<configuration>
<!-- Code coverage reporting is disabled by default and must be enabled
when running tests like this: mvn test -DcodeCoverage -->
<skip>${jacoco.skip}</skip>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>net.sourceforge.czt</groupId>
<artifactId>maven-cup-plugin</artifactId>
<versionRange>[1.6.4,)</versionRange>
<goals>
<goal>generate</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<versionRange>[1.6,)</versionRange>
<goals>
<goal>run</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<versionRange>[2.0,)</versionRange>
<goals>
<goal>copy-dependencies</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore></ignore>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<repositories>
<repository>
<id>apache.snapshots</id>
<name>Apache Development Snapshot Repository</name>
<url>https://repository.apache.org/content/repositories/snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.rcs.releases.repo</id>
<url>https://repository.cloudera.com/content/groups/cdh-releases-rcs</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.releases.repo</id>
<url>https://repository.cloudera.com/content/repositories/releases</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.snapshots.repo</id>
<url>https://repository.cloudera.com/content/repositories/snapshots</url>
<name>CDH Snapshots Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cloudera.thirdparty.repo</id>
<url>https://repository.cloudera.com/content/repositories/third-party</url>
<name>Cloudera Third Party Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<!-- This is needed for java-cup. TODO add the plugin to our maven repo -->
<repository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Snapshots</name>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>cloudera.thirdparty.repo</id>
<url>https://repository.cloudera.com/content/repositories/third-party</url>
<name>Cloudera Third Party Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</pluginRepository>
<pluginRepository>
<id>cloudera.snapshot.repo</id>
<url>https://repository.cloudera.com/content/repositories/snapshots</url>
<name>Cloudera Snapshot Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</pluginRepository>
<pluginRepository>
<id>dtrott</id>
<url>http://maven.davidtrott.com/repository</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</pluginRepository>
<!-- This is needed for the cup maven plugin. TODO add the plugin to our maven repo -->
<pluginRepository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Snapshots</name>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</pluginRepository>
</pluginRepositories>
<profiles>
<profile>
<id>thrift-home-defined</id>
<activation>
<file>
<exists>${env.THRIFT_HOME}/bin/thrift</exists>
</file>
</activation>
<properties>
<thrift.executable>${env.THRIFT_HOME}/bin/thrift</thrift.executable>
</properties>
</profile>
<profile>
<id>jacoco-code-coverage</id>
<activation>
<property>
<!-- Enable code coverage if the 'codeCoverage' property is set.
Usage: mvn test -DcodeCoverage -->
<name>codeCoverage</name>
</property>
</activation>
<properties>
<jacoco.skip>false</jacoco.skip>
<!-- Due to Jacoco's runtime instrumentation some tests could fail.
In order to still produce a coverage report it is recommended
to ignore test failures. -->
<maven.test.failure.ignore>true</maven.test.failure.ignore>
</properties>
</profile>
</profiles>
</project>