Files
impala/java/executor-deps/pom.xml
stiga-huang 2dfc68d852 IMPALA-7712: Support Google Cloud Storage
This patch adds support for GCS(Google Cloud Storage). Using the
gcs-connector, the implementation is similar to other remote
FileSystems.

New flags for GCS:
 - num_gcs_io_threads: Number of GCS I/O threads. Defaults to be 16.

Follow-up:
 - Support for spilling to GCS will be addressed in IMPALA-10561.
 - Support for caching GCS file handles will be addressed in
   IMPALA-10568.
 - test_concurrent_inserts and test_failing_inserts in
   test_acid_stress.py are skipped due to slow file listing on
   GCS (IMPALA-10562).
 - Some tests are skipped due to issues introduced by /etc/hosts setting
   on GCE instances (IMPALA-10563).

Tests:
 - Compile and create hdfs test data on a GCE instance. Upload test data
   to a GCS bucket. Modify all locations in HMS DB to point to the GCS
   bucket. Remove some hdfs caching params. Run CORE tests.
 - Compile and load snapshot data to a GCS bucket. Run CORE tests.

Change-Id: Ia91ec956de3b620cccf6a1244b56b7da7a45b32b
Reviewed-on: http://gerrit.cloudera.org:8080/17121
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2021-03-13 11:20:08 +00:00

236 lines
7.7 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<!--
This pom contains all dependencies required to run an Impala Executor. fe/pom.xml
has a dependency on the pom file produced by this pom file. If a dependency is
listed here it will be included in the Impala coordinator, executor, and
coordinator-executor Docker images. If a jar is not listed here, it will not be
included in the executor Docker images. This allows Impala to create executor
images with fewer jar files, which decreases the overall size of the executor
Docker images.
-->
<parent>
<groupId>org.apache.impala</groupId>
<artifactId>impala-parent</artifactId>
<version>4.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.impala</groupId>
<artifactId>impala-executor-deps</artifactId>
<packaging>pom</packaging>
<name>Impala Executor Dependencies</name>
<dependencies>
<!-- Pin json-smart dependency to version 2.3 -->
<dependency>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
<version>2.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<!-- Exclude json-smart to pin the version -->
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<!-- IMPALA-9468: Avoid pulling in netty for security reasons -->
<groupId>io.netty</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-servlet</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>${hadoop.version}</version>
<!-- Exclude the aws-java-sdk-bundle dependency because the Impala minimal
version of this dependency is used instead. -->
<exclusions>
<exclusion>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-bundle</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.impala</groupId>
<artifactId>impala-minimal-s3a-aws-sdk</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-azure</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-azure-datalake</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
<version>${gcs.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<exclusions>
<!-- Exclude json-smart to pin the version -->
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<exclusions>
<!-- Exclude json-smart to pin the version -->
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
</exclusions>
<version>${hbase.version}</version>
</dependency>
<!-- This reduced dependency is derived from hive-exec and only contains classes needed
by Impala. See shaded-deps/pom.xml for more details -->
<dependency>
<groupId>org.apache.impala</groupId>
<artifactId>impala-minimal-hive-exec</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- IMPALA-8766: Include Knox jars on the classpath -->
<dependency>
<groupId>org.apache.knox</groupId>
<artifactId>gateway-cloud-bindings</artifactId>
<version>${knox.version}</version>
<exclusions>
<!-- Impala currently doesn't support GCS, so exclude those jars -->
<exclusion>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-ozone-filesystem-hadoop3</artifactId>
<version>${ozone.version}</version>
<!-- Remove all transitive dependencies from the Apache Ozone dependency.
hadoop-ozone-filesystem-hadoop3 is a shaded-jar, which already includes
all required transitive dependencies. For some reason, Ozone still pulls
in some transitive dependencies even though they are not needed. -->
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<id>write-executor-classpath</id>
<goals>
<goal>build-classpath</goal>
</goals>
<configuration>
<outputFile>${project.build.directory}/build-executor-deps-classpath.txt</outputFile>
<includeScope>runtime</includeScope>
<excludeTypes>pom</excludeTypes>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>