IMPALA-10218: Remove impala.cdh.repo Maven repository

This removes the impala.cdh.repo Maven repository (i.e.
the repository for the CDH_BUILD_NUMBER). It removes
the associated code for CDH_BUILD_NUMBER.

The only remaining dependency for the CDH_BUILD_NUMBER
repository was Apache Kite in some of our test code.
This transitions that code to use the public version
of Apache Kite.

The testdata/TableFlattener Java project is intended
to be used manually and is not used for any tests.
It has bitrotted and currently does not build. I verified
that it now builds (which it currently did not), but I did
not verify functionality.

Testing:
 - Ran a core job
 - Built testdata/TableFlattener Java project

Change-Id: I44b587f936ae20c207c74a9800cf98baa464164a
Reviewed-on: http://gerrit.cloudera.org:8080/16543
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Csaba Ringhofer <csringhofer@cloudera.com>
This commit is contained in:
Joe McDonnell
2020-10-03 14:08:03 -07:00
parent a5019eb12e
commit 04ce57dcaf
6 changed files with 55 additions and 116 deletions

View File

@@ -171,9 +171,6 @@ fi
: ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
export IMPALA_TOOLCHAIN_HOST
export CDH_BUILD_NUMBER=1814051
export CDH_MAVEN_REPOSITORY=\
"https://${IMPALA_TOOLCHAIN_HOST}/build/cdh_components/${CDH_BUILD_NUMBER}/maven"
export CDP_BUILD_NUMBER=4493826
export CDP_MAVEN_REPOSITORY=\
@@ -191,7 +188,7 @@ export CDP_TEZ_VERSION=0.9.1.7.2.1.0-287
export ARCH_NAME=$(uname -p)
export IMPALA_HUDI_VERSION=0.5.0-incubating
export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT
export IMPALA_KITE_VERSION=1.1.0
export IMPALA_ORC_JAVA_VERSION=1.6.2
export IMPALA_ICEBERG_VERSION=0.8.0-incubating
@@ -721,7 +718,6 @@ echo "METASTORE_DB = $METASTORE_DB"
echo "DOWNLOAD_CDH_COMPONENTS = $DOWNLOAD_CDH_COMPONENTS"
echo "IMPALA_MAVEN_OPTIONS = $IMPALA_MAVEN_OPTIONS"
echo "IMPALA_TOOLCHAIN_HOST = $IMPALA_TOOLCHAIN_HOST"
echo "CDH_BUILD_NUMBER = $CDH_BUILD_NUMBER"
echo "CDP_BUILD_NUMBER = $CDP_BUILD_NUMBER"
echo "CDP_COMPONENTS_HOME = $CDP_COMPONENTS_HOME"
echo "IMPALA_HADOOP_VERSION = $IMPALA_HADOOP_VERSION"

View File

@@ -582,18 +582,14 @@ create_log_dirs
bootstrap_dependencies
# Create .cdh and .cdp files that contains the CDH_BUILD_NUMBER and CDP_BUILD_NUMBER
# respectively. If the content of the files are different than the ones in the
# environment variable, append -U into IMPALA_MAVEN_OPTION to force Maven to update its
# local cache.
CDH_FILE="${IMPALA_HOME}/.cdh"
# Create .cdp file that contains the CDP_BUILD_NUMBER. If the content of the files
# are different than the ones in the environment variable, append -U into
# IMPALA_MAVEN_OPTION to force Maven to update its local cache.
# TODO: Look into removing this. The CDP components do not use SNAPSHOT versions.
CDP_FILE="${IMPALA_HOME}/.cdp"
if [[ ! -f ${CDH_FILE} || ! -f ${CDP_FILE} || \
$(cat ${CDH_FILE}) != ${CDH_BUILD_NUMBER} || \
$(cat ${CDP_FILE}) != ${CDP_BUILD_NUMBER} ]]; then
if [[ ! -f ${CDP_FILE} || $(cat ${CDP_FILE}) != ${CDP_BUILD_NUMBER} ]]; then
export IMPALA_MAVEN_OPTIONS="${IMPALA_MAVEN_OPTIONS} -U"
fi
echo "${CDH_BUILD_NUMBER}" > ${CDH_FILE}
echo "${CDP_BUILD_NUMBER}" > ${CDP_FILE}
if [[ "$BUILD_FE_ONLY" -eq 1 ]]; then

View File

@@ -150,14 +150,6 @@ under the License.
<id>cdh.rcs.releases.repo</id>
<url>https://repository.cloudera.com/content/groups/cdh-releases-rcs</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.releases.repo</id>
<url>https://repository.cloudera.com/content/repositories/releases</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
@@ -174,18 +166,6 @@ under the License.
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<!--
The Maven repository for the CDH build identified by CDH_BUILD_NUMBER.
CDH uses maven SNAPSHOT versions for non-released versions of components.
-->
<id>impala.cdh.repo</id>
<url>${env.CDH_MAVEN_REPOSITORY}</url>
<name>Impala CDH Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>impala.toolchain.kudu.repo</id>
<url>${env.IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY}</url>
@@ -208,10 +188,11 @@ under the License.
<repository>
<!--
HWX Nexus is disabled. This is a tombstone to list out why:
1. Snapshots are disabled because HWX Nexus contains snapshots of CDH artifacts
that can conflict with the artifacts in the maven repository associated with
the CDH_BUILD_NUMBER. Maven can end up downloading a mix of artifacts that are
mutually incompatible. Snapshots are not necessary at this time.
1. Snapshots are disabled because HWX Nexus contains snapshots of artifacts
that can conflict with the artifacts in any of the other repositories with
SNAPSHOT versions. We don't want any conflicting sources for SNAPSHOT versions,
so it is better to keep this disabled. In the past, this was a particular
problem for CDH Hadoop components that used SNAPSHOT versions.
2. In a previous change, we depended on the hadoop-cloud-storage artifact from
the impala.cdp.repo. This had the odd property that it referenced versions of
artifacts that were not in the impala.cdp.repo. For example, artifact A at

View File

@@ -52,10 +52,30 @@
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>${avro.version}</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>${parquet.version}</version>
</dependency>
<dependency>
<groupId>org.kitesdk</groupId>
<artifactId>kite-data-core</artifactId>
<version>${kite.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>com.twitter</groupId>
<artifactId>parquet-avro</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>

View File

@@ -36,9 +36,9 @@ import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.Datasets;
import org.kitesdk.data.Format;
import org.kitesdk.data.Formats;
import parquet.avro.AvroSchemaConverter;
import parquet.hadoop.ParquetFileReader;
import parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import java.io.IOException;
import java.io.PrintWriter;

96
testdata/pom.xml vendored
View File

@@ -44,26 +44,12 @@ under the License.
<version>2.3</version>
</dependency>
<!-- Force javax-el dependency -->
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.el</artifactId>
<version>3.0.1-b06</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
@@ -74,76 +60,26 @@ under the License.
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<exclusions>
<!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<exclusions>
<!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
<groupId>org.eclipse.jetty</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>org.glassfish</groupId>
<artifactId>javax.el</artifactId>
<!-- IMPALA-9468: Avoid pulling in netty for security reasons -->
<groupId>io.netty</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
<exclusions>
<!-- https://issues.apache.org/jira/browse/HADOOP-14903 -->
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-servlet</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.cloudera.htrace</groupId>
<artifactId>htrace-core</artifactId>
<version>2.00</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>${commons-io.version}</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
@@ -166,6 +102,16 @@ under the License.
<groupId>org.kitesdk</groupId>
<artifactId>kite-data-core</artifactId>
<version>${kite.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>com.twitter</groupId>
<artifactId>parquet-avro</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>