diff --git a/README-build.md b/README-build.md index 1297b8698..c60471640 100644 --- a/README-build.md +++ b/README-build.md @@ -29,7 +29,7 @@ can do so through the environment variables and scripts listed below. | SKIP_TOOLCHAIN_BOOTSTRAP | "false" | Skips downloading the toolchain any python dependencies if "true" | | CDH_BUILD_NUMBER | | Identifier to indicate the CDH build number | CDH_COMPONENTS_HOME | "${IMPALA_HOME}/toolchain/cdh_components-${CDH_BUILD_NUMBER}" | Location of the CDH components within the toolchain. | -| CDH_MAJOR_VERSION | "5" | Identifier used to uniqueify paths for potentially incompatible component builds. | +| CDH_MAJOR_VERSION | "7" | Identifier used to uniqueify paths for potentially incompatible component builds. | | IMPALA_CONFIG_SOURCED | "1" | Set by ${IMPALA_HOME}/bin/impala-config.sh (internal use) | | JAVA_HOME | "/usr/lib/jvm/${JAVA_VERSION}" | Used to locate Java | | JAVA_VERSION | "java-7-oracle-amd64" | Can override to set a local Java version. | @@ -59,11 +59,11 @@ can do so through the environment variables and scripts listed below. ## Dependencies | Environment variable | Default value | Description | |----------------------|---------------|-------------| -| HADOOP_HOME | "${CDH_COMPONENTS_HOME}/hadoop-${IMPALA_HADOOP_VERSION}/" | Used to locate Hadoop | +| HADOOP_HOME | "${CDP_COMPONENTS_HOME}/hadoop-${IMPALA_HADOOP_VERSION}/" | Used to locate Hadoop | | HADOOP_INCLUDE_DIR | "${HADOOP_HOME}/include" | For 'hdfs.h' | | HADOOP_LIB_DIR | "${HADOOP_HOME}/lib" | For 'libhdfs.a' or 'libhdfs.so' | -| HIVE_HOME | "${CDH_COMPONENTS_HOME}/{hive-${IMPALA_HIVE_VERSION}/" | | -| HBASE_HOME | "${CDH_COMPONENTS_HOME}/hbase-${IMPALA_HBASE_VERSION}/" | | -| SENTRY_HOME | "${CDH_COMPONENTS_HOME}/sentry-${IMPALA_SENTRY_VERSION}/" | Used to setup test data | +| HIVE_HOME | "${CDP_COMPONENTS_HOME}/{hive-${IMPALA_HIVE_VERSION}/" | | +| HBASE_HOME | "${CDP_COMPONENTS_HOME}/hbase-${IMPALA_HBASE_VERSION}/" | | +| SENTRY_HOME | "${CDP_COMPONENTS_HOME}/sentry-${IMPALA_SENTRY_VERSION}/" | Used to setup test data | | THRIFT_HOME | "${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_VERSION}" | | diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py index 47207277b..5b79d458d 100755 --- a/bin/bootstrap_toolchain.py +++ b/bin/bootstrap_toolchain.py @@ -35,14 +35,9 @@ # other. The way to specify a single consistent set of components is via a build # number. This determines the location in s3 to get the artifacts. # CDP_BUILD_NUMBER - The CDP equivalent of a CDH_BUILD_NUMBER. -# USE_CDP_HIVE - If false, this will use the CDH version of all Hadoop components -# (except Ranger, which is CDP only). If true, this will use the CDP version of all -# Hadoop components (except Sentry, which is CDH only). # DOWNLOAD_CDH_COMPONENTS - When set to true, this script will also download and extract # the CDH/CDP Hadoop components (i.e. Hadoop, Hive, HBase, Sentry, Ranger, etc) into # CDH_COMPONENTS_HOME/CDP_COMPONENTS_HOME as appropriate. -# USE_CDH_KUDU - Kudu can be downloaded either from the toolchain or as a CDH component, -# depending on the value of USE_CDH_KUDU. # KUDU_IS_SUPPORTED - If KUDU_IS_SUPPORTED is false, Kudu is disabled and we download # the toolchain Kudu and use the symbols to compile a non-functional stub library so # that Impala has something to link against. @@ -360,49 +355,6 @@ class ToolchainKudu(ToolchainPackage): return False -class CdhKudu(CdhComponent): - def __init__(self, platform_label): - kudu_archive_tmpl = "kudu-${version}-" + platform_label - # IMPALA_KUDU_URL can contain '%(platform_label)', which needs to be replaced - # with the platform. We override this in os.environ so that it is picked up - # in EnvVersionedPackage. - kudu_url = os.environ.get("IMPALA_KUDU_URL") - if kudu_url: - kudu_url = kudu_url.replace("%(platform_label)", platform_label) - os.environ["IMPALA_KUDU_URL"] = kudu_url - super(CdhKudu, self).__init__('kudu', - archive_basename_tmpl=kudu_archive_tmpl, - unpack_directory_tmpl="kudu-${version}") - - def needs_download(self): - # This verifies that the unpack directory exists - if super(CdhKudu, self).needs_download(): - return True - # Additional check to distinguish this from the Kudu Java package - # Regardless of the actual build type, the 'kudu' tarball will always contain a - # 'debug' and a 'release' directory. - if not os.path.exists(os.path.join(self.pkg_directory(), "debug")): - return True - # Both the pkg_directory and the debug directory exist - return False - - -class CdhKuduJava(CdhComponent): - def __init__(self): - super(CdhKuduJava, self).__init__('kudu-java', - archive_basename_tmpl="kudu-${version}") - - def needs_download(self): - # This verify that the unpack directory exists - if super(CdhKuduJava, self).needs_download(): - return True - # Additional check to distinguish this from the Kudu package - # There should be jars under the kudu directory. - if len(glob.glob("{0}/*jar".format(self.pkg_directory()))) == 0: - return True - return False - - def try_get_platform_release_label(): """Gets the right package label from the OS version. Returns an OsMapping with both 'toolchain' and 'cdh' labels. Return None if not found. @@ -629,21 +581,17 @@ def get_toolchain_downloads(): def get_hadoop_downloads(): cluster_components = [] - use_cdp_hive = os.environ["USE_CDP_HIVE"] == "true" - if use_cdp_hive: - hadoop = CdpComponent("hadoop") - hbase = CdpComponent("hbase", archive_basename_tmpl="hbase-${version}-bin", - unpack_directory_tmpl="hbase-${version}") - hive = CdpComponent("hive", archive_basename_tmpl="apache-hive-${version}-bin") - hive_src = CdpComponent("hive-source", - explicit_version=os.environ.get("IMPALA_HIVE_VERSION"), - archive_basename_tmpl="hive-${version}-source", - unpack_directory_tmpl="hive-${version}") - tez = CdpComponent("tez", archive_basename_tmpl="tez-${version}-minimal", - makedir=True) - cluster_components.extend([hadoop, hbase, hive, hive_src, tez]) - else: - cluster_components.extend(map(CdhComponent, ["hadoop", "hbase", "hive"])) + hadoop = CdpComponent("hadoop") + hbase = CdpComponent("hbase", archive_basename_tmpl="hbase-${version}-bin", + unpack_directory_tmpl="hbase-${version}") + hive = CdpComponent("hive", archive_basename_tmpl="apache-hive-${version}-bin") + hive_src = CdpComponent("hive-source", + explicit_version=os.environ.get("IMPALA_HIVE_VERSION"), + archive_basename_tmpl="hive-${version}-source", + unpack_directory_tmpl="hive-${version}") + tez = CdpComponent("tez", archive_basename_tmpl="tez-${version}-minimal", + makedir=True) + cluster_components.extend([hadoop, hbase, hive, hive_src, tez]) # Sentry is always CDH cluster_components.append(CdhComponent("sentry")) # Ranger is always CDP @@ -654,24 +602,12 @@ def get_hadoop_downloads(): def get_kudu_downloads(use_kudu_stub): # If Kudu is not supported, we download centos7 kudu to build the kudu stub. - # TODO: Should this be from toolchain or CDH? Does it matter? kudu_downloads = [] if use_kudu_stub: kudu_downloads += [ToolchainKudu("centos7")] else: - use_cdh_kudu = os.getenv("USE_CDH_KUDU") == "true" - if use_cdh_kudu: - if not try_get_platform_release_label() \ - or not try_get_platform_release_label().cdh: - logging.error("CDH Kudu is not supported on this platform. Set " - "USE_CDH_KUDU=false to use the toolchain Kudu.") - sys.exit(1) - kudu_downloads += [CdhKudu(get_platform_release_label().cdh)] - # There is also a Kudu Java package. - kudu_downloads += [CdhKuduJava()] - else: - # Toolchain Kudu includes Java artifacts. - kudu_downloads += [ToolchainKudu()] + # Toolchain Kudu includes Java artifacts. + kudu_downloads += [ToolchainKudu()] return kudu_downloads @@ -690,14 +626,10 @@ def main(): and CDP_BUILD_NUMBER). Hadoop component packages are only downloaded if $DOWNLOAD_CDH_COMPONENTS is true. CDH Hadoop packages are downloaded into $CDH_COMPONENTS_HOME. CDP Hadoop packages are downloaded into $CDP_COMPONENTS_HOME. - The versions used for Hadoop components depend on whether USE_CDP_HIVE is true or - false. If true, most components get the CDP versions based on the $CDP_BUILD_NUMBER. - If false, most components get the CDH versions based on the $CDH_BUILD_NUMBER. - The exceptions are: + The versions used for Hadoop components come from the CDP versions based on the + $CDP_BUILD_NUMBER. + The exceptions is: - sentry (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDH_BUILD_NUMBER) - - ranger (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDP_BUILD_NUMBER) - - kudu (currently always downloaded from $IMPALA_TOOLCHAIN_HOST for a given - $CDH_BUILD_NUMBER) If Kudu is not supported on this platform (or KUDU_IS_SUPPORTED=false), then this builds a Kudu stub to allow for compilation without Kudu support. """ diff --git a/bin/impala-config.sh b/bin/impala-config.sh index ab13665ee..f44c0ae9b 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -171,9 +171,6 @@ export IMPALA_TOOLCHAIN_HOST export CDH_BUILD_NUMBER=1814051 export CDH_MAVEN_REPOSITORY=\ "https://${IMPALA_TOOLCHAIN_HOST}/build/cdh_components/${CDH_BUILD_NUMBER}/maven" -export CDH_HADOOP_VERSION=3.0.0-cdh6.x-SNAPSHOT -export CDH_HBASE_VERSION=2.1.0-cdh6.x-SNAPSHOT -export CDH_HIVE_VERSION=2.1.1-cdh6.x-SNAPSHOT export CDH_SENTRY_VERSION=2.1.0-cdh6.x-SNAPSHOT export CDP_BUILD_NUMBER=2523282 @@ -193,8 +190,8 @@ export IMPALA_HUDI_VERSION=0.5.0-incubating export IMPALA_KITE_VERSION=1.0.0-cdh6.x-SNAPSHOT export IMPALA_ORC_JAVA_VERSION=1.6.2 -# When IMPALA_(CDH_COMPONENT)_URL are overridden, they may contain '$(platform_label)' -# which will be substituted for the CDH platform label in bootstrap_toolchain.py +# When IMPALA_(CDP_COMPONENT)_URL are overridden, they may contain '$(platform_label)' +# which will be substituted for the CDP platform label in bootstrap_toolchain.py unset IMPALA_HADOOP_URL unset IMPALA_HBASE_URL unset IMPALA_HIVE_URL @@ -216,9 +213,6 @@ if [ -f "$IMPALA_HOME/bin/impala-config-local.sh" ]; then . "$IMPALA_HOME/bin/impala-config-local.sh" fi -export CDH_HIVE_URL=${CDH_HIVE_URL-} -export CDH_HADOOP_URL=${CDH_HADOOP_URL-} -export CDH_HBASE_URL=${CDH_HBASE_URL-} export CDH_SENTRY_URL=${CDH_SENTRY_URL-} export CDP_HIVE_URL=${CDP_HIVE_URL-} @@ -230,35 +224,19 @@ export CDP_RANGER_URL=${CDP_RANGER_URL-} export CDH_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdh_components-$CDH_BUILD_NUMBER" export CDP_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdp_components-$CDP_BUILD_NUMBER" -export USE_CDP_HIVE=${USE_CDP_HIVE-true} export DISABLE_SENTRY=${DISABLE_SENTRY_OVERRIDE:-"true"} -if $USE_CDP_HIVE; then - # When USE_CDP_HIVE is set we use the CDP hive version to build as well as deploy in - # the minicluster - export CDH_MAJOR_VERSION=7 - export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION} - export IMPALA_HIVE_URL=${CDP_HIVE_URL-} - export IMPALA_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-} - export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION} - export IMPALA_HADOOP_URL=${CDP_HADOOP_URL-} - export IMPALA_HBASE_VERSION=${CDP_HBASE_VERSION} - export IMPALA_HBASE_URL=${CDP_HBASE_URL-} - export IMPALA_TEZ_VERSION=${CDP_TEZ_VERSION} - export IMPALA_TEZ_URL=${CDP_TEZ_URL-} - export IMPALA_KNOX_VERSION=${CDP_KNOX_VERSION} - export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/" -else - # CDH hive version is used to build and deploy in minicluster when USE_CDP_HIVE is - # false - export CDH_MAJOR_VERSION=6 - export IMPALA_HIVE_VERSION=${CDH_HIVE_VERSION} - export IMPALA_HIVE_URL=${CDH_HIVE_URL-} - export IMPALA_HADOOP_VERSION=${CDH_HADOOP_VERSION} - export IMPALA_HADOOP_URL=${CDH_HADOOP_URL-} - export IMPALA_HBASE_VERSION=${CDH_HBASE_VERSION} - export IMPALA_HBASE_URL=${CDH_HBASE_URL-} - export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/" -fi +export CDH_MAJOR_VERSION=7 +export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION} +export IMPALA_HIVE_URL=${CDP_HIVE_URL-} +export IMPALA_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-} +export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION} +export IMPALA_HADOOP_URL=${CDP_HADOOP_URL-} +export IMPALA_HBASE_VERSION=${CDP_HBASE_VERSION} +export IMPALA_HBASE_URL=${CDP_HBASE_URL-} +export IMPALA_TEZ_VERSION=${CDP_TEZ_VERSION} +export IMPALA_TEZ_URL=${CDP_TEZ_URL-} +export IMPALA_KNOX_VERSION=${CDP_KNOX_VERSION} +export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/" # Ozone always uses the CDP version export IMPALA_OZONE_VERSION=${CDP_OZONE_VERSION} @@ -273,10 +251,17 @@ export IMPALA_SENTRY_URL=${CDH_SENTRY_URL-} # Extract the first component of the hive version. # Allow overriding of Hive source location in case we want to build Impala without -# a complete Hive build. This is used by fe/pom.xml to activate compatibility shims -# for Hive-2 or Hive-3 +# a complete Hive build. This is used by various tests and scripts to enable and +# disable tests and functionality. export IMPALA_HIVE_MAJOR_VERSION=$(echo "$IMPALA_HIVE_VERSION" | cut -d . -f 1) +# Hive 1 and 2 are no longer supported. +if [[ "${IMPALA_HIVE_MAJOR_VERSION}" == "1" || + "${IMPALA_HIVE_MAJOR_VERSION}" == "2" ]]; then + echo "Hive 1 and 2 are no longer supported" + return 1 +fi + # It is important to have a coherent view of the JAVA_HOME and JAVA executable. # The JAVA_HOME should be determined first, then the JAVA executable should be # derived from JAVA_HOME. bin/bootstrap_development.sh adds code to @@ -376,27 +361,16 @@ export LOCAL_FS="file:${WAREHOUSE_LOCATION_PREFIX}" export IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION}" ESCAPED_IMPALA_HOME=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$IMPALA_HOME") -if $USE_CDP_HIVE; then - export HIVE_HOME="$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin" - export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-"${CDP_COMPONENTS_HOME}/hive-\ +export HIVE_HOME="$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin" +export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-"${CDP_COMPONENTS_HOME}/hive-\ ${IMPALA_HIVE_VERSION}"} - # Set the path to the hive_metastore.thrift which is used to build thrift code - export HIVE_METASTORE_THRIFT_DIR=$HIVE_SRC_DIR/standalone-metastore/src/main/thrift - export TEZ_HOME="$CDP_COMPONENTS_HOME/tez-${IMPALA_TEZ_VERSION}-minimal" - export HBASE_HOME="$CDP_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}/" - # It is likely that devs will want to work with both the versions of metastore - # if cdp hive is being used change the metastore db name, so we don't have to - # format the metastore db everytime we switch between hive versions - export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_IMPALA_HOME)_cdp"} -else - export HIVE_HOME="$CDH_COMPONENTS_HOME/hive-${IMPALA_HIVE_VERSION}" - # Allow overriding of Hive source location in case we want to build Impala without -# a complete Hive build. - export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-"${HIVE_HOME}/src"} - export HIVE_METASTORE_THRIFT_DIR=$HIVE_SRC_DIR/metastore/if - export HBASE_HOME="$CDH_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}/" - export METASTORE_DB=${METASTORE_DB-$(cut -c-63 <<< HMS$ESCAPED_IMPALA_HOME)} -fi +# Set the path to the hive_metastore.thrift which is used to build thrift code +export HIVE_METASTORE_THRIFT_DIR=$HIVE_SRC_DIR/standalone-metastore/src/main/thrift +export TEZ_HOME="$CDP_COMPONENTS_HOME/tez-${IMPALA_TEZ_VERSION}-minimal" +export HBASE_HOME="$CDP_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}/" +# Previously, there were multiple configurations and the "_cdp" included below +# allowed the two to be distinct. We keep this "_cdp" for historical reasons. +export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_IMPALA_HOME)_cdp"} # Set the Hive binaries in the path export PATH="$HIVE_HOME/bin:$PATH" @@ -692,56 +666,25 @@ fi export USE_KUDU_DEBUG_BUILD=${USE_KUDU_DEBUG_BUILD-false} # Kudu doesn't compile on some old Linux distros. KUDU_IS_SUPPORTED enables building Kudu -# into the backend. We prefer to pull Kudu in from the toolchain, but will fall back to -# using the CDH Kudu by setting USE_CDH_KUDU to true. -export USE_CDH_KUDU=${USE_CDH_KUDU-false} +# into the backend. if [[ -z "${KUDU_IS_SUPPORTED-}" ]]; then if [[ -n "$KUDU_BUILD_DIR" ]]; then KUDU_IS_SUPPORTED=true elif $IS_OSX; then - USE_CDH_KUDU=false KUDU_IS_SUPPORTED=false else KUDU_IS_SUPPORTED=true - if $USE_CDH_KUDU; then - if ! which lsb_release &>/dev/null; then - echo Unable to find the 'lsb_release' command. \ - Please ensure it is available in your PATH. 1>&2 - return 1 - fi - DISTRO_VERSION="$(lsb_release -sir 2>&1)" - if [[ $? -ne 0 ]]; then - echo lsb_release command failed, output was: "$DISTRO_VERSION" 1>&2 - return 1 - fi - # Remove spaces, trim minor versions, and convert to lowercase. - DISTRO_VERSION="$(tr -d ' \n' <<< "$DISTRO_VERSION" | cut -d. -f1 | tr "A-Z" "a-z")" - if [[ "$DISTRO_VERSION" == "ubuntu14" ]]; then - USE_CDH_KUDU=false - fi - fi fi fi export KUDU_IS_SUPPORTED -if $USE_CDH_KUDU; then - export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"1.11.0-cdh6.x-SNAPSHOT"} - export IMPALA_KUDU_JAVA_VERSION=${IMPALA_KUDU_JAVA_VERSION-"1.11.0-cdh6.x-SNAPSHOT"} - export IMPALA_KUDU_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION - export IMPALA_KUDU_JAVA_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION - # If USE_CDH_KUDU is true, Toolchain Kudu maven repository should be disabled. - # We get Kudu Java artifacts from CDH. - export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY="file:///non/existing/repo" - export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=false -else - export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"4ed0dbbd1"} - export IMPALA_KUDU_JAVA_VERSION=${IMPALA_KUDU_JAVA_VERSION-"1.12.0-SNAPSHOT"} - export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN}/kudu-$IMPALA_KUDU_VERSION - export IMPALA_KUDU_JAVA_HOME=${IMPALA_TOOLCHAIN}/kudu-${IMPALA_KUDU_VERSION}/java - export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY=\ +export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"4ed0dbbd1"} +export IMPALA_KUDU_JAVA_VERSION=${IMPALA_KUDU_JAVA_VERSION-"1.12.0-SNAPSHOT"} +export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN}/kudu-$IMPALA_KUDU_VERSION +export IMPALA_KUDU_JAVA_HOME=${IMPALA_TOOLCHAIN}/kudu-${IMPALA_KUDU_VERSION}/java +export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY=\ "file://${IMPALA_KUDU_JAVA_HOME}/repository" - export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=true -fi +export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=true # Set $THRIFT_HOME to the Thrift directory in toolchain. export THRIFT_HOME="${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_VERSION}" diff --git a/bin/jenkins/build-all-flag-combinations.sh b/bin/jenkins/build-all-flag-combinations.sh index 19381464c..a6a0d2cd4 100755 --- a/bin/jenkins/build-all-flag-combinations.sh +++ b/bin/jenkins/build-all-flag-combinations.sh @@ -42,8 +42,6 @@ CONFIGS=( "-skiptests -noclean -asan" "-skiptests -noclean -tsan" "-skiptests -noclean -ubsan -so -ninja" - # USE_CDP_HIVE=true build: - "-skiptests -noclean -use_cdh_hive" ) FAILED="" @@ -58,14 +56,7 @@ trap onexit EXIT mkdir -p ${TMP_DIR} for CONFIG in "${CONFIGS[@]}"; do - CONFIG2=${CONFIG/-use_cdh_hive/} - if [[ "$CONFIG" != "$CONFIG2" ]]; then - CONFIG=$CONFIG2 - export USE_CDP_HIVE=false - else - export USE_CDP_HIVE=true - fi - DESCRIPTION="Options $CONFIG USE_CDP_HIVE=$USE_CDP_HIVE" + DESCRIPTION="Options $CONFIG" if [[ $# == 1 && $1 == "--dryrun" ]]; then echo $DESCRIPTION diff --git a/fe/pom.xml b/fe/pom.xml index 24903de6a..71320d3cf 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -965,245 +965,6 @@ under the License. - - - hive-2 - - - env.IMPALA_HIVE_MAJOR_VERSION - 2 - - - - - org.apache.hive - hive-service - ${hive.version} - - - - org.apache.hive - hive-llap-server - - - - net.minidev - json-smart - - - org.apache.calcite.avatica - avatica - - - org.eclipse.jetty - * - - - ant - * - - - - - - org.apache.hive - hive-serde - ${hive.version} - - - - net.minidev - json-smart - - - org.eclipse.jetty - * - - - - org.fusesource.leveldbjni - * - - - - - - org.apache.hive - hive-exec - ${hive.version} - - - - org.apache.logging.log4j - log4j-slf4j-impl - - - - org.apache.logging.log4j - log4j-1.2-api - - - - net.minidev - json-smart - - - org.apache.calcite.avatica - avatica - - - ant - * - - - org.apache.ant - * - - - orc - * - - - org.apache.orc - * - - - - - - org.apache.hive - hive-common - ${hive.version} - - - - org.apache.logging.log4j - log4j-slf4j-impl - - - - org.apache.logging.log4j - log4j-1.2-api - - - - net.minidev - json-smart - - - org.eclipse.jetty - * - - - org.apache.ant - * - - - - - - org.apache.hive - hive-jdbc - ${hive.version} - test - - - - org.apache.logging.log4j - log4j-slf4j-impl - - - net.minidev - json-smart - - - org.apache.ant - * - - - org.eclipse.jetty - * - - - - io.netty - * - - - - - - org.apache.hive - hive-hbase-handler - ${hive.version} - - - - org.apache.logging.log4j - log4j-slf4j-impl - - - - net.minidev - json-smart - - - org.apache.calcite.avatica - avatica - - - org.eclipse.jetty - * - - - - io.netty - * - - - - - - org.apache.hive - hive-metastore - ${hive.version} - - - - org.apache.logging.log4j - log4j-slf4j-impl - - - - net.minidev - json-smart - - - - - - org.apache.hive.shims - hive-shims-common - ${hive.version} - - - - org.apache.logging.log4j - log4j-slf4j-impl - - - - org.fusesource.leveldbjni - * - - - - - - hive-3 diff --git a/fe/src/compat-hive-2/java/org/apache/hadoop/hive/common/ValidWriteIdList.java b/fe/src/compat-hive-2/java/org/apache/hadoop/hive/common/ValidWriteIdList.java deleted file mode 100644 index 70399a440..000000000 --- a/fe/src/compat-hive-2/java/org/apache/hadoop/hive/common/ValidWriteIdList.java +++ /dev/null @@ -1,74 +0,0 @@ -// // Licensed to the Apache Software Foundation (ASF) under one -// // or more contributor license agreements. See the NOTICE file -// // distributed with this work for additional information -// // regarding copyright ownership. The ASF licenses this file -// // to you under the Apache License, Version 2.0 (the -// // "License"); you may not use this file except in compliance -// // with the License. You may obtain a copy of the License at -// // -// // http://www.apache.org/licenses/LICENSE-2.0 -// // -// // Unless required by applicable law or agreed to in writing, -// // software distributed under the License is distributed on an -// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// // KIND, either express or implied. See the License for the -// // specific language governing permissions and limitations -// // under the License. -package org.apache.hadoop.hive.common; - -/** - * ValidWriteIdList is not supported in Hive 2 - */ -public class ValidWriteIdList { - public enum RangeResponse {NONE, SOME, ALL}; - - public boolean isWriteIdValid(long writeId) { - throw new UnsupportedOperationException("isWriteIdValid not supported for " - + getClass().getName()); - } - - public boolean isValidBase(long writeId) { - throw new UnsupportedOperationException("isValidBase not supported for " - + getClass().getName()); - } - - public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) { - throw new UnsupportedOperationException("isWriteIdRangeValid not supported for " - + getClass().getName()); - } - - public String writeToString() { - throw new UnsupportedOperationException("writeToStringd not supported for " - + getClass().getName()); - } - - public void readFromString(String src) { - throw new UnsupportedOperationException("readFromString not supported for " - + getClass().getName()); - } - - public long getHighWatermark() { - throw new UnsupportedOperationException("getHighWatermark not supported for " - + getClass().getName()); - } - - public long[] getInvalidWriteIds() { - throw new UnsupportedOperationException("getInvalidWriteIds not supported for " - + getClass().getName()); - } - - public boolean isWriteIdAborted(long writeId) { - throw new UnsupportedOperationException("isWriteIdAborted not supported for " - + getClass().getName()); - } - - public RangeResponse isWriteIdRangeAborted(long minWriteId, long maxWriteId) { - throw new UnsupportedOperationException( - "isWriteIdRangeAborted not supported for " + getClass().getName()); - } - - public Long getMinOpenWriteId() { - throw new UnsupportedOperationException("getMinOpenWriteId not supported for " - + getClass().getName()); - } -} diff --git a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java b/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java deleted file mode 100644 index 39cd4ffa3..000000000 --- a/fe/src/compat-hive-2/java/org/apache/impala/compat/MetastoreShim.java +++ /dev/null @@ -1,556 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.impala.compat; - -import static org.apache.impala.service.MetadataOp.TABLE_TYPE_TABLE; -import static org.apache.impala.service.MetadataOp.TABLE_TYPE_VIEW; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.EnumSet; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.common.ValidTxnList; -import org.apache.hadoop.hive.common.ValidWriteIdList; -import org.apache.hadoop.hive.metastore.api.SQLForeignKey; -import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; -import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.InvalidInputException; -import org.apache.hadoop.hive.metastore.api.InvalidObjectException; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.LockComponent; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.FireEventRequest; -import org.apache.hadoop.hive.metastore.api.FireEventRequestData; -import org.apache.hadoop.hive.metastore.api.InsertEventRequestData; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.metastore.messaging.AlterTableMessage; -import org.apache.hadoop.hive.metastore.messaging.EventMessage; -import org.apache.hadoop.hive.metastore.messaging.InsertMessage; -import org.apache.hadoop.hive.metastore.messaging.MessageDeserializer; -import org.apache.hadoop.hive.metastore.messaging.json.ExtendedJSONMessageFactory; -import org.apache.hive.service.rpc.thrift.TGetColumnsReq; -import org.apache.hive.service.rpc.thrift.TGetFunctionsReq; -import org.apache.hive.service.rpc.thrift.TGetSchemasReq; -import org.apache.hive.service.rpc.thrift.TGetTablesReq; -import org.apache.impala.authorization.User; -import org.apache.impala.catalog.CatalogServiceCatalog; -import org.apache.impala.catalog.HdfsPartition; -import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient; -import org.apache.impala.common.ImpalaException; -import org.apache.impala.common.ImpalaRuntimeException; -import org.apache.impala.common.Pair; -import org.apache.impala.common.TransactionException; -import org.apache.impala.service.Frontend; -import org.apache.impala.service.MetadataOp; -import org.apache.impala.thrift.TMetadataOpRequest; -import org.apache.impala.thrift.TResultSet; -import org.apache.impala.util.AcidUtils.TblTransaction; -import org.apache.impala.util.MetaStoreUtil.InsertEventInfo; -import org.apache.log4j.Logger; -import org.apache.thrift.TException; - -/** - * A wrapper around some of Hive's Metastore API's to abstract away differences - * between major versions of Hive. This implements the shimmed methods for Hive 2. - */ -public class MetastoreShim { - private static final Logger LOG = Logger.getLogger(MetastoreShim.class); - - public static TblTransaction createTblTransaction( - IMetaStoreClient client, Table tbl, long txnId) { - throw new UnsupportedOperationException("createTblTransaction"); - } - - public static void commitTblTransactionIfNeeded(IMetaStoreClient client, - TblTransaction tblTxn) throws TransactionException { - throw new UnsupportedOperationException("commitTblTransactionIfNeeded"); - } - - public static void abortTblTransactionIfNeeded(IMetaStoreClient client, - TblTransaction tblTxn) { - throw new UnsupportedOperationException("abortTblTransactionIfNeeded"); - } - - /** - * Wrapper around MetaStoreUtils.validateName() to deal with added arguments. - */ - public static boolean validateName(String name) { - return MetaStoreUtils.validateName(name, null); - } - - /** - * Hive-3 only function - */ - public static void alterTableWithTransaction(IMetaStoreClient client, - Table tbl, TblTransaction tblTxn) { - throw new UnsupportedOperationException("alterTableWithTransaction"); - } - - /** - * Wrapper around IMetaStoreClient.alter_partition() to deal with added - * arguments. - */ - public static void alterPartition(IMetaStoreClient client, Partition partition) - throws InvalidOperationException, MetaException, TException { - client.alter_partition( - partition.getDbName(), partition.getTableName(), partition, null); - } - - /** - * Wrapper around IMetaStoreClient.alter_partitions() to deal with added - * arguments. - */ - public static void alterPartitions(IMetaStoreClient client, String dbName, - String tableName, List partitions) - throws InvalidOperationException, MetaException, TException { - client.alter_partitions(dbName, tableName, partitions, null); - } - - /** - * Wrapper around IMetaStoreClient.createTableWithConstraints() to deal with added - * arguments. - */ - public static void createTableWithConstraints(IMetaStoreClient client, - Table newTbl, List primaryKeys, List foreignKeys) - throws InvalidOperationException, MetaException, TException { - client.createTableWithConstraints(newTbl, primaryKeys, foreignKeys); - } - - /** - * Hive-3 only function - */ - public static void alterPartitionsWithTransaction(IMetaStoreClient client, - String dbName, String tblName, List partitions, TblTransaction tblTxn) { - throw new UnsupportedOperationException("alterTableWithTransaction"); - } - - /** - * Wrapper around IMetaStoreClient.getTableColumnStatistics() to deal with added - * arguments. - */ - public static List getTableColumnStatistics( - IMetaStoreClient client, String dbName, String tableName, List colNames) - throws NoSuchObjectException, MetaException, TException { - return client.getTableColumnStatistics(dbName, tableName, colNames); - } - - /** - * Wrapper around IMetaStoreClient.deleteTableColumnStatistics() to deal with added - * arguments. - */ - public static boolean deleteTableColumnStatistics(IMetaStoreClient client, - String dbName, String tableName, String colName) - throws NoSuchObjectException, MetaException, InvalidObjectException, TException, - InvalidInputException { - return client.deleteTableColumnStatistics(dbName, tableName, colName); - } - - /** - * Wrapper around ColumnStatistics c'tor to deal with the added engine property. - */ - public static ColumnStatistics createNewHiveColStats() { - return new ColumnStatistics(); - } - - /** - * Wrapper around MetaStoreUtils.updatePartitionStatsFast() to deal with added - * arguments. - */ - public static void updatePartitionStatsFast(Partition partition, Table tbl, - Warehouse warehouse) throws MetaException { - MetaStoreUtils.updatePartitionStatsFast(partition, warehouse, null); - } - - /** - * Return the maximum number of Metastore objects that should be retrieved in - * a batch. - */ - public static String metastoreBatchRetrieveObjectsMaxConfigKey() { - return HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX.toString(); - } - - /** - * Return the key and value that should be set in the partition parameters to - * mark that the stats were generated automatically by a stats task. - */ - public static Pair statsGeneratedViaStatsTaskParam() { - return Pair.create(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); - } - - public static TResultSet execGetFunctions( - Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException { - TGetFunctionsReq req = request.getGet_functions_req(); - return MetadataOp.getFunctions( - frontend, req.getCatalogName(), req.getSchemaName(), req.getFunctionName(), user); - } - - public static TResultSet execGetColumns( - Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException { - TGetColumnsReq req = request.getGet_columns_req(); - return MetadataOp.getColumns(frontend, req.getCatalogName(), req.getSchemaName(), - req.getTableName(), req.getColumnName(), user); - } - - public static TResultSet execGetTables( - Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException { - TGetTablesReq req = request.getGet_tables_req(); - return MetadataOp.getTables(frontend, req.getCatalogName(), req.getSchemaName(), - req.getTableName(), req.getTableTypes(), user); - } - - public static TResultSet execGetSchemas( - Frontend frontend, TMetadataOpRequest request, User user) throws ImpalaException { - TGetSchemasReq req = request.getGet_schemas_req(); - return MetadataOp.getSchemas( - frontend, req.getCatalogName(), req.getSchemaName(), user); - } - - /** - * Supported HMS-2 types - */ - public static final EnumSet IMPALA_SUPPORTED_TABLE_TYPES = EnumSet - .of(TableType.EXTERNAL_TABLE, TableType.MANAGED_TABLE, TableType.VIRTUAL_VIEW); - - /** - * mapping between the HMS-2 type the Impala types - */ - public static final ImmutableMap HMS_TO_IMPALA_TYPE = - new ImmutableMap.Builder() - .put("EXTERNAL_TABLE", TABLE_TYPE_TABLE) - .put("MANAGED_TABLE", TABLE_TYPE_TABLE) - .put("INDEX_TABLE", TABLE_TYPE_TABLE) - .put("VIRTUAL_VIEW", TABLE_TYPE_VIEW).build(); - - public static String mapToInternalTableType(String typeStr) { - String defaultTableType = TABLE_TYPE_TABLE; - - TableType tType; - - if (typeStr == null) return defaultTableType; - try { - tType = TableType.valueOf(typeStr.toUpperCase()); - } catch (Exception e) { - return defaultTableType; - } - switch (tType) { - case EXTERNAL_TABLE: - case MANAGED_TABLE: - case INDEX_TABLE: - return TABLE_TYPE_TABLE; - case VIRTUAL_VIEW: - return TABLE_TYPE_VIEW; - default: - return defaultTableType; - } - - } - - /** - * Wrapper method which returns ExtendedJSONMessageFactory in case Impala is - * building against Hive-2 to keep compatibility with Sentry - */ - public static MessageDeserializer getMessageDeserializer() { - return ExtendedJSONMessageFactory.getInstance().getDeserializer(); - } - - /** - * Wrapper around FileUtils.makePartName to deal with package relocation in Hive 3 - * @param partitionColNames - * @param values - * @return - */ - public static String makePartName(List partitionColNames, List values) { - return FileUtils.makePartName(partitionColNames, values); - } - - /** - * Wrapper method around message factory's build alter table message due to added - * arguments in hive 3. - */ - @VisibleForTesting - public static AlterTableMessage buildAlterTableMessage(Table before, Table after, - boolean isTruncateOp, long writeId) { - Preconditions.checkArgument(writeId < 0, "Write ids are not supported in Hive-2 " - + "compatible build"); - Preconditions.checkArgument(!isTruncateOp, "Truncate operation is not supported in " - + "alter table messages in Hive-2 compatible build"); - return ExtendedJSONMessageFactory.getInstance().buildAlterTableMessage(before, after); - } - - /** - * Wrapper around HMS-2 message serializer - * @param message - * @return serialized string to use used in the NotificationEvent's message field - */ - @VisibleForTesting - public static String serializeEventMessage(EventMessage message) { - return message.toString(); - } - - public static String getAllColumnsInformation(List tabCols, - List partitionCols, boolean printHeader, boolean isOutputPadded, - boolean showPartColsSeparately) { - return MetaDataFormatUtils - .getAllColumnsInformation(tabCols, partitionCols, printHeader, isOutputPadded, - showPartColsSeparately); - } - - /** - * Wrapper method around Hive's MetadataFormatUtils.getTableInformation which has - * changed significantly in Hive-3 - * @return - */ - public static String getTableInformation( - org.apache.hadoop.hive.ql.metadata.Table table) { - return MetaDataFormatUtils.getTableInformation(table); - } - - /** - * Wrapper method around BaseSemanticAnalyzer's unespaceSQLString to be compatibility - * with Hive. Takes in a normalized value of the string surrounded by single quotes - */ - public static String unescapeSQLString(String normalizedStringLiteral) { - return BaseSemanticAnalyzer.unescapeSQLString(normalizedStringLiteral); - } - - /** - * This is Hive-3 only function - */ - public static ValidWriteIdList fetchValidWriteIds(IMetaStoreClient client, - String tableFullName) { - throw new UnsupportedOperationException("fetchValidWriteIds not supported"); - } - - /** - * Hive-3 only function - */ - public static ValidWriteIdList getValidWriteIdListFromString(String validWriteIds) { - throw new UnsupportedOperationException( - "getValidWriteIdListFromString not supported"); - } - - /** - * Hive-3 only function - */ - public static ValidTxnList getValidTxns(IMetaStoreClient client) throws TException { - throw new UnsupportedOperationException("getValidTxns not supported"); - } - - /** - * Hive-3 only function - * -1 means undefined - */ - public static long getWriteIdFromMSPartition(Partition partition) { - return -1L; - } - - /** - * Hive-3 only function - */ - public static void setWriteIdForMSPartition(Partition partition, long writeId) { - } - - /** - * Hive-3 only function - * -1 means undefined - */ - public static long getWriteIdFromMSTable(Table msTbl) { - return -1L; - } - - public static boolean hasTableCapability(Table msTbl, byte requiredCapability) { - throw new UnsupportedOperationException("hasTableCapability not supported"); - } - - public static String getTableAccessType(Table msTbl) { - throw new UnsupportedOperationException("getTableAccessType not supported"); - } - - public static void setTableAccessType(Table msTbl, byte accessType) { - throw new UnsupportedOperationException("setTableAccessType not supported"); - } - - public static void setHiveClientCapabilities() { - throw new UnsupportedOperationException("setHiveClientCapabilities not supported"); - } - - /** - * Hive-3 only function - */ - public static long openTransaction(IMetaStoreClient client) - throws TransactionException { - throw new UnsupportedOperationException("openTransaction is not supported."); - } - - /** - * Hive-3 only function - */ - public static void commitTransaction(IMetaStoreClient client, long txnId) - throws TransactionException { - throw new UnsupportedOperationException("commitTransaction is not supported."); - } - - /** - * Hive-3 only function - */ - public static void abortTransaction(IMetaStoreClient client, long txnId) - throws TransactionException { - throw new UnsupportedOperationException("abortTransaction is not supported."); - } - - /** - * Hive-3 only function - */ - public static void releaseLock(IMetaStoreClient client, long lockId) - throws TransactionException { - throw new UnsupportedOperationException("releaseLock is not supported."); - } - - /** - * Hive-3 only function - */ - public static boolean heartbeat(IMetaStoreClient client, - long txnId, long lockId) throws TransactionException { - throw new UnsupportedOperationException("heartbeat is not supported."); - } - - /** - * Hive-3 only function - */ - public static long acquireLock(IMetaStoreClient client, long txnId, - List lockComponents) - throws TransactionException { - throw new UnsupportedOperationException("acquireLock is not supported."); - } - - /** - * Hive-3 only function - */ - public static long allocateTableWriteId(IMetaStoreClient client, long txnId, - String dbName, String tableName) throws TransactionException { - throw new UnsupportedOperationException("allocateTableWriteId is not supported."); - } - - /** - * Hive-3 only function - */ - public static void setTableColumnStatsTransactional(IMetaStoreClient client, - Table msTbl, ColumnStatistics colStats, TblTransaction tblTxn) - throws ImpalaRuntimeException { - throw new UnsupportedOperationException( - "setTableColumnStatsTransactional is not supported."); - } - - /** - * @return the shim version. - */ - public static long getMajorVersion() { - return 2; - } - - /** - * Return the default table path for a new table. - * - * Hive-3 doesn't allow managed table to be non transactional after HIVE-22158. - * Creating a non transactional managed table will finally result in an external table - * with table property "external.table.purge" set to true. As the table type become - * EXTERNAL, the location will be under "metastore.warehouse.external.dir" (HIVE-19837, - * introduces in hive-2.7, not in hive-2.1.x-cdh6.x yet). - */ - public static String getPathForNewTable(Database db, Table tbl) - throws MetaException { - return new Path(db.getLocationUri(), tbl.getTableName().toLowerCase()).toString(); - } - - /** - * Fire insert events asynchronously. This creates a single thread to execute the - * fireInsertEvent method and shuts down the thread after it has finished. - * In case of any exception, we just log the failure of firing insert events. - */ - public static List fireInsertEvents(MetaStoreClient msClient, - List insertEventInfos, String dbName, String tableName) { - ExecutorService fireInsertEventThread = Executors.newSingleThreadExecutor(); - CompletableFuture.runAsync(() -> { - try { - fireInsertEventHelper(msClient.getHiveClient(), insertEventInfos, dbName, tableName); - } catch (Exception e) { - LOG.error("Failed to fire insert event. Some tables might not be" - + " refreshed on other impala clusters.", e); - } finally { - msClient.close(); - } - }, Executors.newSingleThreadExecutor()).thenRun(() -> - fireInsertEventThread.shutdown()); - return Collections.emptyList(); - } - - /** - * Fires an insert event to HMS notification log. In Hive-2 for partitioned table, - * each existing partition touched by the insert will fire a separate insert event. - * @param msClient Metastore client, - * @param insertEventInfos A list of insert event encapsulating the information needed - * to fire insert - * @param dbName - * @param tableName - */ - @VisibleForTesting - public static void fireInsertEventHelper(IMetaStoreClient msClient, - List insertEventInfos, String dbName, String tableName) - throws TException { - Preconditions.checkNotNull(msClient); - Preconditions.checkNotNull(dbName); - Preconditions.checkNotNull(tableName); - for (InsertEventInfo info : insertEventInfos) { - Preconditions.checkNotNull(info.getNewFiles()); - LOG.debug("Firing an insert event for " + tableName); - FireEventRequestData data = new FireEventRequestData(); - InsertEventRequestData insertData = new InsertEventRequestData(); - data.setInsertData(insertData); - FireEventRequest rqst = new FireEventRequest(true, data); - rqst.setDbName(dbName); - rqst.setTableName(tableName); - insertData.setFilesAdded(new ArrayList<>(info.getNewFiles())); - insertData.setReplace(info.isOverwrite()); - if (info.getPartVals() != null) rqst.setPartitionVals(info.getPartVals()); - msClient.fireListenerEvent(rqst); - } - } -} diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh index 054f7c3e3..8d20b833c 100755 --- a/testdata/bin/create-load-data.sh +++ b/testdata/bin/create-load-data.sh @@ -427,11 +427,6 @@ function copy-and-load-dependent-tables { # TODO: Find a good way to integrate this with the normal data loading scripts beeline -n $USER -u "${JDBC_URL}" -f\ ${IMPALA_HOME}/testdata/bin/load-dependent-tables.sql - - if [[ "$IMPALA_HIVE_MAJOR_VERSION" == "2" ]]; then - beeline -n $USER -u "${JDBC_URL}" -f\ - ${IMPALA_HOME}/testdata/bin/load-dependent-tables-hive2.sql - fi } function create-internal-hbase-table { diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh index 47b47c23d..ff089d4d2 100755 --- a/testdata/bin/run-hive-server.sh +++ b/testdata/bin/run-hive-server.sh @@ -50,10 +50,6 @@ do ONLY_METASTORE=1 ;; -with_ranger) - if [[ "$USE_CDP_HIVE" = "false" ]]; then - echo "Ranger authorization is not supported in Hive 2." - exit 1 - fi ENABLE_RANGER_AUTH=1 echo "Starting Hive with Ranger authorization." ;; @@ -78,7 +74,7 @@ suspend=n,address=30010" # CDH Hive metastore scripts do not do so. This is currently to make sure that we can run # all the tests including sentry tests # TODO: This can be removed when we move to Ranger completely -if [[ "$USE_CDP_HIVE" = "true" && -n "$SENTRY_HOME" ]]; then +if [[ -n "$SENTRY_HOME" ]]; then for f in ${SENTRY_HOME}/lib/sentry-binding-hive*.jar; do FILE_NAME=$(basename $f) # exclude all the hive jars from being included in the classpath since Sentry @@ -109,19 +105,17 @@ fi # but compactions are initiated from the HMS in Hive 3. This may change at # some point in the future, in which case we can add this to only the # HS2 classpath. -if ${USE_CDP_HIVE} ; then - export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_HOME}/* - # This is a little hacky, but Tez bundles a bunch of junk into lib/, such - # as extra copies of the hadoop libraries, etc, and we want to avoid conflicts. - # So, we'll be a bit choosy about what we add to the classpath here. - for jar in $TEZ_HOME/lib/* ; do - case $(basename $jar) in - commons-*|RoaringBitmap*) - export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar - ;; - esac - done -fi +export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_HOME}/* +# This is a little hacky, but Tez bundles a bunch of junk into lib/, such +# as extra copies of the hadoop libraries, etc, and we want to avoid conflicts. +# So, we'll be a bit choosy about what we add to the classpath here. +for jar in $TEZ_HOME/lib/* ; do + case $(basename $jar) in + commons-*|RoaringBitmap*) + export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar + ;; + esac +done # Add kudu-hive.jar to the Hive Metastore classpath, so that Kudu's HMS # plugin can be loaded. diff --git a/testdata/cluster/admin b/testdata/cluster/admin index d777133d7..825b7d2fa 100755 --- a/testdata/cluster/admin +++ b/testdata/cluster/admin @@ -31,18 +31,10 @@ set -euo pipefail . $IMPALA_HOME/bin/report_build_error.sh setup_report_build_error -: ${INCLUDE_YARN=} - -# For Hive 3, we require Yarn for Tez support. -if "$USE_CDP_HIVE"; then - INCLUDE_YARN=1 -fi - while getopts vy OPT; do case $OPT in v) set -x;; - y) export INCLUDE_YARN=1;; - ?) echo "Usage: $0 [-v (verbose) -k (kerberize) -y (yarn)] ACTION (see source...)"; exit 1;; + ?) echo "Usage: $0 [-v (verbose) -k (kerberize)] ACTION (see source...)"; exit 1;; esac done shift $(($OPTIND-1)) @@ -63,10 +55,7 @@ export KILL_CLUSTER_MARKER=IBelongToTheMiniCluster if [[ "$TARGET_FILESYSTEM" == "hdfs" ]]; then # The check above indicates that the regular mini-cluster is in use. - SUPPORTED_SERVICES=(hdfs kms) - if [ -n "${INCLUDE_YARN}" ]; then - SUPPORTED_SERVICES+=(yarn) - fi + SUPPORTED_SERVICES=(hdfs kms yarn) else # Either a remote distributed file system or a local non-distributed file system is # in use. Currently the only service that is expected to work is Kudu, though in theory diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py index 29ae1c67f..035795c3a 100644 --- a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py +++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py @@ -22,7 +22,6 @@ import sys kerberize = os.environ.get('IMPALA_KERBERIZE') == 'true' target_filesystem = os.environ.get('TARGET_FILESYSTEM') -use_cdp_components = os.environ.get('USE_CDP_HIVE') == 'true' compression_codecs = [ 'org.apache.hadoop.io.compress.GzipCodec', @@ -90,6 +89,12 @@ CONFIG = { # This property can be used in tests to ascertain that this core-site.xml from # the classpath has been loaded. (Ex: TestRequestPoolService) 'impala.core-site.overridden': 'true', + + # Hadoop changed behaviors for S3AFilesystem to check permissions for the bucket + # on initialization (see HADOOP-16711). Some frontend tests access non-existent + # buckets and rely on the old behavior. This also means that the tests do not + # require AWS credentials. + 'fs.s3a.bucket.probe': '1', } if target_filesystem == 's3': @@ -110,10 +115,3 @@ if kerberize: 'hadoop.proxyuser.hive.hosts': '*', 'hadoop.proxyuser.hive.groups': '*', }) - -if use_cdp_components: - # Hadoop changed behaviors for S3AFilesystem to check permissions for the bucket - # on initialization (see HADOOP-16711). Some frontend tests access non-existent - # buckets and rely on the old behavior. This also means that the tests do not - # require AWS credentials. - CONFIG.update({'fs.s3a.bucket.probe': '1'})