IMPALA-9218: Add support for locally compiled Hive

- Add HIVE_VERSION_OVERRIDE, HIVE_STORAGE_API_VERSION_OVERRIDE,
  HIVE_METASTORE_THRIFT_DIR_OVERRIDE, HIVE_HOME_OVERRIDE environment
  variable support to impala-config.sh
- When used together with HIVE_SRC_DIR_OVERRIDE allows a user to
  specify a locally compiled version of Hive for development and the
  minicluster
- Hive jars are expected to have been installed into the local maven
  repository
- Currently only version 3 of Hive is supported due to the absence of
  API shims for Hive 4.0
Example:
  ~/hive $ mvn package install -Pdist -DskipTests

Example configuration:
export HIVE_VERSION_OVERRIDE=3.1.0-SNAPSHOT
export HIVE_STORAGE_API_VERSION_OVERRIDE=2.6.0
export HIVE_HOME_OVERRIDE=\
~/hive/packaging/target/apache-hive-3.1.0-SNAPSHOT-bin/apache-hive-3.1.0-SNAPSHOT-bin
export HIVE_SRC_DIR_OVERRIDE=~/hive
export HIVE_METASTORE_THRIFT_DIR_OVERRIDE=~/hive/standalone-metastore/src/main/thrift/

Change-Id: I21892c153c445e3a5d93f2bc8f5e0b799929dd34
Reviewed-on: http://gerrit.cloudera.org:8080/17094
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
John Sherman
2021-02-20 17:04:24 +00:00
committed by Impala Public Jenkins
parent 2039746ebe
commit a29d06db53
4 changed files with 26 additions and 7 deletions

View File

@@ -66,3 +66,14 @@ can do so through the environment variables and scripts listed below.
| HBASE_HOME | "${CDP_COMPONENTS_HOME}/hbase-${IMPALA_HBASE_VERSION}/" | |
| THRIFT_HOME | "${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_VERSION}" | |
## Hive Dependency Overrides
Typically used together to specify a local build of Apache Hive. Care should be taken
while using these variables since they take precedence over the defaults in
impala-config.sh, they may cause confusion when switching between branches or versions of
Apache Impala.
| Environment variable | Description |
| HIVE_VERSION_OVERRIDE | Used to specify different Hive version from default |
| HIVE_STORAGE_API_VERSION_OVERRIDE | Used to specify different Hive Storage API version from default |
| HIVE_METASTORE_THRIFT_DIR_OVERRIDE | Used to specify location of metastore thrift files to use during Thrift compilation |
| HIVE_HOME_OVERRIDE | Used to specify location of Hive |

View File

@@ -466,6 +466,13 @@ def get_hadoop_downloads():
unpack_directory_tmpl="hive-${version}")
tez = CdpComponent("tez", archive_basename_tmpl="tez-${version}-minimal",
makedir=True)
use_override_hive = \
"HIVE_VERSION_OVERRIDE" in os.environ and os.environ["HIVE_VERSION_OVERRIDE"] != ""
# If we are using a locally built Hive we do not have a need to pull hive as a
# dependency
if use_override_hive:
cluster_components.extend([hadoop, hbase, tez])
else:
cluster_components.extend([hadoop, hbase, hive, hive_src, tez])
# Ranger is always CDP
cluster_components.append(CdpComponent("ranger",

View File

@@ -239,7 +239,7 @@ export IMPALA_HADOOP_URL=${CDP_HADOOP_URL-}
export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
export IMPALA_HBASE_VERSION=${CDP_HBASE_VERSION}
export IMPALA_HBASE_URL=${CDP_HBASE_URL-}
export IMPALA_HIVE_VERSION=${CDP_HIVE_VERSION}
export IMPALA_HIVE_VERSION=${HIVE_VERSION_OVERRIDE:-"$CDP_HIVE_VERSION"}
export IMPALA_HIVE_URL=${CDP_HIVE_URL-}
export IMPALA_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-}
export IMPALA_ICEBERG_VERSION=${CDP_ICEBERG_VERSION}
@@ -251,6 +251,7 @@ export IMPALA_RANGER_VERSION=${CDP_RANGER_VERSION}
export IMPALA_RANGER_URL=${CDP_RANGER_URL-}
export IMPALA_TEZ_VERSION=${CDP_TEZ_VERSION}
export IMPALA_TEZ_URL=${CDP_TEZ_URL-}
export IMPALA_HIVE_STORAGE_API_VERSION=${HIVE_STORAGE_API_VERSION_OVERRIDE:-"2.3.0.$IMPALA_HIVE_VERSION"}
# Extract the first component of the hive version.
# Allow overriding of Hive source location in case we want to build Impala without
@@ -362,11 +363,11 @@ export LOCAL_FS="file:${WAREHOUSE_LOCATION_PREFIX}"
export IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION}"
ESCAPED_IMPALA_HOME=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$IMPALA_HOME")
export HIVE_HOME="$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"
export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-"${CDP_COMPONENTS_HOME}/hive-\
${IMPALA_HIVE_VERSION}"}
export HIVE_HOME=${HIVE_HOME_OVERRIDE:-"$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"}
export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-"${CDP_COMPONENTS_HOME}/hive-${IMPALA_HIVE_VERSION}"}
# Set the path to the hive_metastore.thrift which is used to build thrift code
export HIVE_METASTORE_THRIFT_DIR=$HIVE_SRC_DIR/standalone-metastore/src/main/thrift
export HIVE_METASTORE_THRIFT_DIR=${HIVE_METASTORE_THRIFT_DIR_OVERRIDE:-\
"$HIVE_SRC_DIR/standalone-metastore/src/main/thrift"}
export TEZ_HOME="$CDP_COMPONENTS_HOME/tez-${IMPALA_TEZ_VERSION}-minimal"
export HBASE_HOME="$CDP_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}/"
# Previously, there were multiple configurations and the "_cdp" included below

View File

@@ -32,7 +32,7 @@ under the License.
<jacoco.report.dir>${env.IMPALA_FE_TEST_COVERAGE_DIR}</jacoco.report.dir>
<hadoop.version>${env.IMPALA_HADOOP_VERSION}</hadoop.version>
<hive.version>${env.IMPALA_HIVE_VERSION}</hive.version>
<hive.storage.api.version>2.3.0.${env.IMPALA_HIVE_VERSION}</hive.storage.api.version>
<hive.storage.api.version>${env.IMPALA_HIVE_STORAGE_API_VERSION}</hive.storage.api.version>
<hive.major.version>${env.IMPALA_HIVE_MAJOR_VERSION}</hive.major.version>
<hudi.version>${env.IMPALA_HUDI_VERSION}</hudi.version>
<ranger.version>${env.IMPALA_RANGER_VERSION}</ranger.version>