mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-9731: Remove USE_CDP_HIVE=false and Hive 2 support
Impala 4 moved to using CDP versions for components, which involves adopting Hive 3. This removes the old code supporting CDH components and Hive 2. Specifically, it does the following: 1. Remove USE_CDP_HIVE and default to the values from USE_CDP_HIVE=true. USE_CDP_HIVE now has no effect on the Impala environment. This also means that bin/jenkins/build-all-flag-combinations.sh no longer include USE_CDP_HIVE=false as a configuration. 2. Remove USE_CDH_KUDU and default to getting Impala from the native toolchain. 3. Ban IMPALA_HIVE_MAJOR_VERSION<3 and remove related code, including the IMPALA_HIVE_MAJOR_VERSION=2 maven profile in fe/pom.xml. There is a fair amount of code that still references the Hive major version. Upstream Hive is now working on Hive 4, so there is a high likelihood that we'll need some code to deal with that transition. This leaves some code (such as maven profiles) and test logic in place. Change-Id: Id85e849beaf4e19dda4092874185462abd2ec608 Reviewed-on: http://gerrit.cloudera.org:8080/15869 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
c43c03c5ee
commit
f241fd08ac
@@ -35,14 +35,9 @@
|
||||
# other. The way to specify a single consistent set of components is via a build
|
||||
# number. This determines the location in s3 to get the artifacts.
|
||||
# CDP_BUILD_NUMBER - The CDP equivalent of a CDH_BUILD_NUMBER.
|
||||
# USE_CDP_HIVE - If false, this will use the CDH version of all Hadoop components
|
||||
# (except Ranger, which is CDP only). If true, this will use the CDP version of all
|
||||
# Hadoop components (except Sentry, which is CDH only).
|
||||
# DOWNLOAD_CDH_COMPONENTS - When set to true, this script will also download and extract
|
||||
# the CDH/CDP Hadoop components (i.e. Hadoop, Hive, HBase, Sentry, Ranger, etc) into
|
||||
# CDH_COMPONENTS_HOME/CDP_COMPONENTS_HOME as appropriate.
|
||||
# USE_CDH_KUDU - Kudu can be downloaded either from the toolchain or as a CDH component,
|
||||
# depending on the value of USE_CDH_KUDU.
|
||||
# KUDU_IS_SUPPORTED - If KUDU_IS_SUPPORTED is false, Kudu is disabled and we download
|
||||
# the toolchain Kudu and use the symbols to compile a non-functional stub library so
|
||||
# that Impala has something to link against.
|
||||
@@ -360,49 +355,6 @@ class ToolchainKudu(ToolchainPackage):
|
||||
return False
|
||||
|
||||
|
||||
class CdhKudu(CdhComponent):
|
||||
def __init__(self, platform_label):
|
||||
kudu_archive_tmpl = "kudu-${version}-" + platform_label
|
||||
# IMPALA_KUDU_URL can contain '%(platform_label)', which needs to be replaced
|
||||
# with the platform. We override this in os.environ so that it is picked up
|
||||
# in EnvVersionedPackage.
|
||||
kudu_url = os.environ.get("IMPALA_KUDU_URL")
|
||||
if kudu_url:
|
||||
kudu_url = kudu_url.replace("%(platform_label)", platform_label)
|
||||
os.environ["IMPALA_KUDU_URL"] = kudu_url
|
||||
super(CdhKudu, self).__init__('kudu',
|
||||
archive_basename_tmpl=kudu_archive_tmpl,
|
||||
unpack_directory_tmpl="kudu-${version}")
|
||||
|
||||
def needs_download(self):
|
||||
# This verifies that the unpack directory exists
|
||||
if super(CdhKudu, self).needs_download():
|
||||
return True
|
||||
# Additional check to distinguish this from the Kudu Java package
|
||||
# Regardless of the actual build type, the 'kudu' tarball will always contain a
|
||||
# 'debug' and a 'release' directory.
|
||||
if not os.path.exists(os.path.join(self.pkg_directory(), "debug")):
|
||||
return True
|
||||
# Both the pkg_directory and the debug directory exist
|
||||
return False
|
||||
|
||||
|
||||
class CdhKuduJava(CdhComponent):
|
||||
def __init__(self):
|
||||
super(CdhKuduJava, self).__init__('kudu-java',
|
||||
archive_basename_tmpl="kudu-${version}")
|
||||
|
||||
def needs_download(self):
|
||||
# This verify that the unpack directory exists
|
||||
if super(CdhKuduJava, self).needs_download():
|
||||
return True
|
||||
# Additional check to distinguish this from the Kudu package
|
||||
# There should be jars under the kudu directory.
|
||||
if len(glob.glob("{0}/*jar".format(self.pkg_directory()))) == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def try_get_platform_release_label():
|
||||
"""Gets the right package label from the OS version. Returns an OsMapping with both
|
||||
'toolchain' and 'cdh' labels. Return None if not found.
|
||||
@@ -629,21 +581,17 @@ def get_toolchain_downloads():
|
||||
|
||||
def get_hadoop_downloads():
|
||||
cluster_components = []
|
||||
use_cdp_hive = os.environ["USE_CDP_HIVE"] == "true"
|
||||
if use_cdp_hive:
|
||||
hadoop = CdpComponent("hadoop")
|
||||
hbase = CdpComponent("hbase", archive_basename_tmpl="hbase-${version}-bin",
|
||||
unpack_directory_tmpl="hbase-${version}")
|
||||
hive = CdpComponent("hive", archive_basename_tmpl="apache-hive-${version}-bin")
|
||||
hive_src = CdpComponent("hive-source",
|
||||
explicit_version=os.environ.get("IMPALA_HIVE_VERSION"),
|
||||
archive_basename_tmpl="hive-${version}-source",
|
||||
unpack_directory_tmpl="hive-${version}")
|
||||
tez = CdpComponent("tez", archive_basename_tmpl="tez-${version}-minimal",
|
||||
makedir=True)
|
||||
cluster_components.extend([hadoop, hbase, hive, hive_src, tez])
|
||||
else:
|
||||
cluster_components.extend(map(CdhComponent, ["hadoop", "hbase", "hive"]))
|
||||
hadoop = CdpComponent("hadoop")
|
||||
hbase = CdpComponent("hbase", archive_basename_tmpl="hbase-${version}-bin",
|
||||
unpack_directory_tmpl="hbase-${version}")
|
||||
hive = CdpComponent("hive", archive_basename_tmpl="apache-hive-${version}-bin")
|
||||
hive_src = CdpComponent("hive-source",
|
||||
explicit_version=os.environ.get("IMPALA_HIVE_VERSION"),
|
||||
archive_basename_tmpl="hive-${version}-source",
|
||||
unpack_directory_tmpl="hive-${version}")
|
||||
tez = CdpComponent("tez", archive_basename_tmpl="tez-${version}-minimal",
|
||||
makedir=True)
|
||||
cluster_components.extend([hadoop, hbase, hive, hive_src, tez])
|
||||
# Sentry is always CDH
|
||||
cluster_components.append(CdhComponent("sentry"))
|
||||
# Ranger is always CDP
|
||||
@@ -654,24 +602,12 @@ def get_hadoop_downloads():
|
||||
|
||||
def get_kudu_downloads(use_kudu_stub):
|
||||
# If Kudu is not supported, we download centos7 kudu to build the kudu stub.
|
||||
# TODO: Should this be from toolchain or CDH? Does it matter?
|
||||
kudu_downloads = []
|
||||
if use_kudu_stub:
|
||||
kudu_downloads += [ToolchainKudu("centos7")]
|
||||
else:
|
||||
use_cdh_kudu = os.getenv("USE_CDH_KUDU") == "true"
|
||||
if use_cdh_kudu:
|
||||
if not try_get_platform_release_label() \
|
||||
or not try_get_platform_release_label().cdh:
|
||||
logging.error("CDH Kudu is not supported on this platform. Set "
|
||||
"USE_CDH_KUDU=false to use the toolchain Kudu.")
|
||||
sys.exit(1)
|
||||
kudu_downloads += [CdhKudu(get_platform_release_label().cdh)]
|
||||
# There is also a Kudu Java package.
|
||||
kudu_downloads += [CdhKuduJava()]
|
||||
else:
|
||||
# Toolchain Kudu includes Java artifacts.
|
||||
kudu_downloads += [ToolchainKudu()]
|
||||
# Toolchain Kudu includes Java artifacts.
|
||||
kudu_downloads += [ToolchainKudu()]
|
||||
|
||||
return kudu_downloads
|
||||
|
||||
@@ -690,14 +626,10 @@ def main():
|
||||
and CDP_BUILD_NUMBER). Hadoop component packages are only downloaded if
|
||||
$DOWNLOAD_CDH_COMPONENTS is true. CDH Hadoop packages are downloaded into
|
||||
$CDH_COMPONENTS_HOME. CDP Hadoop packages are downloaded into $CDP_COMPONENTS_HOME.
|
||||
The versions used for Hadoop components depend on whether USE_CDP_HIVE is true or
|
||||
false. If true, most components get the CDP versions based on the $CDP_BUILD_NUMBER.
|
||||
If false, most components get the CDH versions based on the $CDH_BUILD_NUMBER.
|
||||
The exceptions are:
|
||||
The versions used for Hadoop components come from the CDP versions based on the
|
||||
$CDP_BUILD_NUMBER.
|
||||
The exceptions is:
|
||||
- sentry (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDH_BUILD_NUMBER)
|
||||
- ranger (always downloaded from $IMPALA_TOOLCHAIN_HOST for a given $CDP_BUILD_NUMBER)
|
||||
- kudu (currently always downloaded from $IMPALA_TOOLCHAIN_HOST for a given
|
||||
$CDH_BUILD_NUMBER)
|
||||
If Kudu is not supported on this platform (or KUDU_IS_SUPPORTED=false), then this
|
||||
builds a Kudu stub to allow for compilation without Kudu support.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user