mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-9709: Remove Impala-lzo from the development environment
This removes Impala-lzo from the Impala development environment. Impala-lzo is not built as part of the Impala build. The LZO plugin is no longer loaded. LZO tables are not loaded during dataload, and LZO is no longer tested. This removes some obsolete scan APIs that were only used by Impala-lzo. With this commit, Impala-lzo would require code changes to build against Impala. The plugin infrastructure is not removed, and this leaves some LZO support code in place. If someone were to decide to revive Impala-lzo, they would still be able to load it as a plugin and get the same functionality as before. This plugin support may be removed later. Testing: - Dryrun of GVO - Modified TestPartitionMetadataUncompressedTextOnly's test_unsupported_text_compression() to add LZO case Change-Id: I3a4f12247d8872b7e14c9feb4b2c58cfd60d4c0e Reviewed-on: http://gerrit.cloudera.org:8080/15814 Reviewed-by: Bikramjeet Vig <bikramjeet.vig@cloudera.com> Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
@@ -438,17 +438,6 @@ add_custom_target(cscope ALL DEPENDS gen-deps
|
||||
COMMAND "${CMAKE_SOURCE_DIR}/bin/gen-cscope.sh"
|
||||
)
|
||||
|
||||
# This call is passing IMPALA_TOOLCHAIN_PACKAGES_HOME into Impala-lzo's build.sh,
|
||||
# but this is known not to work with the current version of Impala-lzo when
|
||||
# IMPALA_TOOLCHAIN_PACKAGES_HOME is a subdirectory of IMPALA_TOOLCHAIN. Either
|
||||
# Impala-lzo will need to be fixed or it will need to be removed.
|
||||
if (DEFINED ENV{IMPALA_LZO} AND EXISTS $ENV{IMPALA_LZO})
|
||||
add_custom_target(impala-lzo ALL DEPENDS gen-deps
|
||||
COMMAND $ENV{IMPALA_LZO}/build.sh ${CMAKE_BUILD_TYPE} ${CMAKE_SOURCE_DIR}
|
||||
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}
|
||||
)
|
||||
endif()
|
||||
|
||||
# Dump include paths to a file
|
||||
if (DUMP_INCLUDE_PATHS)
|
||||
file(REMOVE "${DUMP_INCLUDE_PATHS}")
|
||||
|
||||
@@ -42,9 +42,9 @@ using boost::upgrade_lock;
|
||||
using boost::upgrade_to_unique_lock;
|
||||
using std::find;
|
||||
|
||||
// Allow LZO by default to maintain backwards compatibility. We can add more options
|
||||
// if we determine that the plugins are well-maintained and generally stable.
|
||||
DEFINE_string(enabled_hdfs_text_scanner_plugins, "LZO", "(Advanced) whitelist of HDFS "
|
||||
// LZO is no longer supported, so there are no plugins enabled by default. This is
|
||||
// likely to be removed.
|
||||
DEFINE_string(enabled_hdfs_text_scanner_plugins, "", "(Advanced) whitelist of HDFS "
|
||||
"text scanner plugins that Impala will try to dynamically load. Must be a "
|
||||
"comma-separated list of upper-case compression codec names. Each plugin implements "
|
||||
"support for decompression and hands off the decompressed bytes to Impala's builtin "
|
||||
|
||||
@@ -824,14 +824,6 @@ ScanRange* HdfsScanNodeBase::AllocateScanRange(hdfsFS fs, const char* file, int6
|
||||
buffer_opts);
|
||||
}
|
||||
|
||||
ScanRange* HdfsScanNodeBase::AllocateScanRange(hdfsFS fs, const char* file,
|
||||
int64_t len, int64_t offset, int64_t partition_id, int disk_id,
|
||||
int cache_options, bool expected_local, int64_t mtime,
|
||||
bool is_erasure_coded, const ScanRange* original_split) {
|
||||
return AllocateScanRange(fs, file, len, offset, partition_id, disk_id, expected_local,
|
||||
is_erasure_coded, mtime, BufferOpts(cache_options), original_split);
|
||||
}
|
||||
|
||||
void* HdfsScanNodeBase::GetCodegenFn(THdfsFileFormat::type type) {
|
||||
auto it = codegend_fn_map_.find(type);
|
||||
if (it == codegend_fn_map_.end()) return NULL;
|
||||
@@ -1166,7 +1158,7 @@ void HdfsScanNodeBase::UpdateBytesRead(
|
||||
}
|
||||
}
|
||||
|
||||
HdfsFileDesc* ScanRangeSharedState::GetFileDesc(
|
||||
const HdfsFileDesc* ScanRangeSharedState::GetFileDesc(
|
||||
int64_t partition_id, const std::string& filename) {
|
||||
auto file_desc_map_key = make_pair(partition_id, filename);
|
||||
DCHECK(file_descs_.find(file_desc_map_key) != file_descs_.end());
|
||||
|
||||
@@ -130,9 +130,7 @@ class ScanRangeSharedState {
|
||||
public:
|
||||
/// Given a partition_id and filename returns the related file descriptor DCHECK ensures
|
||||
/// there is always file descriptor returned.
|
||||
/// TODO: The LZO scanner expects a non const object so switch to returning a const once
|
||||
/// support for LZO scanner is removed.
|
||||
HdfsFileDesc* GetFileDesc(int64_t partition_id, const std::string& filename);
|
||||
const HdfsFileDesc* GetFileDesc(int64_t partition_id, const std::string& filename);
|
||||
|
||||
/// Sets the scanner specific metadata for 'partition_id' and 'filename'.
|
||||
/// Scanners can use this to store file header information. Thread safe.
|
||||
@@ -497,12 +495,6 @@ class HdfsScanNodeBase : public ScanNode {
|
||||
ScanRangeMetadata* metadata, int disk_id, bool expected_local,
|
||||
bool is_erasure_coded, int64_t mtime, const io::BufferOpts& buffer_opts);
|
||||
|
||||
/// Old API for compatibility with text scanners (e.g. LZO text scanner).
|
||||
io::ScanRange* AllocateScanRange(hdfsFS fs, const char* file, int64_t len,
|
||||
int64_t offset, int64_t partition_id, int disk_id, int cache_options,
|
||||
bool expected_local, int64_t mtime, bool is_erasure_coded = false,
|
||||
const io::ScanRange* original_split = nullptr);
|
||||
|
||||
/// Adds ranges to be read later by scanners. Must not be called once
|
||||
/// remaining_scan_range_submissions_ is 0. The enqueue_location specifies whether the
|
||||
/// scan ranges are added to the head or tail of the queue. Implemented by child classes
|
||||
@@ -525,7 +517,7 @@ class HdfsScanNodeBase : public ScanNode {
|
||||
|
||||
/// Given a partition_id and filename returns the related file descriptor
|
||||
/// DCHECK ensures there is always file descriptor returned
|
||||
inline HdfsFileDesc* GetFileDesc(
|
||||
inline const HdfsFileDesc* GetFileDesc(
|
||||
int64_t partition_id, const std::string& filename) {
|
||||
return shared_state_->GetFileDesc(partition_id, filename);
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ const char* const Codec::ZSTD_COMPRESSION =
|
||||
const char* const Codec::UNKNOWN_CODEC_ERROR =
|
||||
"This compression codec is currently unsupported: ";
|
||||
const char* const NO_LZO_MSG = "LZO codecs may not be created via the Codec interface. "
|
||||
"Instead the LZO library is directly invoked.";
|
||||
"Instead LZO is decoded by an optional text scanner plugin.";
|
||||
|
||||
const Codec::CodecMap Codec::CODEC_MAP = {{"", THdfsCompression::NONE},
|
||||
{DEFAULT_COMPRESSION, THdfsCompression::DEFAULT},
|
||||
|
||||
@@ -199,7 +199,7 @@ function apt-get {
|
||||
|
||||
echo ">>> Installing build tools"
|
||||
ubuntu apt-get update
|
||||
ubuntu apt-get --yes install ccache curl gawk g++ gcc libffi-dev liblzo2-dev \
|
||||
ubuntu apt-get --yes install ccache curl gawk g++ gcc libffi-dev \
|
||||
libkrb5-dev krb5-admin-server krb5-kdc krb5-user libsasl2-dev \
|
||||
libsasl2-modules libsasl2-modules-gssapi-mit libssl-dev make ninja-build \
|
||||
python-dev python-setuptools postgresql ssh wget vim-common psmisc \
|
||||
@@ -240,7 +240,7 @@ redhat sudo yum install -y curl gawk gcc gcc-c++ git krb5-devel krb5-server \
|
||||
krb5-workstation libevent-devel libffi-devel make openssl-devel cyrus-sasl \
|
||||
cyrus-sasl-gssapi cyrus-sasl-devel cyrus-sasl-plain \
|
||||
postgresql postgresql-server \
|
||||
wget vim-common nscd cmake lzo-devel fuse-devel zlib-devel \
|
||||
wget vim-common nscd cmake fuse-devel zlib-devel \
|
||||
psmisc lsof openssh-server redhat-lsb java-1.8.0-openjdk-devel \
|
||||
java-1.8.0-openjdk-src
|
||||
|
||||
@@ -453,25 +453,6 @@ eval "$SET_JAVA_HOME"
|
||||
# Assert that we have a java available
|
||||
test -f $JAVA_HOME/bin/java
|
||||
|
||||
# LZO is not needed to compile or run Impala, but it is needed for the data load
|
||||
echo ">>> Checking out Impala-lzo"
|
||||
: ${IMPALA_LZO_HOME:="${IMPALA_HOME}/../Impala-lzo"}
|
||||
if ! [[ -d "$IMPALA_LZO_HOME" ]]
|
||||
then
|
||||
git clone --branch master https://github.com/cloudera/impala-lzo.git "$IMPALA_LZO_HOME"
|
||||
fi
|
||||
|
||||
echo ">>> Checking out and building hadoop-lzo"
|
||||
|
||||
: ${HADOOP_LZO_HOME:="${IMPALA_HOME}/../hadoop-lzo"}
|
||||
if ! [[ -d "$HADOOP_LZO_HOME" ]]
|
||||
then
|
||||
git clone https://github.com/cloudera/hadoop-lzo.git "$HADOOP_LZO_HOME"
|
||||
fi
|
||||
cd "$HADOOP_LZO_HOME"
|
||||
time -p ant package
|
||||
cd "$IMPALA_HOME"
|
||||
|
||||
# Try to prepopulate the m2 directory to save time
|
||||
if ! bin/jenkins/populate_m2_directory.py ; then
|
||||
echo "Failed to prepopulate the m2 directory. Continuing..."
|
||||
|
||||
@@ -67,12 +67,5 @@ popd
|
||||
rm -f "${IMPALA_HOME}/llvm-ir/"impala*.ll
|
||||
rm -f "${IMPALA_HOME}/be/generated-sources/impala-ir/"*
|
||||
|
||||
# Cleanup Impala-lzo
|
||||
if [ -e "${IMPALA_LZO}" ]; then
|
||||
pushd "${IMPALA_LZO}"
|
||||
git rev-parse 2>/dev/null && git clean -fdx
|
||||
popd
|
||||
fi
|
||||
|
||||
# When switching to and from toolchain, make sure to remove all CMake generated files
|
||||
"${IMPALA_HOME}/bin/clean-cmake.sh"
|
||||
|
||||
@@ -325,8 +325,6 @@ export DOWNLOAD_CDH_COMPONENTS=${DOWNLOAD_CDH_COMPONENTS-true}
|
||||
|
||||
export IS_OSX="$(if [[ "$OSTYPE" == "darwin"* ]]; then echo true; else echo false; fi)"
|
||||
|
||||
export HADOOP_LZO="${HADOOP_LZO-$IMPALA_HOME/../hadoop-lzo}"
|
||||
export IMPALA_LZO="${IMPALA_LZO-$IMPALA_HOME/../Impala-lzo}"
|
||||
export IMPALA_AUX_TEST_HOME="${IMPALA_AUX_TEST_HOME-$IMPALA_HOME/../Impala-auxiliary-tests}"
|
||||
export TARGET_FILESYSTEM="${TARGET_FILESYSTEM-hdfs}"
|
||||
export ERASURE_CODING="${ERASURE_CODING-false}"
|
||||
@@ -568,18 +566,13 @@ export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
|
||||
export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR_OVERRIDE:-"${HADOOP_HOME}/include"}
|
||||
export HADOOP_LIB_DIR=${HADOOP_LIB_DIR_OVERRIDE:-"${HADOOP_HOME}/lib"}
|
||||
|
||||
# Please note that the * is inside quotes, thus it won't get expanded by bash but
|
||||
# by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft
|
||||
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH-}:${HADOOP_HOME}/share/hadoop/tools/lib/*"
|
||||
# YARN is configured to use LZO so the LZO jar needs to be in the hadoop classpath.
|
||||
export LZO_JAR_PATH="$HADOOP_LZO/build/hadoop-lzo-0.4.15.jar"
|
||||
HADOOP_CLASSPATH+=":$LZO_JAR_PATH"
|
||||
|
||||
# Beware of adding entries from $HADOOP_HOME here, because they can change
|
||||
# the order of the classpath, leading to configuration not showing up first.
|
||||
HADOOP_CLASSPATH="$LZO_JAR_PATH"
|
||||
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH-}"
|
||||
# Add the path containing the hadoop-aws jar, which is required to access AWS from the
|
||||
# minicluster.
|
||||
# Please note that the * is inside quotes, thus it won't get expanded by bash but
|
||||
# by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft
|
||||
HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/tools/lib/*"
|
||||
|
||||
export PATH="$HADOOP_HOME/bin:$PATH"
|
||||
@@ -610,7 +603,7 @@ export HIVE_CONF_DIR="$IMPALA_FE_DIR/./src/test/resources"
|
||||
export POSTGRES_JDBC_DRIVER="${IMPALA_FE_DIR}/target/dependency/postgresql-${IMPALA_POSTGRES_JDBC_DRIVER_VERSION}.jar"
|
||||
|
||||
export HIVE_AUX_JARS_PATH="$POSTGRES_JDBC_DRIVER"
|
||||
export AUX_CLASSPATH="${LZO_JAR_PATH}"
|
||||
export AUX_CLASSPATH=""
|
||||
### Tell hive not to use jline
|
||||
export HADOOP_USER_CLASSPATH_FIRST=true
|
||||
|
||||
@@ -707,7 +700,6 @@ LIBHDFS_OPTS="${LIBHDFS_OPTS} -XX:MaxPermSize=128m"
|
||||
export CLASSPATH="$IMPALA_FE_DIR/target/dependency:${CLASSPATH:+:${CLASSPATH}}"
|
||||
CLASSPATH="$IMPALA_FE_DIR/target/classes:$CLASSPATH"
|
||||
CLASSPATH="$IMPALA_FE_DIR/src/test/resources:$CLASSPATH"
|
||||
CLASSPATH="$LZO_JAR_PATH:$CLASSPATH"
|
||||
|
||||
# A marker in the environment to prove that we really did source this file
|
||||
export IMPALA_CONFIG_SOURCED=1
|
||||
@@ -726,8 +718,6 @@ echo "HBASE_CONF_DIR = $HBASE_CONF_DIR"
|
||||
echo "RANGER_HOME = $RANGER_HOME"
|
||||
echo "RANGER_CONF_DIR = $RANGER_CONF_DIR "
|
||||
echo "THRIFT_HOME = $THRIFT_HOME"
|
||||
echo "HADOOP_LZO = $HADOOP_LZO"
|
||||
echo "IMPALA_LZO = $IMPALA_LZO"
|
||||
echo "CLASSPATH = $CLASSPATH"
|
||||
echo "LIBHDFS_OPTS = $LIBHDFS_OPTS"
|
||||
echo "JAVA_HOME = $JAVA_HOME"
|
||||
|
||||
@@ -21,9 +21,6 @@
|
||||
# run Impala binaries in the context of a dev environment.
|
||||
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}"
|
||||
|
||||
# Impala-lzo is loaded at runtime, so needs to be on the search path.
|
||||
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_LZO}/build"
|
||||
|
||||
# We built against toolchain GCC so we need to dynamically link against matching
|
||||
# library versions. (the rpath isn't baked into the binaries)
|
||||
IMPALA_TOOLCHAIN_GCC_LIB=\
|
||||
|
||||
@@ -139,7 +139,6 @@ options, args = parser.parse_args()
|
||||
IMPALA_HOME = os.environ["IMPALA_HOME"]
|
||||
CORE_SITE_PATH = os.path.join(IMPALA_HOME, "fe/src/test/resources/core-site.xml")
|
||||
KNOWN_BUILD_TYPES = ["debug", "release", "latest"]
|
||||
IMPALA_LZO = os.environ["IMPALA_LZO"]
|
||||
# The location in the container where the cache is always mounted.
|
||||
DATA_CACHE_CONTAINER_PATH = "/opt/impala/cache"
|
||||
|
||||
@@ -623,12 +622,6 @@ class DockerMiniClusterOperations(object):
|
||||
# Run the container as the current user.
|
||||
user_args = ["--user", "{0}:{1}".format(os.getuid(), os.getgid())]
|
||||
|
||||
# Allow loading LZO plugin, if built.
|
||||
lzo_lib_dir = os.path.join(IMPALA_LZO, "build")
|
||||
if os.path.isdir(lzo_lib_dir):
|
||||
mount_args += ["--mount",
|
||||
"type=bind,src={0},dst=/opt/impala/lib/plugins".format(lzo_lib_dir)]
|
||||
|
||||
mem_limit_args = []
|
||||
if mem_limit is not None:
|
||||
mem_limit_args = ["--memory", str(mem_limit)]
|
||||
|
||||
10
buildall.sh
10
buildall.sh
@@ -434,9 +434,6 @@ build_all_components() {
|
||||
if (( build_independent_targets )); then
|
||||
MAKE_TARGETS+=" cscope fe tarballs"
|
||||
fi
|
||||
if [[ -e "$IMPALA_LZO" ]]; then
|
||||
MAKE_TARGETS+=" impala-lzo"
|
||||
fi
|
||||
fi
|
||||
${MAKE_CMD} -j${IMPALA_BUILD_THREADS:-4} ${IMPALA_MAKE_FLAGS} ${MAKE_TARGETS}
|
||||
}
|
||||
@@ -518,13 +515,6 @@ reconfigure_test_cluster() {
|
||||
|
||||
# Generate the Hadoop configs needed by Impala
|
||||
"${IMPALA_HOME}/bin/create-test-configuration.sh" ${CREATE_TEST_CONFIG_ARGS}
|
||||
|
||||
# Copy Hadoop-lzo dependencies if available (required to generate Lzo data).
|
||||
if stat "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* > /dev/null ; then
|
||||
cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_LIB_DIR/native"
|
||||
else
|
||||
echo "No hadoop-lzo found"
|
||||
fi
|
||||
}
|
||||
|
||||
# Starts the test cluster processes except for Impala.
|
||||
|
||||
@@ -250,14 +250,6 @@ function build_impdev() {
|
||||
git fetch /git_common_dir --no-tags "$GIT_HEAD_REV"
|
||||
git checkout -b test-with-docker FETCH_HEAD
|
||||
|
||||
# Checkout impala-lzo too
|
||||
mkdir /home/impdev/Impala-lzo
|
||||
pushd /home/impdev/Impala-lzo
|
||||
git init
|
||||
git fetch $IMPALA_LZO_REPO --no-tags "$IMPALA_LZO_REF"
|
||||
git checkout -b test-with-docker FETCH_HEAD
|
||||
popd
|
||||
|
||||
# Link in logs. Logs are on the host since that's the most important thing to
|
||||
# look at after the tests are run.
|
||||
ln -sf /logs logs
|
||||
|
||||
@@ -18,13 +18,11 @@ ARG BASE_IMAGE=ubuntu:16.04
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
# Install minimal dependencies required for Impala services to run.
|
||||
# liblzo2-2 may be needed by the Impala-lzo plugin, which is used in tests.
|
||||
# We install it in the base image for convenience.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y openjdk-8-jre-headless \
|
||||
libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit \
|
||||
sudo netcat-openbsd less curl iproute2 vim iputils-ping \
|
||||
tzdata liblzo2-2 krb5-user && \
|
||||
tzdata krb5-user && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
@@ -184,11 +184,6 @@ def main():
|
||||
default=os.path.expanduser("~/.ccache"))
|
||||
parser.add_argument('--tail', action="store_true",
|
||||
help="Run tail on all container log files.")
|
||||
parser.add_argument('--impala-lzo-repo',
|
||||
default="https://github.com/cloudera/impala-lzo.git",
|
||||
help="Git repo for Impala-lzo repo")
|
||||
parser.add_argument('--impala-lzo-ref', default='master',
|
||||
help="Branch name for Impala-lzo repo.")
|
||||
parser.add_argument('--env', metavar='K=V', default=[], action='append',
|
||||
help="""Passes given environment variables (expressed as KEY=VALUE)
|
||||
through containers.
|
||||
@@ -210,8 +205,6 @@ def main():
|
||||
suite_concurrency=args.suite_concurrency,
|
||||
impalad_mem_limit_bytes=args.impalad_mem_limit_bytes,
|
||||
tail=args.tail,
|
||||
impala_lzo_repo=args.impala_lzo_repo,
|
||||
impala_lzo_ref=args.impala_lzo_ref,
|
||||
env=args.env, base_image=args.base_image)
|
||||
|
||||
fh = logging.FileHandler(os.path.join(_make_dir_if_not_exist(t.log_dir), "log.txt"))
|
||||
@@ -449,7 +442,7 @@ class TestWithDocker(object):
|
||||
cleanup_image, ccache_dir, test_mode,
|
||||
suite_concurrency, parallel_test_concurrency,
|
||||
impalad_mem_limit_bytes, tail,
|
||||
impala_lzo_repo, impala_lzo_ref, env, base_image):
|
||||
env, base_image):
|
||||
self.build_image = build_image
|
||||
self.name = name
|
||||
self.containers = []
|
||||
@@ -485,8 +478,6 @@ class TestWithDocker(object):
|
||||
self.parallel_test_concurrency = parallel_test_concurrency
|
||||
self.impalad_mem_limit_bytes = impalad_mem_limit_bytes
|
||||
self.tail = tail
|
||||
self.impala_lzo_repo = impala_lzo_repo
|
||||
self.impala_lzo_ref = impala_lzo_ref
|
||||
self.env = env
|
||||
self.base_image = base_image
|
||||
|
||||
@@ -571,8 +562,6 @@ class TestWithDocker(object):
|
||||
"-v", self.git_root + ":/repo:ro",
|
||||
"-v", self.git_common_dir + ":/git_common_dir:ro",
|
||||
"-e", "GIT_HEAD_REV=" + self.git_head_rev,
|
||||
"-e", "IMPALA_LZO_REPO=" + self.impala_lzo_repo,
|
||||
"-e", "IMPALA_LZO_REF=" + self.impala_lzo_ref,
|
||||
# Share timezone between host and container
|
||||
"-e", "LOCALTIME_LINK_TARGET=" + localtime_link_target,
|
||||
"-v", self.ccache_dir + ":/ccache",
|
||||
|
||||
@@ -468,8 +468,7 @@ public class ToSqlUtils {
|
||||
}
|
||||
|
||||
if (storageHandlerClass == null) {
|
||||
// TODO: Remove this special case when we have the LZO_TEXT writer
|
||||
// We must handle LZO_TEXT specially because Impala does not yet support creating
|
||||
// We must handle LZO_TEXT specially because Impala does not support creating
|
||||
// tables with this row format. In this case, we cannot output "WITH
|
||||
// SERDEPROPERTIES" because Hive does not support it with "STORED AS". For any
|
||||
// other HdfsFileFormat we want to output the serdeproperties because it is
|
||||
|
||||
@@ -27,10 +27,12 @@ import com.google.common.collect.ImmutableMap;
|
||||
* Support for recognizing compression suffixes on data files.
|
||||
* Compression of a file is recognized in mapreduce by looking for suffixes of
|
||||
* supported codecs.
|
||||
* For now Impala supports LZO, GZIP, SNAPPY, BZIP2 and some additional formats if plugins
|
||||
* For now Impala supports GZIP, SNAPPY, BZIP2 and some additional formats if plugins
|
||||
* are available. Even if a plugin is available, we need to add the file suffixes here so
|
||||
* that we can resolve the compression type from the file name. LZO can use the specific
|
||||
* HIVE input class.
|
||||
* Some compression types here are detected even though they are not supported. This
|
||||
* allows for better error messages (e.g. LZ4, LZO).
|
||||
*/
|
||||
public enum HdfsCompression {
|
||||
NONE,
|
||||
|
||||
@@ -49,6 +49,9 @@ public enum HdfsFileFormat {
|
||||
"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
|
||||
"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
|
||||
false, false, true),
|
||||
// LZO_TEXT is never used as an actual HdfsFileFormat. It is used only to store the
|
||||
// input format class and match against it (e.g. in HdfsCompression). Outside of this
|
||||
// file, tables that use the LZO input format class use HdfsFileFormat.TEXT.
|
||||
LZO_TEXT("com.hadoop.mapred.DeprecatedLzoTextInputFormat",
|
||||
"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
|
||||
"", false, false, true),
|
||||
@@ -194,8 +197,7 @@ public enum HdfsFileFormat {
|
||||
case TEXT:
|
||||
if (compressionType == HdfsCompression.LZO ||
|
||||
compressionType == HdfsCompression.LZO_INDEX) {
|
||||
// TODO: Update this when we can write LZO text.
|
||||
// It is not currently possible to create a table with LZO compressed text files
|
||||
// It is not possible to create a table with LZO compressed text files
|
||||
// in Impala, but this is valid in Hive.
|
||||
return String.format("INPUTFORMAT '%s' OUTPUTFORMAT '%s'",
|
||||
LZO_TEXT.inputFormat(), LZO_TEXT.outputFormat());
|
||||
|
||||
@@ -179,7 +179,6 @@ public class HdfsScanNode extends ScanNode {
|
||||
ImmutableSet.<HdfsFileFormat>builder()
|
||||
.add(HdfsFileFormat.RC_FILE)
|
||||
.add(HdfsFileFormat.TEXT)
|
||||
.add(HdfsFileFormat.LZO_TEXT)
|
||||
.add(HdfsFileFormat.SEQUENCE_FILE)
|
||||
.add(HdfsFileFormat.AVRO)
|
||||
.build();
|
||||
|
||||
@@ -62,8 +62,8 @@ public class HdfsTableSink extends TableSink {
|
||||
protected final boolean inputIsClustered_;
|
||||
|
||||
private static final Set<HdfsFileFormat> SUPPORTED_FILE_FORMATS = ImmutableSet.of(
|
||||
HdfsFileFormat.PARQUET, HdfsFileFormat.TEXT, HdfsFileFormat.LZO_TEXT,
|
||||
HdfsFileFormat.RC_FILE, HdfsFileFormat.SEQUENCE_FILE, HdfsFileFormat.AVRO);
|
||||
HdfsFileFormat.PARQUET, HdfsFileFormat.TEXT, HdfsFileFormat.RC_FILE,
|
||||
HdfsFileFormat.SEQUENCE_FILE, HdfsFileFormat.AVRO);
|
||||
|
||||
// Stores the indices into the list of non-clustering columns of the target table that
|
||||
// are stored in the 'sort.columns' table property. This is sent to the backend to
|
||||
@@ -150,7 +150,7 @@ public class HdfsTableSink extends TableSink {
|
||||
return 1024L * 1024L * 1024L;
|
||||
}
|
||||
|
||||
// For all other supported formats (TEXT, LZO_TEXT, RC_FILE, SEQUENCE_FILE & AVRO)
|
||||
// For all other supported formats (TEXT, RC_FILE, SEQUENCE_FILE & AVRO)
|
||||
// 100KB is a very approximate estimate of the amount of data buffered.
|
||||
return 100L * 1024L;
|
||||
}
|
||||
|
||||
@@ -3385,15 +3385,15 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
|
||||
// File type / table type mismatch.
|
||||
AnalyzesOk(String.format("load data inpath '%s' %s into table " +
|
||||
"tpch.lineitem",
|
||||
"/test-warehouse/alltypes_text_lzo/year=2009/month=4", overwrite));
|
||||
"/test-warehouse/alltypes_text_gzip/year=2009/month=4", overwrite));
|
||||
// When table type matches, analysis passes for partitioned and unpartitioned
|
||||
// tables.
|
||||
AnalyzesOk(String.format("load data inpath '%s' %s into table " +
|
||||
"functional_text_lzo.alltypes partition(year=2009, month=4)",
|
||||
"/test-warehouse/alltypes_text_lzo/year=2009/month=4", overwrite));
|
||||
"functional_text_gzip.alltypes partition(year=2009, month=4)",
|
||||
"/test-warehouse/alltypes_text_gzip/year=2009/month=4", overwrite));
|
||||
AnalyzesOk(String.format("load data inpath '%s' %s into table " +
|
||||
"functional_text_lzo.jointbl",
|
||||
"/test-warehouse/alltypes_text_lzo/year=2009/month=4", overwrite));
|
||||
"functional_text_gzip.jointbl",
|
||||
"/test-warehouse/alltypes_text_gzip/year=2009/month=4", overwrite));
|
||||
|
||||
// Verify with a read-only table
|
||||
AnalysisError(String.format("load data inpath '%s' into table " +
|
||||
|
||||
@@ -101,12 +101,10 @@ public class AnalyzerTest extends FrontendTestBase {
|
||||
|
||||
@Test
|
||||
public void TestCompressedText() throws AnalysisException {
|
||||
AnalyzesOk("SELECT count(*) FROM functional_text_lzo.tinyinttable");
|
||||
// TODO: Disabling the text/{gzip,bzip,snap} analysis test until the corresponding
|
||||
// databases are loaded.
|
||||
// AnalyzesOk("SELECT count(*) FROM functional_text_gzip.tinyinttable");
|
||||
// AnalyzesOk("SELECT count(*) FROM functional_text_snap.tinyinttable");
|
||||
// AnalyzesOk("SELECT count(*) FROM functional_text_bzip.tinyinttable");
|
||||
AnalyzesOk("SELECT count(*) FROM functional_text_bzip.tinyinttable");
|
||||
AnalyzesOk("SELECT count(*) FROM functional_text_def.tinyinttable");
|
||||
AnalyzesOk("SELECT count(*) FROM functional_text_gzip.tinyinttable");
|
||||
AnalyzesOk("SELECT count(*) FROM functional_text_snap.tinyinttable");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
BIN
testdata/bad_text_lzo/bad_text.lzo
vendored
BIN
testdata/bad_text_lzo/bad_text.lzo
vendored
Binary file not shown.
BIN
testdata/bad_text_lzo/bad_text.lzo.index
vendored
BIN
testdata/bad_text_lzo/bad_text.lzo.index
vendored
Binary file not shown.
22
testdata/bin/create-load-data.sh
vendored
22
testdata/bin/create-load-data.sh
vendored
@@ -453,28 +453,6 @@ EOF
|
||||
}
|
||||
|
||||
function load-custom-data {
|
||||
# Load the index files for corrupted lzo data.
|
||||
hadoop fs -mkdir -p /test-warehouse/bad_text_lzo_text_lzo
|
||||
hadoop fs -rm -f /test-warehouse/bad_text_lzo_text_lzo/bad_text.lzo.index
|
||||
hadoop fs -put ${IMPALA_HOME}/testdata/bad_text_lzo/bad_text.lzo.index \
|
||||
/test-warehouse/bad_text_lzo_text_lzo/
|
||||
|
||||
hadoop fs -rm -r -f /bad_text_lzo_text_lzo/
|
||||
hadoop fs -mv /test-warehouse/bad_text_lzo_text_lzo/ /
|
||||
# Cleanup the old bad_text_lzo files, if they exist.
|
||||
hadoop fs -rm -r -f /test-warehouse/bad_text_lzo/
|
||||
|
||||
# TODO: Why is there a REMOTE_LOAD condition? See IMPALA-4347
|
||||
if [[ -z $REMOTE_LOAD ]]; then
|
||||
# Index all lzo files in HDFS under /test-warehouse
|
||||
${IMPALA_HOME}/testdata/bin/lzo_indexer.sh /test-warehouse
|
||||
fi
|
||||
|
||||
hadoop fs -mv /bad_text_lzo_text_lzo/ /test-warehouse/
|
||||
|
||||
# Remove all index files in this partition.
|
||||
hadoop fs -rm -f /test-warehouse/alltypes_text_lzo/year=2009/month=1/*.lzo.index
|
||||
|
||||
# Add a sequence file that only contains a header (see IMPALA-362)
|
||||
hadoop fs -put -f ${IMPALA_HOME}/testdata/tinytable_seq_snap/tinytable_seq_snap_header_only \
|
||||
/test-warehouse/tinytable_seq_snap
|
||||
|
||||
31
testdata/bin/generate-schema-statements.py
vendored
31
testdata/bin/generate-schema-statements.py
vendored
@@ -171,7 +171,6 @@ COMPRESSION_MAP = {'def': 'org.apache.hadoop.io.compress.DefaultCodec',
|
||||
'gzip': 'org.apache.hadoop.io.compress.GzipCodec',
|
||||
'bzip': 'org.apache.hadoop.io.compress.BZip2Codec',
|
||||
'snap': 'org.apache.hadoop.io.compress.SnappyCodec',
|
||||
'lzo': 'com.hadoop.compression.lzo.LzopCodec',
|
||||
'none': ''
|
||||
}
|
||||
|
||||
@@ -188,9 +187,6 @@ FILE_FORMAT_MAP = {
|
||||
'orc': 'ORC',
|
||||
'parquet': 'PARQUET',
|
||||
'hudiparquet': 'HUDIPARQUET',
|
||||
'text_lzo':
|
||||
"\nINPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'" +
|
||||
"\nOUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'",
|
||||
'avro': 'AVRO',
|
||||
'hbase': "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'",
|
||||
'kudu': "KUDU",
|
||||
@@ -224,7 +220,7 @@ WITH SERDEPROPERTIES (
|
||||
"{hbase_column_mapping}")
|
||||
{tbl_properties}{{hdfs_location}}"""
|
||||
|
||||
KNOWN_EXPLORATION_STRATEGIES = ['core', 'pairwise', 'exhaustive', 'lzo']
|
||||
KNOWN_EXPLORATION_STRATEGIES = ['core', 'pairwise', 'exhaustive']
|
||||
|
||||
def build_create_statement(table_template, table_name, db_name, db_suffix,
|
||||
file_format, compression, hdfs_location,
|
||||
@@ -232,8 +228,6 @@ def build_create_statement(table_template, table_name, db_name, db_suffix,
|
||||
create_stmt = ''
|
||||
if (force_reload):
|
||||
create_stmt += 'DROP TABLE IF EXISTS %s%s.%s;\n' % (db_name, db_suffix, table_name)
|
||||
if compression == 'lzo':
|
||||
file_format = '%s_%s' % (file_format, compression)
|
||||
# hbase / kudu tables are external, and not read from hdfs. We don't need an
|
||||
# hdfs_location.
|
||||
if file_format in ['hbase', 'kudu']:
|
||||
@@ -454,9 +448,9 @@ def build_insert_into_statement(insert, db_name, db_suffix, table_name, file_for
|
||||
statement += "set hive.auto.convert.join=true;\n"
|
||||
|
||||
# For some reason (hive bug?) we need to have the CombineHiveInputFormat set
|
||||
# for cases where we are compressing in bzip or lzo on certain tables that
|
||||
# for cases where we are compressing in bzip on certain tables that
|
||||
# have multiple files.
|
||||
if 'multi' in table_name and ('bzip' in db_suffix or 'lzo' in db_suffix):
|
||||
if 'multi' in table_name and ('bzip' in db_suffix):
|
||||
statement += SET_HIVE_INPUT_FORMAT % "CombineHiveInputFormat"
|
||||
else:
|
||||
statement += SET_HIVE_INPUT_FORMAT % "HiveInputFormat"
|
||||
@@ -682,9 +676,6 @@ def generate_statements(output_name, test_vectors, sections,
|
||||
output = impala_create
|
||||
if create_hive or file_format == 'hbase':
|
||||
output = hive_output
|
||||
elif codec == 'lzo':
|
||||
# Impala CREATE TABLE doesn't allow INPUTFORMAT.
|
||||
output = hive_output
|
||||
|
||||
# TODO: Currently, Kudu does not support partitioned tables via Impala.
|
||||
# If a CREATE_KUDU section was provided, assume it handles the partition columns
|
||||
@@ -748,21 +739,7 @@ def generate_statements(output_name, test_vectors, sections,
|
||||
# moment, it assumes we're only using ALTER for partitioning the table.
|
||||
if alter and file_format not in ("hbase", "kudu"):
|
||||
use_db = 'USE {db_name};\n'.format(db_name=db)
|
||||
if output == hive_output and codec == 'lzo':
|
||||
# Hive ALTER TABLE ADD PARTITION doesn't handle null partitions, so
|
||||
# we can't run the ALTER section in this case.
|
||||
if options.force_reload:
|
||||
# IMPALA-2278: Hive INSERT OVERWRITE won't clear out partition directories
|
||||
# that weren't already added to the table. So, for force reload, manually
|
||||
# delete the partition directories.
|
||||
output.create.append(("DFS -rm -R {data_path};").format(
|
||||
data_path=data_path))
|
||||
else:
|
||||
# If this is not a force reload use msck repair to add the partitions
|
||||
# into the table.
|
||||
output.create.append(use_db + 'msck repair table %s;' % (table_name))
|
||||
else:
|
||||
output.create.append(use_db + alter.format(table_name=table_name))
|
||||
output.create.append(use_db + alter.format(table_name=table_name))
|
||||
|
||||
# If the directory already exists in HDFS, assume that data files already exist
|
||||
# and skip loading the data. Otherwise, the data is generated using either an
|
||||
|
||||
1
testdata/bin/generate-test-vectors.py
vendored
1
testdata/bin/generate-test-vectors.py
vendored
@@ -92,7 +92,6 @@ def is_valid_combination(vector):
|
||||
if len(vector) == 4:
|
||||
return not (
|
||||
(vector[FILE_FORMAT_IDX] == 'text' and vector[COMPRESSION_IDX] in ['def']) or
|
||||
(vector[FILE_FORMAT_IDX] != 'text' and vector[COMPRESSION_IDX] == 'lzo') or
|
||||
(vector[COMPRESSION_IDX] == 'none' and vector[COMPRESSION_TYPE_IDX] != 'none') or
|
||||
(vector[COMPRESSION_IDX] != 'none' and vector[COMPRESSION_TYPE_IDX] == 'none') or
|
||||
(vector[FILE_FORMAT_IDX] != 'seq' and vector[COMPRESSION_TYPE_IDX] == 'record') or
|
||||
|
||||
5
testdata/bin/load_nested.py
vendored
5
testdata/bin/load_nested.py
vendored
@@ -44,8 +44,7 @@ COMPRESSION_VALUES_MAP = {
|
||||
"parquet": {
|
||||
"none": "SNAPPY",
|
||||
"snap": "SNAPPY",
|
||||
"gzip": "GZIP",
|
||||
"lzo": "LZO"
|
||||
"gzip": "GZIP"
|
||||
},
|
||||
# Currently, only three codecs are supported in Hive for ORC. See Hive codes in
|
||||
# org.apache.orc.impl.WriterImpl#createCodec (in module hive-orc)
|
||||
@@ -397,7 +396,7 @@ if __name__ == "__main__":
|
||||
source_db = args.source_db
|
||||
target_db = args.target_db
|
||||
file_format, compression_value = args.table_format.split("/")
|
||||
# 'compression_value' is one of [none,def,gzip,bzip,snap,lzo]. We should translate it
|
||||
# 'compression_value' is one of [none,def,gzip,bzip,snap]. We should translate it
|
||||
# into values that can be set to Hive.
|
||||
if file_format not in COMPRESSION_KEYS_MAP:
|
||||
raise Exception("Nested types in file format %s are not supported" % file_format)
|
||||
|
||||
20
testdata/bin/lzo_indexer.sh
vendored
20
testdata/bin/lzo_indexer.sh
vendored
@@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
hadoop jar ${HADOOP_LZO}/build/hadoop-lzo-0.4.15.jar com.hadoop.compression.lzo.DistributedLzoIndexer $*
|
||||
@@ -26,8 +26,6 @@ target_filesystem = os.environ.get('TARGET_FILESYSTEM')
|
||||
compression_codecs = [
|
||||
'org.apache.hadoop.io.compress.GzipCodec',
|
||||
'org.apache.hadoop.io.compress.DefaultCodec',
|
||||
'com.hadoop.compression.lzo.LzoCodec',
|
||||
'com.hadoop.compression.lzo.LzopCodec',
|
||||
'org.apache.hadoop.io.compress.BZip2Codec'
|
||||
]
|
||||
|
||||
@@ -44,7 +42,6 @@ CONFIG = {
|
||||
|
||||
# Compression codecs
|
||||
'io.compression.codecs': ",".join(compression_codecs),
|
||||
'io.compression.deoc.lzo.class': 'com.hadoop.compression.lzo.LzoCodec',
|
||||
|
||||
# Set up proxyuser
|
||||
'hadoop.proxyuser.${USER}.hosts': '*',
|
||||
|
||||
@@ -76,9 +76,7 @@ app_classpath = [
|
||||
'$HADOOP_HDFS_HOME/share/hadoop/hdfs/*',
|
||||
'$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*',
|
||||
'$HADOOP_YARN_HOME/share/hadoop/yarn/*',
|
||||
'$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*',
|
||||
# Append the LZO jar for LZO-compressed file support.
|
||||
'${LZO_JAR_PATH}']
|
||||
'$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*']
|
||||
|
||||
# Hive 3 needs Tez on the classpath.
|
||||
if hive_major_version == 3:
|
||||
|
||||
@@ -1563,17 +1563,6 @@ OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
bad_text_lzo
|
||||
---- COLUMNS
|
||||
field STRING
|
||||
---- DEPENDENT_LOAD_HIVE
|
||||
-- Error recovery test data for LZO compression.
|
||||
LOAD DATA LOCAL INPATH '{impala_home}/testdata/bad_text_lzo/bad_text.lzo'
|
||||
OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
|
||||
====
|
||||
---- DATASET
|
||||
functional
|
||||
---- BASE_TABLE_NAME
|
||||
bad_text_gzip
|
||||
---- COLUMNS
|
||||
s STRING
|
||||
|
||||
@@ -33,7 +33,6 @@ table_name:insert_overwrite_partitioned, constraint:restrict_to, table_format:pa
|
||||
table_name:insert_string_partitioned, constraint:restrict_to, table_format:parquet/none/none
|
||||
|
||||
table_name:old_rcfile_table, constraint:restrict_to, table_format:rc/none/none
|
||||
table_name:bad_text_lzo, constraint:restrict_to, table_format:text/lzo/block
|
||||
table_name:bad_text_gzip, constraint:restrict_to, table_format:text/gzip/block
|
||||
table_name:bad_seq_snap, constraint:restrict_to, table_format:seq/snap/block
|
||||
table_name:bad_avro_snap_strings, constraint:restrict_to, table_format:avro/snap/block
|
||||
@@ -242,13 +241,11 @@ table_name:date_tbl, constraint:restrict_to, table_format:avro/snap/block
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:orc/def/block
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:hbase/none/none
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:text/none/none
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:text/lzo/block
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:text/bzip/block
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:text/gzip/block
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:text/snap/block
|
||||
table_name:date_tbl, constraint:restrict_to, table_format:text/def/block
|
||||
table_name:date_tbl_error, constraint:restrict_to, table_format:text/none/none
|
||||
table_name:date_tbl_error, constraint:restrict_to, table_format:text/lzo/block
|
||||
table_name:date_tbl_error, constraint:restrict_to, table_format:text/bzip/block
|
||||
table_name:date_tbl_error, constraint:restrict_to, table_format:text/gzip/block
|
||||
table_name:date_tbl_error, constraint:restrict_to, table_format:text/snap/block
|
||||
@@ -280,7 +277,6 @@ table_name:bucketed_ext_table, constraint:exclude, table_format:hbase/none/none
|
||||
table_name:bucketed_ext_table, constraint:exclude, table_format:kudu/none/none
|
||||
table_name:bucketed_table, constraint:exclude, table_format:hbase/none/none
|
||||
table_name:bucketed_table, constraint:exclude, table_format:kudu/none/none
|
||||
table_name:bucketed_table, constraint:exclude, table_format:text/lzo/block
|
||||
|
||||
# The uncompressed ORC tables are mainly used in test_scanners_fuzz.py to avoid creating
|
||||
# them each time when running the test. Developers may run this test many times locally.
|
||||
|
||||
|
@@ -531,8 +531,8 @@ PLAN-ROOT SINK
|
||||
# join involving tables with no table stats
|
||||
# one of the tables (alltypes) is a compressed text file
|
||||
# tests that the default join strategy is broadcast
|
||||
select * from functional_text_lzo.emptytable a inner join
|
||||
functional_text_lzo.alltypes b on a.f2 = b.int_col
|
||||
select * from functional_text_gzip.emptytable a inner join
|
||||
functional_text_gzip.alltypes b on a.f2 = b.int_col
|
||||
---- PLAN
|
||||
PLAN-ROOT SINK
|
||||
|
|
||||
@@ -541,11 +541,11 @@ PLAN-ROOT SINK
|
||||
| runtime filters: RF000 <- a.f2
|
||||
| row-size=96B cardinality=5.65K
|
||||
|
|
||||
|--00:SCAN HDFS [functional_text_lzo.emptytable a]
|
||||
|--00:SCAN HDFS [functional_text_gzip.emptytable a]
|
||||
| partitions=0/0 files=0 size=0B
|
||||
| row-size=16B cardinality=0
|
||||
|
|
||||
01:SCAN HDFS [functional_text_lzo.alltypes b]
|
||||
01:SCAN HDFS [functional_text_gzip.alltypes b]
|
||||
HDFS partitions=24/24 files=24 size=123.32KB
|
||||
runtime filters: RF000 -> b.int_col
|
||||
row-size=80B cardinality=5.65K
|
||||
|
||||
@@ -3056,8 +3056,8 @@ PLAN-ROOT SINK
|
||||
# join involving tables with no table stats
|
||||
# one of the tables (alltypes) is a compressed text file
|
||||
# tests that the default join strategy is broadcast
|
||||
select * from functional_text_lzo.emptytable a inner join
|
||||
functional_text_lzo.alltypes b on a.f2 = b.int_col
|
||||
select * from functional_text_gzip.emptytable a inner join
|
||||
functional_text_gzip.alltypes b on a.f2 = b.int_col
|
||||
---- PLAN
|
||||
PLAN-ROOT SINK
|
||||
|
|
||||
@@ -3066,11 +3066,11 @@ PLAN-ROOT SINK
|
||||
| runtime filters: RF000 <- b.int_col
|
||||
| row-size=96B cardinality=0
|
||||
|
|
||||
|--01:SCAN HDFS [functional_text_lzo.alltypes b]
|
||||
|--01:SCAN HDFS [functional_text_gzip.alltypes b]
|
||||
| HDFS partitions=24/24 files=24 size=123.32KB
|
||||
| row-size=80B cardinality=unavailable
|
||||
|
|
||||
00:SCAN HDFS [functional_text_lzo.emptytable a]
|
||||
00:SCAN HDFS [functional_text_gzip.emptytable a]
|
||||
partitions=0/0 files=0 size=0B
|
||||
runtime filters: RF000 -> a.f2
|
||||
row-size=16B cardinality=0
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
file_format: text,seq,rc,avro,parquet,orc,hbase,kudu
|
||||
dataset: functional
|
||||
compression_codec: none,def,gzip,bzip,snap,lzo
|
||||
compression_codec: none,def,gzip,bzip,snap
|
||||
compression_type: none,block,record
|
||||
|
||||
|
@@ -4,7 +4,6 @@ file_format: text, dataset: functional, compression_codec: def, compression_type
|
||||
file_format: text, dataset: functional, compression_codec: gzip, compression_type: block
|
||||
file_format: text, dataset: functional, compression_codec: bzip, compression_type: block
|
||||
file_format: text, dataset: functional, compression_codec: snap, compression_type: block
|
||||
file_format: text, dataset: functional, compression_codec: lzo, compression_type: block
|
||||
file_format: seq, dataset: functional, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: functional, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: functional, compression_codec: def, compression_type: record
|
||||
|
||||
|
@@ -107,24 +107,6 @@ row_regex: .*Error parsing row: file: $NAMENODE/.* before offset: \d+
|
||||
INT, DATE, DATE
|
||||
====
|
||||
---- QUERY
|
||||
select count(*) from functional_text_lzo.bad_text_lzo
|
||||
---- ERRORS
|
||||
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
|
||||
---- RESULTS
|
||||
5141
|
||||
---- TYPES
|
||||
bigint
|
||||
====
|
||||
---- QUERY
|
||||
select count(field) from functional_text_lzo.bad_text_lzo
|
||||
---- ERRORS
|
||||
Blocksize: 536870911 is greater than LZO_MAX_BLOCK_SIZE: 67108864
|
||||
---- RESULTS
|
||||
5141
|
||||
---- TYPES
|
||||
bigint
|
||||
====
|
||||
---- QUERY
|
||||
select * from alltypeserrornonulls
|
||||
---- ERRORS
|
||||
Error converting column: 3 to SMALLINT
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
====
|
||||
---- QUERY
|
||||
# Test that running with plugin disabled fails gracefully.
|
||||
select * from functional_text_lzo.alltypes
|
||||
---- CATCH
|
||||
Scanner plugin 'LZO' is not one of the enabled plugins: ''
|
||||
====
|
||||
@@ -371,18 +371,6 @@ LOCATION '$$location_uri$$'
|
||||
TBLPROPERTIES ('external.table.purge'='TRUE')
|
||||
====
|
||||
---- QUERY
|
||||
SHOW CREATE TABLE functional_text_lzo.tinytable
|
||||
---- RESULTS-HIVE
|
||||
CREATE EXTERNAL TABLE functional_text_lzo.tinytable (
|
||||
a STRING,
|
||||
b STRING
|
||||
)
|
||||
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
|
||||
STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
|
||||
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
|
||||
LOCATION '$$location_uri$$'
|
||||
====
|
||||
---- QUERY
|
||||
SHOW CREATE TABLE functional.allcomplextypes
|
||||
---- RESULTS-HIVE
|
||||
CREATE EXTERNAL TABLE functional.allcomplextypes (
|
||||
|
||||
@@ -13,7 +13,7 @@ BIGINT
|
||||
select count(*)
|
||||
from multi_text_compression where month <= 3
|
||||
---- CATCH
|
||||
Scanner plugin 'LZ4' is not one of the enabled plugins: 'LZO'
|
||||
Scanner plugin 'LZ4' is not one of the enabled plugins: ''
|
||||
====
|
||||
---- QUERY
|
||||
# Unknown compression suffix is treated as uncompressed text.
|
||||
@@ -26,3 +26,10 @@ INT
|
||||
Error converting column: 0 to INT
|
||||
Error parsing row: file: __HDFS_FILENAME__, before offset: 16
|
||||
====
|
||||
---- QUERY
|
||||
# Test that querying partition with unsupported plugin fails gracefully.
|
||||
select count(*)
|
||||
from multi_text_compression where month = 5
|
||||
---- CATCH
|
||||
Scanner plugin 'LZO' is not one of the enabled plugins: ''
|
||||
====
|
||||
@@ -1,4 +1,4 @@
|
||||
file_format: text,seq
|
||||
dataset: tpch
|
||||
compression_codec: none,def,gzip,bzip,snap,lzo
|
||||
compression_codec: none,def,gzip,bzip,snap
|
||||
compression_type: none,block,record
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# Generated File.
|
||||
file_format: text, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: record
|
||||
|
||||
|
@@ -2,4 +2,3 @@
|
||||
file_format: text, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: gzip, compression_type: record
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
file_format: text,seq,rc,avro,parquet,kudu
|
||||
dataset: tpch
|
||||
compression_codec: none,def,gzip,bzip,snap,lzo
|
||||
compression_codec: none,def,gzip,bzip,snap
|
||||
compression_type: none,block,record
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# Generated File.
|
||||
file_format: text, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: record
|
||||
|
||||
|
@@ -2,4 +2,3 @@
|
||||
file_format: text, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: gzip, compression_type: record
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
file_format: text,seq, parquet
|
||||
dataset: tpch
|
||||
compression_codec: none,def,gzip,bzip,snap,lzo
|
||||
compression_codec: none,def,gzip,bzip,snap
|
||||
compression_type: none,block,record
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# Generated File.
|
||||
file_format: text, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: record
|
||||
|
||||
|
@@ -2,4 +2,3 @@
|
||||
file_format: text, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: gzip, compression_type: record
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
file_format: text,seq,rc,avro,parquet
|
||||
dataset: tpcds
|
||||
compression_codec: none,def,gzip,bzip,snap,lzo
|
||||
compression_codec: none,def,gzip,bzip,snap
|
||||
compression_type: none,block,record
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# Generated File.
|
||||
file_format: text, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
file_format: text, dataset: tpcds, compression_codec: lzo, compression_type: block
|
||||
file_format: seq, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpcds, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpcds, compression_codec: def, compression_type: record
|
||||
|
||||
|
@@ -8,7 +8,6 @@ file_format: parquet, dataset: tpcds, compression_codec: def, compression_type:
|
||||
file_format: avro, dataset: tpcds, compression_codec: def, compression_type: block
|
||||
file_format: rc, dataset: tpcds, compression_codec: bzip, compression_type: block
|
||||
file_format: seq, dataset: tpcds, compression_codec: snap, compression_type: record
|
||||
file_format: text, dataset: tpcds, compression_codec: lzo, compression_type: block
|
||||
file_format: rc, dataset: tpcds, compression_codec: def, compression_type: block
|
||||
file_format: avro, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
file_format: parquet, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
file_format: text,seq,rc,avro,parquet,orc
|
||||
dataset: tpcds
|
||||
compression_codec: none,def,gzip,bzip,snap,lzo
|
||||
compression_codec: none,def,gzip,bzip,snap
|
||||
compression_type: none,block,record
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# Generated File.
|
||||
file_format: text, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
file_format: text, dataset: tpcds, compression_codec: lzo, compression_type: block
|
||||
file_format: seq, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpcds, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpcds, compression_codec: def, compression_type: record
|
||||
|
||||
|
1
testdata/workloads/tpcds/tpcds_pairwise.csv
vendored
1
testdata/workloads/tpcds/tpcds_pairwise.csv
vendored
@@ -8,7 +8,6 @@ file_format: parquet, dataset: tpcds, compression_codec: def, compression_type:
|
||||
file_format: avro, dataset: tpcds, compression_codec: def, compression_type: block
|
||||
file_format: rc, dataset: tpcds, compression_codec: bzip, compression_type: block
|
||||
file_format: seq, dataset: tpcds, compression_codec: snap, compression_type: record
|
||||
file_format: text, dataset: tpcds, compression_codec: lzo, compression_type: block
|
||||
file_format: rc, dataset: tpcds, compression_codec: def, compression_type: block
|
||||
file_format: avro, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
file_format: parquet, dataset: tpcds, compression_codec: none, compression_type: none
|
||||
|
||||
|
2
testdata/workloads/tpch/tpch_dimensions.csv
vendored
2
testdata/workloads/tpch/tpch_dimensions.csv
vendored
@@ -1,4 +1,4 @@
|
||||
file_format: text,seq,rc,avro,parquet,orc,kudu
|
||||
dataset: tpch
|
||||
compression_codec: none,def,gzip,bzip,snap,lzo
|
||||
compression_codec: none,def,gzip,bzip,snap
|
||||
compression_type: none,block,record
|
||||
|
||||
|
1
testdata/workloads/tpch/tpch_exhaustive.csv
vendored
1
testdata/workloads/tpch/tpch_exhaustive.csv
vendored
@@ -1,7 +1,6 @@
|
||||
# Generated File.
|
||||
file_format: text, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: text, dataset: tpch, compression_codec: gzip, compression_type: block
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: def, compression_type: record
|
||||
|
||||
|
1
testdata/workloads/tpch/tpch_pairwise.csv
vendored
1
testdata/workloads/tpch/tpch_pairwise.csv
vendored
@@ -8,7 +8,6 @@ file_format: parquet, dataset: tpch, compression_codec: def, compression_type: b
|
||||
file_format: avro, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: rc, dataset: tpch, compression_codec: bzip, compression_type: block
|
||||
file_format: seq, dataset: tpch, compression_codec: snap, compression_type: record
|
||||
file_format: text, dataset: tpch, compression_codec: lzo, compression_type: block
|
||||
file_format: rc, dataset: tpch, compression_codec: def, compression_type: block
|
||||
file_format: avro, dataset: tpch, compression_codec: none, compression_type: none
|
||||
file_format: parquet, dataset: tpch, compression_codec: none, compression_type: none
|
||||
|
||||
|
@@ -32,7 +32,7 @@ class TableFormatInfo(object):
|
||||
KNOWN_FILE_FORMATS = ['text', 'seq', 'rc', 'parquet', 'orc', 'avro', 'hbase']
|
||||
if os.environ['KUDU_IS_SUPPORTED'] == 'true':
|
||||
KNOWN_FILE_FORMATS.append('kudu')
|
||||
KNOWN_COMPRESSION_CODECS = ['none', 'snap', 'gzip', 'bzip', 'def', 'lzo', 'zstd', 'lz4']
|
||||
KNOWN_COMPRESSION_CODECS = ['none', 'snap', 'gzip', 'bzip', 'def', 'zstd', 'lz4']
|
||||
KNOWN_COMPRESSION_TYPES = ['none', 'block', 'record']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
@@ -26,7 +26,7 @@ from tests.common.test_dimensions import create_exec_option_dimension
|
||||
from tests.common.test_result_verifier import verify_query_result_is_equal
|
||||
|
||||
# compression codecs impala support reading in text file type
|
||||
TEXT_CODECS = ['snappy', 'gzip', 'zstd', 'lzo', 'bzip2', 'deflate', 'default']
|
||||
TEXT_CODECS = ['snappy', 'gzip', 'zstd', 'bzip2', 'deflate', 'default']
|
||||
|
||||
|
||||
class TestTextInterop(CustomClusterTestSuite):
|
||||
@@ -84,7 +84,6 @@ class TestTextInterop(CustomClusterTestSuite):
|
||||
'snappy': 'org.apache.hadoop.io.compress.SnappyCodec',
|
||||
'gzip': 'org.apache.hadoop.io.compress.GzipCodec',
|
||||
'zstd': 'org.apache.hadoop.io.compress.ZStandardCodec',
|
||||
'lzo': 'com.hadoop.compression.lzo.LzopCodec',
|
||||
'bzip2': 'org.apache.hadoop.io.compress.BZip2Codec',
|
||||
'deflate': 'org.apache.hadoop.io.compress.DeflateCodec',
|
||||
'default': 'org.apache.hadoop.io.compress.DefaultCodec'
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
|
||||
|
||||
class TestScannerPlugin(CustomClusterTestSuite):
|
||||
"""Tests that involve changing the scanner plugin option."""
|
||||
|
||||
@classmethod
|
||||
def get_workload(self):
|
||||
return 'functional-query'
|
||||
|
||||
@pytest.mark.execute_serially
|
||||
@CustomClusterTestSuite.with_args("--enabled_hdfs_text_scanner_plugins=")
|
||||
def test_disable_lzo_plugin(self, vector):
|
||||
"""Test that we can gracefully handle a disabled plugin."""
|
||||
# Should be able to query valid partitions only.
|
||||
self.run_test_case('QueryTest/disable-lzo-plugin', vector)
|
||||
@@ -102,7 +102,7 @@ class TestMetadataQueryStatements(ImpalaTestSuite):
|
||||
self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes",
|
||||
compare=compare_describe_formatted)
|
||||
self.exec_and_compare_hive_and_impala_hs2(
|
||||
"describe formatted functional_text_lzo.alltypes",
|
||||
"describe formatted functional_text_gzip.alltypes",
|
||||
compare=compare_describe_formatted)
|
||||
|
||||
# Describe an unpartitioned table.
|
||||
|
||||
@@ -181,7 +181,7 @@ class TestPartitionMetadataUncompressedTextOnly(ImpalaTestSuite):
|
||||
FQ_TBL_NAME, TBL_LOCATION))
|
||||
|
||||
self.__add_alltypes_partition(vector, FQ_TBL_NAME, "functional", 2009, 1)
|
||||
self.__add_alltypes_partition(vector, FQ_TBL_NAME, "functional_text_lzo", 2009, 2)
|
||||
self.__add_alltypes_partition(vector, FQ_TBL_NAME, "functional_text_gzip", 2009, 2)
|
||||
|
||||
# Create a new partition with a bogus file with the unsupported LZ4 suffix.
|
||||
lz4_year = 2009
|
||||
@@ -204,8 +204,18 @@ class TestPartitionMetadataUncompressedTextOnly(ImpalaTestSuite):
|
||||
"alter table {0} add partition (year={1}, month={2}) location '{3}'".format(
|
||||
FQ_TBL_NAME, fake_comp_year, fake_comp_month, fake_comp_ym_partition_loc))
|
||||
|
||||
# Create a new partition with a bogus file with the now-unsupported LZO suffix
|
||||
lzo_year = 2009
|
||||
lzo_month = 5
|
||||
lzo_ym_partition_loc = self.__make_ym_partition_dir(TBL_LOCATION, lzo_year, lzo_month)
|
||||
self.filesystem_client.create_file("{0}/fake.lzo".format(lzo_ym_partition_loc)[1:],
|
||||
"some test data")
|
||||
self.client.execute(
|
||||
"alter table {0} add partition (year={1}, month={2}) location '{3}'".format(
|
||||
FQ_TBL_NAME, lzo_year, lzo_month, lzo_ym_partition_loc))
|
||||
|
||||
show_files_result = self.client.execute("show files in {0}".format(FQ_TBL_NAME))
|
||||
assert len(show_files_result.data) == 4, "Expected one file per partition dir"
|
||||
assert len(show_files_result.data) == 5, "Expected one file per partition dir"
|
||||
|
||||
self.run_test_case('QueryTest/unsupported-compression-partitions', vector,
|
||||
unique_database)
|
||||
@@ -222,8 +232,11 @@ class TestPartitionMetadataUncompressedTextOnly(ImpalaTestSuite):
|
||||
"""Create the year/month partition directory and return the path."""
|
||||
y_partition_loc = "{0}/year={1}".format(tbl_location, year)
|
||||
ym_partition_loc = "{0}/month={1}".format(y_partition_loc, month)
|
||||
self.filesystem_client.delete_file_dir(tbl_location[1:], recursive=True)
|
||||
self.filesystem_client.make_dir(tbl_location[1:])
|
||||
self.filesystem_client.make_dir(y_partition_loc[1:])
|
||||
if not self.filesystem_client.exists(tbl_location[1:]):
|
||||
self.filesystem_client.make_dir(tbl_location[1:])
|
||||
if not self.filesystem_client.exists(y_partition_loc[1:]):
|
||||
self.filesystem_client.make_dir(y_partition_loc[1:])
|
||||
if self.filesystem_client.exists(ym_partition_loc[1:]):
|
||||
self.filesystem_client.delete_file_dir(ym_partition_loc[1:], recursive=True)
|
||||
self.filesystem_client.make_dir(ym_partition_loc[1:])
|
||||
return ym_partition_loc
|
||||
|
||||
@@ -75,7 +75,6 @@ class TestCompressedFormats(ImpalaTestSuite):
|
||||
file_format = vector.get_value('file_format')
|
||||
extension, suffix = vector.get_value('compression_format')
|
||||
if file_format in ['rc', 'seq']:
|
||||
# TODO: How about LZO?
|
||||
# Test that {gzip,snappy,bzip,deflate}-compressed
|
||||
# {RC,sequence,text} files are supported.
|
||||
db_suffix = '_%s_%s' % (file_format, suffix)
|
||||
|
||||
@@ -72,7 +72,7 @@ class TestScannersFuzzing(ImpalaTestSuite):
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('table_format').file_format in ('avro', 'parquet', 'orc') or
|
||||
(v.get_value('table_format').file_format == 'text' and
|
||||
v.get_value('table_format').compression_codec in ('none', 'lzo')))
|
||||
v.get_value('table_format').compression_codec in ('none')))
|
||||
|
||||
|
||||
def test_fuzz_alltypes(self, vector, unique_database):
|
||||
@@ -247,8 +247,7 @@ class TestScannersFuzzing(ImpalaTestSuite):
|
||||
msg = "Should not throw error when abort_on_error=0: '{0}'".format(e)
|
||||
LOG.error(msg)
|
||||
# Parquet and compressed text can fail the query for some parse errors.
|
||||
# E.g. corrupt Parquet footer (IMPALA-3773) or a corrupt LZO index file
|
||||
# (IMPALA-4013).
|
||||
# E.g. corrupt Parquet footer (IMPALA-3773)
|
||||
table_format = vector.get_value('table_format')
|
||||
if table_format.file_format not in ['parquet', 'orc', 'rc', 'seq'] \
|
||||
and not (table_format.file_format == 'text' and
|
||||
|
||||
Reference in New Issue
Block a user