IMPALA-11528: Catalogd should start up with a corrupt Hive function.

This commit handles the case for a specific kind of corrupt function
within the Hive Metastore in the following situation:

A valid Hive SQL function gets created in HMS. This UDF is written in
Java and must derive from the "UDF" class. After creating this function
in Impala, we then replace the underlying jar file with a class that
does NOT derive from the "UDF" class.

In this scenario, catalogd should reject the function and still start
up gracefully. Before this commit, catalogd wasn't coming up. The
reason for this was because the Hive function
FunctionUtils.getUDFClassType() has a dependency on UDAF and was
throwing a LinkageError exception, so we need to include the UDAF
class in the shaded jar.

Change-Id: I54e7a1df6d018ba6cf5ecf32dc9946edf86e2112
Reviewed-on: http://gerrit.cloudera.org:8080/18927
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Tamas Mate <tmater@apache.org>
This commit is contained in:
Steve Carlin
2022-08-29 14:57:19 -07:00
committed by Csaba Ringhofer
parent 04b5319e6e
commit 4e813b7085
8 changed files with 153 additions and 1 deletions

View File

@@ -1764,7 +1764,7 @@ public class CatalogServiceCatalog extends Catalog {
db.addFunction(fn); db.addFunction(fn);
fn.setCatalogVersion(incrementAndGetCatalogVersion()); fn.setCatalogVersion(incrementAndGetCatalogVersion());
} }
} catch (Exception e) { } catch (Exception | LinkageError e) {
LOG.error("Skipping function load: " + function.getFunctionName(), e); LOG.error("Skipping function load: " + function.getFunctionName(), e);
} }
} }

View File

@@ -261,6 +261,7 @@ under the License.
<module>shaded-deps/s3a-aws-sdk</module> <module>shaded-deps/s3a-aws-sdk</module>
<module>TableFlattener</module> <module>TableFlattener</module>
<module>test-hive-udfs</module> <module>test-hive-udfs</module>
<module>test-corrupt-hive-udfs</module>
<module>yarn-extras</module> <module>yarn-extras</module>
</modules> </modules>

View File

@@ -100,6 +100,7 @@ the same dependencies
<include>org/apache/hadoop/hive/ql/parse/SemanticException.class</include> <include>org/apache/hadoop/hive/ql/parse/SemanticException.class</include>
<!-- Needed to support Hive udfs --> <!-- Needed to support Hive udfs -->
<include>org/apache/hadoop/hive/ql/exec/*UDF*</include> <include>org/apache/hadoop/hive/ql/exec/*UDF*</include>
<include>org/apache/hadoop/hive/ql/exec/UDAF.class</include>
<include>org/apache/hadoop/hive/ql/exec/MapredContext.class</include> <include>org/apache/hadoop/hive/ql/exec/MapredContext.class</include>
<include>org/apache/hadoop/hive/ql/exec/FunctionUtils*</include> <include>org/apache/hadoop/hive/ql/exec/FunctionUtils*</include>
<include>org/apache/hadoop/hive/ql/parse/GenericHiveLexer*</include> <include>org/apache/hadoop/hive/ql/parse/GenericHiveLexer*</include>

View File

@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>org.apache.impala</groupId>
<artifactId>impala-parent</artifactId>
<version>4.2.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>test-corrupt-hive-udfs</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<name>test-corrupt-hive-udfs</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala;
/*
* This class is one of two classes used to test for a corrupt UDF loaded
* into the Hive MetaStore. This class contains an invalid UDF. The jar file with this
* this class will replace the class in the jar created from the hive-test-udfs
* directory. The catalogd executable should still be able to start correctly
* even with this corrupt function.
*/
public class CorruptUdf {
}

View File

@@ -0,0 +1,35 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.IntWritable;
/*
* This class is one of two classes used to test for a corrupt UDF loaded
* into the Hive MetaStore. This class contains a valid UDF. This file will
* be used to create the function through SQL within the Hive MetaStore.
* Later, this object will be replaced by a corrupt Java object (under
* the hive-corrupt-test-udfs directory). The test will ensure that catalogd
* is still able to start up fine (though the function will be disabled).
*/
public class CorruptUdf extends UDF {
public IntWritable evaluate(IntWritable a) {
return a;
}
}

View File

@@ -51,6 +51,10 @@ then
pushd "${IMPALA_HOME}" pushd "${IMPALA_HOME}"
"${MAKE_CMD:-make}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \ "${MAKE_CMD:-make}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \
TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir uda-sample-ir TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir uda-sample-ir
cd "${IMPALA_HOME}/java/test-corrupt-hive-udfs"
"${IMPALA_HOME}/bin/mvn-quiet.sh" package
cp target/test-corrupt-hive-udfs-1.0.jar \
"${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar"
cd "${IMPALA_HOME}/java/test-hive-udfs" cd "${IMPALA_HOME}/java/test-hive-udfs"
"${IMPALA_HOME}/bin/mvn-quiet.sh" package "${IMPALA_HOME}/bin/mvn-quiet.sh" package
cp target/test-hive-udfs-1.0.jar "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar" cp target/test-hive-udfs-1.0.jar "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar"
@@ -97,6 +101,8 @@ ln -s "${IMPALA_HOME}/be/build/latest/testutil/libTestUdfs.so" "${UDF_TMP_DIR}/u
ln -s "${HIVE_HOME}/lib/hive-exec-"*.jar "${UDF_TMP_DIR}/hive-exec.jar" ln -s "${HIVE_HOME}/lib/hive-exec-"*.jar "${UDF_TMP_DIR}/hive-exec.jar"
ln -s "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar" \ ln -s "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar" \
"${UDF_TMP_DIR}/impala-hive-udfs.jar" "${UDF_TMP_DIR}/impala-hive-udfs.jar"
ln -s "${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar" \
"${UDF_TMP_DIR}/impala-corrupt-hive-udfs.jar"
ln -s "${IMPALA_HOME}/be/build/latest/testutil/test-udfs.ll" "${UDF_TMP_DIR}" ln -s "${IMPALA_HOME}/be/build/latest/testutil/test-udfs.ll" "${UDF_TMP_DIR}"
ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/libudfsample.so" "${UDF_TMP_DIR}" ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/libudfsample.so" "${UDF_TMP_DIR}"
ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/udf-sample.ll" "${UDF_TMP_DIR}" ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/udf-sample.ll" "${UDF_TMP_DIR}"

View File

@@ -176,6 +176,34 @@ class TestUdfPersistence(CustomClusterTestSuite):
self.verify_function_count( self.verify_function_count(
"SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0) "SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
@SkipIfFS.hive
@pytest.mark.execute_serially
def test_corrupt_java_bad_function(self):
if self.exploration_strategy() != 'exhaustive': pytest.skip()
""" IMPALA-11528: This tests if a corrupt function exists inside of Hive
which does not derive from UDF. The way we do this here is to create a valid
function in Hive which does derive from UDF, but switch the underlying jar to
one that does not derive from the UDF class. """
CORRUPT_JAR = "test-warehouse/test_corrupt.jar"
self.filesystem_client.delete_file_dir(CORRUPT_JAR)
# impala-hive-udfs.jar contains the class CorruptUdf which derives from UDF
# which is a valid function.
self.filesystem_client.copy("/test-warehouse/impala-hive-udfs.jar",
"/" + CORRUPT_JAR)
self.run_stmt_in_hive("create function %s.corrupt_bad_function_udf as \
'org.apache.impala.CorruptUdf' using jar '%s/%s'"
% (self.JAVA_FN_TEST_DB, os.getenv('DEFAULT_FS'), CORRUPT_JAR))
# Now copy the CorruptUdf class from the impala-corrupt-hive-udfs.jar file which
# does not derive from UDF, making it an invalid UDF.
self.filesystem_client.delete_file_dir(CORRUPT_JAR)
self.filesystem_client.copy("/test-warehouse/impala-corrupt-hive-udfs.jar",
"/" + CORRUPT_JAR)
self.__restart_cluster()
# Make sure the function count is 0
self.verify_function_count(
"SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
@SkipIfFS.hive @SkipIfFS.hive
@pytest.mark.execute_serially @pytest.mark.execute_serially
@CustomClusterTestSuite.with_args( @CustomClusterTestSuite.with_args(