mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-11528: Catalogd should start up with a corrupt Hive function.
This commit handles the case for a specific kind of corrupt function within the Hive Metastore in the following situation: A valid Hive SQL function gets created in HMS. This UDF is written in Java and must derive from the "UDF" class. After creating this function in Impala, we then replace the underlying jar file with a class that does NOT derive from the "UDF" class. In this scenario, catalogd should reject the function and still start up gracefully. Before this commit, catalogd wasn't coming up. The reason for this was because the Hive function FunctionUtils.getUDFClassType() has a dependency on UDAF and was throwing a LinkageError exception, so we need to include the UDAF class in the shaded jar. Change-Id: I54e7a1df6d018ba6cf5ecf32dc9946edf86e2112 Reviewed-on: http://gerrit.cloudera.org:8080/18927 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Tamas Mate <tmater@apache.org>
This commit is contained in:
committed by
Csaba Ringhofer
parent
04b5319e6e
commit
4e813b7085
@@ -1764,7 +1764,7 @@ public class CatalogServiceCatalog extends Catalog {
|
||||
db.addFunction(fn);
|
||||
fn.setCatalogVersion(incrementAndGetCatalogVersion());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
} catch (Exception | LinkageError e) {
|
||||
LOG.error("Skipping function load: " + function.getFunctionName(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -261,6 +261,7 @@ under the License.
|
||||
<module>shaded-deps/s3a-aws-sdk</module>
|
||||
<module>TableFlattener</module>
|
||||
<module>test-hive-udfs</module>
|
||||
<module>test-corrupt-hive-udfs</module>
|
||||
<module>yarn-extras</module>
|
||||
</modules>
|
||||
|
||||
|
||||
@@ -100,6 +100,7 @@ the same dependencies
|
||||
<include>org/apache/hadoop/hive/ql/parse/SemanticException.class</include>
|
||||
<!-- Needed to support Hive udfs -->
|
||||
<include>org/apache/hadoop/hive/ql/exec/*UDF*</include>
|
||||
<include>org/apache/hadoop/hive/ql/exec/UDAF.class</include>
|
||||
<include>org/apache/hadoop/hive/ql/exec/MapredContext.class</include>
|
||||
<include>org/apache/hadoop/hive/ql/exec/FunctionUtils*</include>
|
||||
<include>org/apache/hadoop/hive/ql/parse/GenericHiveLexer*</include>
|
||||
|
||||
53
java/test-corrupt-hive-udfs/pom.xml
Normal file
53
java/test-corrupt-hive-udfs/pom.xml
Normal file
@@ -0,0 +1,53 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>org.apache.impala</groupId>
|
||||
<artifactId>impala-parent</artifactId>
|
||||
<version>4.2.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>test-corrupt-hive-udfs</artifactId>
|
||||
<version>1.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>test-corrupt-hive-udfs</name>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.3</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,28 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.impala;
|
||||
|
||||
/*
|
||||
* This class is one of two classes used to test for a corrupt UDF loaded
|
||||
* into the Hive MetaStore. This class contains an invalid UDF. The jar file with this
|
||||
* this class will replace the class in the jar created from the hive-test-udfs
|
||||
* directory. The catalogd executable should still be able to start correctly
|
||||
* even with this corrupt function.
|
||||
*/
|
||||
public class CorruptUdf {
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package org.apache.impala;
|
||||
|
||||
import org.apache.hadoop.hive.ql.exec.UDF;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
|
||||
/*
|
||||
* This class is one of two classes used to test for a corrupt UDF loaded
|
||||
* into the Hive MetaStore. This class contains a valid UDF. This file will
|
||||
* be used to create the function through SQL within the Hive MetaStore.
|
||||
* Later, this object will be replaced by a corrupt Java object (under
|
||||
* the hive-corrupt-test-udfs directory). The test will ensure that catalogd
|
||||
* is still able to start up fine (though the function will be disabled).
|
||||
*/
|
||||
public class CorruptUdf extends UDF {
|
||||
public IntWritable evaluate(IntWritable a) {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
6
testdata/bin/copy-udfs-udas.sh
vendored
6
testdata/bin/copy-udfs-udas.sh
vendored
@@ -51,6 +51,10 @@ then
|
||||
pushd "${IMPALA_HOME}"
|
||||
"${MAKE_CMD:-make}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \
|
||||
TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir uda-sample-ir
|
||||
cd "${IMPALA_HOME}/java/test-corrupt-hive-udfs"
|
||||
"${IMPALA_HOME}/bin/mvn-quiet.sh" package
|
||||
cp target/test-corrupt-hive-udfs-1.0.jar \
|
||||
"${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar"
|
||||
cd "${IMPALA_HOME}/java/test-hive-udfs"
|
||||
"${IMPALA_HOME}/bin/mvn-quiet.sh" package
|
||||
cp target/test-hive-udfs-1.0.jar "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar"
|
||||
@@ -97,6 +101,8 @@ ln -s "${IMPALA_HOME}/be/build/latest/testutil/libTestUdfs.so" "${UDF_TMP_DIR}/u
|
||||
ln -s "${HIVE_HOME}/lib/hive-exec-"*.jar "${UDF_TMP_DIR}/hive-exec.jar"
|
||||
ln -s "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar" \
|
||||
"${UDF_TMP_DIR}/impala-hive-udfs.jar"
|
||||
ln -s "${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar" \
|
||||
"${UDF_TMP_DIR}/impala-corrupt-hive-udfs.jar"
|
||||
ln -s "${IMPALA_HOME}/be/build/latest/testutil/test-udfs.ll" "${UDF_TMP_DIR}"
|
||||
ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/libudfsample.so" "${UDF_TMP_DIR}"
|
||||
ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/udf-sample.ll" "${UDF_TMP_DIR}"
|
||||
|
||||
@@ -176,6 +176,34 @@ class TestUdfPersistence(CustomClusterTestSuite):
|
||||
self.verify_function_count(
|
||||
"SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
|
||||
|
||||
@SkipIfFS.hive
|
||||
@pytest.mark.execute_serially
|
||||
def test_corrupt_java_bad_function(self):
|
||||
if self.exploration_strategy() != 'exhaustive': pytest.skip()
|
||||
""" IMPALA-11528: This tests if a corrupt function exists inside of Hive
|
||||
which does not derive from UDF. The way we do this here is to create a valid
|
||||
function in Hive which does derive from UDF, but switch the underlying jar to
|
||||
one that does not derive from the UDF class. """
|
||||
|
||||
CORRUPT_JAR = "test-warehouse/test_corrupt.jar"
|
||||
self.filesystem_client.delete_file_dir(CORRUPT_JAR)
|
||||
# impala-hive-udfs.jar contains the class CorruptUdf which derives from UDF
|
||||
# which is a valid function.
|
||||
self.filesystem_client.copy("/test-warehouse/impala-hive-udfs.jar",
|
||||
"/" + CORRUPT_JAR)
|
||||
self.run_stmt_in_hive("create function %s.corrupt_bad_function_udf as \
|
||||
'org.apache.impala.CorruptUdf' using jar '%s/%s'"
|
||||
% (self.JAVA_FN_TEST_DB, os.getenv('DEFAULT_FS'), CORRUPT_JAR))
|
||||
# Now copy the CorruptUdf class from the impala-corrupt-hive-udfs.jar file which
|
||||
# does not derive from UDF, making it an invalid UDF.
|
||||
self.filesystem_client.delete_file_dir(CORRUPT_JAR)
|
||||
self.filesystem_client.copy("/test-warehouse/impala-corrupt-hive-udfs.jar",
|
||||
"/" + CORRUPT_JAR)
|
||||
self.__restart_cluster()
|
||||
# Make sure the function count is 0
|
||||
self.verify_function_count(
|
||||
"SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
|
||||
|
||||
@SkipIfFS.hive
|
||||
@pytest.mark.execute_serially
|
||||
@CustomClusterTestSuite.with_args(
|
||||
|
||||
Reference in New Issue
Block a user