diff --git a/fe/src/main/java/org/apache/impala/hive/executor/HiveGenericJavaFunction.java b/fe/src/main/java/org/apache/impala/hive/executor/HiveGenericJavaFunction.java new file mode 100644 index 000000000..8491bb53b --- /dev/null +++ b/fe/src/main/java/org/apache/impala/hive/executor/HiveGenericJavaFunction.java @@ -0,0 +1,191 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.hive.executor; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.FunctionType; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.ResourceType; +import org.apache.hadoop.hive.metastore.api.ResourceUri; +import org.apache.hadoop.hive.ql.exec.FunctionUtils.UDFClassType; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.impala.analysis.FunctionName; +import org.apache.impala.analysis.HdfsUri; +import org.apache.impala.catalog.CatalogException; +import org.apache.impala.catalog.ScalarFunction; +import org.apache.impala.catalog.PrimitiveType; +import org.apache.impala.catalog.ScalarType; +import org.apache.impala.catalog.Type; +import org.apache.impala.common.AnalysisException; +import org.apache.impala.common.FileSystemUtil; +import org.apache.impala.service.BackendConfig; +import org.apache.impala.thrift.TFunction; +import org.apache.impala.thrift.TFunctionBinaryType; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +import org.apache.log4j.Logger; + +/** + * HiveGenericJavaFunction generates the instance of the GenericUDF object given + * a className. + */ +public class HiveGenericJavaFunction implements HiveJavaFunction { + private static final Logger LOG = Logger.getLogger(HiveGenericJavaFunction.class); + + private final Function hiveFn_; + + private final Type retType_; + + private final Type[] parameterTypes_; + + private final GenericUDF genericUDF_; + + public HiveGenericJavaFunction(Class udfClass, + Function hiveFn, Type retType, Type[] parameterTypes) + throws CatalogException { + try { + hiveFn_ = hiveFn; + retType_ = retType; + parameterTypes_ = parameterTypes; + genericUDF_ = createGenericUDFInstance(udfClass); + checkValidFunction(); + } catch (CatalogException e) { + String errorMsg = "Error retrieving class " + udfClass + ": " + e.getMessage(); + throw new CatalogException(errorMsg, e); + } + } + + public HiveGenericJavaFunction(Class udfClass, + Type retType, Type[] parameterTypes) throws CatalogException { + this(udfClass, null, retType, parameterTypes); + } + + @Override + public Function getHiveFunction() { + return hiveFn_; + } + + /** + * Currently GenericUDF does not support extracting the parameters and + * return type out of the method. It is impossible to do via reflection. + * Potentially this can be done if we add annotations in the class to + * handle it. + */ + @Override + public List extract() throws CatalogException { + // Return blank list because extraction cannot be done. + return new ArrayList<>(); + } + + public GenericUDF getGenericUDFInstance() { + return genericUDF_; + } + + public Type getRetType() { + return retType_; + } + + public Type[] getParameterTypes() { + return parameterTypes_; + } + + private GenericUDF createGenericUDFInstance(Class udfClass) + throws CatalogException { + try { + Constructor ctor = udfClass.getConstructor(); + return (GenericUDF) ctor.newInstance(); + } catch (NoSuchMethodException e) { + throw new CatalogException( + "Unable to find constructor with no arguments.", e); + } catch (IllegalArgumentException e) { + throw new CatalogException( + "Unable to call UDF constructor with no arguments.", e); + } catch (InstantiationException|IllegalAccessException|InvocationTargetException e) { + throw new CatalogException("Unable to call create UDF instance.", e); + } + } + + private void checkValidFunction() throws CatalogException { + try { + ObjectInspector[] parameterOIs = getInspectors(parameterTypes_); + // Call the initialize method which will give us the return type that + // the GenericUDF produces. Then we check if it matches what we expect. + ObjectInspector returnOI = genericUDF_.initialize(parameterOIs); + if (returnOI != getInspector(retType_) && !returnOI.getTypeName().equals("void")) { + throw new CatalogException("Function expected return type " + + returnOI.getTypeName() + " but was created with " + retType_); + } + } catch (UDFArgumentException e) { + LOG.error(e.getMessage()); + throw new CatalogException("Function cannot be created with the following " + + "parameters: (" + Joiner.on(",").join(parameterTypes_) + "). "); + } + } + + private ObjectInspector[] getInspectors(Type[] typeArray) + throws CatalogException { + ObjectInspector[] OIArray = new ObjectInspector[typeArray.length]; + for (int i = 0; i < typeArray.length; ++i) { + OIArray[i] = getInspector(typeArray[i]); + } + return OIArray; + } + + private ObjectInspector getInspector(Type t) throws CatalogException { + switch (t.getPrimitiveType().toThrift()) { + case BOOLEAN: + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + case TINYINT: + return PrimitiveObjectInspectorFactory.writableByteObjectInspector; + case SMALLINT: + return PrimitiveObjectInspectorFactory.writableShortObjectInspector; + case INT: + return PrimitiveObjectInspectorFactory.writableIntObjectInspector; + case BIGINT: + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + case FLOAT: + return PrimitiveObjectInspectorFactory.writableFloatObjectInspector; + case DOUBLE: + return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + case STRING: + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + default: + throw new CatalogException("Unsupported type: " + t); + } + } +} diff --git a/fe/src/main/java/org/apache/impala/hive/executor/HiveJavaFunctionFactoryImpl.java b/fe/src/main/java/org/apache/impala/hive/executor/HiveJavaFunctionFactoryImpl.java index c90b09bbe..25f5d9e76 100644 --- a/fe/src/main/java/org/apache/impala/hive/executor/HiveJavaFunctionFactoryImpl.java +++ b/fe/src/main/java/org/apache/impala/hive/executor/HiveJavaFunctionFactoryImpl.java @@ -49,6 +49,9 @@ public class HiveJavaFunctionFactoryImpl implements HiveJavaFunctionFactory { case UDF: return new HiveLegacyJavaFunction(javaClass.getUDFClass(), hiveFn, retType, paramTypes); + case GENERIC_UDF: + return new HiveGenericJavaFunction(javaClass.getUDFClass(), hiveFn, retType, + paramTypes); default: throw new CatalogException("Function " + fnName + ": The class " + jarUri + " does not derive " @@ -58,6 +61,9 @@ public class HiveJavaFunctionFactoryImpl implements HiveJavaFunctionFactory { public HiveJavaFunction create(String localLibPath, ScalarFunction fn) throws CatalogException { + if (fn.hasVarArgs()) { + throw new CatalogException("Variable arguments not supported in Hive UDFs."); + } return create(localLibPath, HiveJavaFunction.toHiveFunction((ScalarFunction) fn), fn.getReturnType(), fn.getArgs()); } diff --git a/fe/src/main/java/org/apache/impala/hive/executor/HiveUdfExecutorGeneric.java b/fe/src/main/java/org/apache/impala/hive/executor/HiveUdfExecutorGeneric.java new file mode 100644 index 000000000..de26ccd27 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/hive/executor/HiveUdfExecutorGeneric.java @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.hive.executor; + +import sun.misc.Unsafe; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.impala.catalog.Type; +import org.apache.impala.common.ImpalaException; +import org.apache.impala.common.ImpalaRuntimeException; +import org.apache.impala.common.JniUtil; +import org.apache.impala.thrift.THiveUdfExecutorCtorParams; +import org.apache.impala.thrift.TPrimitiveType; +import org.apache.impala.util.UnsafeUtil; +import org.apache.log4j.Logger; +import org.apache.thrift.protocol.TBinaryProtocol; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +/** + * Wrapper object to run hive GenericUDFs. This class works with UdfCallExpr in the + * backend to marshall data back and forth between the execution engine and + * the java UDF class. + * See the comments in be/src/exprs/hive-udf-call.h for more details. + */ +public class HiveUdfExecutorGeneric extends HiveUdfExecutor { + private static final Logger LOG = Logger.getLogger(HiveUdfExecutorGeneric.class); + + private GenericUDF genericUDF_; + + // Hive Generic UDFs expect a DeferredObject for each parameter passed in. However, + // if the ith parameter is NULL, then Hive expects the deferredObject[i] value to + // contain null. The deferredParameters array is populated at initialization time. The + // runtimeDeferredObjects is passed into the Hive Generic UDF code at runtime and + // the runDeferredParameters[i] value will either contain deferredParameters[i] or NULL. + private DeferredObject[] deferredParameters_; + private DeferredObject[] runtimeDeferredParameters_; + + /** + * Create a UdfExecutor, using parameters from a serialized thrift object. Used by + * the backend. + */ + public HiveUdfExecutorGeneric(THiveUdfExecutorCtorParams request, + HiveGenericJavaFunction hiveJavaFn) throws ImpalaRuntimeException { + super(request, JavaUdfDataType.getType(hiveJavaFn.getRetType()), + JavaUdfDataType.getTypes(hiveJavaFn.getParameterTypes())); + genericUDF_ = hiveJavaFn.getGenericUDFInstance(); + deferredParameters_ = createDeferredObjects(); + runtimeDeferredParameters_ = new DeferredObject[getNumParams()]; + } + + @Override + public void closeDerived() { + } + + /** + * Evalutes the UDF with 'args' as the input to the UDF. + */ + @Override + protected Object evaluateDerived(JavaUdfDataType[] argTypes, + long inputNullsPtr, Object[] inputObjectArgs) throws ImpalaRuntimeException { + try { + for (int i = 0; i < runtimeDeferredParameters_.length; ++i) { + runtimeDeferredParameters_[i] = + (UnsafeUtil.UNSAFE.getByte(inputNullsPtr + i) == 0) + ? deferredParameters_[i] + : null; + } + return genericUDF_.evaluate(runtimeDeferredParameters_); + } catch (HiveException e) { + throw new ImpalaRuntimeException("UDF failed to evaluate", e); + } catch (IllegalArgumentException e) { + throw new ImpalaRuntimeException("UDF failed to evaluate", e); + } + } + + /** + * Special method for unit testing. In the Java unit tests, the arguments + * will change on every iteration, forcing us to create new a DeferredJavaObject + * each time. In the python E2E tests, the normal pathway will be tested. + */ + @Override + public long evaluateForTesting(Object... args) throws ImpalaRuntimeException { + Preconditions.checkState(args.length == deferredParameters_.length); + for (int i = 0; i < deferredParameters_.length; ++i) { + deferredParameters_[i] = new DeferredJavaObject(args[i]); + } + return evaluate(); + } + + @Override + public Method getMethod() { + return null; + } + + private DeferredObject[] createDeferredObjects() { + DeferredObject[] deferredObjects = new DeferredObject[getNumParams()]; + for (int i = 0; i < deferredObjects.length; ++i) { + deferredObjects[i] = new DeferredJavaObject(getInputObject(i)); + } + return deferredObjects; + } +} diff --git a/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java b/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java index 79f225e70..81345a71f 100644 --- a/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java +++ b/fe/src/main/java/org/apache/impala/hive/executor/UdfExecutor.java @@ -120,10 +120,17 @@ public class UdfExecutor { HiveUdfExecutor.getParameterTypes(request)); return new HiveUdfExecutorLegacy(request, function); } + case GENERIC_UDF: { + HiveGenericJavaFunction function = + new HiveGenericJavaFunction(udfLoader.getUDFClass(), + HiveUdfExecutor.getRetType(request), + HiveUdfExecutor.getParameterTypes(request)); + return new HiveUdfExecutorGeneric(request, function); + } default: throw new ImpalaRuntimeException("The class " + request.fn.scalar_fn.symbol + - " does not derive " + "from a known supported Hive UDF class " + - "(UDF)."); + " does not derive from a known supported Hive UDF class " + + "(UDF or GenericUDF)."); } } catch (CatalogException e) { throw new ImpalaRuntimeException(e.getMessage(), e); diff --git a/fe/src/test/java/org/apache/impala/hive/executor/TestGenericUdf.java b/fe/src/test/java/org/apache/impala/hive/executor/TestGenericUdf.java new file mode 100644 index 000000000..a732ec8e6 --- /dev/null +++ b/fe/src/test/java/org/apache/impala/hive/executor/TestGenericUdf.java @@ -0,0 +1,312 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.hive.executor; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +/** + * Simple Generic UDFs for testing. + * + * Udf that takes a variable number of arguments of the same type and applies + * the "+" operator to them. The "+" is a concatenation for string types. For + * boolean types, it applies the OR operation. If only one argument is provided, + * it returns that argument. + * + * This class is duplicated in fe and java/test-hive-udfs. We need this class in a + * separate project so we can test loading UDF jars that are not already on the + * classpath, and we can't delete the FE's class because UdfExecutorTest depends + * on it. + * + * The jar for this file can be built by running "mvn clean package" in + * tests/test-hive-udfs. This is run in testdata/bin/create-load-data.sh, and + * copied to HDFS in testdata/bin/copy-udfs-uda.sh. + * + */ +public class TestGenericUdf extends GenericUDF { + + private List inputTypes_ = new ArrayList<>(); + private PrimitiveObjectInspector retTypeOI_; + private PrimitiveCategory argAndRetType_; + + private static final Set SUPPORTED_ARG_TYPES = + new ImmutableSet.Builder() + .add(PrimitiveCategory.BOOLEAN) + .add(PrimitiveCategory.BYTE) + .add(PrimitiveCategory.SHORT) + .add(PrimitiveCategory.INT) + .add(PrimitiveCategory.LONG) + .add(PrimitiveCategory.FLOAT) + .add(PrimitiveCategory.DOUBLE) + .add(PrimitiveCategory.STRING) + .build(); + + public TestGenericUdf() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + if (arguments.length == 0) { + throw new UDFArgumentException("No arguments provided."); + } + + for (ObjectInspector oi : arguments) { + if (!(oi instanceof PrimitiveObjectInspector)) { + throw new UDFArgumentException("Found an input that is not a primitive."); + } + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + inputTypes_.add(poi.getPrimitiveCategory()); + } + + // return type is always same as last argument + retTypeOI_ = (PrimitiveObjectInspector) arguments[0]; + + argAndRetType_ = retTypeOI_.getPrimitiveCategory(); + + verifyArgs(argAndRetType_, inputTypes_); + return retTypeOI_; + } + + @Override + public Object evaluate(DeferredObject[] arguments) + throws HiveException { + if (arguments.length != inputTypes_.size()) { + throw new HiveException("Number of arguments passed in did not match number of " + + "arguments expected."); + } + switch (argAndRetType_) { + case BOOLEAN: + return evaluateBoolean(arguments); + case BYTE: + return evaluateByte(arguments); + case SHORT: + return evaluateShort(arguments); + case INT: + return evaluateInt(arguments); + case LONG: + return evaluateLong(arguments); + case FLOAT: + return evaluateFloat(arguments); + case DOUBLE: + return evaluateDouble(arguments); + case STRING: + return evaluateString(arguments); + case DATE: + case TIMESTAMP: + default: + throw new HiveException("Unsupported argument type " + argAndRetType_); + } + } + + @Override + public String getDisplayString(String[] children) { + return "TestGenericUdf"; + } + + private void verifyArgs(PrimitiveCategory argAndRetType, + List inputTypes) throws UDFArgumentException { + + if (!SUPPORTED_ARG_TYPES.contains(argAndRetType)) { + throw new UDFArgumentException("Unsupported argument type " + argAndRetType_); + } + + for (PrimitiveCategory inputType : inputTypes) { + if (inputType != argAndRetType) { + throw new UDFArgumentException("Invalid function for " + + getSignatureString(argAndRetType, inputTypes)); + } + } + } + + public BooleanWritable evaluateBoolean(DeferredObject[] inputs) throws HiveException { + List booleanInputs = new ArrayList<>(); + boolean finalBoolean = false; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof BooleanWritable)) { + throw new HiveException("Expected BooleanWritable but got " + input.getClass()); + } + boolean currentBool = ((BooleanWritable) input.get()).get(); + finalBoolean |= currentBool; + } + BooleanWritable resultBool = new BooleanWritable(); + resultBool.set(finalBoolean); + return resultBool; + } + + public ByteWritable evaluateByte(DeferredObject[] inputs) throws HiveException { + List byteInputs = new ArrayList<>(); + byte finalByte = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof ByteWritable)) { + throw new HiveException("Expected ByteWritable but got " + input.getClass()); + } + byte currentByte = ((ByteWritable) input.get()).get(); + finalByte += currentByte; + } + ByteWritable resultByte = new ByteWritable(); + resultByte.set(finalByte); + return resultByte; + } + + public ShortWritable evaluateShort(DeferredObject[] inputs) throws HiveException { + List shortInputs = new ArrayList<>(); + short finalShort = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof ShortWritable)) { + throw new HiveException("Expected ShortWritable but got " + input.getClass()); + } + short currentShort = ((ShortWritable) input.get()).get(); + finalShort += currentShort; + } + ShortWritable resultShort = new ShortWritable(); + resultShort.set(finalShort); + return resultShort; + } + + public IntWritable evaluateInt(DeferredObject[] inputs) throws HiveException { + List intInputs = new ArrayList<>(); + int finalInt = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof IntWritable)) { + throw new HiveException("Expected IntWritable but got " + input.getClass()); + } + int currentInt = ((IntWritable) input.get()).get(); + finalInt += currentInt; + } + IntWritable resultInt = new IntWritable(); + resultInt.set(finalInt); + return resultInt; + } + + public LongWritable evaluateLong(DeferredObject[] inputs) throws HiveException { + List longInputs = new ArrayList<>(); + long finalLong = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof LongWritable)) { + throw new HiveException("Expected LongWritable but got " + input.getClass()); + } + long currentLong = ((LongWritable) input.get()).get(); + finalLong += currentLong; + } + LongWritable resultLong = new LongWritable(); + resultLong.set(finalLong); + return resultLong; + } + + public FloatWritable evaluateFloat(DeferredObject[] inputs) throws HiveException { + List floatInputs = new ArrayList<>(); + float finalFloat = 0.0F; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof FloatWritable)) { + throw new HiveException("Expected FloatWritable but got " + input.getClass()); + } + float currentFloat = ((FloatWritable) input.get()).get(); + finalFloat += currentFloat; + } + FloatWritable resultFloat = new FloatWritable(); + resultFloat.set(finalFloat); + return resultFloat; + } + + public DoubleWritable evaluateDouble(DeferredObject[] inputs) throws HiveException { + List doubleInputs = new ArrayList<>(); + double finalDouble = 0.0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof DoubleWritable)) { + throw new HiveException("Expected DoubleWritable but got " + input.getClass()); + } + double currentDouble = ((DoubleWritable) input.get()).get(); + finalDouble += currentDouble; + } + DoubleWritable resultDouble = new DoubleWritable(); + resultDouble.set(finalDouble); + return resultDouble; + } + + public Text evaluateString(DeferredObject[] inputs) throws HiveException { + List stringInputs = new ArrayList<>(); + String finalString = ""; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof Text)) { + throw new HiveException("Expected String but got " + input.get().getClass()); + } + String currentString = ((Text) input.get()).toString(); + finalString += currentString; + } + Text resultString = new Text(); + resultString.set(finalString); + return resultString; + } + + private String getSignatureString(PrimitiveCategory argAndRetType_, + List inputTypes_) { + return argAndRetType_ + "TestGenericUdf(" + Joiner.on(",").join(inputTypes_) + ")"; + } +} diff --git a/fe/src/test/java/org/apache/impala/hive/executor/UdfExecutorTest.java b/fe/src/test/java/org/apache/impala/hive/executor/UdfExecutorTest.java index a0883e269..ecde05c30 100644 --- a/fe/src/test/java/org/apache/impala/hive/executor/UdfExecutorTest.java +++ b/fe/src/test/java/org/apache/impala/hive/executor/UdfExecutorTest.java @@ -53,6 +53,8 @@ import org.apache.hadoop.hive.ql.udf.UDFSqrt; import org.apache.hadoop.hive.ql.udf.UDFSubstr; import org.apache.hadoop.hive.ql.udf.UDFTan; import org.apache.hadoop.hive.ql.udf.UDFUnhex; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBRound; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -199,6 +201,7 @@ public class UdfExecutorTest { // are allowed. void validateArgType(Object w) { if (w instanceof String || + w instanceof Text || w instanceof ImpalaIntWritable || w instanceof ImpalaFloatWritable || w instanceof ImpalaBigIntWritable || @@ -265,7 +268,7 @@ public class UdfExecutorTest { if (args[i] instanceof String) { // For authoring the test, we'll just pass string and make the proper // object here. - if (method.getParameterTypes()[i] == Text.class) { + if (method != null && method.getParameterTypes()[i] == Text.class) { inputArgs[i] = createText((String)args[i]); } else { inputArgs[i] = createBytes((String)args[i]); @@ -375,7 +378,9 @@ public class UdfExecutorTest { List argTypeStrs = Lists.newArrayList(); for (Object arg: args) argTypeStrs.add(arg.getClass().getSimpleName()); errMsgs.add("Argument types: " + Joiner.on(",").join(argTypeStrs)); - errMsgs.add("Resolved method: " + e.getMethod().toGenericString()); + if (e.getMethod() != null) { + errMsgs.add("Resolved method: " + e.getMethod().toGenericString()); + } Assert.fail("\n" + Joiner.on("\n").join(errMsgs)); } } @@ -458,6 +463,53 @@ public class UdfExecutorTest { freeAllocations(); } + @Test + public void HiveGenericTest() + throws ImpalaException, MalformedURLException, TException { + TestHiveUdf(GenericUDFBRound.class, createInt(1), createInt(1)); + TestHiveUdf(GenericUDFBRound.class, createDouble(1.0), createDouble(1.1)); + TestHiveUdf(GenericUDFUpper.class, createText("HELLO"), createText("Hello")); + } + + @Test + // Test GenericUDF for all supported types + public void BasicGenericTest() + throws ImpalaException, MalformedURLException, TException { + TestUdf(null, TestGenericUdf.class, createBoolean(true), createBoolean(true)); + TestUdf(null, TestGenericUdf.class, createTinyInt(1), createTinyInt(1)); + TestUdf(null, TestGenericUdf.class, createSmallInt(1), createSmallInt(1)); + TestUdf(null, TestGenericUdf.class, createInt(1), createInt(1)); + TestUdf(null, TestGenericUdf.class, createBigInt(1), createBigInt(1)); + TestUdf(null, TestGenericUdf.class, createFloat(1.1f), createFloat(1.1f)); + TestUdf(null, TestGenericUdf.class, createDouble(1.1), createDouble(1.1)); + TestUdf(null, TestGenericUdf.class, createText("ABCD"), createText("ABCD")); + TestUdf(null, TestGenericUdf.class, createDouble(3), + createDouble(1), createDouble(2)); + TestUdf(null, TestGenericUdf.class, createText("ABCXYZ"), createText("ABC"), + createText("XYZ")); + TestUdf(null, TestGenericUdf.class, createInt(3), createInt(1), createInt(2)); + TestUdf(null, TestGenericUdf.class, createFloat(1.1f + 1.2f), + createFloat(1.1f), createFloat(1.2f)); + TestUdf(null, TestGenericUdf.class, createDouble(1.1 + 1.2 + 1.3), + createDouble(1.1), createDouble(1.2), createDouble(1.3)); + TestUdf(null, TestGenericUdf.class, createSmallInt(1 + 2), createSmallInt(1), + createSmallInt(2)); + TestUdf(null, TestGenericUdf.class, createBoolean(true), + createBoolean(true), createBoolean(false)); + TestUdf(null, TestGenericUdf.class, createInt(5 + 6 + 7), createInt(5), + createInt(6), createInt(7)); + TestUdf(null, TestGenericUdf.class, createBoolean(true), + createBoolean(false), createBoolean(false), createBoolean(true)); + TestUdf(null, TestGenericUdf.class, createFloat(1.1f + 1.2f + 1.3f), + createFloat(1.1f), createFloat(1.2f), createFloat(1.3f)); + TestUdf(null, TestGenericUdf.class, createInt(5 + 6 + 7 + 8), createInt(5), + createInt(6), createInt(7), createInt(8)); + TestUdf(null, TestGenericUdf.class, createBoolean(true), + createBoolean(true), createBoolean(true), createBoolean(true), + createBoolean(true)); + freeAllocations(); + } + @Test // Test identity for all types public void BasicTest() diff --git a/java/shaded-deps/hive-exec/pom.xml b/java/shaded-deps/hive-exec/pom.xml index 642ba3e6e..e0a577f63 100644 --- a/java/shaded-deps/hive-exec/pom.xml +++ b/java/shaded-deps/hive-exec/pom.xml @@ -100,6 +100,7 @@ the same dependencies org/apache/hadoop/hive/ql/parse/SemanticException.class org/apache/hadoop/hive/ql/exec/*UDF* + org/apache/hadoop/hive/ql/exec/MapredContext.class org/apache/hadoop/hive/ql/exec/FunctionUtils* org/apache/hadoop/hive/ql/parse/GenericHiveLexer* org/apache/hadoop/hive/ql/parse/HiveLexer* diff --git a/java/test-hive-udfs/src/main/java/org/apache/impala/GenericImportsNearbyClassesUdf.java b/java/test-hive-udfs/src/main/java/org/apache/impala/GenericImportsNearbyClassesUdf.java new file mode 100644 index 000000000..af9747fa7 --- /dev/null +++ b/java/test-hive-udfs/src/main/java/org/apache/impala/GenericImportsNearbyClassesUdf.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; + +public class GenericImportsNearbyClassesUdf extends GenericUDF { + public GenericImportsNearbyClassesUdf() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentException("GenericImports takes one argument."); + } + if (!(arguments[0] instanceof PrimitiveObjectInspector)) { + throw new UDFArgumentException("Found an input that is not a primitive."); + } + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) arguments[0]; + if (poi.getPrimitiveCategory() != PrimitiveCategory.STRING) { + throw new UDFArgumentException("GenericImports needs one STRING arg."); + } + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) + throws HiveException { + return new Text(UtilForUdf.getHello()); + } + + @Override + public String getDisplayString(String[] children) { + return "GenericImports"; + } +} diff --git a/java/test-hive-udfs/src/main/java/org/apache/impala/GenericReplaceStringUdf.java b/java/test-hive-udfs/src/main/java/org/apache/impala/GenericReplaceStringUdf.java new file mode 100644 index 000000000..f6a78cac1 --- /dev/null +++ b/java/test-hive-udfs/src/main/java/org/apache/impala/GenericReplaceStringUdf.java @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; + +public class GenericReplaceStringUdf extends GenericUDF { + public GenericReplaceStringUdf() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentException("GenericReplaceStringUdf takes one argument."); + } + if (!(arguments[0] instanceof PrimitiveObjectInspector)) { + throw new UDFArgumentException("Found an input that is not a primitive."); + } + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) arguments[0]; + if (poi.getPrimitiveCategory() != PrimitiveCategory.STRING) { + throw new UDFArgumentException("GenericReplaceStringUdf needs one STRING arg."); + } + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) + throws HiveException { + if (arguments.length != 1) { + throw new RuntimeException("Number of expected args did not match."); + } + DeferredObject input = arguments[0]; + if (input == null) { + return new Text(""); + } + + if (!(input.get() instanceof Text)) { + throw new RuntimeException("Expected String but got " + input.get().getClass()); + } + String currentString = ((Text) input.get()).toString(); + Text resultString = new Text(); + resultString.set(currentString.replace("s", "ss")); + return resultString; + } + + @Override + public String getDisplayString(String[] children) { + return "GenericReplaceStringUdf"; + } +} diff --git a/java/test-hive-udfs/src/main/java/org/apache/impala/TestGenericUdf.java b/java/test-hive-udfs/src/main/java/org/apache/impala/TestGenericUdf.java new file mode 100644 index 000000000..70052caf5 --- /dev/null +++ b/java/test-hive-udfs/src/main/java/org/apache/impala/TestGenericUdf.java @@ -0,0 +1,312 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +/** + * Simple Generic UDFs for testing. + * + * Udf that takes a variable number of arguments of the same type and applies + * the "+" operator to them. The "+" is a concatenation for string types. For + * boolean types, it applies the OR operation. If only one argument is provided, + * it returns that argument. + * + * This class is a copy of the TestGenericUdf class in the FE. We need this class in a + * separate project so we can test loading UDF jars that are not already on the + * classpath, and we can't delete the FE's class because UdfExecutorTest depends + * on it. + * + * The jar for this file can be built by running "mvn clean package" in + * tests/test-hive-udfs. This is run in testdata/bin/create-load-data.sh, and + * copied to HDFS in testdata/bin/copy-udfs-uda.sh. + * + */ +public class TestGenericUdf extends GenericUDF { + + private List inputTypes_ = new ArrayList<>(); + private PrimitiveObjectInspector retTypeOI_; + private PrimitiveCategory argAndRetType_; + + private static final Set SUPPORTED_ARG_TYPES = + new ImmutableSet.Builder() + .add(PrimitiveCategory.BOOLEAN) + .add(PrimitiveCategory.BYTE) + .add(PrimitiveCategory.SHORT) + .add(PrimitiveCategory.INT) + .add(PrimitiveCategory.LONG) + .add(PrimitiveCategory.FLOAT) + .add(PrimitiveCategory.DOUBLE) + .add(PrimitiveCategory.STRING) + .build(); + + public TestGenericUdf() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + if (arguments.length == 0) { + throw new UDFArgumentException("No arguments provided."); + } + + for (ObjectInspector oi : arguments) { + if (!(oi instanceof PrimitiveObjectInspector)) { + throw new UDFArgumentException("Found an input that is not a primitive."); + } + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + inputTypes_.add(poi.getPrimitiveCategory()); + } + + // return type is always same as last argument + retTypeOI_ = (PrimitiveObjectInspector) arguments[0]; + + argAndRetType_ = retTypeOI_.getPrimitiveCategory(); + + verifyArgs(argAndRetType_, inputTypes_); + return retTypeOI_; + } + + @Override + public Object evaluate(DeferredObject[] arguments) + throws HiveException { + if (arguments.length != inputTypes_.size()) { + throw new HiveException("Number of arguments passed in did not match number of " + + "arguments expected."); + } + switch (argAndRetType_) { + case BOOLEAN: + return evaluateBoolean(arguments); + case BYTE: + return evaluateByte(arguments); + case SHORT: + return evaluateShort(arguments); + case INT: + return evaluateInt(arguments); + case LONG: + return evaluateLong(arguments); + case FLOAT: + return evaluateFloat(arguments); + case DOUBLE: + return evaluateDouble(arguments); + case STRING: + return evaluateString(arguments); + case DATE: + case TIMESTAMP: + default: + throw new HiveException("Unsupported argument type " + argAndRetType_); + } + } + + @Override + public String getDisplayString(String[] children) { + return "TestGenericUdf"; + } + + private void verifyArgs(PrimitiveCategory argAndRetType, + List inputTypes) throws UDFArgumentException { + + if (!SUPPORTED_ARG_TYPES.contains(argAndRetType)) { + throw new UDFArgumentException("Unsupported argument type " + argAndRetType_); + } + + for (PrimitiveCategory inputType : inputTypes) { + if (inputType != argAndRetType) { + throw new UDFArgumentException("Invalid function for " + + getSignatureString(argAndRetType, inputTypes)); + } + } + } + + public BooleanWritable evaluateBoolean(DeferredObject[] inputs) throws HiveException { + List booleanInputs = new ArrayList<>(); + boolean finalBoolean = false; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof BooleanWritable)) { + throw new HiveException("Expected BooleanWritable but got " + input.getClass()); + } + boolean currentBool = ((BooleanWritable) input.get()).get(); + finalBoolean |= currentBool; + } + BooleanWritable resultBool = new BooleanWritable(); + resultBool.set(finalBoolean); + return resultBool; + } + + public ByteWritable evaluateByte(DeferredObject[] inputs) throws HiveException { + List byteInputs = new ArrayList<>(); + byte finalByte = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof ByteWritable)) { + throw new HiveException("Expected ByteWritable but got " + input.getClass()); + } + byte currentByte = ((ByteWritable) input.get()).get(); + finalByte += currentByte; + } + ByteWritable resultByte = new ByteWritable(); + resultByte.set(finalByte); + return resultByte; + } + + public ShortWritable evaluateShort(DeferredObject[] inputs) throws HiveException { + List shortInputs = new ArrayList<>(); + short finalShort = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof ShortWritable)) { + throw new HiveException("Expected ShortWritable but got " + input.getClass()); + } + short currentShort = ((ShortWritable) input.get()).get(); + finalShort += currentShort; + } + ShortWritable resultShort = new ShortWritable(); + resultShort.set(finalShort); + return resultShort; + } + + public IntWritable evaluateInt(DeferredObject[] inputs) throws HiveException { + List intInputs = new ArrayList<>(); + int finalInt = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof IntWritable)) { + throw new HiveException("Expected IntWritable but got " + input.getClass()); + } + int currentInt = ((IntWritable) input.get()).get(); + finalInt += currentInt; + } + IntWritable resultInt = new IntWritable(); + resultInt.set(finalInt); + return resultInt; + } + + public LongWritable evaluateLong(DeferredObject[] inputs) throws HiveException { + List longInputs = new ArrayList<>(); + long finalLong = 0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof LongWritable)) { + throw new HiveException("Expected LongWritable but got " + input.getClass()); + } + long currentLong = ((LongWritable) input.get()).get(); + finalLong += currentLong; + } + LongWritable resultLong = new LongWritable(); + resultLong.set(finalLong); + return resultLong; + } + + public FloatWritable evaluateFloat(DeferredObject[] inputs) throws HiveException { + List floatInputs = new ArrayList<>(); + float finalFloat = 0.0F; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof FloatWritable)) { + throw new HiveException("Expected FloatWritable but got " + input.getClass()); + } + float currentFloat = ((FloatWritable) input.get()).get(); + finalFloat += currentFloat; + } + FloatWritable resultFloat = new FloatWritable(); + resultFloat.set(finalFloat); + return resultFloat; + } + + public DoubleWritable evaluateDouble(DeferredObject[] inputs) throws HiveException { + List doubleInputs = new ArrayList<>(); + double finalDouble = 0.0; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof DoubleWritable)) { + throw new HiveException("Expected DoubleWritable but got " + input.getClass()); + } + double currentDouble = ((DoubleWritable) input.get()).get(); + finalDouble += currentDouble; + } + DoubleWritable resultDouble = new DoubleWritable(); + resultDouble.set(finalDouble); + return resultDouble; + } + + public Text evaluateString(DeferredObject[] inputs) throws HiveException { + List stringInputs = new ArrayList<>(); + String finalString = ""; + for (DeferredObject input : inputs) { + if (input == null) { + return null; + } + if (!(input.get() instanceof Text)) { + throw new HiveException("Expected String but got " + input.get().getClass()); + } + String currentString = ((Text) input.get()).toString(); + finalString += currentString; + } + Text resultString = new Text(); + resultString.set(finalString); + return resultString; + } + + private String getSignatureString(PrimitiveCategory argAndRetType_, + List inputTypes_) { + return argAndRetType_ + "TestGenericUdf(" + Joiner.on(",").join(inputTypes_) + ")"; + } +} diff --git a/java/test-hive-udfs/src/main/java/org/apache/impala/TestGenericUdfException.java b/java/test-hive-udfs/src/main/java/org/apache/impala/TestGenericUdfException.java new file mode 100644 index 000000000..79f546c09 --- /dev/null +++ b/java/test-hive-udfs/src/main/java/org/apache/impala/TestGenericUdfException.java @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** + * Simple Generic UDFs that always throw an exception. + */ +public class TestGenericUdfException extends GenericUDF { + + public TestGenericUdfException() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) + throws HiveException { + throw new RuntimeException("Test exception"); + } + + @Override + public String getDisplayString(String[] children) { + return "TestGenericUdfException"; + } +} diff --git a/testdata/workloads/functional-query/queries/QueryTest/generic-java-udf.test b/testdata/workloads/functional-query/queries/QueryTest/generic-java-udf.test new file mode 100644 index 000000000..2bf2407b3 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/generic-java-udf.test @@ -0,0 +1,306 @@ +==== +---- QUERY +select hive_bround(cast(3.14 as double)) +---- RESULTS +3.0 +---- TYPES +DOUBLE +==== +---- QUERY +select hive_bround(cast(3.14 as int)) +---- RESULTS +3 +---- TYPES +INT +==== +---- QUERY +select hive_upper('hello') +---- RESULTS +'HELLO' +---- TYPES +STRING +==== +---- QUERY +#Test GenericUDF functions +select generic_identity(true), generic_identity(cast(NULL as boolean)); +---- TYPES +boolean, boolean +---- RESULTS +true,NULL +==== +---- QUERY +select generic_identity(cast(10 as tinyint)), generic_identity(cast(NULL as tinyint)); +---- TYPES +tinyint, tinyint +---- RESULTS +10,NULL +==== +---- QUERY +select generic_identity(cast(10 as smallint)), generic_identity(cast(NULL as smallint)); +---- TYPES +smallint, smallint +---- RESULTS +10,NULL +==== +---- QUERY +select generic_identity(cast(10 as int)), generic_identity(cast(NULL as int)); +---- TYPES +int, int +---- RESULTS +10,NULL +==== +---- QUERY +select generic_identity(cast(10 as bigint)), generic_identity(cast(NULL as bigint)); +---- TYPES +bigint, bigint +---- RESULTS +10,NULL +==== +---- QUERY +select generic_identity(cast(10.0 as float)), generic_identity(cast(NULL as float)); +---- TYPES +float, float +---- RESULTS +10,NULL +==== +---- QUERY +select generic_identity(cast(10.0 as double)), generic_identity(cast(NULL as double)); +---- TYPES +double, double +---- RESULTS +10,NULL +==== +---- QUERY +# IMPALA-1134. Tests that strings are copied correctly +select length(generic_identity("0123456789")), + length(generic_add("0123456789", "0123456789")), + length(generic_add("0123456789", "0123456789", "0123456789")); +---- TYPES +int, int, int +---- RESULTS +10,20,30 +==== +---- QUERY +# IMPALA-1392: Hive UDFs that throw exceptions should return NULL +select generic_throws_exception(); +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select generic_throws_exception() from functional.alltypestiny; +---- TYPES +boolean +---- RESULTS +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +==== +---- QUERY +select generic_add(cast(1 as int), cast(2 as int)); +---- TYPES +int +---- RESULTS +3 +==== +---- QUERY +select generic_add(generic_add(cast(1 as int), cast(2 as int)), cast(2 as int)); +---- TYPES +int +---- RESULTS +5 +==== +---- QUERY +select generic_add(cast(generic_add(cast(1 as int), cast(2 as int)) - generic_add(cast(2 as int), cast(1 as int)) as int), cast(2 as int)); +---- TYPES +int +---- RESULTS +2 +==== +---- QUERY +select generic_add(cast(1 as smallint), cast(2 as smallint)); +---- TYPES +smallint +---- RESULTS +3 +==== +---- QUERY +select generic_add(cast(3.0 as float), cast(4.0 as float)); +---- TYPES +float +---- RESULTS +7.0 +==== +---- QUERY +select generic_add(cast(1.0 as double), cast(2.0 as double)); +---- TYPES +double +---- RESULTS +3.0 +==== +---- QUERY +select generic_add(cast(1 as boolean), cast(0 as boolean)); +---- TYPES +boolean +---- RESULTS +true +==== +---- QUERY +select generic_add(cast(1 as boolean), cast(1 as boolean)); +---- TYPES +boolean +---- RESULTS +true +==== +---- QUERY +# IMPALA-3378: test many Java UDFs being opened and run concurrently +select * from +(select max(int_col) from functional.alltypesagg + where generic_identity(bool_col) union all +(select max(int_col) from functional.alltypesagg + where generic_identity(tinyint_col) > 1 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(smallint_col) > 1 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(int_col) > 1 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(bigint_col) > 1 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(float_col) > 1.0 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(double_col) > 1.0 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(string_col) > '1' union all +(select max(int_col) from functional.alltypesagg + where not generic_identity(bool_col) union all +(select max(int_col) from functional.alltypesagg + where generic_identity(tinyint_col) > 2 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(smallint_col) > 2 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(int_col) > 2 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(bigint_col) > 2 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(float_col) > 2.0 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(double_col) > 2.0 union all +(select max(int_col) from functional.alltypesagg + where generic_identity(string_col) > '2' +)))))))))))))))) v +---- TYPES +INT +---- RESULTS +998 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +999 +==== +---- QUERY +drop table if exists replace_string_input +==== +---- QUERY +create table replace_string_input as +values('toast'), ('scone'), ('stuff'), ('sssss'), ('yes'), ('scone'), ('stuff'); +==== +---- QUERY +# Regression test for IMPALA-4266: memory management bugs with output strings from +# Java UDFS, exposed by using the UDF as a grouping key in an aggregation. +# The UDF replaces "s" with "ss" in the strings. +select distinct generic_replace_string(_c0) as es +from replace_string_input +order by 1; +---- TYPES +string +---- RESULTS +'sscone' +'ssssssssss' +'sstuff' +'toasst' +'yess' +==== +---- QUERY +# Regression test for IMPALA-8016; this UDF loads another class in the same jar. +select generic_import_nearby_classes("placeholder"); +---- TYPES +string +---- RESULTS +'Hello' +==== +---- QUERY +# Java Generic UDFs for DATE are not allowed yet +create function identity(Date) returns Date +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; +---- CATCH +AnalysisException: Type DATE is not supported for Java UDFs. +==== +---- QUERY +# Java Generic UDFs for DECIMAL are not allowed yet +create function identity(decimal(5,0)) returns decimal(5,0) +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; +---- CATCH +AnalysisException: Type DECIMAL(5,0) is not supported for Java UDFs. +==== +---- QUERY +# Java Generic UDFs for TIMESTAMP are not allowed yet +create function identity(Timestamp) returns Timestamp +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; +---- CATCH +AnalysisException: Type TIMESTAMP is not supported for Java UDFs. +==== +---- QUERY +create function identity(ARRAY) returns INT +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; +---- CATCH +AnalysisException: Type 'ARRAY' is not supported in UDFs/UDAs. +==== +---- QUERY +create function identity(MAP) returns INT +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; +---- CATCH +AnalysisException: Type 'MAP' is not supported in UDFs/UDAs. +==== +---- QUERY +create function identity(STRUCT) returns INT +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; +---- CATCH +AnalysisException: Type 'STRUCT' is not supported in UDFs/UDAs. +==== +---- QUERY +create function generic_add_fail(smallint, smallint) returns int +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; +---- CATCH +CatalogException: Function expected return type smallint but was created with INT +==== +---- QUERY +create function var_args_func(int...) returns int +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestUdf'; +---- CATCH +CatalogException: Variable arguments not supported in Hive UDFs. +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test index 82a7fe96c..e23139de1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/java-udf.test +++ b/testdata/workloads/functional-query/queries/QueryTest/java-udf.test @@ -346,3 +346,10 @@ symbol='org.apache.impala.TestUdf'; ---- CATCH AnalysisException: Type TIMESTAMP is not supported for Java UDFs. ==== +---- QUERY +create function var_args_func(int...) returns int +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestUdf'; +---- CATCH +CatalogException: Variable arguments not supported in Hive UDFs. +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/load-generic-java-udfs.test b/testdata/workloads/functional-query/queries/QueryTest/load-generic-java-udfs.test new file mode 100644 index 000000000..41ae36c24 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/load-generic-java-udfs.test @@ -0,0 +1,86 @@ +==== +---- QUERY +create function hive_bround(int) returns int +location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' +symbol='org.apache.hadoop.hive.ql.udf.generic.GenericUDFBRound'; + +create function hive_bround(double) returns double +location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' +symbol='org.apache.hadoop.hive.ql.udf.generic.GenericUDFBRound'; + +create function hive_upper(string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/hive-exec.jar' +symbol='org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper'; + +create function generic_identity(boolean) returns boolean +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_identity(tinyint) returns tinyint +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_identity(smallint) returns smallint +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_identity(int) returns int +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_identity(bigint) returns bigint +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_add(boolean, boolean) returns boolean +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_identity(float) returns float +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_identity(double) returns double +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_identity(string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_add(string, string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_add(string, string, string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_add(smallint, smallint) returns smallint +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_add(int, int) returns int +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_add(float, float) returns float +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_add(double, double) returns double +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdf'; + +create function generic_throws_exception() returns boolean +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.TestGenericUdfException'; + +create function generic_replace_string(string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.GenericReplaceStringUdf'; + +create function generic_import_nearby_classes(string) returns string +location '$FILESYSTEM_PREFIX/test-warehouse/impala-hive-udfs.jar' +symbol='org.apache.impala.GenericImportsNearbyClassesUdf'; +==== diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py index 685dfc9ab..1e0b38443 100644 --- a/tests/query_test/test_udfs.py +++ b/tests/query_test/test_udfs.py @@ -330,6 +330,10 @@ class TestUdfExecution(TestUdfBase): self.run_test_case('QueryTest/load-java-udfs-fail', vector, use_db=unique_database) self.run_test_case('QueryTest/java-udf', vector, use_db=unique_database) + def test_generic_java_udfs(self, vector, unique_database): + self.run_test_case('QueryTest/load-generic-java-udfs', vector, use_db=unique_database) + self.run_test_case('QueryTest/generic-java-udf', vector, use_db=unique_database) + def test_udf_errors(self, vector, unique_database): # Only run with codegen disabled to force interpretation path to be taken. # Aim to exercise two failure cases: