diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java b/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java index 15f5ebae9..2432be650 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/TypeDef.java @@ -43,6 +43,13 @@ public class TypeDef implements ParseNode { @Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isAnalyzed_) return; + // Check the max nesting depth before calling the recursive analyze() to avoid + // a stack overflow. + if (parsedType_.exceedsMaxNestingDepth()) { + throw new AnalysisException(String.format( + "Type exceeds the maximum nesting depth of %s:\n%s", + Type.MAX_NESTING_DEPTH, parsedType_.toSql())); + } analyze(parsedType_, analyzer); isAnalyzed_ = true; } diff --git a/fe/src/main/java/com/cloudera/impala/catalog/ArrayType.java b/fe/src/main/java/com/cloudera/impala/catalog/ArrayType.java index f39dad637..404751572 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/ArrayType.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/ArrayType.java @@ -20,7 +20,10 @@ public class ArrayType extends Type { public Type getItemType() { return itemType_; } @Override - public String toSql() { return String.format("ARRAY<%s>", itemType_.toSql()); } + public String toSql(int depth) { + if (depth >= MAX_NESTING_DEPTH) return "ARRAY<...>"; + return String.format("ARRAY<%s>", itemType_.toSql(depth + 1)); + } @Override public boolean equals(Object other) { diff --git a/fe/src/main/java/com/cloudera/impala/catalog/MapType.java b/fe/src/main/java/com/cloudera/impala/catalog/MapType.java index 41e9b97ae..61c43419a 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/MapType.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/MapType.java @@ -33,8 +33,10 @@ public class MapType extends Type { } @Override - public String toSql() { - return String.format("MAP<%s,%s>", keyType_.toSql(), valueType_.toSql()); + public String toSql(int depth) { + if (depth >= MAX_NESTING_DEPTH) return "MAP<...>"; + return String.format("MAP<%s,%s>", + keyType_.toSql(depth + 1), valueType_.toSql(depth + 1)); } @Override diff --git a/fe/src/main/java/com/cloudera/impala/catalog/ScalarType.java b/fe/src/main/java/com/cloudera/impala/catalog/ScalarType.java index 2f738b5f2..20b97dafd 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/ScalarType.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/ScalarType.java @@ -150,7 +150,8 @@ public class ScalarType extends Type { } @Override - public String toSql() { + public String toSql(int depth) { + if (depth >= MAX_NESTING_DEPTH) return "..."; switch(type_) { case BINARY: return type_.toString(); case VARCHAR: diff --git a/fe/src/main/java/com/cloudera/impala/catalog/StructField.java b/fe/src/main/java/com/cloudera/impala/catalog/StructField.java index 977568d38..c57f19876 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/StructField.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/StructField.java @@ -46,9 +46,10 @@ public class StructField { public int getPosition() { return position_; } public void setPosition(int position) { position_ = position; } - public String toSql() { + public String toSql(int depth) { + String typeSql = (depth < Type.MAX_NESTING_DEPTH) ? type_.toSql(depth) : "..."; StringBuilder sb = new StringBuilder(name_); - if (type_ != null) sb.append(":" + type_.toSql()); + if (type_ != null) sb.append(":" + typeSql); if (comment_ != null) sb.append(String.format(" COMMENT '%s'", comment_)); return sb.toString(); } diff --git a/fe/src/main/java/com/cloudera/impala/catalog/StructType.java b/fe/src/main/java/com/cloudera/impala/catalog/StructType.java index a9f89c8f5..5099cc5a5 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/StructType.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/StructType.java @@ -35,9 +35,10 @@ public class StructType extends Type { } @Override - public String toSql() { + public String toSql(int depth) { + if (depth >= MAX_NESTING_DEPTH) return "STRUCT<...>"; ArrayList fieldsSql = Lists.newArrayList(); - for (StructField f: fields_) fieldsSql.add(f.toSql()); + for (StructField f: fields_) fieldsSql.add(f.toSql(depth + 1)); return String.format("STRUCT<%s>", Joiner.on(",").join(fieldsSql)); } diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Table.java b/fe/src/main/java/com/cloudera/impala/catalog/Table.java index b778f1488..1943ecbab 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/Table.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/Table.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.log4j.Logger; import com.cloudera.impala.analysis.TableName; +import com.cloudera.impala.common.AnalysisException; import com.cloudera.impala.thrift.TAccessLevel; import com.cloudera.impala.thrift.TCatalogObject; import com.cloudera.impala.thrift.TCatalogObjectType; @@ -321,6 +322,7 @@ public abstract class Table implements CatalogObject { * - Supported by Impala, in which case the type is returned. * - A type Impala understands but is not yet implemented (e.g. date), the type is * returned but type.IsSupported() returns false. + * - A supported type that exceeds an Impala limit, e.g., on the nesting depth. * - A type Impala can't understand at all, and a TableLoadingException is thrown. */ protected Type parseColumnType(FieldSchema fs) throws TableLoadingException { @@ -330,6 +332,11 @@ public abstract class Table implements CatalogObject { "Unsupported type '%s' in column '%s' of table '%s'", fs.getType(), fs.getName(), getName())); } + if (type.exceedsMaxNestingDepth()) { + throw new TableLoadingException(String.format( + "Type exceeds the maximum nesting depth of %s:\n%s", + Type.MAX_NESTING_DEPTH, type.toSql())); + } return type; } diff --git a/fe/src/main/java/com/cloudera/impala/catalog/Type.java b/fe/src/main/java/com/cloudera/impala/catalog/Type.java index 9a54b7383..9e1027c3d 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/Type.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/Type.java @@ -38,6 +38,12 @@ import com.google.common.collect.Lists; * as abstract methods that subclasses must implement. */ public abstract class Type { + // Maximum nesting depth of a type. This limit was determined experimentally by + // generating and scanning deeply nested Parquet and Avro files. In those experiments, + // we exceeded the stack space in the scanner (which uses recursion for dealing with + // nested types) at a nesting depth between 200 and 300 (200 worked, 300 crashed). + public static int MAX_NESTING_DEPTH = 100; + // Static constant types for scalar types that don't require additional information. public static final ScalarType INVALID = new ScalarType(PrimitiveType.INVALID_TYPE); public static final ScalarType NULL = new ScalarType(PrimitiveType.NULL_TYPE); @@ -112,7 +118,13 @@ public abstract class Type { * The output of this is stored directly in the hive metastore as the column type. * The string must match exactly. */ - public abstract String toSql(); + public final String toSql() { return toSql(0); } + + /** + * Recursive helper for toSql() to be implemented by subclasses. Keeps track of the + * nesting depth and terminates the recursion if MAX_NESTING_DEPTH is reached. + */ + protected abstract String toSql(int depth); /** * Same as toSql() but adds newlines and spaces for better readability of nested types. @@ -292,6 +304,44 @@ public abstract class Type { return ScalarType.INVALID; } + /** + * Returns true if this type exceeds the MAX_NESTING_DEPTH, false otherwise. + */ + public boolean exceedsMaxNestingDepth() { return exceedsMaxNestingDepth(0); } + + /** + * Helper for exceedsMaxNestingDepth(). Recursively computes the max nesting depth, + * terminating early if MAX_NESTING_DEPTH is reached. Returns true if this type + * exceeds the MAX_NESTING_DEPTH, false otherwise. + * + * Examples of types and their nesting depth: + * INT --> 1 + * STRUCT --> 2 + * STRUCT> --> 3 + * ARRAY --> 2 + * ARRAY> --> 3 + * MAP --> 2 + * MAP> --> 3 + */ + private boolean exceedsMaxNestingDepth(int d) { + if (d >= MAX_NESTING_DEPTH) return true; + if (isStructType()) { + StructType structType = (StructType) this; + for (StructField f: structType.getFields()) { + if (f.getType().exceedsMaxNestingDepth(d + 1)) return true; + } + } else if (isArrayType()) { + ArrayType arrayType = (ArrayType) this; + if (arrayType.getItemType().exceedsMaxNestingDepth(d + 1)) return true; + } else if (isMapType()) { + MapType mapType = (MapType) this; + if (mapType.getValueType().exceedsMaxNestingDepth(d + 1)) return true; + } else { + Preconditions.checkState(isScalarType()); + } + return false; + } + public static List toThrift(Type[] types) { return toThrift(Lists.newArrayList(types)); } diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java index 9b12bf970..c301df058 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java @@ -18,6 +18,7 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.UUID; import org.apache.commons.lang3.StringUtils; @@ -30,6 +31,7 @@ import org.junit.Assert; import org.junit.Test; import com.cloudera.impala.catalog.ArrayType; +import com.cloudera.impala.catalog.Catalog; import com.cloudera.impala.catalog.CatalogException; import com.cloudera.impala.catalog.DataSource; import com.cloudera.impala.catalog.DataSourceTable; @@ -2120,6 +2122,16 @@ public class AnalyzeDDLTest extends AnalyzerTest { } } + /** + * Wraps the given typeDef in a CREATE TABLE stmt and runs AnalyzesOk(). + * Returns the analyzed type. + */ + private Type TypeDefAnalyzeOk(String typeDef) { + ParseNode stmt = AnalyzesOk(String.format("CREATE TABLE t (i %s)", typeDef)); + CreateTableStmt createTableStmt = (CreateTableStmt) stmt; + return createTableStmt.getColumnDefs().get(0).getType(); + } + /** * Wraps the given typeDefs in a CREATE TABLE stmt and asserts that the type def * failed to analyze with the given error message. @@ -2175,6 +2187,34 @@ public class AnalyzeDDLTest extends AnalyzerTest { // Invalid struct-field name. TypeDefAnalysisError("STRUCT<`???`: int>", "Invalid struct field name: ???"); + + // Test maximum nesting depth with all complex types. + for (String prefix: Arrays.asList("struct** + * with exactly depth-1 repetitions of prefix and suffix + */ + private String genTypeSql(int depth, String prefix, String middle, String suffix) { + return StringUtils.repeat(prefix, depth - 1) + + middle + StringUtils.repeat(suffix, depth - 1); } @Test diff --git a/testdata/max_nesting_depth/README b/testdata/max_nesting_depth/README new file mode 100644 index 000000000..a44fd42cb --- /dev/null +++ b/testdata/max_nesting_depth/README @@ -0,0 +1,22 @@ +This folder contains Parquet files for testing the maximum nesting depth +of complex types. The maximum nesting depth Types.MAX_NESTING_DEPTH (100). +All Parquet files contain the integer 42 as the only data value in a single +Parquet column. + +The folders contain files for the following complex types with nesting +depth of exactly 100. + +struct/ +- struct...>> + +int_array/ +- array...>> + +int_map/ +- map...>> + +struct_array/ +- array...>>>> + +struct_map/ +- map...>>>> diff --git a/testdata/max_nesting_depth/int_array/file.parq b/testdata/max_nesting_depth/int_array/file.parq new file mode 100644 index 000000000..dcd4f679b Binary files /dev/null and b/testdata/max_nesting_depth/int_array/file.parq differ diff --git a/testdata/max_nesting_depth/int_map/file.parq b/testdata/max_nesting_depth/int_map/file.parq new file mode 100644 index 000000000..6f86147c9 Binary files /dev/null and b/testdata/max_nesting_depth/int_map/file.parq differ diff --git a/testdata/max_nesting_depth/struct/file.parq b/testdata/max_nesting_depth/struct/file.parq new file mode 100644 index 000000000..54eece54f Binary files /dev/null and b/testdata/max_nesting_depth/struct/file.parq differ diff --git a/testdata/max_nesting_depth/struct_array/file.parq b/testdata/max_nesting_depth/struct_array/file.parq new file mode 100644 index 000000000..a5ea2c819 Binary files /dev/null and b/testdata/max_nesting_depth/struct_array/file.parq differ diff --git a/testdata/max_nesting_depth/struct_map/file.parq b/testdata/max_nesting_depth/struct_map/file.parq new file mode 100644 index 000000000..344f7f102 Binary files /dev/null and b/testdata/max_nesting_depth/struct_map/file.parq differ diff --git a/testdata/workloads/functional-query/queries/QueryTest/max-nesting-depth.test b/testdata/workloads/functional-query/queries/QueryTest/max-nesting-depth.test new file mode 100644 index 000000000..1bbd34d1a --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/max-nesting-depth.test @@ -0,0 +1,199 @@ +==== +---- QUERY +# Test maximally nested struct. +create external table max_nesting_depth.struct_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct/file.parq' +stored as parquet +location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct/' +==== +---- QUERY +select f. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0 +from max_nesting_depth.struct_tbl +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test maximally nested array. +create external table max_nesting_depth.int_array_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_array/file.parq' +stored as parquet +location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_array/' +==== +---- QUERY +# Test absolute table ref executed with a single scan. +select * from max_nesting_depth.int_array_tbl.f. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item.item.item. + item.item.item.item.item.item.item.item +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test relative ref executed with deeply nested subplans. +select * from max_nesting_depth.int_array_tbl.f t0, + t0.item t1, t1.item t2, t2.item t3, t3.item t4, t4.item t5, + t5.item t6, t6.item t7, t7.item t8, t8.item t9, t9.item t10, + t10.item t11, t11.item t12, t12.item t13, t13.item t14, t14.item t15, + t15.item t16, t16.item t17, t17.item t18, t18.item t19, t19.item t20, + t20.item t21, t21.item t22, t22.item t23, t23.item t24, t24.item t25, + t25.item t26, t26.item t27, t27.item t28, t28.item t29, t29.item t30, + t30.item t31, t31.item t32, t32.item t33, t33.item t34, t34.item t35, + t35.item t36, t36.item t37, t37.item t38, t38.item t39, t39.item t40, + t40.item t41, t41.item t42, t42.item t43, t43.item t44, t44.item t45, + t45.item t46, t46.item t47, t47.item t48, t48.item t49, t49.item t50, + t50.item t51, t51.item t52, t52.item t53, t53.item t54, t54.item t55, + t55.item t56, t56.item t57, t57.item t58, t58.item t59, t59.item t60, + t60.item t61, t61.item t62, t62.item t63, t63.item t64, t64.item t65, + t65.item t66, t66.item t67, t67.item t68, t68.item t69, t69.item t70, + t70.item t71, t71.item t72, t72.item t73, t73.item t74, t74.item t75, + t75.item t76, t76.item t77, t77.item t78, t78.item t79, t79.item t80, + t80.item t81, t81.item t82, t82.item t83, t83.item t84, t84.item t85, + t85.item t86, t86.item t87, t87.item t88, t88.item t89, t89.item t90, + t90.item t91, t91.item t92, t92.item t93, t93.item t94, t94.item t95, + t95.item t96, t96.item t97, t97.item t98 +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test maximally nested array of struct. +create external table max_nesting_depth.struct_array_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_array/file.parq' +stored as parquet +location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_array/' +==== +---- QUERY +# Test absolute table ref executed with a single scan. +select * from max_nesting_depth.struct_array_tbl.f. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0 +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test relative ref executed with deeply nested subplans. +select * from max_nesting_depth.struct_array_tbl.f t0, + t0.f0 t1, t1.f0 t2, t2.f0 t3, t3.f0 t4, t4.f0 t5, + t5.f0 t6, t6.f0 t7, t7.f0 t8, t8.f0 t9, t9.f0 t10, + t10.f0 t11, t11.f0 t12, t12.f0 t13, t13.f0 t14, t14.f0 t15, + t15.f0 t16, t16.f0 t17, t17.f0 t18, t18.f0 t19, t19.f0 t20, + t20.f0 t21, t21.f0 t22, t22.f0 t23, t23.f0 t24, t24.f0 t25, + t25.f0 t26, t26.f0 t27, t27.f0 t28, t28.f0 t29, t29.f0 t30, + t30.f0 t31, t31.f0 t32, t32.f0 t33, t33.f0 t34, t34.f0 t35, + t35.f0 t36, t36.f0 t37, t37.f0 t38, t38.f0 t39, t39.f0 t40, + t40.f0 t41, t41.f0 t42, t42.f0 t43, t43.f0 t44, t44.f0 t45, + t45.f0 t46, t46.f0 t47, t47.f0 t48, t48.f0 t49 +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test maximally nested map. +create external table max_nesting_depth.int_map_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_map/file.parq' +stored as parquet +location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/int_map/' +==== +---- QUERY +# Test absolute table ref executed with a single scan. +select t.value from max_nesting_depth.int_map_tbl.f. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value.value.value. + value.value.value.value.value.value.value.value t +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test relative ref executed with deeply nested subplans. +select t98.value from max_nesting_depth.int_map_tbl.f t0, + t0.value t1, t1.value t2, t2.value t3, t3.value t4, t4.value t5, + t5.value t6, t6.value t7, t7.value t8, t8.value t9, t9.value t10, + t10.value t11, t11.value t12, t12.value t13, t13.value t14, t14.value t15, + t15.value t16, t16.value t17, t17.value t18, t18.value t19, t19.value t20, + t20.value t21, t21.value t22, t22.value t23, t23.value t24, t24.value t25, + t25.value t26, t26.value t27, t27.value t28, t28.value t29, t29.value t30, + t30.value t31, t31.value t32, t32.value t33, t33.value t34, t34.value t35, + t35.value t36, t36.value t37, t37.value t38, t38.value t39, t39.value t40, + t40.value t41, t41.value t42, t42.value t43, t43.value t44, t44.value t45, + t45.value t46, t46.value t47, t47.value t48, t48.value t49, t49.value t50, + t50.value t51, t51.value t52, t52.value t53, t53.value t54, t54.value t55, + t55.value t56, t56.value t57, t57.value t58, t58.value t59, t59.value t60, + t60.value t61, t61.value t62, t62.value t63, t63.value t64, t64.value t65, + t65.value t66, t66.value t67, t67.value t68, t68.value t69, t69.value t70, + t70.value t71, t71.value t72, t72.value t73, t73.value t74, t74.value t75, + t75.value t76, t76.value t77, t77.value t78, t78.value t79, t79.value t80, + t80.value t81, t81.value t82, t82.value t83, t83.value t84, t84.value t85, + t85.value t86, t86.value t87, t87.value t88, t88.value t89, t89.value t90, + t90.value t91, t91.value t92, t92.value t93, t93.value t94, t94.value t95, + t95.value t96, t96.value t97, t97.value t98 +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test maximally nested map of struct. +create external table max_nesting_depth.struct_map_tbl +like parquet '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_map/file.parq' +stored as parquet +location '$FILESYSTEM_PREFIX/test-warehouse/max_nesting_depth/struct_map/' +==== +---- QUERY +# Test absolute table ref executed with a single scan. +select t.value from max_nesting_depth.struct_map_tbl.f. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0.f0. + f0.f0.f0.f0.f0.f0.f0.f0.f0 t +---- RESULTS +42 +---- TYPES +int +==== +---- QUERY +# Test relative ref executed with deeply nested subplans. +select t49.value from max_nesting_depth.struct_map_tbl.f t0, + t0.f0 t1, t1.f0 t2, t2.f0 t3, t3.f0 t4, t4.f0 t5, + t5.f0 t6, t6.f0 t7, t7.f0 t8, t8.f0 t9, t9.f0 t10, + t10.f0 t11, t11.f0 t12, t12.f0 t13, t13.f0 t14, t14.f0 t15, + t15.f0 t16, t16.f0 t17, t17.f0 t18, t18.f0 t19, t19.f0 t20, + t20.f0 t21, t21.f0 t22, t22.f0 t23, t23.f0 t24, t24.f0 t25, + t25.f0 t26, t26.f0 t27, t27.f0 t28, t28.f0 t29, t29.f0 t30, + t30.f0 t31, t31.f0 t32, t32.f0 t33, t33.f0 t34, t34.f0 t35, + t35.f0 t36, t36.f0 t37, t37.f0 t38, t38.f0 t39, t39.f0 t40, + t40.f0 t41, t41.f0 t42, t42.f0 t43, t43.f0 t44, t44.f0 t45, + t45.f0 t46, t46.f0 t47, t47.f0 t48, t48.f0 t49 +---- RESULTS +42 +---- TYPES +int +==== diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py index 8e68617b4..f26886ed7 100644 --- a/tests/query_test/test_nested_types.py +++ b/tests/query_test/test_nested_types.py @@ -6,10 +6,12 @@ import random from subprocess import check_call import pytest +from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.test_vector import * from tests.common.impala_test_suite import * from tests.common.skip import SkipIfOldAggsJoins -from tests.util.filesystem_utils import get_fs_path +from tests.util.filesystem_utils import WAREHOUSE, get_fs_path +from subprocess import call, check_call @SkipIfOldAggsJoins.nested_types class TestNestedTypes(ImpalaTestSuite): @@ -434,3 +436,68 @@ class TestParquetArrayEncodings(ImpalaTestSuite): check_call(["hadoop", "fs", "-put", local_path, location], shell=False) self.client.execute("invalidate metadata %s.%s" % (self.DATABASE, tablename)) return tablename + +@SkipIfOldAggsJoins.nested_types +class TestMaxNestingDepth(ImpalaTestSuite): + TEST_DB = 'max_nesting_depth' + TEST_HDFS_ROOT_DIR = "%s/max_nesting_depth/" % WAREHOUSE + + # Should be kept in sync with the FE's Type.MAX_NESTING_DEPTH + MAX_NESTING_DEPTH = 100 + + @classmethod + def get_workload(self): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestMaxNestingDepth, cls).add_test_dimensions() + cls.TestMatrix.add_constraint(lambda v: + v.get_value('table_format').file_format == 'parquet') + + def setup_method(self, method): + self.cleanup_db(TestMaxNestingDepth.TEST_DB) + self.client.execute("create database %s location '%s/%s.db'" % + (TestMaxNestingDepth.TEST_DB, WAREHOUSE, + TestMaxNestingDepth.TEST_DB)) + if method.__name__ == "test_max_nesting_depth": + # Clean up HDFS directory and copy the test files. + self.__remove_test_hdfs_dir() + self.__copy_test_hdfs_dir() + + def teardown_method(self, method): + self.cleanup_db(TestMaxNestingDepth.TEST_DB) + if method.__name__ == "test_max_nesting_depth": + self.__remove_test_hdfs_dir() + + def __remove_test_hdfs_dir(self): + call(["hdfs", "dfs", "-rm", "-r", + TestMaxNestingDepth.TEST_HDFS_ROOT_DIR], shell=False) + + def __copy_test_hdfs_dir(self): + check_call(["hdfs", "dfs", "-copyFromLocal", + "%s/testdata/max_nesting_depth" % os.environ['IMPALA_HOME'], + WAREHOUSE], shell=False) + + @pytest.mark.execute_serially + def test_max_nesting_depth(self, vector): + """Tests that Impala can scan Parquet files having complex types of + the maximum nesting depth.""" + self.run_test_case('QueryTest/max-nesting-depth', vector) + + @pytest.mark.execute_serially + def test_load_hive_table(self, vector): + """Tests that Impala rejects Hive-created tables with complex types that exceed + the maximum nesting depth.""" + # Type with a nesting depth of MAX_NESTING_DEPTH + 1 + type_sql = ("array<" * self.MAX_NESTING_DEPTH) + "int" +\ + (">" * self.MAX_NESTING_DEPTH) + create_table_sql = "CREATE TABLE %s.above_max_depth (f %s) STORED AS PARQUET" %\ + (self.TEST_DB, type_sql) + check_call(["hive", "-e", create_table_sql], shell=False) + self.client.execute("invalidate metadata %s.above_max_depth" % self.TEST_DB) + try: + self.client.execute("explain select 1 from %s.above_max_depth" % self.TEST_DB) + assert False, "Expected table loading to fail." + except ImpalaBeeswaxException, e: + assert "Type exceeds the maximum nesting depth" in str(e)