mirror of
https://github.com/apache/impala.git
synced 2026-01-10 18:00:14 -05:00
IMPALA-5037: Default PARQUET_ARRAY_RESOLUTION=THREE_LEVEL
Changes the default value for the PARQUET_ARRAY_RESOLUTION query option to conform to the Parquet standard. Before: TWO_LEVEL_THEN_THREE_LEVEL After: THREE_LEVEL For more information see: * IMPALA-4725 * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md Testing: - expands and cleans up the existing tests for more coverage over the different resolution policies - private core/hdfs run passed Cherry-picks: not for 2.x. Change-Id: Ib8f7e9010c4354d667305d9df7b78862efb23fe1 Reviewed-on: http://gerrit.cloudera.org:8080/9210 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Impala Public Jenkins
This commit is contained in:
committed by
Impala Public Jenkins
parent
32bf54dfd0
commit
dedff5e25c
@@ -225,7 +225,7 @@ struct TQueryOptions {
|
||||
|
||||
// Policy for resolving nested array fields in Parquet files.
|
||||
54: optional TParquetArrayResolution parquet_array_resolution =
|
||||
TParquetArrayResolution.TWO_LEVEL_THEN_THREE_LEVEL
|
||||
TParquetArrayResolution.THREE_LEVEL
|
||||
|
||||
// Indicates whether to read statistics from Parquet files and use them during query
|
||||
// processing. This includes skipping data based on the statistics and computing query
|
||||
|
||||
@@ -247,10 +247,6 @@ enum TImpalaQueryOptions {
|
||||
// between the two and three level encodings with index-based field resolution.
|
||||
// The ambiguity can manually be resolved using this query option, or by using
|
||||
// PARQUET_FALLBACK_SCHEMA_RESOLUTION=name.
|
||||
// The value TWO_LEVEL_THEN_THREE_LEVEL was the default mode since Impala 2.3.
|
||||
// It is preserved as the default for compatibility.
|
||||
// TODO: Remove the TWO_LEVEL_THEN_THREE_LEVEL mode completely or at least make
|
||||
// it non-default in a compatibility breaking release.
|
||||
PARQUET_ARRAY_RESOLUTION,
|
||||
|
||||
// Indicates whether to read statistics from Parquet files and use them during query
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
import os
|
||||
from subprocess import check_call
|
||||
from pytest import skip
|
||||
|
||||
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
@@ -27,6 +28,7 @@ from tests.common.skip import (
|
||||
SkipIfS3,
|
||||
SkipIfADLS,
|
||||
SkipIfLocal)
|
||||
from tests.common.test_vector import ImpalaTestDimension
|
||||
from tests.util.filesystem_utils import WAREHOUSE, get_fs_path
|
||||
|
||||
class TestNestedTypes(ImpalaTestSuite):
|
||||
@@ -130,6 +132,8 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'],
|
||||
"testdata/parquet_nested_types_encodings")
|
||||
|
||||
ARRAY_RESOLUTION_POLICIES = ["three_level", "two_level", "two_level_then_three_level"]
|
||||
|
||||
@classmethod
|
||||
def get_workload(self):
|
||||
return 'functional-query'
|
||||
@@ -137,9 +141,17 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestParquetArrayEncodings, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
|
||||
'parquet_array_resolution', *TestParquetArrayEncodings.ARRAY_RESOLUTION_POLICIES))
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('table_format').file_format == 'parquet')
|
||||
|
||||
def __init_arr_res(self, vector):
|
||||
arr_res = vector.get_value('parquet_array_resolution')
|
||||
qopts = vector.get_value('exec_option')
|
||||
qopts['parquet_array_resolution'] = arr_res
|
||||
return (arr_res, qopts)
|
||||
|
||||
# $ parquet-tools schema SingleFieldGroupInList.parquet
|
||||
# message SingleFieldGroupInList {
|
||||
# optional group single_element_groups (LIST) {
|
||||
@@ -156,54 +168,8 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
# .single_element_group:
|
||||
# ..count = 2345
|
||||
def test_single_field_group_in_list(self, vector, unique_database):
|
||||
tablename = "SingleFieldGroupInList"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename, "SingleFieldGroupInList.parquet",
|
||||
"col1 array<struct<count: bigint>>")
|
||||
|
||||
result = self.client.execute("select item.count from %s.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
assert result.data == ['1234', '2345']
|
||||
|
||||
result = self.client.execute("select item.count from %s t, t.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
assert result.data == ['1234', '2345']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
assert result.data == ['2']
|
||||
|
||||
# $ parquet-tools schema AvroPrimitiveInList.parquet
|
||||
# message AvroPrimitiveInList {
|
||||
# required group list_of_ints (LIST) {
|
||||
# repeated int32 array;
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# $ parquet-tools cat AvroPrimitiveInList.parquet
|
||||
# list_of_ints:
|
||||
# .array = 34
|
||||
# .array = 35
|
||||
# .array = 36
|
||||
def test_avro_primitive_in_list(self, vector, unique_database):
|
||||
tablename = "AvroPrimitiveInList"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename, "AvroPrimitiveInList.parquet",
|
||||
"col1 array<int>")
|
||||
|
||||
result = self.client.execute("select item from %s.col1" % full_name)
|
||||
assert len(result.data) == 3
|
||||
assert result.data == ['34', '35', '36']
|
||||
|
||||
result = self.client.execute("select item from %s t, t.col1" % full_name)
|
||||
assert len(result.data) == 3
|
||||
assert result.data == ['34', '35', '36']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
assert result.data == ['3']
|
||||
self.__test_single_field_group_in_list(unique_database, "SingleFieldGroupInList",
|
||||
"SingleFieldGroupInList.parquet", vector)
|
||||
|
||||
# $ parquet-tools schema AvroSingleFieldGroupInList.parquet
|
||||
# message AvroSingleFieldGroupInList {
|
||||
@@ -221,25 +187,102 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
# .array:
|
||||
# ..count = 2345
|
||||
def test_avro_single_field_group_in_list(self, vector, unique_database):
|
||||
tablename = "AvroSingleFieldGroupInList"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
# Note that the field name does not match the field name in the file schema.
|
||||
self._create_test_table(unique_database, tablename,
|
||||
"AvroSingleFieldGroupInList.parquet", "col1 array<struct<f1: bigint>>")
|
||||
self.__test_single_field_group_in_list(unique_database, "AvroSingleFieldGroupInList",
|
||||
"AvroSingleFieldGroupInList.parquet", vector)
|
||||
|
||||
result = self.client.execute("select item.f1 from %s.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
assert result.data == ['1234', '2345']
|
||||
# $ parquet-tools schema ThriftSingleFieldGroupInList.parquet
|
||||
# message ThriftSingleFieldGroupInList {
|
||||
# optional group single_element_groups (LIST) {
|
||||
# repeated group single_element_groups_tuple {
|
||||
# required int64 count;
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# $ parquet-tools cat ThriftSingleFieldGroupInList.parquet
|
||||
# single_element_groups:
|
||||
# .single_element_groups_tuple:
|
||||
# ..count = 1234
|
||||
# .single_element_groups_tuple:
|
||||
# ..count = 2345
|
||||
def test_thrift_single_field_group_in_list(self, vector, unique_database):
|
||||
self.__test_single_field_group_in_list(unique_database,
|
||||
"ThriftSingleFieldGroupInList", "ThriftSingleFieldGroupInList.parquet", vector)
|
||||
|
||||
result = self.client.execute("select item.f1 from %s t, t.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
assert result.data == ['1234', '2345']
|
||||
def __test_single_field_group_in_list(self, db, tbl, parq_file, vector):
|
||||
arr_res, qopts = self.__init_arr_res(vector)
|
||||
full_name = "%s.%s" % (db, tbl)
|
||||
if arr_res == "two_level" or arr_res == "two_level_then_three_level":
|
||||
self._create_test_table(db, tbl, parq_file, "col1 array<struct<f1: bigint>>")
|
||||
result = self.execute_query("select item.f1 from %s.col1" % full_name, qopts)
|
||||
assert result.data == ['1234', '2345']
|
||||
result = self.execute_query("select item.f1 from %s t, t.col1" % full_name, qopts)
|
||||
assert result.data == ['1234', '2345']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
if arr_res == "three_level":
|
||||
self._create_test_table(db, tbl, parq_file, "col1 array<bigint>")
|
||||
result = self.execute_query("select item from %s.col1" % full_name, qopts)
|
||||
assert result.data == ['1234', '2345']
|
||||
result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
|
||||
assert result.data == ['1234', '2345']
|
||||
|
||||
result = self.execute_query(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
|
||||
assert result.data == ['2']
|
||||
|
||||
# $ parquet-tools schema AvroPrimitiveInList.parquet
|
||||
# message AvroPrimitiveInList {
|
||||
# required group list_of_ints (LIST) {
|
||||
# repeated int32 array;
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# $ parquet-tools cat AvroPrimitiveInList.parquet
|
||||
# list_of_ints:
|
||||
# .array = 34
|
||||
# .array = 35
|
||||
# .array = 36
|
||||
def test_avro_primitive_in_list(self, vector, unique_database):
|
||||
self.__test_primitive_in_list(unique_database, "AvroPrimitiveInList",
|
||||
"AvroPrimitiveInList.parquet", vector)
|
||||
|
||||
# $ parquet-tools schema ThriftPrimitiveInList.parquet
|
||||
# message ThriftPrimitiveInList {
|
||||
# required group list_of_ints (LIST) {
|
||||
# repeated int32 list_of_ints_tuple;
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# $ parquet-tools cat ThriftPrimitiveInList.parquet
|
||||
# list_of_ints:
|
||||
# .list_of_ints_tuple = 34
|
||||
# .list_of_ints_tuple = 35
|
||||
# .list_of_ints_tuple = 36
|
||||
def test_thrift_primitive_in_list(self, vector, unique_database):
|
||||
self.__test_primitive_in_list(unique_database, "ThriftPrimitiveInList",
|
||||
"ThriftPrimitiveInList.parquet", vector)
|
||||
|
||||
def __test_primitive_in_list(self, db, tbl, parq_file, vector):
|
||||
arr_res, qopts = self.__init_arr_res(vector)
|
||||
full_name = "%s.%s" % (db, tbl)
|
||||
self._create_test_table(db, tbl, parq_file, "col1 array<int>")
|
||||
|
||||
if arr_res == "two_level" or arr_res == "two_level_then_three_level":
|
||||
result = self.execute_query("select item from %s.col1" % full_name, qopts)
|
||||
assert result.data == ['34', '35', '36']
|
||||
result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
|
||||
assert result.data == ['34', '35', '36']
|
||||
|
||||
if arr_res == "three_level":
|
||||
result = self.execute_query("select item from %s.col1" % full_name, qopts)
|
||||
assert result.data == ['NULL', 'NULL', 'NULL']
|
||||
result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
|
||||
assert result.data == ['NULL', 'NULL', 'NULL']
|
||||
|
||||
result = self.execute_query(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
|
||||
assert result.data == ['3']
|
||||
|
||||
# $ parquet-tools schema bad-avro.parquet
|
||||
# message org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray {
|
||||
# required group int_arrays_column (LIST) {
|
||||
@@ -280,94 +323,8 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
#
|
||||
# [Same int_arrays_column repeated 8x more]
|
||||
def test_avro_array_of_arrays(self, vector, unique_database):
|
||||
tablename = "AvroArrayOfArrays"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename, "bad-avro.parquet",
|
||||
"col1 array<array<int>>")
|
||||
|
||||
result = self.client.execute("select item from %s.col1.item" % full_name)
|
||||
assert len(result.data) == 9 * 10
|
||||
assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * 10
|
||||
|
||||
result = self.client.execute(
|
||||
"select a2.item from %s t, t.col1 a1, a1.item a2" % full_name)
|
||||
assert len(result.data) == 9 * 10
|
||||
assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * 10
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 10
|
||||
assert result.data == ['3'] * 10
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) v" % full_name)
|
||||
assert len(result.data) == 3 * 10
|
||||
assert result.data == ['3', '3', '3'] * 10
|
||||
|
||||
# $ parquet-tools schema ThriftPrimitiveInList.parquet
|
||||
# message ThriftPrimitiveInList {
|
||||
# required group list_of_ints (LIST) {
|
||||
# repeated int32 list_of_ints_tuple;
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# $ parquet-tools cat ThriftPrimitiveInList.parquet
|
||||
# list_of_ints:
|
||||
# .list_of_ints_tuple = 34
|
||||
# .list_of_ints_tuple = 35
|
||||
# .list_of_ints_tuple = 36
|
||||
def test_thrift_primitive_in_list(self, vector, unique_database):
|
||||
tablename = "ThriftPrimitiveInList"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename,
|
||||
"ThriftPrimitiveInList.parquet", "col1 array<int>")
|
||||
|
||||
result = self.client.execute("select item from %s.col1" % full_name)
|
||||
assert len(result.data) == 3
|
||||
assert result.data == ['34', '35', '36']
|
||||
|
||||
result = self.client.execute("select item from %s t, t.col1" % full_name)
|
||||
assert len(result.data) == 3
|
||||
assert result.data == ['34', '35', '36']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
assert result.data == ['3']
|
||||
|
||||
# $ parquet-tools schema ThriftSingleFieldGroupInList.parquet
|
||||
# message ThriftSingleFieldGroupInList {
|
||||
# optional group single_element_groups (LIST) {
|
||||
# repeated group single_element_groups_tuple {
|
||||
# required int64 count;
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# $ parquet-tools cat ThriftSingleFieldGroupInList.parquet
|
||||
# single_element_groups:
|
||||
# .single_element_groups_tuple:
|
||||
# ..count = 1234
|
||||
# .single_element_groups_tuple:
|
||||
# ..count = 2345
|
||||
def test_thrift_single_field_group_in_list(self, vector, unique_database):
|
||||
tablename = "ThriftSingleFieldGroupInList"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename,
|
||||
"ThriftSingleFieldGroupInList.parquet", "col1 array<struct<f1: bigint>>")
|
||||
|
||||
result = self.client.execute("select item.f1 from %s.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
assert result.data == ['1234', '2345']
|
||||
|
||||
result = self.client.execute("select item.f1 from %s t, t.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
assert result.data == ['1234', '2345']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
assert result.data == ['2']
|
||||
self.__test_array_of_arrays(unique_database, "AvroArrayOfArrays",
|
||||
"bad-avro.parquet", vector, 10)
|
||||
|
||||
# $ parquet-tools schema bad-thrift.parquet
|
||||
# message ParquetSchema {
|
||||
@@ -393,29 +350,39 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
# ..intListsColumn_tuple_tuple = 7
|
||||
# ..intListsColumn_tuple_tuple = 8
|
||||
def test_thrift_array_of_arrays(self, vector, unique_database):
|
||||
tablename = "ThriftArrayOfArrays"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename, "bad-thrift.parquet",
|
||||
"col1 array<array<int>>")
|
||||
self.__test_array_of_arrays(unique_database, "ThriftArrayOfArrays",
|
||||
"bad-thrift.parquet", vector, 1)
|
||||
|
||||
result = self.client.execute("select item from %s.col1.item" % full_name)
|
||||
assert len(result.data) == 9
|
||||
assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8']
|
||||
def __test_array_of_arrays(self, db, tbl, parq_file, vector, mult):
|
||||
arr_res, qopts = self.__init_arr_res(vector)
|
||||
full_name = "%s.%s" % (db, tbl)
|
||||
self._create_test_table(db, tbl, parq_file, "col1 array<array<int>>")
|
||||
|
||||
result = self.client.execute(
|
||||
"select a2.item from %s t, t.col1 a1, a1.item a2" % full_name)
|
||||
assert len(result.data) == 9
|
||||
assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8']
|
||||
if arr_res == "two_level" or arr_res == "two_level_then_three_level":
|
||||
result = self.execute_query("select item from %s.col1.item" % full_name, qopts)
|
||||
assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * mult
|
||||
result = self.execute_query(
|
||||
"select a2.item from %s t, t.col1 a1, a1.item a2" % full_name)
|
||||
assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * mult
|
||||
result = self.execute_query(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
|
||||
assert result.data == ['3'] * mult
|
||||
result = self.execute_query(
|
||||
"select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) v"\
|
||||
% full_name, qopts)
|
||||
assert result.data == ['3', '3', '3'] * mult
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
assert result.data == ['3']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) v" % full_name)
|
||||
assert len(result.data) == 3
|
||||
assert result.data == ['3', '3', '3']
|
||||
if arr_res == "three_level":
|
||||
expected_err = "has an incompatible Parquet schema"
|
||||
try:
|
||||
self.execute_query("select item from %s.col1.item" % full_name, qopts)
|
||||
except Exception, e:
|
||||
assert expected_err in str(e)
|
||||
try:
|
||||
self.execute_query("select cnt from %s t, (select count(*) cnt from t.col1) v"\
|
||||
% full_name, qopts)
|
||||
except Exception, e:
|
||||
assert expected_err in str(e)
|
||||
|
||||
# $ parquet-tools schema UnannotatedListOfPrimitives.parquet
|
||||
# message UnannotatedListOfPrimitives {
|
||||
@@ -427,22 +394,18 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
# list_of_ints = 35
|
||||
# list_of_ints = 36
|
||||
def test_unannotated_list_of_primitives(self, vector, unique_database):
|
||||
arr_res, qopts = self.__init_arr_res(vector)
|
||||
tablename = "UnannotatedListOfPrimitives"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename,
|
||||
"UnannotatedListOfPrimitives.parquet", "col1 array<int>")
|
||||
|
||||
result = self.client.execute("select item from %s.col1" % full_name)
|
||||
assert len(result.data) == 3
|
||||
result = self.execute_query("select item from %s.col1" % full_name, qopts)
|
||||
assert result.data == ['34', '35', '36']
|
||||
|
||||
result = self.client.execute("select item from %s t, t.col1" % full_name)
|
||||
assert len(result.data) == 3
|
||||
result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
|
||||
assert result.data == ['34', '35', '36']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
result = self.execute_query(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
|
||||
assert result.data == ['3']
|
||||
|
||||
# $ parquet-tools schema UnannotatedListOfGroups.parquet
|
||||
@@ -461,22 +424,18 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
# .x = 2.0
|
||||
# .y = 2.0
|
||||
def test_unannotated_list_of_groups(self, vector, unique_database):
|
||||
arr_res, qopts = self.__init_arr_res(vector)
|
||||
tablename = "UnannotatedListOfGroups"
|
||||
full_name = "%s.%s" % (unique_database, tablename)
|
||||
self._create_test_table(unique_database, tablename,
|
||||
"UnannotatedListOfGroups.parquet", "col1 array<struct<f1: float, f2: float>>")
|
||||
|
||||
result = self.client.execute("select f1, f2 from %s.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
result = self.execute_query("select f1, f2 from %s.col1" % full_name, qopts)
|
||||
assert result.data == ['1\t1', '2\t2']
|
||||
|
||||
result = self.client.execute("select f1, f2 from %s t, t.col1" % full_name)
|
||||
assert len(result.data) == 2
|
||||
result = self.execute_query("select f1, f2 from %s t, t.col1" % full_name, qopts)
|
||||
assert result.data == ['1\t1', '2\t2']
|
||||
|
||||
result = self.client.execute(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
|
||||
assert len(result.data) == 1
|
||||
result = self.execute_query(
|
||||
"select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
|
||||
assert result.data == ['2']
|
||||
|
||||
# $ parquet-tools schema AmbiguousList_Modern.parquet
|
||||
@@ -548,6 +507,11 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
|
||||
not exactly match the data files'. The name-based policy generally does not have
|
||||
this problem because it avoids traversing incorrect schema paths.
|
||||
"""
|
||||
|
||||
# The Parquet resolution policy is manually set in the .test files.
|
||||
if vector.get_value('parquet_array_resolution') != "three_level":
|
||||
skip("Test only run with three_level")
|
||||
|
||||
ambig_modern_tbl = "ambig_modern"
|
||||
self._create_test_table(unique_database, ambig_modern_tbl,
|
||||
"AmbiguousList_Modern.parquet",
|
||||
|
||||
Reference in New Issue
Block a user