mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-13211: Add negative test for Parquet Byte Stream Split encoding
This change adds EE tests in test_parquet_byte_stream_split_encoding.py that check that Impala returns the correct error message when it encounters a table that contains a parquet file with Byte Stream Split encoding. To regenerate the test files, run the parquet_files_generator.py script in the testdata/parquet_byte_stream_split_encoding/ folder. Change-Id: If5eff8bf51fe246a9d0250e38c470b821fec75d9 Reviewed-on: http://gerrit.cloudera.org:8080/22124 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
19110b490d
commit
8aea57fc77
6
testdata/parquet_byte_stream_split_encoding/README
vendored
Normal file
6
testdata/parquet_byte_stream_split_encoding/README
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
The doubles_byte_stream_split.parquet and floats_byte_stream_split.parquet files were
|
||||
generated with the parquet_files_generator.py script. The script is using PyArrow
|
||||
(https://arrow.apache.org/docs/python).
|
||||
|
||||
To regenerate the files, run:
|
||||
python3 parquet_files_generator.py
|
||||
BIN
testdata/parquet_byte_stream_split_encoding/doubles_byte_stream_split.parquet
vendored
Normal file
BIN
testdata/parquet_byte_stream_split_encoding/doubles_byte_stream_split.parquet
vendored
Normal file
Binary file not shown.
BIN
testdata/parquet_byte_stream_split_encoding/floats_byte_stream_split.parquet
vendored
Normal file
BIN
testdata/parquet_byte_stream_split_encoding/floats_byte_stream_split.parquet
vendored
Normal file
Binary file not shown.
40
testdata/parquet_byte_stream_split_encoding/parquet_files_generator.py
vendored
Normal file
40
testdata/parquet_byte_stream_split_encoding/parquet_files_generator.py
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
test_file_dir = "testdata/parquet_byte_stream_split_encoding"
|
||||
|
||||
nums_to_encode = [1.45, 4.256, 6.3573, 4.235, 7.5198463, 10.57956, 100.68491,
|
||||
0.54987623514, 1.0]
|
||||
|
||||
floats = pa.array(nums_to_encode, type=pa.float32())
|
||||
floats_table = pa.table([floats], names=["floats"])
|
||||
pq.write_table(floats_table, os.path.join(test_file_dir,
|
||||
'floats_byte_stream_split.parquet'), use_dictionary=False,
|
||||
use_byte_stream_split=True)
|
||||
|
||||
doubles = pa.array(nums_to_encode, type=pa.float64())
|
||||
doubles_table = pa.table([doubles], names=["doubles"])
|
||||
pq.write_table(doubles_table, os.path.join(test_file_dir,
|
||||
'doubles_byte_stream_split.parquet'), use_dictionary=False,
|
||||
use_byte_stream_split=True)
|
||||
57
tests/query_test/test_parquet_byte_stream_split_encoding.py
Normal file
57
tests/query_test/test_parquet_byte_stream_split_encoding.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
|
||||
from tests.common.file_utils import create_table_and_copy_files
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
|
||||
|
||||
class TestParquetEncodings(ImpalaTestSuite):
|
||||
|
||||
TEST_FILE_DIRECTORY = "testdata/parquet_byte_stream_split_encoding"
|
||||
|
||||
@classmethod
|
||||
def get_workload(cls):
|
||||
return 'functional-query'
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestParquetEncodings, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_constraint(
|
||||
lambda v: v.get_value('table_format').file_format == 'parquet')
|
||||
|
||||
def test_parquet_byte_stream_split_encoding_float(self, vector, unique_database):
|
||||
self._parquet_byte_stream_split_encoding_helper(vector, unique_database, "float",
|
||||
os.path.join(self.TEST_FILE_DIRECTORY, "floats_byte_stream_split.parquet"))
|
||||
|
||||
def test_parquet_byte_stream_split_encoding_double(self, vector, unique_database):
|
||||
self._parquet_byte_stream_split_encoding_helper(vector, unique_database, "double",
|
||||
os.path.join(self.TEST_FILE_DIRECTORY, "doubles_byte_stream_split.parquet"))
|
||||
|
||||
def _parquet_byte_stream_split_encoding_helper(self, vector, unique_database, col_type,
|
||||
filename):
|
||||
table_name = "parquet_byte_stream_split_negative_test"
|
||||
create_stmt = "create table {}.{} (numbers {}) stored as parquet".format(
|
||||
unique_database, table_name, col_type)
|
||||
create_table_and_copy_files(self.client, create_stmt, unique_database, table_name,
|
||||
[filename])
|
||||
query_stmt = "select * from {}.{}".format(unique_database, table_name)
|
||||
result = self.execute_query_expect_failure(self.client, query_stmt)
|
||||
assert "unsupported encoding: BYTE_STREAM_SPLIT" in str(result)
|
||||
Reference in New Issue
Block a user