mirror of
https://github.com/apache/impala.git
synced 2026-01-01 18:00:30 -05:00
Partition metadata tests were marked as xfail because of IMPALA-624. Additionally, we had to invoke hive to insert into two partitions pointing to the same location (this limitation is now removed). This patch changes the test to use Impala exclusively, removes the xfail tag and adds a teardown method to the test class. Change-Id: I15fa97bef4f8714d0873a9c713627a198f3388ad Reviewed-on: http://gerrit.ent.cloudera.com:8080/2086 Reviewed-by: Ishaan Joshi <ishaan@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/2215
88 lines
3.4 KiB
Python
88 lines
3.4 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import logging
|
|
import pytest
|
|
import shlex
|
|
import time
|
|
from tests.common.test_result_verifier import *
|
|
from tests.util.shell_util import exec_process
|
|
from tests.common.test_vector import *
|
|
from tests.common.test_dimensions import ALL_NODES_ONLY
|
|
from tests.common.impala_test_suite import *
|
|
|
|
# Tests specific to partition metadata.
|
|
# TODO: Split up the DDL tests and move some of the partition-specific tests
|
|
# here.
|
|
class TestPartitionMetadata(ImpalaTestSuite):
|
|
TEST_DB = 'partition_md'
|
|
TEST_TBL = 'bulk_part'
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestPartitionMetadata, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
|
|
|
|
# There is no reason to run these tests using all dimensions.
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == 'text' and\
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
|
|
def setup_method(self, method):
|
|
self.cleanup_db(self.TEST_DB)
|
|
self.client.execute("create database %s" % self.TEST_DB)
|
|
|
|
def teardown_method(self, method):
|
|
self.cleanup_db(self.TEST_DB)
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_multiple_partitions_same_location(self, vector):
|
|
"""Regression test for IMPALA-597. Verifies Impala is able to properly read
|
|
tables that have multiple partitions pointing to the same location.
|
|
"""
|
|
self.client.execute("use %s" % self.TEST_DB)
|
|
location = '/test-warehouse/%s' % self.TEST_TBL
|
|
# Cleanup any existing data in the table directory.
|
|
self.hdfs_client.delete_file_dir(location[1:], recursive=True)
|
|
# Create the table
|
|
self.client.execute("create table %s(i int) partitioned by(j int)"\
|
|
"location '%s'" % (self.TEST_TBL, location))
|
|
|
|
# Point multiple partitions to the same location and use partition locations that
|
|
# do not contain a key=value path.
|
|
self.hdfs_client.make_dir(location[1:] + '/p')
|
|
|
|
# Point both partitions to the same location.
|
|
self.client.execute("alter table %s add partition (j=1) location '%s/p'" %
|
|
(self.TEST_TBL, location))
|
|
self.client.execute("alter table %s add partition (j=2) location '%s/p'" %
|
|
(self.TEST_TBL, location))
|
|
|
|
# Insert some data.
|
|
self.client.execute("insert into table %s partition(j=1) select 1" % self.TEST_TBL)
|
|
|
|
# The data will be read twice because each partition points to the same location.
|
|
data = self.execute_scalar("select sum(i), sum(j) from %s" % self.TEST_TBL)
|
|
assert data.split('\t') == ['2', '3']
|
|
|
|
self.client.execute("insert into %s partition(j) select 1, 1" % self.TEST_TBL)
|
|
self.client.execute("insert into %s partition(j) select 1, 2" % self.TEST_TBL)
|
|
data = self.execute_scalar("select sum(i), sum(j) from %s" % self.TEST_TBL)
|
|
assert data.split('\t') == ['6', '9']
|