#!/usr/bin/env python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import pytest
import shlex
import time
from tests.common.test_result_verifier import *
from tests.util.shell_util import exec_process
from tests.common.test_vector import *
from tests.common.test_dimensions import ALL_NODES_ONLY
from tests.common.impala_test_suite import *

# Tests specific to partition metadata.
# TODO: Split up the DDL tests and move some of the partition-specific tests
# here.
class TestPartitionMetadata(ImpalaTestSuite):
  TEST_DB = 'partition_md'
  TEST_TBL = 'bulk_part'

  @classmethod
  def get_workload(self):
    return 'functional-query'

  @classmethod
  def add_test_dimensions(cls):
    super(TestPartitionMetadata, cls).add_test_dimensions()
    cls.TestMatrix.add_dimension(create_single_exec_option_dimension())

    # There is no reason to run these tests using all dimensions.
    cls.TestMatrix.add_constraint(lambda v:\
        v.get_value('table_format').file_format == 'text' and\
        v.get_value('table_format').compression_codec == 'none')

  def setup_method(self, method):
    self.cleanup_db(self.TEST_DB)
    self.client.execute("create database %s" % self.TEST_DB)

  def teardown_method(self, method):
    self.cleanup_db(self.TEST_DB)

  @pytest.mark.execute_serially
  def test_multiple_partitions_same_location(self, vector):
    """Regression test for IMPALA-597. Verifies Impala is able to properly read
    tables that have multiple partitions pointing to the same location.
    """
    self.client.execute("use %s" % self.TEST_DB)
    location = '/test-warehouse/%s' % self.TEST_TBL
    # Cleanup any existing data in the table directory.
    self.hdfs_client.delete_file_dir(location[1:], recursive=True)
    # Create the table
    self.client.execute("create table %s(i int) partitioned by(j int)"\
        "location '%s'" % (self.TEST_TBL, location))

    # Point multiple partitions to the same location and use partition locations that
    # do not contain a key=value path.
    self.hdfs_client.make_dir(location[1:] + '/p')

    # Point both partitions to the same location.
    self.client.execute("alter table %s add partition (j=1) location '%s/p'" %
        (self.TEST_TBL, location))
    self.client.execute("alter table %s add partition (j=2) location '%s/p'" %
        (self.TEST_TBL, location))

    # Insert some data.
    self.client.execute("insert into table %s partition(j=1) select 1" % self.TEST_TBL)

    # The data will be read twice because each partition points to the same location.
    data = self.execute_scalar("select sum(i), sum(j) from %s" % self.TEST_TBL)
    assert data.split('\t') == ['2', '3']

    self.client.execute("insert into %s partition(j) select 1, 1" % self.TEST_TBL)
    self.client.execute("insert into %s partition(j) select 1, 2" % self.TEST_TBL)
    data = self.execute_scalar("select sum(i), sum(j) from %s" % self.TEST_TBL)
    assert data.split('\t') == ['6', '9']