mirror of
https://github.com/apache/impala.git
synced 2025-12-30 21:02:41 -05:00
Some tests have constraints that were there only to help reduce runtime which reduces coverage when running in exhaustive mode. The majority of the constraints are because it adds no value to run the test across additional dimensions (or it is invalid to run with those dimensions). Updates the tests that have legitimate constraints to use two new helper methods for constraining the table format dimension: create_uncompressed_text_dimension() create_parquet_dimension() These will create a dimension that will produce a single test vector, either uncompressed text or parquet respectively. Change-Id: Id85387c1efd5d192f8059ef89934933389bfe247 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2149 Reviewed-by: Lenni Kuff <lskuff@cloudera.com> Tested-by: jenkins (cherry picked from commit e02acbd469bc48c684b2089405b4a20552802481) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2290
60 lines
2.3 KiB
Python
60 lines
2.3 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
# Functional tests for LOAD DATA statements.
|
|
|
|
import pytest
|
|
from tests.common.test_vector import *
|
|
from tests.common.impala_test_suite import *
|
|
from subprocess import call
|
|
|
|
class TestLoadData(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestLoadData, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
|
|
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
|
|
|
|
def setup_method(self, method):
|
|
# Cleanup any existing files in the test tables and staging directories.
|
|
call(["hadoop", "fs", "-rm", "-r", "-f", "/test-warehouse/test_load*"], shell=False)
|
|
call(["hadoop", "fs", "-rm", "-r", "-f", "/tmp/load_data/"], shell=False)
|
|
|
|
# Create staging directories.
|
|
for i in range(1, 6):
|
|
call(["hadoop", "fs", "-mkdir", "-p", "/tmp/load_data/%d" % i], shell=False)
|
|
|
|
# Copy some data files from existing tables to validate load.
|
|
for i in range(1, 4):
|
|
call(["hadoop", "fs", "-cp",
|
|
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
|
|
"/tmp/load_data/%d" % i], shell=False)
|
|
|
|
# Each partition in alltypesaggmultifiles should have 4 data files.
|
|
for i in range(4, 6):
|
|
call(["hadoop", "fs", "-cp",
|
|
'/test-warehouse/alltypesaggmultifiles/year=2010/month=1/day=1/*',
|
|
'/tmp/load_data/%d/' % i], shell=False)
|
|
|
|
# Make some hidden files.
|
|
call(["hadoop", "fs", "-cp",
|
|
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
|
|
"/tmp/load_data/3/.100101.txt"], shell=False)
|
|
call(["hadoop", "fs", "-cp",
|
|
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
|
|
"/tmp/load_data/3/_100101.txt"], shell=False)
|
|
|
|
@classmethod
|
|
def __assert_hdfs_path_exists(cls, path):
|
|
assert 0 == call(["hadoop", "fs", "-test", "-e", path], shell=False),\
|
|
"Path does not exist."
|
|
|
|
def test_load(self, vector):
|
|
self.run_test_case('QueryTest/load', vector)
|
|
# The hidden files should not have been moved as part of the load operation.
|
|
self.__assert_hdfs_path_exists("/tmp/load_data/3/.100101.txt")
|
|
self.__assert_hdfs_path_exists("/tmp/load_data/3/_100101.txt")
|