Files
impala/tests/query_test/test_load.py
Lenni Kuff c2cfc7e2a3 IMPALA-373: Add support for 'LOAD DATA' statements
This change adds Impala support for LOAD DATA statements. This allows the user
to load one or more files into a table or partition from a given HDFS location. The
load operation only moves files, it does not convert data to match the target
table/partition's file format.
2014-01-08 10:51:02 -08:00

64 lines
2.4 KiB
Python

#!/usr/bin/env python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
# Functional tests for LOAD DATA statements.
import pytest
from tests.common.test_vector import *
from tests.common.impala_test_suite import *
from subprocess import call
class TestLoadData(ImpalaTestSuite):
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestLoadData, cls).add_test_dimensions()
cls.TestMatrix.add_constraint(lambda v:\
v.get_value('table_format').file_format == 'text' and\
v.get_value('exec_option')['batch_size'] == 0 and\
v.get_value('exec_option')['disable_codegen'] == False and\
v.get_value('exec_option')['num_nodes'] != 1)
def setup_method(self, method):
# Cleanup any existing files in the test tables and staging directories.
call(["hadoop", "fs", "-rm", "-r", "-f", "/test-warehouse/test_load*"], shell=False)
call(["hadoop", "fs", "-rm", "-r", "-f", "/tmp/load_data/"], shell=False)
# Create staging directories.
for i in range(1, 6):
call(["hadoop", "fs", "-mkdir", "-p", "/tmp/load_data/%d" % i], shell=False)
# Copy some data files from existing tables to validate load.
for i in range(1, 4):
call(["hadoop", "fs", "-cp",
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
"/tmp/load_data/%d" % i], shell=False)
# Each partition in alltypesaggmultifiles should have 4 data files.
for i in range(4, 6):
call(["hadoop", "fs", "-cp",
'/test-warehouse/alltypesaggmultifiles/year=2010/month=1/day=1/*',
'/tmp/load_data/%d/' % i], shell=False)
# Make some hidden files.
call(["hadoop", "fs", "-cp",
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
"/tmp/load_data/3/.100101.txt"], shell=False)
call(["hadoop", "fs", "-cp",
"/test-warehouse/alltypes/year=2010/month=1/100101.txt",
"/tmp/load_data/3/_100101.txt"], shell=False)
@classmethod
def __assert_hdfs_path_exists(cls, path):
assert 0 == call(["hadoop", "fs", "-test", "-e", path], shell=False),\
"Path does not exist."
@pytest.mark.execute_serially
def test_load(self, vector):
self.run_test_case('QueryTest/load', vector)
# The hidden files should not have been moved as part of the load operation.
self.__assert_hdfs_path_exists("/tmp/load_data/3/.100101.txt")
self.__assert_hdfs_path_exists("/tmp/load_data/3/_100101.txt")