mirror of
https://github.com/apache/impala.git
synced 2025-12-31 15:00:10 -05:00
Change-Id: Ie0c4c458b0919978b03ebaba28bf37950dd34643 Reviewed-on: http://gerrit.ent.cloudera.com:8080/3009 Tested-by: jenkins Reviewed-by: Anusha Dasarakothapalli <anusha.dasarakothapalli@cloudera.com> Reviewed-on: http://gerrit.ent.cloudera.com:8080/3091
295 lines
12 KiB
Python
295 lines
12 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import shlex
|
|
from subprocess import call
|
|
from tests.common.test_vector import *
|
|
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
|
|
from tests.common.impala_test_suite import *
|
|
|
|
# The purpose of view compatibility testing is to check whether views created in Hive
|
|
# can be queried in Impala and vice versa. A test typically consists of
|
|
# the following actions specified in different test sections.
|
|
# 1. create a view with a certain definition using Hive and Impala
|
|
# 2. explain a "select *" query on the view created by Hive using Hive and Impala
|
|
# 3. explain a "select *" query on the view created by Impala using Hive and Impala
|
|
# For each of the steps above its corresponding test section specifies our expectations
|
|
# on whether Impala and Hive will succeed or fail.
|
|
#
|
|
# Impala and Hive's SQL dialects are not fully compatible. We intentionally rely
|
|
# on the view creation mechanism instead of just testing various SQL statements in
|
|
# Impala and Hive, because view creation transforms the original view definition into
|
|
# a so-called "extended view definition". As this process of transformation could
|
|
# potentially change in Impala and/or Hive simply testing various SQL statements
|
|
# in Impala and Hive would be insufficient.
|
|
class TestViewCompatibility(ImpalaTestSuite):
|
|
TEST_DB_NAME = "view_compat_test_db"
|
|
VALID_SECTION_NAMES = ["CREATE_VIEW", "CREATE_VIEW_RESULTS",\
|
|
"QUERY_HIVE_VIEW_RESULTS", "QUERY_IMPALA_VIEW_RESULTS"]
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestViewCompatibility, cls).add_test_dimensions()
|
|
# don't use any exec options, running exactly once is fine
|
|
cls.TestMatrix.clear_dimension('exec_option')
|
|
# There is no reason to run these tests using all dimensions.
|
|
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
|
|
|
|
def setup_method(self, method):
|
|
# cleanup and create a fresh test database
|
|
self.cleanup_db(self.TEST_DB_NAME)
|
|
self.execute_query("create database %s" % (self.TEST_DB_NAME))
|
|
|
|
def teardown_method(self, method):
|
|
self.cleanup_db(self.TEST_DB_NAME)
|
|
|
|
def test_view_compatibility(self, vector):
|
|
self.__run_view_compat_test_case('QueryTest/views-compatibility', vector)
|
|
|
|
def __run_view_compat_test_case(self, test_file_name, vector):
|
|
"""
|
|
Runs a view-compatibility test file, containing the following sections:
|
|
|
|
---- CREATE_VIEW
|
|
contains a view creation statement to be executed in Impala and Hive
|
|
---- CREATE_VIEW_RESULTS
|
|
whether we expect the view creation in Impala/Hive to succeed/fail
|
|
---- QUERY_HIVE_VIEW_RESULTS
|
|
whether we expect to be able to query the view created by Hive in Hive/Impala
|
|
---- QUERY_IMPALA_VIEW_RESULTS
|
|
whether we expect to be able to query the view created by Impala in Hive/Impala
|
|
"""
|
|
|
|
sections = self.load_query_test_file(self.get_workload(), test_file_name,\
|
|
self.VALID_SECTION_NAMES)
|
|
|
|
for test_section in sections:
|
|
# validate the test
|
|
test_case = ViewCompatTestCase(test_section, test_file_name, self.TEST_DB_NAME)
|
|
|
|
# create views in Hive and Impala checking against the expected results
|
|
self.__exec_in_hive(test_case.get_create_view_sql('HIVE'),\
|
|
test_case.get_create_view_sql('HIVE'),\
|
|
test_case.get_create_exp_res())
|
|
# The table may or may not have been created in Hive. And so, "invalidate metadata"
|
|
# may throw an exception.
|
|
try:
|
|
self.client.invalidate_table(test_case.hive_view_name)
|
|
except ImpalaBeeswaxException as e:
|
|
assert "TableNotFoundException" in str(e)
|
|
|
|
self.__exec_in_impala(test_case.get_create_view_sql('IMPALA'),\
|
|
test_case.get_create_view_sql('IMPALA'),\
|
|
test_case.get_create_exp_res())
|
|
|
|
# explain a simple query on the view created by Hive in Hive and Impala
|
|
if test_case.has_query_hive_section():
|
|
exp_res = test_case.get_query_exp_res('HIVE');
|
|
if 'HIVE' in exp_res:
|
|
self.__exec_in_hive(test_case.get_query_view_sql('HIVE'),\
|
|
test_case.get_create_view_sql('HIVE'), exp_res)
|
|
if 'IMPALA' in exp_res:
|
|
self.__exec_in_impala(test_case.get_query_view_sql('HIVE'),\
|
|
test_case.get_create_view_sql('HIVE'), exp_res)
|
|
|
|
# explain a simple query on the view created by Impala in Hive and Impala
|
|
if test_case.has_query_impala_section():
|
|
exp_res = test_case.get_query_exp_res('IMPALA');
|
|
if 'HIVE' in exp_res:
|
|
self.__exec_in_hive(test_case.get_query_view_sql('IMPALA'),\
|
|
test_case.get_create_view_sql('IMPALA'), exp_res)
|
|
if 'IMPALA' in exp_res:
|
|
self.__exec_in_impala(test_case.get_query_view_sql('IMPALA'),\
|
|
test_case.get_create_view_sql('IMPALA'), exp_res)
|
|
|
|
# drop the views without checking success or failure
|
|
self.__exec_in_hive(test_case.get_drop_view_sql('HIVE'),\
|
|
test_case.get_create_view_sql('HIVE'), None)
|
|
try:
|
|
self.client.invalidate_table(test_case.hive_view_name)
|
|
except ImpalaBeeswaxException as e:
|
|
assert "TableNotFoundException" in str(e)
|
|
|
|
self.__exec_in_impala(test_case.get_drop_view_sql('IMPALA'),\
|
|
test_case.get_create_view_sql('IMPALA'), None)
|
|
|
|
def __exec_in_hive(self, sql_str, create_view_sql, exp_res):
|
|
hive_ret = call(['hive', '-e', sql_str])
|
|
self.__cmp_expected(sql_str, create_view_sql, exp_res, "HIVE", hive_ret == 0)
|
|
|
|
def __exec_in_impala(self, sql_str, create_view_sql, exp_res):
|
|
success = True
|
|
try:
|
|
impala_ret = self.execute_query(sql_str)
|
|
success = impala_ret.success
|
|
except: # consider any exception a failure
|
|
success = False
|
|
self.__cmp_expected(sql_str, create_view_sql, exp_res, "IMPALA", success)
|
|
|
|
def __cmp_expected(self, sql_str, create_view_sql, exp_res, engine, success):
|
|
if exp_res is None:
|
|
return
|
|
if exp_res[engine] and not success:
|
|
assert 0, '%s failed to execute\n%s\nwhile testing a view created as\n%s'\
|
|
% (engine, sql_str, create_view_sql)
|
|
if not exp_res[engine] and success:
|
|
assert 0, '%s unexpectedly succeeded in executing\n%s\nwhile testing '\
|
|
'a view created as\n%s' % (engine, create_view_sql, sql_str)
|
|
|
|
# Represents one view-compatibility test case. Performs validation of the test sections
|
|
# and provides SQL to execute for each section.
|
|
class ViewCompatTestCase(object):
|
|
RESULT_KEYS = ["IMPALA", "HIVE"]
|
|
|
|
def __init__(self, test_section, test_file_name, test_db_name):
|
|
if 'CREATE_VIEW' not in test_section:
|
|
assert 0, 'Error in test file %s. Test cases require a '\
|
|
'CREATE_VIEW section.\n%s' %\
|
|
(test_file_name, pprint.pformat(test_section))
|
|
|
|
self.create_exp_res = None
|
|
# get map of expected results from test sections
|
|
if 'CREATE_VIEW_RESULTS' in test_section:
|
|
self.create_exp_res =\
|
|
self.__get_expected_results(test_section['CREATE_VIEW_RESULTS'])
|
|
else:
|
|
assert 0, 'Error in test file %s. Test cases require a '\
|
|
'CREATE_VIEW_RESULTS section.\n%s' %\
|
|
(test_file_name, pprint.pformat(test_section))
|
|
|
|
self.query_hive_exp_res = None
|
|
if 'QUERY_HIVE_VIEW_RESULTS' in test_section:
|
|
self.query_hive_exp_res =\
|
|
self.__get_expected_results(test_section['QUERY_HIVE_VIEW_RESULTS'])
|
|
|
|
self.query_impala_exp_res = None
|
|
if 'QUERY_IMPALA_VIEW_RESULTS' in test_section:
|
|
self.query_impala_exp_res =\
|
|
self.__get_expected_results(test_section['QUERY_IMPALA_VIEW_RESULTS'])
|
|
|
|
if self.query_hive_exp_res is None and self.query_impala_exp_res is None:
|
|
assert 0, 'Error in test file %s. Test cases require a QUERY_HIVE_VIEW_RESULTS '\
|
|
'or QUERY_IMPALA_VIEW_RESULTS section.\n%s' %\
|
|
(test_file_name, pprint.pformat(test_section))
|
|
|
|
# clean test section, remove comments etc.
|
|
self.create_view_sql = QueryTestSectionReader.build_query(test_section['CREATE_VIEW'])
|
|
|
|
view_name = self.__get_view_name(self.create_view_sql)
|
|
if view_name.find(".") != -1:
|
|
assert 0, 'Error in test file %s. Found unexpected view name %s that is '\
|
|
'qualified with a database' % (test_file_name, view_name)
|
|
|
|
# add db prefix and suffixes to indicate which engine created the view
|
|
self.hive_view_name = test_db_name + '.' + view_name + '_hive'
|
|
self.impala_view_name = test_db_name + '.' + view_name + '_impala'
|
|
|
|
self.hive_create_view_sql =\
|
|
self.create_view_sql.replace(view_name, self.hive_view_name, 1)
|
|
self.impala_create_view_sql =\
|
|
self.create_view_sql.replace(view_name, self.impala_view_name, 1)
|
|
|
|
# SQL to explain a simple query on the view created by Hive in Hive and Impala
|
|
if self.query_hive_exp_res is not None:
|
|
self.query_hive_view_sql = 'explain select * from %s' % (self.hive_view_name)
|
|
|
|
# SQL to explain a simple query on the view created by Impala in Hive and Impala
|
|
if self.query_impala_exp_res is not None:
|
|
self.query_impala_view_sql = 'explain select * from %s' % (self.impala_view_name)
|
|
|
|
self.drop_hive_view_sql = "drop view %s" % (self.hive_view_name)
|
|
self.drop_impala_view_sql = "drop view %s" % (self.impala_view_name)
|
|
|
|
def __get_view_name(self, create_view_sql):
|
|
lexer = shlex.shlex(create_view_sql)
|
|
tokens = list(lexer)
|
|
# sanity check the create view statement
|
|
if len(tokens) < 3:
|
|
assert 0, 'Error in test. Invalid CREATE VIEW statement: %s' % (create_view_sql)
|
|
if tokens[0].lower() != "create" or tokens[1].lower() != "view":
|
|
assert 0, 'Error in test. Invalid CREATE VIEW statement: %s' % (create_view_sql)
|
|
|
|
if tokens[2].lower() == "if":
|
|
# expect an "if not exists" clause
|
|
return tokens[5]
|
|
else:
|
|
# expect a create view view_name ...
|
|
return tokens[2]
|
|
|
|
def __get_expected_results(self, section_text):
|
|
lines = section_text.splitlines()
|
|
exp_res = dict()
|
|
for line in lines:
|
|
components = line.partition("=")
|
|
if (components[2].lower() == 'SUCCESS'.lower()):
|
|
exp_res[components[0]] = True
|
|
else:
|
|
exp_res[components[0]] = False
|
|
|
|
# check that the results section contains at least one entry
|
|
if not (lambda a, b: any(i in b for i in a)):
|
|
assert 0, 'No valid entry in expected-results section. '\
|
|
'Expected an IMPALA or HIVE entry.'
|
|
return exp_res
|
|
|
|
def get_create_view_sql(self, engine):
|
|
engine = engine.upper();
|
|
if engine == "HIVE":
|
|
return self.hive_create_view_sql
|
|
elif engine == "IMPALA":
|
|
return self.impala_create_view_sql
|
|
else:
|
|
assert 0, "Unknown execution engine %s" % (engine)
|
|
|
|
def get_create_exp_res(self):
|
|
return self.create_exp_res
|
|
|
|
def get_drop_view_sql(self, engine):
|
|
engine = engine.upper();
|
|
if engine == "HIVE":
|
|
return self.drop_hive_view_sql
|
|
elif engine == "IMPALA":
|
|
return self.drop_impala_view_sql
|
|
else:
|
|
assert 0, "Unknown execution engine %s" % (engine)
|
|
|
|
def get_query_exp_res(self, engine):
|
|
engine = engine.upper();
|
|
if engine == "HIVE":
|
|
return self.query_hive_exp_res
|
|
elif engine == "IMPALA":
|
|
return self.query_impala_exp_res
|
|
else:
|
|
assert 0, "Unknown execution engine %s" % (engine)
|
|
|
|
def get_query_view_sql(self, engine):
|
|
engine = engine.upper();
|
|
if engine == "HIVE":
|
|
return self.query_hive_view_sql
|
|
elif engine == "IMPALA":
|
|
return self.query_impala_view_sql
|
|
else:
|
|
assert 0, "Unknown execution engine %s" % (engine)
|
|
return self.query_hive_view_sql
|
|
|
|
def has_query_hive_section(self):
|
|
return hasattr(self, 'query_hive_view_sql')
|
|
|
|
def has_query_impala_section(self):
|
|
return hasattr(self, 'query_impala_view_sql')
|