mirror of
https://github.com/apache/impala.git
synced 2026-01-03 06:00:52 -05:00
Some tests have constraints that were there only to help reduce runtime which reduces coverage when running in exhaustive mode. The majority of the constraints are because it adds no value to run the test across additional dimensions (or it is invalid to run with those dimensions). Updates the tests that have legitimate constraints to use two new helper methods for constraining the table format dimension: create_uncompressed_text_dimension() create_parquet_dimension() These will create a dimension that will produce a single test vector, either uncompressed text or parquet respectively. Change-Id: Id85387c1efd5d192f8059ef89934933389bfe247 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2149 Reviewed-by: Lenni Kuff <lskuff@cloudera.com> Tested-by: jenkins (cherry picked from commit e02acbd469bc48c684b2089405b4a20552802481) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2290
171 lines
7.1 KiB
Python
Executable File
171 lines
7.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
# Impala tests for queries that query metadata and set session settings
|
|
import logging
|
|
import pytest
|
|
from subprocess import call
|
|
from tests.common.test_vector import *
|
|
from tests.common.impala_test_suite import *
|
|
|
|
# TODO: For these tests to pass, all table metadata must be created exhaustively.
|
|
# the tests should be modified to remove that requirement.
|
|
class TestMetadataQueryStatements(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestMetadataQueryStatements, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=ALL_NODES_ONLY,
|
|
disable_codegen_options=[False],
|
|
batch_sizes=[0],
|
|
sync_ddl=[0, 1]))
|
|
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
|
|
|
|
def setup_method(self, method):
|
|
self.cleanup_db('hive_test_db')
|
|
|
|
def teardown_method(self, method):
|
|
self.cleanup_db('hive_test_db')
|
|
|
|
def test_show(self, vector):
|
|
self.run_test_case('QueryTest/show', vector)
|
|
|
|
def test_show_stats(self, vector):
|
|
self.run_test_case('QueryTest/show-stats', vector)
|
|
|
|
def test_describe_table(self, vector):
|
|
self.run_test_case('QueryTest/describe', vector)
|
|
|
|
def test_describe_formatted(self, vector):
|
|
# Describe a partitioned table.
|
|
self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes")
|
|
self.exec_and_compare_hive_and_impala_hs2(
|
|
"describe formatted functional_text_lzo.alltypes")
|
|
# Describe an unpartitioned table.
|
|
self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem")
|
|
self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.jointbl")
|
|
|
|
try:
|
|
# Describe a view
|
|
self.exec_and_compare_hive_and_impala_hs2(\
|
|
"describe formatted functional.alltypes_view_sub")
|
|
except AssertionError:
|
|
pytest.xfail("Investigate minor difference in displaying null vs empty values")
|
|
|
|
def test_use_table(self, vector):
|
|
self.run_test_case('QueryTest/use', vector)
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_impala_sees_hive_created_tables_and_databases(self, vector):
|
|
self.client.set_configuration(vector.get_value('exec_option'))
|
|
db_name = 'hive_test_db'
|
|
tbl_name = 'testtbl'
|
|
call(["hive", "-e", "DROP DATABASE IF EXISTS %s CASCADE" % db_name])
|
|
self.client.execute("invalidate metadata")
|
|
|
|
result = self.client.execute("show databases")
|
|
assert db_name not in result.data
|
|
|
|
call(["hive", "-e", "CREATE DATABASE %s" % db_name])
|
|
|
|
# Run 'invalidate metadata <table name>' when the parent database does not exist.
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name))
|
|
result = self.client.execute("show databases")
|
|
assert db_name not in result.data
|
|
|
|
# Create a table external to Impala.
|
|
call(["hive", "-e", "CREATE TABLE %s.%s (i int)" % (db_name, tbl_name)])
|
|
|
|
# Impala does not know about this database or table.
|
|
result = self.client.execute("show databases")
|
|
assert db_name not in result.data
|
|
|
|
# Run 'invalidate metadata <table name>'. It should add the database and table
|
|
# in to Impala's catalog.
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name))
|
|
result = self.client.execute("show databases")
|
|
assert db_name in result.data
|
|
|
|
result = self.client.execute("show tables in %s" % db_name)
|
|
assert tbl_name in result.data
|
|
assert len(result.data) == 1
|
|
|
|
self.client.execute("create table %s.%s (j int)" % (db_name, tbl_name + "_test"))
|
|
call(["hive", "-e", "drop table %s.%s" % (db_name, tbl_name + "_test")])
|
|
|
|
# Re-create the table in Hive. Use the same name, but different casing.
|
|
call(["hive", "-e", "CREATE TABLE %s.%s (i bigint)" % (db_name, tbl_name + "_TEST")])
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name + "_Test"))
|
|
result = self.client.execute("show tables in %s" % db_name)
|
|
assert tbl_name + "_test" in result.data
|
|
assert tbl_name + "_Test" not in result.data
|
|
assert tbl_name + "_TEST" not in result.data
|
|
|
|
# Verify this table is the version created in Hive (the column should be BIGINT)
|
|
result = self.client.execute("describe %s.%s" % (db_name, tbl_name + '_test'))
|
|
assert 'bigint' in result.data[0]
|
|
|
|
self.client.execute("drop table %s.%s" % (db_name, tbl_name + "_TEST"))
|
|
|
|
# Make sure we can actually use the table
|
|
self.client.execute(("insert overwrite table %s.%s "
|
|
"select 1 from functional.alltypes limit 5"
|
|
% (db_name, tbl_name)))
|
|
result = self.execute_scalar("select count(*) from %s.%s" % (db_name, tbl_name))
|
|
assert int(result) == 5
|
|
|
|
# Should be able to call invalidate metadata multiple times on the same table.
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name))
|
|
self.client.execute("refresh %s.%s" % (db_name, tbl_name))
|
|
result = self.client.execute("show tables in %s" % db_name)
|
|
assert tbl_name in result.data
|
|
|
|
# Can still use the table.
|
|
result = self.execute_scalar("select count(*) from %s.%s" % (db_name, tbl_name))
|
|
assert int(result) == 5
|
|
|
|
# Run 'invalidate metadata <table name>' when no table exists with that name.
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name + '2'))
|
|
result = self.client.execute("show tables in %s" % db_name);
|
|
assert len(result.data) == 1
|
|
assert tbl_name in result.data
|
|
|
|
# Create another table
|
|
call(["hive", "-e", "CREATE TABLE %s.%s (i int)" % (db_name, tbl_name + '2')])
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name + '2'))
|
|
result = self.client.execute("show tables in %s" % db_name)
|
|
assert tbl_name + '2' in result.data
|
|
assert tbl_name in result.data
|
|
|
|
# Drop the table, and then verify invalidate metadata <table name> removes the
|
|
# table from the catalog.
|
|
call(["hive", "-e", "DROP TABLE %s.%s " % (db_name, tbl_name)])
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name))
|
|
result = self.client.execute("show tables in %s" % db_name)
|
|
assert tbl_name + '2' in result.data
|
|
assert tbl_name not in result.data
|
|
|
|
# Should be able to call invalidate multiple times on the same table when the table
|
|
# does not exist.
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name))
|
|
result = self.client.execute("show tables in %s" % db_name)
|
|
assert tbl_name + '2' in result.data
|
|
assert tbl_name not in result.data
|
|
|
|
# Drop the parent database (this will drop all tables). Then invalidate the table
|
|
call(["hive", "-e", "DROP DATABASE %s CASCADE" % db_name])
|
|
self.client.execute("invalidate metadata %s.%s" % (db_name, tbl_name + '2'))
|
|
result = self.client.execute("show tables in %s" % db_name);
|
|
assert len(result.data) == 0
|
|
|
|
# Requires a refresh to see the dropped database
|
|
result = self.client.execute("show databases");
|
|
assert db_name in result.data
|
|
|
|
self.client.execute("invalidate metadata")
|
|
result = self.client.execute("show databases");
|
|
assert db_name not in result.data
|