mirror of
https://github.com/apache/impala.git
synced 2026-01-01 18:00:30 -05:00
Change-Id: I19900ae029876d8f74169eda0f08f5be3509fbaf Reviewed-on: http://gerrit.ent.cloudera.com:8080/2946 Reviewed-by: Nong Li <nong@cloudera.com> Tested-by: jenkins
126 lines
5.1 KiB
Python
126 lines
5.1 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
# Validates limit on scan nodes
|
|
#
|
|
import logging
|
|
import pytest
|
|
from copy import copy
|
|
from subprocess import call
|
|
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
|
|
from tests.common.impala_test_suite import *
|
|
from tests.common.test_vector import *
|
|
from tests.common.impala_cluster import ImpalaCluster
|
|
from tests.common.test_dimensions import create_exec_option_dimension
|
|
from tests.util.shell_util import exec_process
|
|
|
|
# End to end test that hdfs caching is working.
|
|
class TestHdfsCaching(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'tpch'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestHdfsCaching, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('exec_option')['batch_size'] == 0)
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == "text")
|
|
|
|
# The tpch nation table is cached as part of data loading. We'll issue a query
|
|
# against this table and verify the metric is updated correctly.
|
|
@pytest.mark.execute_serially
|
|
def test_table_is_cached(self, vector):
|
|
cached_read_metric = "impala-server.io-mgr.cached-bytes-read"
|
|
query_string = "select count(*) from tpch.nation"
|
|
expected_bytes_delta = 2199
|
|
impala_cluster = ImpalaCluster()
|
|
|
|
# Collect the cached read metric on all the impalads before running the query
|
|
cached_bytes_before = list()
|
|
for impalad in impala_cluster.impalads:
|
|
cached_bytes_before.append(impalad.service.get_metric_value(cached_read_metric))
|
|
|
|
# Execute the query.
|
|
result = self.execute_query(query_string)
|
|
assert(len(result.data) == 1)
|
|
assert(result.data[0] == '25')
|
|
|
|
# Read the metrics again.
|
|
cached_bytes_after = list()
|
|
for impalad in impala_cluster.impalads:
|
|
cached_bytes_after.append(impalad.service.get_metric_value(cached_read_metric))
|
|
|
|
# Verify that the cached bytes increased by the expected number on exactly one of
|
|
# the impalads.
|
|
num_metrics_increased = 0
|
|
assert(len(cached_bytes_before) == len(cached_bytes_after))
|
|
for i in range(0, len(cached_bytes_before)):
|
|
assert(cached_bytes_before[i] == cached_bytes_after[i] or\
|
|
cached_bytes_before[i] + expected_bytes_delta == cached_bytes_after[i])
|
|
if cached_bytes_after[i] > cached_bytes_before[i]:
|
|
num_metrics_increased = num_metrics_increased + 1
|
|
|
|
if num_metrics_increased != 1:
|
|
# Test failed, print the metrics
|
|
for i in range(0, len(cached_bytes_before)):
|
|
print "%d %d" % (cached_bytes_before[i], cached_bytes_after[i])
|
|
assert(False)
|
|
|
|
def test_cache_cancellation(self, vector):
|
|
""" This query runs on some mix of cached and not cached tables. The query has
|
|
a limit so it exercises the cancellation paths. Regression test for
|
|
IMPALA-1019. """
|
|
num_iters = 100
|
|
query_string = """
|
|
with t1 as (select int_col x, bigint_col y from functional.alltypes limit 2),
|
|
t2 as (select int_col x, bigint_col y from functional.alltypestiny limit 2),
|
|
t3 as (select int_col x, bigint_col y from functional.alltypessmall limit 2)
|
|
select * from t1, t2, t3 where t1.x = t2.x and t2.x = t3.x """
|
|
|
|
# Run this query for some iterations since it is timing dependent.
|
|
for x in xrange(1, num_iters):
|
|
result = self.execute_query(query_string)
|
|
assert(len(result.data) == 2)
|
|
|
|
class TestHdfsCachingDdl(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestHdfsCachingDdl, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
|
|
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == 'text' and \
|
|
v.get_value('table_format').compression_codec == 'none')
|
|
|
|
@pytest.mark.execute_serially
|
|
@pytest.mark.xfail(run=False, reason="IMPALA-1037. This test is flaky")
|
|
def test_caching_ddl(self, vector):
|
|
self.client.execute("drop table if exists functional.cached_tbl_part")
|
|
self.client.execute("drop table if exists functional.cached_tbl_nopart")
|
|
|
|
# Get the number of cache requests before starting the test
|
|
num_entries_pre = get_num_cache_requests()
|
|
self.run_test_case('QueryTest/hdfs-caching', vector)
|
|
|
|
# After running this test case we should be left with 6 cache requests.
|
|
# In this case, 1 for each table + 4 more for each cached partition.
|
|
assert num_entries_pre == get_num_cache_requests() - 6
|
|
|
|
self.client.execute("drop table functional.cached_tbl_part")
|
|
self.client.execute("drop table functional.cached_tbl_nopart")
|
|
|
|
# Dropping the tables should cleanup cache entries leaving us with the same
|
|
# total number of entries
|
|
assert num_entries_pre == get_num_cache_requests()
|
|
|
|
def get_num_cache_requests():
|
|
"""Returns the number of outstanding cache requests"""
|
|
rc, stdout, stderr = exec_process("hdfs cacheadmin -listDirectives -stats")
|
|
assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout, stderr)
|
|
return len(stdout.split('\n'))
|