mirror of
https://github.com/apache/impala.git
synced 2025-12-21 02:48:14 -05:00
This commit turns on events processing by default. The default polling interval is set as 1 second which can be overrriden by setting hms_event_polling_interval_s to non-default value. When the event polling turned on by default this patch also moves the test_event_processing.py to tests/metadata instead of custom cluster test. Some tests within test_event_processing.py which needed non-default configurations were moved to tests/custom_cluster/test_events_custom_configs.py. Additionally, some other tests were modified to take into account the automatic ability of Impala to detect newly added tables from hive. Testing done: 1. Ran exhaustive tests by turning on the events processing multiple times. 2. Ran exhaustive tests by disabling events processing. 3. Ran dockerized tests. Change-Id: I9a8b1871a98b913d0ad8bb26a104a296b6a06122 Reviewed-on: http://gerrit.cloudera.org:8080/17612 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Quanlong Huang <huangquanlong@gmail.com>
283 lines
10 KiB
Python
283 lines
10 KiB
Python
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import pytest
|
|
import os
|
|
import time
|
|
import threading
|
|
|
|
from subprocess import check_call
|
|
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
|
|
from tests.common.custom_cluster_test_suite import (
|
|
CustomClusterTestSuite,
|
|
DEFAULT_CLUSTER_SIZE)
|
|
from tests.util.filesystem_utils import IS_ISILON, IS_LOCAL
|
|
|
|
|
|
NUM_SUBSCRIBERS = DEFAULT_CLUSTER_SIZE + 1
|
|
|
|
|
|
class TestHiveMetaStoreFailure(CustomClusterTestSuite):
|
|
"""Tests to validate the Catalog Service continues to function even if the HMS
|
|
fails."""
|
|
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
if cls.exploration_strategy() != 'exhaustive':
|
|
pytest.skip('These tests only run in exhaustive')
|
|
super(TestHiveMetaStoreFailure, cls).setup_class()
|
|
|
|
@classmethod
|
|
def run_hive_server(cls):
|
|
script = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/run-hive-server.sh')
|
|
run_cmd = [script]
|
|
if IS_LOCAL or IS_ISILON:
|
|
run_cmd.append('-only_metastore')
|
|
check_call(run_cmd, close_fds=True)
|
|
|
|
@classmethod
|
|
def teardown_class(cls):
|
|
# Make sure the metastore is running even if the test aborts somewhere unexpected
|
|
# before restarting the metastore itself.
|
|
cls.run_hive_server()
|
|
super(TestHiveMetaStoreFailure, cls).teardown_class()
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args(
|
|
impalad_args='--use_local_catalog --catalog_topic_mode=minimal',
|
|
catalogd_args='--catalog_topic_mode=minimal')
|
|
def test_hms_service_dies(self):
|
|
"""Regression test for IMPALA-823 to verify the catalog service works properly when
|
|
HMS connections fail"""
|
|
# Force the tables to be uncached and then kill the hive metastore.
|
|
tbl_name = "functional.alltypes"
|
|
self.client.execute("invalidate metadata %s" % tbl_name)
|
|
kill_cmd = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/kill-hive-server.sh')
|
|
check_call([kill_cmd], close_fds=True)
|
|
|
|
try:
|
|
self.client.execute("describe %s" % tbl_name)
|
|
except ImpalaBeeswaxException as e:
|
|
print str(e)
|
|
assert "Failed to load metadata for table: %s. Running 'invalidate metadata %s' "\
|
|
"may resolve this problem." % (tbl_name, tbl_name) in str(e)
|
|
self.run_hive_server()
|
|
|
|
self.client.execute("invalidate metadata %s" % tbl_name)
|
|
self.client.execute("describe %s" % tbl_name)
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args(
|
|
impalad_args='--use_local_catalog --catalog_topic_mode=minimal',
|
|
catalogd_args='--catalog_topic_mode=minimal')
|
|
def test_hms_client_retries(self):
|
|
"""Test that a running query will trigger the retry logic in
|
|
RetryingMetaStoreClient."""
|
|
# Force the tables to be uncached and then kill the hive metastore.
|
|
tbl_name = "functional.alltypes"
|
|
self.client.execute("invalidate metadata %s" % tbl_name)
|
|
kill_cmd = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/kill-hive-server.sh')
|
|
check_call([kill_cmd], close_fds=True)
|
|
|
|
# Run a query asynchronously.
|
|
query = "select * from {0} limit 1".format(tbl_name)
|
|
thread = threading.Thread(target=lambda:
|
|
self.execute_query_expect_success(self.client, query))
|
|
thread.start()
|
|
|
|
# Wait 1 second for the catalogd to start contacting HMS, then start HMS.
|
|
time.sleep(1)
|
|
self.run_hive_server()
|
|
|
|
# Wait for the query to complete, assert that the HMS client retried the connection.
|
|
thread.join()
|
|
self.assert_catalogd_log_contains("INFO",
|
|
"MetaStoreClient lost connection. Attempting to reconnect", expected_count=-1)
|
|
|
|
|
|
class TestCatalogHMSFailures(CustomClusterTestSuite):
|
|
"""Test Catalog behavior when HMS is not present."""
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
if cls.exploration_strategy() != 'exhaustive':
|
|
pytest.skip('These tests only run in exhaustive')
|
|
super(TestCatalogHMSFailures, cls).setup_class()
|
|
|
|
@classmethod
|
|
def run_hive_server(cls):
|
|
script = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/run-hive-server.sh')
|
|
run_cmd = [script]
|
|
if IS_LOCAL or IS_ISILON:
|
|
run_cmd.append('-only_metastore')
|
|
check_call(run_cmd, close_fds=True)
|
|
|
|
@classmethod
|
|
def cleanup_process(cls, proc):
|
|
try:
|
|
proc.kill()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
proc.wait()
|
|
except Exception:
|
|
pass
|
|
|
|
@classmethod
|
|
def teardown_class(cls):
|
|
# Make sure the metastore is running even if the test aborts somewhere unexpected
|
|
# before restarting the metastore itself.
|
|
cls.run_hive_server()
|
|
super(TestCatalogHMSFailures, cls).teardown_class()
|
|
|
|
@classmethod
|
|
def reload_metadata(cls, client):
|
|
client.execute('invalidate metadata')
|
|
client.execute('show databases')
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args(
|
|
impalad_args='--use_local_catalog --catalog_topic_mode=minimal',
|
|
catalogd_args='--initial_hms_cnxn_timeout_s=120 --catalog_topic_mode=minimal')
|
|
def test_kill_hms_after_catalog_init(self):
|
|
"""IMPALA-4278: If HMS dies after catalogd initialization, SQL statements that force
|
|
metadata load should fail quickly. After HMS restart, metadata load should work
|
|
again"""
|
|
# Make sure that catalogd is connected to HMS
|
|
impalad = self.cluster.get_any_impalad()
|
|
client = impalad.service.create_beeswax_client()
|
|
self.reload_metadata(client)
|
|
|
|
# Kill Hive
|
|
kill_cmd = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/kill-hive-server.sh')
|
|
check_call([kill_cmd], close_fds=True)
|
|
|
|
# Metadata load should fail quickly
|
|
start = time.time()
|
|
try:
|
|
self.reload_metadata(client)
|
|
except ImpalaBeeswaxException as e:
|
|
assert "Connection refused" in str(e)
|
|
else:
|
|
assert False, "Metadata load should have failed"
|
|
end = time.time()
|
|
assert end - start < 30, "Metadata load hasn't failed quickly enough"
|
|
|
|
# Start Hive
|
|
self.run_hive_server()
|
|
|
|
# Metadata load should work now
|
|
self.reload_metadata(client)
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args(
|
|
impalad_args='--use_local_catalog --catalog_topic_mode=minimal',
|
|
catalogd_args='--initial_hms_cnxn_timeout_s=120 --catalog_topic_mode=minimal')
|
|
def test_start_catalog_before_hms(self):
|
|
"""IMPALA-4278: If catalogd is started with initial_hms_cnxn_timeout_s set to a value
|
|
greater than HMS startup time, it will manage to establish connection to HMS even if
|
|
HMS is started a little later"""
|
|
# Make sure that catalogd is connected to HMS
|
|
impalad = self.cluster.get_any_impalad()
|
|
client = impalad.service.create_beeswax_client()
|
|
self.reload_metadata(client)
|
|
|
|
# Kill Hive
|
|
kill_cmd = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/kill-hive-server.sh')
|
|
check_call([kill_cmd], close_fds=True)
|
|
|
|
# Kill the catalogd.
|
|
catalogd = self.cluster.catalogd
|
|
catalogd.kill()
|
|
|
|
# The statestore should detect the catalog service has gone down.
|
|
statestored = self.cluster.statestored
|
|
statestored.service.wait_for_live_subscribers(NUM_SUBSCRIBERS - 1, timeout=60)
|
|
|
|
try:
|
|
# Start the catalog service asynchronously.
|
|
catalogd.start(wait_until_ready=False)
|
|
# Wait 10s to be sure that the catalogd is in the 'trying to connect' phase of its
|
|
# startup.
|
|
time.sleep(10)
|
|
|
|
# Start Hive and wait for catalogd to come up
|
|
self.run_hive_server()
|
|
statestored.service.wait_for_live_subscribers(NUM_SUBSCRIBERS, timeout=60)
|
|
impalad.service.wait_for_metric_value('catalog.ready', True, timeout=60)
|
|
|
|
# Metadata load should work now
|
|
self.reload_metadata(client)
|
|
finally:
|
|
# Make sure to clean up the catalogd process that we started
|
|
self.cleanup_process(catalogd)
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args(
|
|
impalad_args='--use_local_catalog --catalog_topic_mode=minimal',
|
|
catalogd_args='--initial_hms_cnxn_timeout_s=30 --catalog_topic_mode=minimal')
|
|
def test_catalogd_fails_if_hms_started_late(self):
|
|
"""IMPALA-4278: If the HMS is not started within initial_hms_cnxn_timeout_s, then the
|
|
catalogd fails"""
|
|
# Make sure that catalogd is connected to HMS
|
|
impalad = self.cluster.get_any_impalad()
|
|
client = impalad.service.create_beeswax_client()
|
|
self.reload_metadata(client)
|
|
|
|
# Kill Hive
|
|
kill_cmd = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/kill-hive-server.sh')
|
|
check_call([kill_cmd], close_fds=True)
|
|
|
|
# Kill the catalogd.
|
|
catalogd = self.cluster.catalogd
|
|
catalogd.kill()
|
|
|
|
# The statestore should detect the catalog service has gone down.
|
|
statestored = self.cluster.statestored
|
|
statestored.service.wait_for_live_subscribers(NUM_SUBSCRIBERS - 1, timeout=60)
|
|
|
|
try:
|
|
# Start the catalog service asynchronously.
|
|
catalogd.start(wait_until_ready=False)
|
|
# Wait 40s to be sure that the catalogd has been trying to connect to HMS longer
|
|
# than initial_hms_cnxn_timeout_s.
|
|
time.sleep(40)
|
|
|
|
# Start Hive
|
|
self.run_hive_server()
|
|
|
|
# catalogd has terminated by now
|
|
assert not catalogd.get_pid(), "catalogd should have terminated"
|
|
finally:
|
|
# Make sure to clean up the catalogd process that we started
|
|
self.cleanup_process(catalogd)
|
|
|
|
try:
|
|
# Start the catalog service again and wait for it to come up.
|
|
catalogd.start()
|
|
statestored.service.wait_for_live_subscribers(NUM_SUBSCRIBERS, timeout=60)
|
|
impalad.service.wait_for_metric_value('catalog.ready', True, timeout=60)
|
|
|
|
# Metadata load should work now
|
|
self.reload_metadata(client)
|
|
finally:
|
|
# Make sure to clean up the catalogd process that we started
|
|
self.cleanup_process(catalogd)
|