Files
impala/tests/query_test/test_lifecycle.py
Tim Armstrong 2ca7f8e7c0 IMPALA-7995: part 1: fixes for e2e dockerised impala tests
This fixes all core e2e tests running on my local dockerised
minicluster build. I do not yet have a CI job or script running
but I wanted to get feedback on these changes sooner. The second
part of the change will include the CI script and any follow-on
fixes required for the exhaustive tests.

The following fixes were required:
* Detect docker_network from TEST_START_CLUSTER_ARGS
* get_webserver_port() does not depend on the caller passing in
  the default webserver port. It failed previously because it
  relied on start-impala-cluster.py setting -webserver_port
  for *all* processes.
* Add SkipIf markers for tests that don't make sense or are
  non-trivial to fix for containerised Impala.
* Support loading Impala-lzo plugin from host for tests that depend on
  it.
* Fix some tests that had 'localhost' hardcoded - instead it should
  be $INTERNAL_LISTEN_HOST, which defaults to localhost.
* Fix bug with sorting impala daemons by backend port, which is
  the same for all dockerised impalads.

Testing:
I ran tests locally as follows after having set up a docker network and
starting other services:

  ./buildall.sh -noclean -notests -ninja
  ninja -j $IMPALA_BUILD_THREADS docker_images
  export TEST_START_CLUSTER_ARGS="--docker_network=impala-cluster"
  export FE_TEST=false
  export BE_TEST=false
  export JDBC_TEST=false
  export CLUSTER_TEST=false
  ./bin/run-all-tests.sh

Change-Id: Iee86cbd2c4631a014af1e8cef8e1cd523a812755
Reviewed-on: http://gerrit.cloudera.org:8080/12639
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2019-04-13 02:42:32 +00:00

97 lines
4.2 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest
import time
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.impala_cluster import ImpalaCluster
from tests.verifiers.metric_verifier import MetricVerifier
# TODO: Debug actions leak into other tests in the same suite (if not explicitly
# unset). Ensure they get unset between tests.
class TestFragmentLifecycleWithDebugActions(ImpalaTestSuite):
"""Using the debug action interface, check that failed queries correctly clean up *all*
fragments"""
IN_FLIGHT_FRAGMENTS = "impala-server.num-fragments-in-flight"
@classmethod
def get_workload(self):
return 'functional'
@pytest.mark.execute_serially
def test_failure_in_prepare(self):
# Fail the scan node
verifiers = [MetricVerifier(i.service)
for i in ImpalaCluster.get_e2e_test_cluster().impalads]
self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
try:
self.client.execute("SELECT COUNT(*) FROM functional.alltypes")
assert "Query should have thrown an error"
except ImpalaBeeswaxException:
pass
for v in verifiers:
v.wait_for_metric(self.IN_FLIGHT_FRAGMENTS, 0)
@pytest.mark.execute_serially
def test_failure_in_prepare_multi_fragment(self):
# Test that if one fragment fails that the others are cleaned up during the ensuing
# cancellation.
verifiers = [MetricVerifier(i.service)
for i in ImpalaCluster.get_e2e_test_cluster().impalads]
# Fail the scan node
self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
# Force a query plan that will have three fragments or more.
try:
self.client.execute("SELECT COUNT(*) FROM functional.alltypes a JOIN [SHUFFLE] \
functional.alltypes b on a.id = b.id")
assert "Query should have thrown an error"
except ImpalaBeeswaxException:
pass
for v in verifiers:
# Long timeout required because fragments may be blocked while sending data. The
# default value of --datastream_sender_timeout_ms is 120s before they wake up and
# cancel themselves.
#
# TODO: Fix when we have cancellable RPCs.
v.wait_for_metric(self.IN_FLIGHT_FRAGMENTS, 0, timeout=125)
class TestFragmentLifecycle(ImpalaTestSuite):
def test_finst_cancel_when_query_complete(self):
"""Regression test for IMPALA-4295: if a query returns all its rows before all its
finsts have completed, it should cancel the finsts and complete promptly."""
now = time.time()
# Query designed to produce 1024 (the limit) rows very quickly from the first union
# child, but the second one takes a very long time to complete. Without fix for
# IMPALA-4295, the whole query waits for the second child to complete.
# Due to IMPALA-5671, the limit must be a multiple of the row batch size - if it's
# reached during production of a row batch, processing moves to the second child, and
# the query will take a long time complete.
self.client.execute("with l as (select 1 from functional.alltypes), r as"
" (select count(*) from tpch_parquet.lineitem a cross join tpch_parquet.lineitem b)"
"select * from l union all (select * from r) LIMIT 1024")
end = time.time()
# Query typically completes in < 2s, but if cross join is fully evaluated, will take >
# 10 minutes. Pick 2 minutes as a reasonable midpoint to avoid false negatives.
assert end - now < 120, "Query took too long to complete: " + duration + "s"