impala/tests/custom_cluster/test_result_spooling.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import pytest
import re

from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.test_dimensions import (
    create_single_exec_option_dimension,
    create_uncompressed_text_dimension)
from tests.util.web_pages_util import (
    get_num_completed_backends,
    get_mem_admitted_backends_debug_page)


class TestDedicatedCoordinator(CustomClusterTestSuite):
  """A custom cluster test that tests result spooling against a cluster with a dedicated
  coordinator."""

  @classmethod
  def get_workload(cls):
    return 'functional-query'

  @classmethod
  def add_test_dimensions(cls):
    super(TestDedicatedCoordinator, cls).add_test_dimensions()
    cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
    # There's no reason to test this on other file formats/compression codecs right now
    cls.ImpalaTestMatrix.add_dimension(
      create_uncompressed_text_dimension(cls.get_workload()))

  @pytest.mark.execute_serially
  @CustomClusterTestSuite.with_args(cluster_size=2, num_exclusive_coordinators=1)
  def test_dedicated_coordinator(self, vector):
    """Test the following when result spooling is enabled on a cluster with a dedicated
    coordinator when all results are spooled: (1) all backends are shutdown besides the
    coordinator and (2) all non-coordinator memory is released."""
    num_rows = 2000
    query = "select id from functional_parquet.alltypes order by id limit {0}".format(
        num_rows)
    vector.get_value('exec_option')['spool_query_results'] = 'true'

    # Amount of time to wait for the query to reach the finished state before throwing a
    # Timeout exception.
    timeout = 10

    handle = self.execute_query_async(query, vector.get_value('exec_option'))
    try:
      # Wait for the query to finish (all rows are spooled). Assert that the executor
      # has been shutdown and its memory has been released.
      self.wait_for_state(handle, self.client.QUERY_STATES['FINISHED'], timeout)
      self.assert_eventually(timeout, 0.5,
          lambda: re.search("RowsSent:.*({0})".format(num_rows),
          self.client.get_runtime_profile(handle)))
      assert "NumCompletedBackends: 1 (1)" in self.client.get_runtime_profile(handle)
      mem_admitted = get_mem_admitted_backends_debug_page(self.cluster)
      assert mem_admitted['executor'][0] == 0
      assert mem_admitted['coordinator'] > 0
      assert get_num_completed_backends(self.cluster.impalads[0].service,
               handle.get_handle().id) == 1

      # Fetch all results from the query and assert that the coordinator and the executor
      # have been shutdown and their memory has been released.
      self.client.fetch(query, handle)
      assert "NumCompletedBackends: 2 (2)" in self.client.get_runtime_profile(handle)
      mem_admitted = get_mem_admitted_backends_debug_page(self.cluster)
      assert mem_admitted['executor'][0] == 0
      assert mem_admitted['coordinator'] == 0
      assert get_num_completed_backends(self.cluster.impalads[0].service,
               handle.get_handle().id) == 2
    finally:
      self.client.close_query(handle)