mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
This commit redoes some of the self-event detection logic, specifically for the partition events. Before the patch, the self-event identifiers for a partition were stored at a table level when generating the partition events. This was problematic since unlike ADD_PARTITION and DROP_PARTITION event, ALTER_PARTITION event is generated one per partition. Due to this if there are multiple ALTER_PARTITION events generated, only the first event is identified as a self-event and the rest of the events are processed. This patch fixes this by adding the self-event identifiers to each partition so that when the event is later received, each ALTER_PARTITION uses the state stored in HdfsPartition to valuate the self-events. The patch makes sure that the event processor takes a table lock during self-event evaluation to avoid races with other parts of the code which try to modify the table at the same time. Additionally, this patch also changes the event processor to refresh a loaded table (incomplete tables are not refreshed) when a ALTER_TABLE event is received instead of invalidating the table. This makes the events processor consistent with respect to all the other event types. In future, we should add a flag to choose the behavior preference (prefer invalidate or refresh). Also, this patch fixes the following related issues: 1. Self-event logic was not triggered for alter database events when user modifies the comment on the database. 2. In case of queries like "alter table add if not exists partition...", the partition is not added since its pre-existing. The self-event identifiers should not be added in such cases since no event is expected from such queries. 3. Changed wait_for_event_processing test util method in EventProcessorUtils to use a more deterministic way to determine if the catalog updates have propogated to impalad instead of waiting for a random duration of time. This also speeds up the event processing tests significantly. Testing Done: 1. Added a e2e self-events test which runs multiple impala queries and makes sure that the event is skips processing. 2. Ran MetastoreEventsProcessorTest 3. Ran core tests on CDH and CDP builds. Change-Id: I9b4148f6be0f9f946c8ad8f314d64b095731744c Reviewed-on: http://gerrit.cloudera.org:8080/14799 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
108 lines
4.2 KiB
Python
108 lines
4.2 KiB
Python
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# Impala tests for Hive Metastore, covering the expected propagation
|
|
# of metadata from Hive to Impala or Impala to Hive. Each test
|
|
# modifies the metadata via Hive and checks that the modification
|
|
# succeeded by querying Impala, or vice versa.
|
|
|
|
import logging
|
|
import requests
|
|
import time
|
|
import json
|
|
|
|
LOG = logging.getLogger('event_processor_utils')
|
|
LOG.setLevel(level=logging.DEBUG)
|
|
class EventProcessorUtils(object):
|
|
|
|
DEFAULT_CATALOG_URL = "http://localhost:25020"
|
|
|
|
@staticmethod
|
|
def wait_for_event_processing(test_suite, timeout=10):
|
|
"""Waits till the event processor has synced to the latest event id from metastore
|
|
or the timeout value in seconds whichever is earlier"""
|
|
success = False
|
|
assert timeout > 0
|
|
assert test_suite.hive_client is not None
|
|
current_event_id = EventProcessorUtils.get_current_notification_id(
|
|
test_suite.hive_client)
|
|
LOG.info("Waiting until events processor syncs to event id:" + str(current_event_id))
|
|
end_time = time.time() + timeout
|
|
while time.time() < end_time:
|
|
last_synced_id = EventProcessorUtils.get_last_synced_event_id()
|
|
if last_synced_id >= current_event_id:
|
|
LOG.debug(
|
|
"Metric last-synced-event-id has reached the desired value:" + str(
|
|
last_synced_id))
|
|
success = True
|
|
break
|
|
time.sleep(0.1)
|
|
if not success:
|
|
raise Exception(
|
|
"Event processor did not sync till last known event id {0} \
|
|
within {1} seconds".format(current_event_id, timeout))
|
|
# Wait until the impalad catalog versions agree with the catalogd's version.
|
|
catalogd_version = test_suite.cluster.catalogd.service.get_catalog_version()
|
|
for impalad in test_suite.cluster.impalads:
|
|
impalad.service.wait_for_metric_value("catalog.curr-version", catalogd_version)
|
|
return success
|
|
|
|
@staticmethod
|
|
def get_event_processor_metrics():
|
|
"""Scrapes the catalog's /events webpage and return a dictionary with the event
|
|
processor metrics."""
|
|
response = requests.get("%s/events?json" % EventProcessorUtils.DEFAULT_CATALOG_URL)
|
|
assert response.status_code == requests.codes.ok
|
|
varz_json = json.loads(response.text)
|
|
metrics = varz_json["event_processor_metrics"].strip().splitlines()
|
|
|
|
# Helper to strip a pair of elements
|
|
def strip_pair(p):
|
|
return (p[0].strip(), p[1].strip())
|
|
|
|
pairs = [strip_pair(kv.split(':')) for kv in metrics if kv]
|
|
return dict(pairs)
|
|
|
|
@staticmethod
|
|
def get_event_processor_metric(metric_key, default_val=None):
|
|
"""Returns the event processor metric from the /events catalog debug page"""
|
|
metrics = EventProcessorUtils.get_event_processor_metrics()
|
|
if metric_key not in metrics:
|
|
return default_val
|
|
return metrics[metric_key]
|
|
|
|
@staticmethod
|
|
def get_last_synced_event_id():
|
|
"""Returns the last_synced_event_id."""
|
|
metrics = EventProcessorUtils.get_event_processor_metrics()
|
|
assert 'last-synced-event-id' in metrics.keys()
|
|
return int(metrics['last-synced-event-id'])
|
|
|
|
@staticmethod
|
|
def get_event_processor_status():
|
|
"""
|
|
Returns the current status of the EventsProcessor
|
|
"""
|
|
metrics = EventProcessorUtils.get_event_processor_metrics()
|
|
assert 'status' in metrics.keys()
|
|
return metrics['status']
|
|
|
|
@staticmethod
|
|
def get_current_notification_id(hive_client):
|
|
"""Returns the current notification from metastore"""
|
|
assert hive_client is not None
|
|
return int(hive_client.get_current_notificationEventId().eventId)
|