mirror of
https://github.com/apache/impala.git
synced 2026-02-03 09:00:39 -05:00
If the leopard front end is run on a pristine system, the front end will raise exceptions, because the directories containing reports and schedules do not yet exist. Since it's the controller's responsibility to create and populate these filesystem artifacts, in the frontend, let's just protect against the exception and log warnings. Testing: Before the fix, the reload_reports thread would immediately raise an exception and the thread would die; loading the Web app in a browser would cause an internal server error. After the fix, the reload_reports thread logs a warning but continues to run, and loading the Web app in a browser presents an appropriately empty listing of run names and schedules. Change-Id: I186174db136694d3df04d9159362c6eeaa70b5b8 Reviewed-on: http://gerrit.cloudera.org:8080/2164 Reviewed-by: Michael Brown <mikeb@cloudera.com> Tested-by: Internal Jenkins
315 lines
12 KiB
Python
Executable File
315 lines
12 KiB
Python
Executable File
#!/usr/bin/env impala-python
|
|
# Copyright (c) 2015 Cloudera, Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import logging
|
|
import os
|
|
import pickle
|
|
import stat
|
|
import time
|
|
from time import sleep
|
|
from flask import Flask, render_template, request
|
|
from schedule_item import ScheduleItem
|
|
from controller import PATH_TO_REPORTS, PATH_TO_SCHEDULE
|
|
from threading import Thread
|
|
from tests.comparison.query_profile import DefaultProfile
|
|
from tests.comparison.db_types import (
|
|
Boolean,
|
|
Char,
|
|
Decimal,
|
|
Float,
|
|
Int,
|
|
TYPES,
|
|
Timestamp)
|
|
|
|
MAX_REPORT_AGE = 21 * 24 * 3600 # 21 days
|
|
SLEEP_LENGTH = 20 * 60 # 20 min
|
|
|
|
LOG = logging.getLogger('leopard.front_end')
|
|
|
|
app = Flask(__name__)
|
|
app.reports = {}
|
|
|
|
|
|
ASSETS = {'bootstrap_css': 'css/bootstrap.min.css',
|
|
'hljs_css': 'css/default.css',
|
|
'favicon': 'favicon.ico',
|
|
'bootstrap_js': 'js/bootstrap.min.js',
|
|
'hljs_js': 'js/highlight.pack.js'}
|
|
|
|
@app.route('/reports/<report_id>')
|
|
def show_report(report_id):
|
|
'''Renders a report as HTML. '''
|
|
|
|
if report_id not in app.reports:
|
|
with open(os.path.join(PATH_TO_REPORTS, report_id), 'r') as f:
|
|
app.reports[report_id] = pickle.load(f)
|
|
|
|
report = app.reports[report_id]
|
|
|
|
def get_next_id():
|
|
'''Generates all natural numbers. '''
|
|
i = 0
|
|
while True:
|
|
yield str(i)
|
|
i += 1
|
|
|
|
gen = get_next_id()
|
|
|
|
# Generate HTML for displaying the crashes
|
|
outer_crashes_list = []
|
|
for first_impala_frame in report.grouped_stacks:
|
|
crashes_list = []
|
|
# results are sorted on the length of the query SQL
|
|
for result in sorted(report.grouped_stacks[first_impala_frame],
|
|
key = lambda result: len(result['test_sql'])):
|
|
inner_id = next(gen)
|
|
inner_title = 'Lines in Stack: {0}'.format(
|
|
len(result['formatted_stack'].split('\n')))
|
|
content = ('<h4>Impala Query:</h4><pre><code>{0}</code></pre>'
|
|
'<h4>Stack:</h4><pre>{1}</pre>').format(
|
|
result['test_sql'], result['formatted_stack'][:50000])
|
|
crashes_list.append((inner_id, inner_title, content))
|
|
id = next(gen)
|
|
title = first_impala_frame
|
|
outer_crashes_list.append((id, title, crashes_list))
|
|
|
|
# Generate HTML for displaying result row count mismatches
|
|
row_count_list = []
|
|
for result in sorted(report.grouped_results['row_counts'],
|
|
key = lambda result: len(result['test_sql'])):
|
|
id = next(gen)
|
|
title = 'Impala Rows: {0}, Postgres Rows: {1}'.format(
|
|
result['test_row_count'], result['ref_row_count'])
|
|
content = ('<h4>Impala Query:</h4><pre><code>{0}</code></pre>'
|
|
'<h4>Postgres Query:</h4><pre><code>{1}</code></pre>').format(
|
|
result['test_sql'], result['ref_sql'])
|
|
row_count_list.append((id, title, content))
|
|
|
|
# Generate HTML for displaying result content mismatches
|
|
mismatch_list = []
|
|
for result in sorted(report.grouped_results['mismatch'],
|
|
key = lambda result: len(result['test_sql'])):
|
|
id = next(gen)
|
|
title = 'Query Length: {0}'.format(len(result['test_sql']))
|
|
content = ('<h4>Impala Query:</h4><pre><code>{0}</code></pre>'
|
|
'<h4>Postgres Query:</h4><pre><code>{1}</code></pre>'
|
|
'<h4>Mismatch Impala Row:</h4><pre><code>{2}</code></pre>'
|
|
'<h4>Mismatch Postgres Row:</h4><pre><code>{3}</code></pre>').format(
|
|
result['test_sql'],
|
|
result['ref_sql'],
|
|
result['mismatch_test_row'],
|
|
result['mismatch_ref_row'])
|
|
mismatch_list.append((id, title, content))
|
|
|
|
return render_template(
|
|
'report.template',
|
|
assets=ASSETS,
|
|
report=report,
|
|
outer_crashes_list=outer_crashes_list,
|
|
row_count_list=row_count_list,
|
|
mismatch_list=mismatch_list)
|
|
|
|
@app.route('/start_run', methods=['POST', 'GET'])
|
|
def start_run():
|
|
'''Method that receives POST requests and gernerates a schedule item.'''
|
|
|
|
if request.method != 'POST': return 'fail'
|
|
|
|
if 'time_limit' in request.form:
|
|
# This is a custom run because time_limit item is present only in the custom_run form.
|
|
# Values will be extracted from the form and a new profile will be generated.
|
|
|
|
new_profile = DefaultProfile()
|
|
|
|
# Bounds
|
|
new_profile._bounds['MAX_NESTED_QUERY_COUNT'] = (
|
|
int(request.form['max_nested_query_count_from']),
|
|
int(request.form['max_nested_query_count_to']))
|
|
new_profile._bounds['MAX_NESTED_EXPR_COUNT'] = (
|
|
int(request.form['max_nested_expr_count_from']),
|
|
int(request.form['max_nested_expr_count_to']))
|
|
new_profile._bounds['SELECT_ITEM_COUNT'] = (
|
|
int(request.form['select_item_count_from']),
|
|
int(request.form['select_item_count_to']))
|
|
new_profile._bounds['WITH_TABLE_COUNT'] = (
|
|
int(request.form['with_table_count_from']),
|
|
int(request.form['with_table_count_to']))
|
|
new_profile._bounds['TABLE_COUNT'] = (
|
|
int(request.form['table_count_from']),
|
|
int(request.form['table_count_to']))
|
|
new_profile._bounds['ANALYTIC_LEAD_LAG_OFFSET'] = (
|
|
int(request.form['analytic_lead_lag_offset_from']),
|
|
int(request.form['analytic_lead_lag_offset_to']))
|
|
new_profile._bounds['ANALYTIC_WINDOW_OFFSET'] = (
|
|
int(request.form['analytic_window_offset_from']),
|
|
int(request.form['analytic_window_offset_to']))
|
|
|
|
# Select Item Category
|
|
new_profile._weights['SELECT_ITEM_CATEGORY']['AGG'] = int(
|
|
request.form['select_agg'])
|
|
new_profile._weights['SELECT_ITEM_CATEGORY']['ANALYTIC'] = int(
|
|
request.form['select_analytic'])
|
|
new_profile._weights['SELECT_ITEM_CATEGORY']['BASIC'] = int(
|
|
request.form['select_basic'])
|
|
|
|
# Types
|
|
new_profile._weights['TYPES'][Boolean] = int(request.form['types_boolean'])
|
|
new_profile._weights['TYPES'][Char] = int(request.form['types_char'])
|
|
new_profile._weights['TYPES'][Decimal] = int(request.form['types_decimal'])
|
|
new_profile._weights['TYPES'][Float] = int(request.form['types_float'])
|
|
new_profile._weights['TYPES'][Int] = int(request.form['types_int'])
|
|
new_profile._weights['TYPES'][Timestamp] = int(request.form['types_timestamp'])
|
|
|
|
# Join
|
|
new_profile._weights['JOIN']['INNER'] = int(request.form['join_inner'])
|
|
new_profile._weights['JOIN']['LEFT'] = int(request.form['join_left'])
|
|
new_profile._weights['JOIN']['RIGHT'] = int(request.form['join_right'])
|
|
new_profile._weights['JOIN']['FULL_OUTER'] = int(request.form['join_full_outer'])
|
|
new_profile._weights['JOIN']['CROSS'] = int(request.form['join_cross'])
|
|
|
|
# Optional Query Clauses Probabilities
|
|
new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = float(
|
|
request.form['optional_with'])
|
|
new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['FROM'] = float(
|
|
request.form['optional_from'])
|
|
new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['WHERE'] = float(
|
|
request.form['optional_where'])
|
|
new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['GROUP_BY'] = float(
|
|
request.form['optional_group_by'])
|
|
new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['HAVING'] = float(
|
|
request.form['optional_having'])
|
|
new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['UNION'] = float(
|
|
request.form['optional_union'])
|
|
new_profile._probabilities['OPTIONAL_QUERY_CLAUSES']['ORDER_BY'] = float(
|
|
request.form['optional_order_by'])
|
|
|
|
# Optional Analytic Clauses Probabilities
|
|
new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES']['PARTITION_BY'] = float(
|
|
request.form['optional_analytic_partition_by'])
|
|
new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES']['ORDER_BY'] = float(
|
|
request.form['optional_analytic_order_by'])
|
|
new_profile._probabilities['OPTIONAL_ANALYTIC_CLAUSES']['WINDOW'] = float(
|
|
request.form['optional_analytic_window'])
|
|
|
|
# Misc Probabilities
|
|
new_profile._probabilities['MISC']['INLINE_VIEW'] = float(
|
|
request.form['misc_inline_view'])
|
|
new_profile._probabilities['MISC']['SELECT_DISTINCT'] = float(
|
|
request.form['misc_select_distinct'])
|
|
new_profile._probabilities['MISC']['SCALAR_SUBQUERY'] = float(
|
|
request.form['misc_scalar_subquery'])
|
|
new_profile._probabilities['MISC']['UNION_ALL'] = float(
|
|
request.form['misc_union_all'])
|
|
|
|
# Analytic Designs
|
|
new_profile._flags['ANALYTIC_DESIGNS']['TOP_LEVEL_QUERY_WITHOUT_LIMIT'] = \
|
|
'analytic_designs_top_level_no_limit' in request.form
|
|
new_profile._flags['ANALYTIC_DESIGNS']['DETERMINISTIC_ORDER_BY'] = \
|
|
'analytic_designs_deterministic_order_by' in request.form
|
|
new_profile._flags['ANALYTIC_DESIGNS']['NO_ORDER_BY'] = \
|
|
'analytic_designs_no_order_by' in request.form
|
|
new_profile._flags['ANALYTIC_DESIGNS']['ONLY_SELECT_ITEM'] = \
|
|
'analytic_designs_only_select_item' in request.form
|
|
new_profile._flags['ANALYTIC_DESIGNS']['UNBOUNDED_WINDOW'] = \
|
|
'analytic_designs_unbounded_window' in request.form
|
|
new_profile._flags['ANALYTIC_DESIGNS']['RANK_FUNC'] = \
|
|
'analytic_designs_rank_func' in request.form
|
|
|
|
schedule_item = ScheduleItem(
|
|
run_name = request.form['run_name'],
|
|
query_profile = new_profile,
|
|
time_limit_sec = int(request.form['time_limit']),
|
|
git_command = request.form['git_command'],
|
|
parent_job = '')
|
|
else:
|
|
# Run based on previous run
|
|
schedule_item = ScheduleItem(
|
|
run_name = request.form['run_name'],
|
|
query_profile = DefaultProfile(),
|
|
time_limit_sec = 24 * 3600, # Default time limit is 24 hours
|
|
git_command = request.form['git_command'],
|
|
parent_job = request.form['report_id'])
|
|
|
|
schedule_item.save_pickle()
|
|
|
|
return 'success'
|
|
|
|
@app.route("/custom_run")
|
|
def custom_run():
|
|
'''Render the custom run page.
|
|
'''
|
|
return render_template(
|
|
'custom_run.template',
|
|
assets=ASSETS)
|
|
|
|
def reload_reports():
|
|
'''Reload reports in the reports directory every 20 minutes. Loaded reports are placed
|
|
into app.reports. This allows new reports to appear on the front page. Only reports
|
|
from the past 7 days are loaded. This method should be run in a separate thread.
|
|
'''
|
|
while True:
|
|
new_reports = {}
|
|
try:
|
|
report_ids = os.listdir(PATH_TO_REPORTS)
|
|
except EnvironmentError as e:
|
|
report_ids = []
|
|
LOG.warn('{0}: {1}'.format(e.filename, e.strerror))
|
|
for report_id in report_ids:
|
|
file_age = time.time() - os.stat(
|
|
os.path.join(PATH_TO_REPORTS, report_id))[stat.ST_MTIME]
|
|
if file_age < MAX_REPORT_AGE:
|
|
# We want this report
|
|
if report_id in app.reports:
|
|
new_reports[report_id] = app.reports[report_id]
|
|
else:
|
|
with open(os.path.join(PATH_TO_REPORTS, report_id), 'r') as f:
|
|
new_reports[report_id] = pickle.load(f)
|
|
app.reports = new_reports
|
|
sleep(SLEEP_LENGTH)
|
|
|
|
@app.route("/")
|
|
def front_page():
|
|
'''Renders the front page as HTML.
|
|
'''
|
|
|
|
try:
|
|
schedule_item_ids = os.listdir(PATH_TO_SCHEDULE)
|
|
except EnvironmentError as e:
|
|
schedule_item_ids = []
|
|
LOG.warn('{0}: {1}'.format(e.filename, e.strerror))
|
|
|
|
schedule_items = []
|
|
|
|
for schedule_item_id in schedule_item_ids:
|
|
schedule_items.append(pickle.load(
|
|
open(os.path.join(PATH_TO_SCHEDULE, schedule_item_id), 'r')))
|
|
|
|
return render_template(
|
|
'index.template',
|
|
assets=ASSETS,
|
|
reports=sorted(
|
|
app.reports.items(), key=lambda (k, report): report.run_date, reverse=True),
|
|
schedule_items=schedule_items)
|
|
|
|
if __name__ == '__main__':
|
|
logging.basicConfig(
|
|
format='%(asctime)s %(levelname)s [%(name)s.%(threadName)s:%(lineno)s]: '
|
|
'%(message)s',
|
|
level=logging.INFO)
|
|
thread = Thread(name='reload_reports', target=reload_reports)
|
|
thread.daemon = True
|
|
thread.start()
|
|
app.run(host='0.0.0.0', debug=False)
|