mirror of
https://github.com/apache/impala.git
synced 2025-12-19 09:58:28 -05:00
To remove the dependency on Python 2, existing scripts need to use python3 rather than python. These commands find those locations (for impala-python and regular python): git grep impala-python | grep -v impala-python3 | grep -v impala-python-common | grep -v init-impala-python git grep bin/python | grep -v python3 This removes or switches most of these locations by various means: 1. If a python file has a #!/bin/env impala-python (or python) but doesn't have a main function, it removes the hash-bang and makes sure that the file is not executable. 2. Most scripts can simply switch from impala-python to impala-python3 (or python to python3) with minimal changes. 3. The cm-api pypi package (which doesn't support Python 3) has been replaced by the cm-client pypi package and interfaces have changed. Rather than migrating the code (which hasn't been used in years), this deletes the old code and stops installing cm-api into the virtualenv. The code can be restored and revamped if there is any interest in interacting with CM clusters. 4. This switches tests/comparison over to impala-python3, but this code has bit-rotted. Some pieces can be run manually, but it can't be fully verified with Python 3. It shouldn't hold back the migration on its own. 5. This also replaces locations of impala-python in comments / documentation / READMEs. 6. kazoo (used for interacting with HBase) needed to be upgraded to a version that supports Python 3. The newest version of kazoo requires upgrades of other component versions, so this uses kazoo 2.8.0 to avoid needing other upgrades. The two remaining uses of impala-python are: - bin/cmake_aux/create_virtualenv.sh - bin/impala-env-versioned-python These will be removed separately when we drop Python 2 support completely. In particular, these are useful for testing impala-shell with Python 2 until we stop supporting Python 2 for impala-shell. The docker-based tests still use /usr/bin/python, but this can be switched over independently (and doesn't impact impala-python) Testing: - Ran core job - Ran build + dataload on Centos 7, Redhat 8 - Manual testing of individual scripts (except some bitrotted areas like the random query generator) Change-Id: If209b761290bc7e7c716c312ea757da3e3bca6dc Reviewed-on: http://gerrit.cloudera.org:8080/23468 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Michael Smith <michael.smith@cloudera.com>
504 lines
22 KiB
Python
Executable File
504 lines
22 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# Usage: critique-gerrit-review.py [--dryrun]
|
|
#
|
|
# This script is meant to run on an jenkins.impala.io build slave and post back comments
|
|
# to a code review. It does not need to run on all supported platforms so we use system
|
|
# python instead of the full Impala virtualenv.
|
|
#
|
|
# This script runs in the context of a source checkout. It posts comments for issues
|
|
# introduced between HEAD^ and HEAD. It picks up metadata from environment variables
|
|
# set by the jenkins gerrit trigger: GERRIT_CHANGE_NUMBER, GERRIT_PATCHSET_NUMBER, etc.
|
|
#
|
|
# It uses the gerrit ssh interface to post the review, connecting as
|
|
# impala-public-jenkins.
|
|
# Ref: https://gerrit-review.googlesource.com/Documentation/cmd-review.html
|
|
#
|
|
# Dependencies:
|
|
# ssh, pip, venv
|
|
#
|
|
# TODO: generalise to other warnings
|
|
# * clang-tidy
|
|
|
|
from argparse import ArgumentParser
|
|
from collections import defaultdict
|
|
import json
|
|
import os
|
|
from os import environ
|
|
import os.path
|
|
import re
|
|
from subprocess import check_call, check_output, DEVNULL, Popen, PIPE
|
|
import sys
|
|
import venv
|
|
|
|
|
|
FLAKE8_VERSION = "7.1.1"
|
|
FLAKE8_DIFF_VERSION = "0.2.2"
|
|
PYPARSING_VERSION = "3.1.4"
|
|
FLAKE8_UNUSED_ARG_VERSION = "0.0.13"
|
|
|
|
VENV_PATH = "gerrit_critic_venv"
|
|
VENV_BIN = os.path.join(VENV_PATH, "bin")
|
|
PIP_PATH = os.path.join(VENV_BIN, "pip3")
|
|
FLAKE8_DIFF_PATH = os.path.join(VENV_BIN, "flake8-diff")
|
|
|
|
# Limit on length of lines in source files.
|
|
LINE_LIMIT = 90
|
|
|
|
# Source file extensions that we should apply our line limit and whitespace rules to.
|
|
SOURCE_EXTENSIONS = {".cc", ".h", ".java", ".py", ".sh", ".thrift"}
|
|
|
|
# Source file patterns that we exclude from our checks.
|
|
EXCLUDE_FILE_PATTERNS = [
|
|
re.compile(r".*be/src/kudu.*"), # Kudu source code may have different rules.
|
|
re.compile(r".*-benchmark.cc"), # Benchmark files tend to have long lines.
|
|
re.compile(r".*/function-registry/impala_functions.py"), # Many long strings.
|
|
re.compile(r".*/catalog/BuiltinsDb.java"), # Many long strings.
|
|
re.compile(r".*/codegen/gen_ir_descriptions.py"), # Many long strings.
|
|
re.compile(r".*shell/ext-py/.*"), # Third-party code.
|
|
re.compile(r".*be/src/thirdparty/.*"), # Third-party code.
|
|
re.compile(r".*/.*\.xml\.py") # Long lines in config template files.
|
|
]
|
|
|
|
# Thrift files that are not used in communication between impalad and catalogd/statestore
|
|
EXCLUDE_THRIFT_FILES = {
|
|
"BackendGflags.thrift", # Only used between FE and BE
|
|
"beeswax.thrift", # Only used between client and impalad
|
|
"DataSinks.thrift", # Only used in impalads
|
|
"Descriptors.thrift", # Only used in impalads
|
|
"ExecStats.thrift", # Only used in impalads
|
|
"LineageGraph.thrift", # Only used in impalads
|
|
"NetworkTest.thrift", # Unused in production
|
|
"Planner.thrift", # Only used in impalads
|
|
"PlanNodes.thrift", # Only used in impalads
|
|
"parquet.thrift", # Only used in impalads
|
|
"ResourceProfile.thrift", # Only used in impalads
|
|
"SystemTables.thrift", # Only used in impalads
|
|
"Zip.thrift", # Unused
|
|
}
|
|
|
|
THRIFT_FILE_COMMENT = (
|
|
"This file is used in communication between impalad and catalogd/statestore. "
|
|
"Please make sure impalads can still work with new/old versions of catalogd and "
|
|
"statestore. Basically only new optional fields can be added.")
|
|
FBS_FILE_COMMENT = (
|
|
"This file is used in communication between impalad and catalogd/statestore. "
|
|
"Please make sure impalads can still work with new/old versions of catalogd and "
|
|
"statestore. Basically only new fields can be added and should be added at the end "
|
|
"of a table definition.\n"
|
|
"https://flatbuffers.dev/flatbuffers_guide_writing_schema.html")
|
|
PLANNER_TEST_TABLE_STATS = (
|
|
"Table numRows and totalSize is changed in this planner test. It is unnecessary to "
|
|
"change them. Consider running testdata/bin/restore-stats-on-planner-tests.py "
|
|
"script to restore them to consistent values.")
|
|
UNCOMMITTED_CHANGE = "Uncommitted change detected!"
|
|
COMMENT_REVISION_SIDE = "REVISION"
|
|
COMMENT_PARENT_SIDE = "PARENT"
|
|
|
|
# Matches range information like:
|
|
# @@ -229,0 +230,2 @@ struct TColumnStats {
|
|
RANGE_RE = re.compile(r"^@@ -([0-9]*).* \+([0-9]*).*@@\s*(.*)$")
|
|
# Matches required/optional fields like:
|
|
# 7: required i64 num_trues
|
|
REQUIRED_FIELD_RE = re.compile(r"[0-9]*: required \w* \w*")
|
|
OPTIONAL_FIELD_RE = re.compile(r"[0-9]*: optional \w* \w*")
|
|
# Matches include statements like:
|
|
# include "Exprs.thrift"
|
|
INCLUDE_FILE_RE = re.compile(r"include \"(\w+\.thrift)\"")
|
|
THRIFT_WARNING_SUFFIX = (" might break the compatibility between impalad and "
|
|
"catalogd/statestore during upgrade")
|
|
|
|
|
|
def setup_virtualenv():
|
|
"""Set up virtualenv with flake8-diff."""
|
|
venv.create(VENV_PATH, with_pip=True, system_site_packages=True)
|
|
check_call([PIP_PATH, "install",
|
|
"wheel",
|
|
f"flake8=={FLAKE8_VERSION}",
|
|
f"flake8-diff=={FLAKE8_DIFF_VERSION}",
|
|
f"pyparsing=={PYPARSING_VERSION}",
|
|
f"flake8-unused-arguments=={FLAKE8_UNUSED_ARG_VERSION}"])
|
|
# Add the libpath of the installed venv to import pyparsing
|
|
sys.path.append(os.path.join(VENV_PATH, f"lib/python{sys.version_info.major}."
|
|
f"{sys.version_info.minor}/site-packages/"))
|
|
|
|
|
|
def get_comment_input(message, line_number=0, side=COMMENT_REVISION_SIDE,
|
|
context_line="", dryrun=False):
|
|
comment = {"message": message, "line": line_number, "side": side}
|
|
if dryrun:
|
|
comment["context_line"] = context_line
|
|
return comment
|
|
|
|
|
|
def get_flake8_comments(base_revision, revision):
|
|
"""Get flake8 warnings for code changes made in the git commit 'revision'.
|
|
Returns a dict with file path as keys and a list of CommentInput objects. See
|
|
https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#review-input
|
|
for information on the format."""
|
|
comments = defaultdict(lambda: [])
|
|
# flake8 needs to be on the path.
|
|
flake8_env = os.environ.copy()
|
|
flake8_env["PATH"] = "{0}:{1}".format(VENV_BIN, flake8_env["PATH"])
|
|
|
|
flake8_diff_proc = Popen(
|
|
[FLAKE8_DIFF_PATH, "--standard-flake8-output", "--color", "off", base_revision,
|
|
revision],
|
|
stdin=PIPE, stdout=PIPE, stderr=PIPE, env=flake8_env, universal_newlines=True)
|
|
stdout, stderr = flake8_diff_proc.communicate()
|
|
# Ignore the return code since it will be non-zero if any violations are found. We want
|
|
# to continue in that case. Instead check stderr for any errors.
|
|
if stderr:
|
|
raise Exception("Did not expect flake8-diff to write to stderr:\n{0}".format(stderr))
|
|
|
|
# Match output lines like:
|
|
# bin/jenkins/flake8-gerrit-review.py:25:1: F401 'json' imported but unused
|
|
VIOLATION_RE = re.compile(r"^([^:]*):([0-9]*):([0-9]*): (.*)$")
|
|
|
|
for line in stdout.splitlines():
|
|
match = VIOLATION_RE.match(line)
|
|
if not match:
|
|
raise Exception("Pattern did not match line:\n{0}".format(line))
|
|
file, line, col, details = match.groups()
|
|
line = int(line)
|
|
col = int(col)
|
|
skip_file = False
|
|
for pattern in EXCLUDE_FILE_PATTERNS:
|
|
if pattern.match(file):
|
|
skip_file = True
|
|
break
|
|
if skip_file:
|
|
continue
|
|
comments_for_file = comments[file]
|
|
comment = {"message": "flake8: {0}".format(details)}
|
|
# Heuristic: if the error is on the first column, assume it applies to the whole line.
|
|
if col == 1:
|
|
comment["line"] = line
|
|
else:
|
|
comment["range"] = {"start_line": line, "end_line": line,
|
|
"start_character": col - 1, "end_character": col}
|
|
comments_for_file.append(comment)
|
|
return comments
|
|
|
|
|
|
def get_misc_comments(base_revision, revision, dryrun=False):
|
|
"""Get miscellaneous warnings for code changes made in the git commit 'revision', e.g.
|
|
long lines and trailing whitespace. These warnings are produced by directly parsing the
|
|
diff output."""
|
|
comments = defaultdict(lambda: [])
|
|
# Matches range information like:
|
|
# @@ -128 +133,2 @@ if __name__ == "__main__":
|
|
RANGE_RE = re.compile(r"^@@ -[0-9,]* \+([0-9]*).*$")
|
|
|
|
diff = check_output(["git", "diff", "-U0", "{0}..{1}".format(base_revision, revision)],
|
|
universal_newlines=True)
|
|
curr_file = None
|
|
check_source_file = False
|
|
curr_line_num = 0
|
|
for diff_line in diff.splitlines():
|
|
if diff_line.startswith("+++ "):
|
|
# Start of diff for a file. Strip off "+++ b/" to get the file path.
|
|
curr_file = diff_line[6:]
|
|
check_source_file = os.path.splitext(curr_file)[1] in SOURCE_EXTENSIONS
|
|
if check_source_file:
|
|
for pattern in EXCLUDE_FILE_PATTERNS:
|
|
if pattern.match(curr_file):
|
|
check_source_file = False
|
|
break
|
|
elif diff_line.startswith("@@ "):
|
|
# Figure out the starting line of the hunk. Format of unified diff is:
|
|
# @@ -128 +133,2 @@ if __name__ == "__main__":
|
|
# We want to extract the start line for the added lines
|
|
match = RANGE_RE.match(diff_line)
|
|
if not match:
|
|
raise Exception("Pattern did not match diff line:\n{0}".format(diff_line))
|
|
curr_line_num = int(match.group(1))
|
|
elif diff_line.startswith("+") and check_source_file:
|
|
# An added or modified line - check it to see if we should generate warnings.
|
|
add_misc_comments_for_line(comments, diff_line[1:], curr_file, curr_line_num,
|
|
dryrun)
|
|
curr_line_num += 1
|
|
return comments
|
|
|
|
|
|
def add_misc_comments_for_line(comments, line, curr_file, curr_line_num, dryrun=False):
|
|
"""Helper for get_misc_comments to generate comments for 'line' at 'curr_line_num' in
|
|
'curr_file' and append them to 'comments'."""
|
|
# Check for trailing whitespace.
|
|
if line.rstrip() != line:
|
|
comments[curr_file].append(get_comment_input(
|
|
"line has trailing whitespace", curr_line_num, context_line=line, dryrun=dryrun))
|
|
|
|
# Check for long lines. Skip .py files since flake8 already flags long lines.
|
|
if len(line) > LINE_LIMIT and os.path.splitext(curr_file)[1] != ".py":
|
|
msg = "line too long ({0} > {1})".format(len(line), LINE_LIMIT)
|
|
comments[curr_file].append(get_comment_input(
|
|
msg, curr_line_num, context_line=line, dryrun=dryrun))
|
|
|
|
if '\t' in line:
|
|
comments[curr_file].append(get_comment_input(
|
|
"tab used for whitespace", curr_line_num, context_line=line, dryrun=dryrun))
|
|
|
|
if 'ThriftDebugString' in line and curr_file.startswith("be/src/"):
|
|
comments[curr_file].append(get_comment_input(
|
|
"Please make sure you don't output sensitive data with ThriftDebugString(). "
|
|
"If so, use impala::RedactedDebugString() instead.",
|
|
curr_line_num, context_line=line, dryrun=dryrun))
|
|
|
|
|
|
def compare_thrift_structs(curr_file, old_structs, curr_structs, comments):
|
|
# Skip if the file is removed or new
|
|
if old_structs is None or curr_structs is None:
|
|
return
|
|
# Adding new structs is OK. Compare fields of existing structs.
|
|
for struct_name, old_fields in old_structs.items():
|
|
if struct_name not in curr_structs:
|
|
print(f"Removed struct {struct_name}")
|
|
continue
|
|
curr_fields = curr_structs[struct_name]
|
|
for fid, old_field in old_fields.items():
|
|
if fid not in curr_fields:
|
|
if old_field.qualifier == 'required':
|
|
comments[curr_file].append(get_comment_input(
|
|
f"Deleting required field '{old_field.name}' of {struct_name}"
|
|
+ THRIFT_WARNING_SUFFIX,
|
|
old_field.line_num, COMMENT_PARENT_SIDE))
|
|
continue
|
|
curr_field = curr_fields.pop(fid)
|
|
if curr_field.name != old_field.name:
|
|
comments[curr_file].append(get_comment_input(
|
|
f"Renaming field '{old_field.name}' to '{curr_field.name}' in {struct_name}"
|
|
+ THRIFT_WARNING_SUFFIX,
|
|
curr_field.line_num))
|
|
continue
|
|
if curr_field.qualifier != old_field.qualifier:
|
|
comments[curr_file].append(get_comment_input(
|
|
f"Changing field '{old_field.name}' from {old_field.qualifier} to "
|
|
f"{curr_field.qualifier} in {struct_name}" + THRIFT_WARNING_SUFFIX,
|
|
curr_field.line_num))
|
|
continue
|
|
if curr_field.type != old_field.type:
|
|
comments[curr_file].append(get_comment_input(
|
|
f"Changing type of field '{old_field.name}' from {old_field.type} to "
|
|
f"{curr_field.type} in {struct_name}" + THRIFT_WARNING_SUFFIX,
|
|
curr_field.line_num))
|
|
if len(curr_fields) > 0:
|
|
for new_field in curr_fields.values():
|
|
if new_field.qualifier == 'required':
|
|
comments[curr_file].append(get_comment_input(
|
|
f"Adding a required field '{new_field.name}' in {struct_name}"
|
|
+ THRIFT_WARNING_SUFFIX,
|
|
new_field.line_num))
|
|
new_struct_names = curr_structs.keys() - old_structs.keys()
|
|
if len(new_struct_names) > 0:
|
|
print(f"New structs {new_struct_names}")
|
|
|
|
|
|
def compare_thrift_enums(curr_file, old_enums, curr_enums, comments):
|
|
# Skip if the file is removed or new
|
|
if old_enums is None or curr_enums is None:
|
|
return
|
|
for enum_name, old_items in old_enums.items():
|
|
if enum_name not in curr_enums:
|
|
print(f"Removed enum {enum_name}")
|
|
continue
|
|
curr_items = curr_enums[enum_name]
|
|
for value, old_item in old_items.items():
|
|
if value not in curr_items:
|
|
comments[curr_file].append(get_comment_input(
|
|
f"Removing the enum item {enum_name}.{old_item.name}={value}"
|
|
+ THRIFT_WARNING_SUFFIX,
|
|
old_item.line_num, COMMENT_PARENT_SIDE))
|
|
continue
|
|
if curr_items[value].name != old_item.name:
|
|
comments[curr_file].append(get_comment_input(
|
|
f"Enum item {old_item.name}={value} changed to "
|
|
f"{curr_items[value].name}={value} in {enum_name}. This"
|
|
+ THRIFT_WARNING_SUFFIX,
|
|
curr_items[value].line_num))
|
|
|
|
|
|
def extract_thrift_defs_of_revision(revision, file_name):
|
|
"""Extract a dict of thrift structs from pyparsing.ParseResults"""
|
|
# Importing thrift_parser depends on pyparsing being installed in setup_virtualenv().
|
|
from thrift_parser import extract_thrift_defs
|
|
try:
|
|
contents = check_output(["git", "show", f"{revision}:{file_name}"],
|
|
universal_newlines=True)
|
|
except Exception as e:
|
|
# Usually it's due to file doesn't exist in that revision
|
|
print(f"Failed to read {file_name} of revision {revision}: {e}")
|
|
return None, None
|
|
return extract_thrift_defs(contents)
|
|
|
|
|
|
def get_catalog_compatibility_comments(base_revision, revision):
|
|
"""Get comments on Thrift/FlatBuffers changes that might break the communication
|
|
between impalad and catalogd/statestore"""
|
|
comments = defaultdict(lambda: [])
|
|
|
|
diff = check_output(
|
|
["git", "diff", "--name-only", "{0}..{1}".format(base_revision, revision),
|
|
"--", "common/thrift/*.thrift"],
|
|
universal_newlines=True)
|
|
for curr_file in diff.splitlines():
|
|
if os.path.basename(curr_file) in EXCLUDE_THRIFT_FILES:
|
|
continue
|
|
print(f"Parsing {curr_file}")
|
|
curr_structs, curr_enums = extract_thrift_defs_of_revision(revision, curr_file)
|
|
old_structs, old_enums = extract_thrift_defs_of_revision(base_revision, curr_file)
|
|
compare_thrift_structs(curr_file, old_structs, curr_structs, comments)
|
|
compare_thrift_enums(curr_file, old_enums, curr_enums, comments)
|
|
|
|
merge_comments(
|
|
comments, get_flatbuffers_compatibility_comments(base_revision, revision))
|
|
return comments
|
|
|
|
|
|
def get_flatbuffers_compatibility_comments(base_revision, revision):
|
|
comments = defaultdict(lambda: [])
|
|
diff = check_output(
|
|
["git", "diff", "--numstat", "{0}..{1}".format(base_revision, revision),
|
|
"--", "common/fbs"],
|
|
universal_newlines=True)
|
|
for diff_line in diff.splitlines():
|
|
_, _, path = diff_line.split()
|
|
if os.path.splitext(path)[1] == ".fbs":
|
|
comments[path].append(get_comment_input(FBS_FILE_COMMENT))
|
|
return comments
|
|
|
|
|
|
def get_planner_tests_comments():
|
|
comments = defaultdict(lambda: [])
|
|
# First, check that testdata/ dir is clean.
|
|
diff = check_output(
|
|
["git", "diff", "--numstat", "--", "testdata"],
|
|
universal_newlines=True)
|
|
for diff_line in diff.splitlines():
|
|
_, _, path = diff_line.split()
|
|
comments[path].append(get_comment_input(UNCOMMITTED_CHANGE))
|
|
if len(comments) > 0:
|
|
return comments
|
|
|
|
# All clean. Run script and comment any changes.
|
|
check_call(['./testdata/bin/restore-stats-on-planner-tests.py'], stdout=DEVNULL)
|
|
diff = check_output(
|
|
["git", "diff", "--numstat", "--", "testdata"],
|
|
universal_newlines=True)
|
|
for diff_line in diff.splitlines():
|
|
_, _, path = diff_line.split()
|
|
if os.path.splitext(path)[1] == ".test":
|
|
comments[path].append(get_comment_input(PLANNER_TEST_TABLE_STATS))
|
|
|
|
# Restore testdata/ dir.
|
|
check_call(['git', 'checkout', 'testdata'])
|
|
return comments
|
|
|
|
|
|
def get_eslint_comments(base_revision, revision):
|
|
"""Get comments based on JS code analysis and styling rules for
|
|
all scripts present in the git commit 'revision'."""
|
|
filenames = check_output(["git", "diff", "{0}..{1}".format(base_revision, revision),
|
|
"--name-only", "--", "*.js"], universal_newlines=True)
|
|
filenames = filenames.strip().splitlines()
|
|
comments = {}
|
|
if (len(filenames) > 0):
|
|
node_env = {}
|
|
if (os.environ.get("IMPALA_SCRIPTS_HOME")):
|
|
node_env["IMPALA_HOME"] = os.environ.get("IMPALA_SCRIPTS_HOME")
|
|
else:
|
|
node_env["IMPALA_HOME"] = os.environ.get("IMPALA_HOME")
|
|
node_env["HOME"] = os.environ.get("HOME")
|
|
|
|
NODEJS_SETUP = os.path.join(node_env["IMPALA_HOME"], "bin/nodejs/setup_nodejs.sh")
|
|
NODEJS_BIN = check_output([NODEJS_SETUP], env=node_env).decode("utf-8")
|
|
NODEJS_BIN = NODEJS_BIN.splitlines()[-1]
|
|
NODE = os.path.join(NODEJS_BIN, "node")
|
|
NPM = os.path.join(NODEJS_BIN, "npm")
|
|
LINTING_TESTS_HOME = os.path.join(node_env["IMPALA_HOME"], "tests/webui/linting")
|
|
check_output([NODE, NPM, "--prefix", LINTING_TESTS_HOME, "install"])
|
|
filenames = [os.path.join(node_env["IMPALA_HOME"], filename) for filename in
|
|
filenames]
|
|
try:
|
|
node_env["IMPALA_NODEJS"] = NODE
|
|
node_env["REVIEW_JS_FILES"] = ' '.join(filenames)
|
|
LINTING_BIN = os.path.join(LINTING_TESTS_HOME, "node_modules/.bin/eslint")
|
|
LINTING_FORMATTER = os.path.join(LINTING_TESTS_HOME,
|
|
"eslint/eslint_custom_output_formatter.js")
|
|
LINTING_CONFIG = os.path.join(LINTING_TESTS_HOME, "eslint.config.js")
|
|
check_output([NODE, LINTING_BIN, "--config", LINTING_CONFIG, *filenames, "-f",
|
|
LINTING_FORMATTER], env=node_env).decode("utf-8")
|
|
except Exception as e:
|
|
comments = json.loads(e.output)
|
|
return comments
|
|
|
|
def post_review_to_gerrit(review_input):
|
|
"""Post a review to the gerrit patchset. 'review_input' is a ReviewInput JSON object
|
|
containing the review comments. The gerrit change and patchset are picked up from
|
|
environment variables set by the gerrit jenkins trigger."""
|
|
change_num = environ["GERRIT_CHANGE_NUMBER"]
|
|
patch_num = environ["GERRIT_PATCHSET_NUMBER"]
|
|
proc = Popen(["ssh", "-p", environ["GERRIT_PORT"],
|
|
"impala-public-jenkins@" + environ["GERRIT_HOST"], "gerrit", "review",
|
|
"--project", environ["GERRIT_PROJECT"], "--json",
|
|
"{0},{1}".format(change_num, patch_num)], stdin=PIPE,
|
|
universal_newlines=True)
|
|
proc.communicate(json.dumps(review_input))
|
|
if proc.returncode != 0:
|
|
raise Exception("Error posting review to gerrit.")
|
|
|
|
|
|
def merge_comments(a, b):
|
|
for k, v in b.items():
|
|
a[k].extend(v)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser(description="Generate and post gerrit comments")
|
|
parser.add_argument("--dryrun", action='store_true',
|
|
help="Don't post comments back to gerrit. Also shows the context "
|
|
"lines if possible.")
|
|
parser.add_argument("--revision", default="HEAD",
|
|
help="The revision to check. Defaults to HEAD. Note that "
|
|
"flake8-diff only actually works correctly on HEAD. So "
|
|
"specifying other commits might miss the results of "
|
|
"flake8-diff.")
|
|
parser.add_argument("--base-revision",
|
|
help="The base revision to check. Defaults to the parent commit of"
|
|
" revision")
|
|
args = parser.parse_args()
|
|
|
|
setup_virtualenv()
|
|
revision = args.revision
|
|
base_revision = args.base_revision if args.base_revision else "{0}^".format(revision)
|
|
comments = get_flake8_comments(base_revision, revision)
|
|
merge_comments(comments, get_misc_comments(base_revision, revision, args.dryrun))
|
|
merge_comments(comments, get_catalog_compatibility_comments(base_revision, revision))
|
|
merge_comments(comments, get_planner_tests_comments())
|
|
merge_comments(comments, get_eslint_comments(base_revision, revision))
|
|
review_input = {"comments": comments}
|
|
if len(comments) > 0:
|
|
review_input["message"] = (
|
|
"gerrit-auto-critic failed. You can reproduce it locally using command:\n\n"
|
|
" python3 bin/jenkins/critique-gerrit-review.py --dryrun\n\n"
|
|
"To run it, you might need a virtual env with Python3's venv installed.")
|
|
print(json.dumps(review_input, indent=True))
|
|
if not args.dryrun:
|
|
post_review_to_gerrit(review_input)
|