IMPALA-13305: Better thrift compatibility checks based on pyparsing

There are some false positive warnings reported by
critique-gerrit-review.py when adding a new thrift struct that has
required fields. This patch leverages pyparsing to analyze the
thrift file changes. So we can identify whether the new required field
is added in an existing struct.

thrift_parser.py adds a simple thrift grammar parser to parse a thrift
file into an AST. It basically consists of pyparsing.ParseResults and
some customized classes to inject the line number, i.e.
thrift_parser.ThriftField and thrift_parser.ThriftEnumItem.

Import thrift_parser to parse the current version of a thrift file and
the old version of it before the commit. critique-gerrit-review.py
then compares the structs and enums to report these warnings:
 - A required field is deleted in an existing struct.
 - A new required field is added in an existing struct.
 - An existing field is renamed.
 - The qualifier (required/optional) of a field is changed.
 - The type of a field is changed.
 - An enum item is removed.
 - Enum items are reordered.

Only thrift files used in both catalogd and impalad are checked. This is
the same as the current version. We can further improve this by
analyzing all RPCs used between impalad and catalogd to get all thrift
struct/enums used in them.

Warning examples for commit e48af8c04:
  "common/thrift/StatestoreService.thrift": [
   {
    "message": "Renaming field 'sequence' to 'catalogd_version' in TUpdateCatalogdRequest might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 345,
    "side": "REVISION"
   }
  ]

Warning examples for commit 595212b4e:
  "common/thrift/CatalogObjects.thrift": [
   {
    "message": "Adding a required field 'type' in TIcebergPartitionField might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 612,
    "side": "REVISION"
   }
  ]

Warning examples for commit c57921225:
  "common/thrift/CatalogObjects.thrift": [
   {
    "message": "Renaming field 'partition_id' to 'spec_id' in TIcebergPartitionSpec might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 606,
    "side": "REVISION"
   }
  ],
  "common/thrift/CatalogService.thrift": [
   {
    "message": "Changing field 'iceberg_data_files_fb' from required to optional in TIcebergOperationParam might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 215,
    "side": "REVISION"
   },
   {
    "message": "Adding a required field 'operation' in TIcebergOperationParam might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 209,
    "side": "REVISION"
   }
  ],
  "common/thrift/Query.thrift": [
   {
    "message": "Renaming field 'spec_id' to 'iceberg_params' in TFinalizeParams might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 876,
    "side": "REVISION"
   }
  ]

Warning example for commit 2b2cf8d96:
  "common/thrift/CatalogService.thrift": [
   {
    "message": "Enum item FUNCTION_NOT_FOUND=3 changed to TABLE_NOT_LOADED=3 in CatalogLookupStatus. This might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 381,
    "side": "REVISION"
   }
  ]

Warning example for commit c01efd096:
  "common/thrift/JniCatalog.thrift": [
   {
    "message": "Removing the enum item TAlterTableType.SET_OWNER=15 might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 107,
    "side": "PARENT"
   }
  ]

Warning example for commit 374783c55:
"common/thrift/Query.thrift": [
   {
    "message": "Changing type of field 'enabled_runtime_filter_types' from PlanNodes.TEnabledRuntimeFilterTypes to set<PlanNodes.TRuntimeFilterType> in TQueryOptions might break the compatibility between impalad and catalogd/statestore during upgrade",
    "line": 449,
    "side": "REVISION"
   }

Tests
 - Add tests in tests/infra/test_thrift_parser.py
 - Verified the script with all(1260) commits of common/thrift.

Change-Id: Ia1dc4112404d0e7c5df94ee9f59a4fe2084b360d
Reviewed-on: http://gerrit.cloudera.org:8080/22264
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
stiga-huang
2024-12-27 16:41:11 +08:00
committed by Impala Public Jenkins
parent ce6be49c08
commit 777ae104bb
4 changed files with 403 additions and 95 deletions

View File

@@ -44,10 +44,13 @@ from os import environ
import os.path
import re
from subprocess import check_call, check_output, DEVNULL, Popen, PIPE
import sys
import venv
FLAKE8_VERSION = "7.1.1"
FLAKE8_DIFF_VERSION = "0.2.2"
PYPARSING_VERSION = "3.1.4"
VENV_PATH = "gerrit_critic_venv"
VENV_BIN = os.path.join(VENV_PATH, "bin")
@@ -126,8 +129,12 @@ def setup_virtualenv():
venv.create(VENV_PATH, with_pip=True, system_site_packages=True)
check_call([PIP_PATH, "install",
"wheel",
"flake8=={0}".format(FLAKE8_VERSION),
"flake8-diff=={0}".format(FLAKE8_DIFF_VERSION)])
f"flake8=={FLAKE8_VERSION}",
f"flake8-diff=={FLAKE8_DIFF_VERSION}",
f"pyparsing=={PYPARSING_VERSION}"])
# Add the libpath of the installed venv to import pyparsing
sys.path.append(os.path.join(VENV_PATH, f"lib/python{sys.version_info.major}."
f"{sys.version_info.minor}/site-packages/"))
def get_comment_input(message, line_number=0, side=COMMENT_REVISION_SIDE,
@@ -253,106 +260,110 @@ def add_misc_comments_for_line(comments, line, curr_file, curr_line_num, dryrun=
curr_line_num, context_line=line, dryrun=dryrun))
def compare_thrift_structs(curr_file, old_structs, curr_structs, comments):
# Skip if the file is removed or new
if old_structs is None or curr_structs is None:
return
# Adding new structs is OK. Compare fields of existing structs.
for struct_name, old_fields in old_structs.items():
if struct_name not in curr_structs:
print(f"Removed struct {struct_name}")
continue
curr_fields = curr_structs[struct_name]
for fid, old_field in old_fields.items():
if fid not in curr_fields:
if old_field.qualifier == 'required':
comments[curr_file].append(get_comment_input(
f"Deleting required field '{old_field.name}' of {struct_name}"
+ THRIFT_WARNING_SUFFIX,
old_field.line_num, COMMENT_PARENT_SIDE))
continue
curr_field = curr_fields.pop(fid)
if curr_field.name != old_field.name:
comments[curr_file].append(get_comment_input(
f"Renaming field '{old_field.name}' to '{curr_field.name}' in {struct_name}"
+ THRIFT_WARNING_SUFFIX,
curr_field.line_num))
continue
if curr_field.qualifier != old_field.qualifier:
comments[curr_file].append(get_comment_input(
f"Changing field '{old_field.name}' from {old_field.qualifier} to "
f"{curr_field.qualifier} in {struct_name}" + THRIFT_WARNING_SUFFIX,
curr_field.line_num))
continue
if curr_field.type != old_field.type:
comments[curr_file].append(get_comment_input(
f"Changing type of field '{old_field.name}' from {old_field.type} to "
f"{curr_field.type} in {struct_name}" + THRIFT_WARNING_SUFFIX,
curr_field.line_num))
if len(curr_fields) > 0:
for new_field in curr_fields.values():
if new_field.qualifier == 'required':
comments[curr_file].append(get_comment_input(
f"Adding a required field '{new_field.name}' in {struct_name}"
+ THRIFT_WARNING_SUFFIX,
new_field.line_num))
new_struct_names = curr_structs.keys() - old_structs.keys()
if len(new_struct_names) > 0:
print(f"New structs {new_struct_names}")
def compare_thrift_enums(curr_file, old_enums, curr_enums, comments):
# Skip if the file is removed or new
if old_enums is None or curr_enums is None:
return
for enum_name, old_items in old_enums.items():
if enum_name not in curr_enums:
print(f"Removed enum {enum_name}")
continue
curr_items = curr_enums[enum_name]
for value, old_item in old_items.items():
if value not in curr_items:
comments[curr_file].append(get_comment_input(
f"Removing the enum item {enum_name}.{old_item.name}={value}"
+ THRIFT_WARNING_SUFFIX,
old_item.line_num, COMMENT_PARENT_SIDE))
continue
if curr_items[value].name != old_item.name:
comments[curr_file].append(get_comment_input(
f"Enum item {old_item.name}={value} changed to "
f"{curr_items[value].name}={value} in {enum_name}. This"
+ THRIFT_WARNING_SUFFIX,
curr_items[value].line_num))
def extract_thrift_defs_of_revision(revision, file_name):
"""Extract a dict of thrift structs from pyparsing.ParseResults"""
# Importing thrift_parser depends on pyparsing being installed in setup_virtualenv().
from thrift_parser import extract_thrift_defs
try:
contents = check_output(["git", "show", f"{revision}:{file_name}"],
universal_newlines=True)
except Exception as e:
# Usually it's due to file doesn't exist in that revision
print(f"Failed to read {file_name} of revision {revision}: {e}")
return None, None
return extract_thrift_defs(contents)
def get_catalog_compatibility_comments(base_revision, revision, dryrun=False):
"""Get comments on Thrift/FlatBuffers changes that might break the communication
between impalad and catalogd/statestore"""
comments = defaultdict(lambda: [])
diff = check_output(
["git", "diff", "-U0", "{0}..{1}".format(base_revision, revision),
"--", "common/thrift"],
["git", "diff", "--name-only", "{0}..{1}".format(base_revision, revision),
"--", "common/thrift/*.thrift"],
universal_newlines=True)
curr_file = None
check_source_file = False
has_concerns = False
in_enum_clause = False
is_thrift_file = False
# Line numbers in the old file and in the new file
old_line_num = 0
new_line_num = 0
for diff_line in diff.splitlines():
if diff_line.startswith("--- "):
for curr_file in diff.splitlines():
if os.path.basename(curr_file) in EXCLUDE_THRIFT_FILES:
continue
elif diff_line.startswith("+++ "):
# Start of diff for a file. Add a comment for the previous file if has concerns.
if curr_file and check_source_file and has_concerns:
comments[curr_file].append(get_comment_input(THRIFT_FILE_COMMENT))
# Strip off "+++ b/" to get the file path.
curr_file = diff_line[6:]
check_source_file = False
has_concerns = False
in_enum_clause = False
is_thrift_file = os.path.splitext(curr_file)[1] == ".thrift"
if is_thrift_file and os.path.basename(curr_file) not in EXCLUDE_THRIFT_FILES:
check_source_file = True
elif check_source_file and diff_line.startswith("@@ "):
# Figure out the starting line of the hunk. Examples:
# @@ -932,0 +933,5 @@ enum TImpalaQueryOptions {
# @@ -55,0 +56 @@ enum TPlanNodeType {
# @@ -109 +109 @@ struct THdfsTableSink {
# We want to extract the start line for the added lines
match = RANGE_RE.match(diff_line)
if not match:
raise Exception("Pattern did not match diff line:\n{0}".format(diff_line))
old_line_num = int(match.group(1))
new_line_num = int(match.group(2))
in_enum_clause = match.group(3).startswith("enum ")
elif check_source_file and diff_line.startswith("-"):
# Check if deleting/modifying a required field
change = diff_line[1:].strip()
if in_enum_clause and not change.startswith("//"):
has_concerns = True
comments[curr_file].append(get_comment_input(
"Modifying/deleting this enum item" + THRIFT_WARNING_SUFFIX,
old_line_num, COMMENT_PARENT_SIDE, diff_line, dryrun))
elif REQUIRED_FIELD_RE.match(change):
has_concerns = True
comments[curr_file].append(get_comment_input(
"Modifying/deleting this required field" + THRIFT_WARNING_SUFFIX,
old_line_num, COMMENT_PARENT_SIDE, diff_line, dryrun))
elif OPTIONAL_FIELD_RE.match(change):
# Removing an optional field should be OK unless the field number is reused by
# a new optional field. Add a general comment on the file.
has_concerns = True
old_line_num += 1
elif is_thrift_file and diff_line.startswith("+"):
change = diff_line[1:].strip()
# Check includes in Thrift files
match = INCLUDE_FILE_RE.match(change)
# Case 1: a target file includes a whitelist file, E.g. Frontend.thrift includes
# LineageGraph.thrift. Future changes in LineageGraph.thrift might impact
# Frontend.thrift so
# - LineageGraph.thrift should be removed from the whitelist (i.e.
# EXCLUDE_THRIFT_FILES) if it will be used in impalad and catalogd.
# - Or developers should make sure the included new fields are read/write only
# in impalads or only in catalogd.
if match and check_source_file and match.group(1) in EXCLUDE_THRIFT_FILES:
comments[curr_file].append(get_comment_input(
"Future changes in {0} might break the compatibility between impalad and "
"catalogd/statestore. Please remove {0} from EXCLUDE_THRIFT_FILES in "
"bin/jenkins/critique-gerrit-review.py or make sure the new fields are "
"read/write only in impalads or only in catalogd".format(match.group(1)),
new_line_num, context_line=diff_line, dryrun=dryrun))
# Case 2: A whitelist file includes a target file, e.g. PlanNodes.thrift includes
# Data.thrift. Note that PlanNodes.thrift is supposed to be used inside impalad.
# Data.thrift is used in both impalad and catalogd. We should ensure new fields
# included from Data.thrift are not set from catalogd.
elif (match and not check_source_file
and match.group(1) not in EXCLUDE_THRIFT_FILES):
comments[curr_file].append(get_comment_input(
"Thrift objects in the current file are supposed to be used inside "
"impalads. Please make sure new fields includes from {} are not set by "
"catalogd.".format(match.group(1)),
new_line_num, context_line=diff_line, dryrun=dryrun))
elif check_source_file:
if REQUIRED_FIELD_RE.match(change):
has_concerns = True
comments[curr_file].append(get_comment_input(
"Modifying/adding this required field" + THRIFT_WARNING_SUFFIX,
new_line_num, context_line=diff_line, dryrun=dryrun))
new_line_num += 1
if curr_file and check_source_file and has_concerns:
comments[curr_file].append(get_comment_input(THRIFT_FILE_COMMENT))
print(f"Parsing {curr_file}")
curr_structs, curr_enums = extract_thrift_defs_of_revision(revision, curr_file)
old_structs, old_enums = extract_thrift_defs_of_revision(base_revision, curr_file)
compare_thrift_structs(curr_file, old_structs, curr_structs, comments)
compare_thrift_enums(curr_file, old_enums, curr_enums, comments)
merge_comments(
comments, get_flatbuffers_compatibility_comments(base_revision, revision))
return comments

259
bin/jenkins/thrift_parser.py Executable file
View File

@@ -0,0 +1,259 @@
#!/usr/bin/python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Developed based on example at:
# https://github.com/pyparsing/pyparsing/blob/c4cf4a5/examples/protobuf_parser.py
#
# TODO: Consider using thriftpy2.parser. Need it to expose lineno.
#
from collections import defaultdict
import os
import sys
from pyparsing import (
Forward,
Group,
Keyword,
OneOrMore,
Optional,
ParseBaseException,
ParseResults,
Suppress,
Word,
ZeroOrMore,
alphanums,
alphas,
cppStyleComment,
delimitedList,
line,
lineno,
oneOf,
pyparsing_common,
pythonStyleComment,
quotedString,
restOfLine,
)
# Simply treating strings like 'CatalogServiceVersion.V2' as an identifier by allowing
# '.' in the bodyChars
ident = Word(alphas + "_", bodyChars=alphanums + "_.").setName("identifier")
integer = pyparsing_common.integer
LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI, COMMA, COLON, LT, GT =\
map(Suppress, "{}[]()=;,:<>")
optionalCommaOrSemi = Optional(COMMA | SEMI)
# Ref: https://github.com/apache/thrift/blob/154d154/compiler/cpp/src/thrift/thriftl.ll
TYPEDEF_ = Keyword('typedef')
NAMESPACE_ = Keyword('namespace')
EXCEPTION_ = Keyword('exception')
STRUCT_ = Keyword('struct')
UNION_ = Keyword('union')
REQUIRED_ = Keyword('required')
OPTIONAL_ = Keyword('optional')
ENUM_ = Keyword('enum')
EXTENDS_ = Keyword('extends')
SERVICE_ = Keyword('service')
TRUE_ = Keyword('true')
FALSE_ = Keyword('false')
THROWS_ = Keyword('throws')
LIST_ = Keyword('list')
MAP_ = Keyword('map')
SET_ = Keyword('set')
VOID_ = Keyword('void')
CONST_ = Keyword('const')
typespec = Forward()
# Scalar types and customized types
simple_types = oneOf("void bool byte i8 i16 i32 i64 double string binary") | ident
list_type = (LIST_ | SET_) + LT + Group(typespec) + GT
map_type = MAP_ + LT + Group(typespec + COMMA + typespec) + GT
typespec << (list_type | map_type | simple_types)
typespec.setName("typespec")
list_value = LBRACK + ZeroOrMore(delimitedList(ident)) + RBRACK
rvalue = pyparsing_common.fnumber | TRUE_ | FALSE_ | ident | quotedString | list_value
class ThriftField:
def __init__(self, line_num, field_id, qualifier, ts, name, default_value):
self.id = field_id
self.qualifier = qualifier
self.type = ts
self.name = name
self.default_value = default_value
self.line_num = line_num
def __repr__(self):
return f"Field{self.id} {self.qualifier} {self.type} {self.name} L{self.line_num}"
def create_thrift_field(src_string, locn, toks):
# 'qualifier' and 'default_value' are optional. Use getattr() to set them as None
# when they are missing
qualifier = getattr(toks, "qualifier")
default_value = getattr(toks, "defaultValue")
return ThriftField(lineno(locn, src_string), toks["id"], qualifier, toks["ts"],
toks["name"], default_value)
def gen_type_string(toks):
"""Generate type strings from the nested tokens, e.g.
[['set', ['PlanNodes.TRuntimeFilterType']]] -> 'set<PlanNodes.TRuntimeFilterType>',
[['map', ['int', 'string']]] -> 'map<int,string>'.
"""
if not isinstance(toks, ParseResults):
return str(toks)
if len(toks) == 1:
# Somehow the first level of 'toks' has two nested levels. E.g. for list<string>,
# 'toks' is ParseResults([ParseResults(['list', ParseResults(['string'])])]).
return gen_type_string(toks[0])
assert len(toks) == 2
if toks[0] == 'set' or toks[0] == 'list':
return f"{toks[0]}<{gen_type_string(toks[1])}>"
assert toks[0] == 'map'
return f"map<{gen_type_string(toks[1][0])},{gen_type_string(toks[1][1])}>"
fieldDefn = (
integer("id")
+ COLON
+ Optional(REQUIRED_ | OPTIONAL_, "optional")("qualifier")
+ Group(typespec)("ts").setParseAction(gen_type_string)
+ ident("name")
+ Optional(EQ + rvalue("defaultValue"))
+ optionalCommaOrSemi
).setParseAction(create_thrift_field)
fieldsDefBody = Group(ZeroOrMore(fieldDefn))("body")
structDefn = STRUCT_ - ident("name") + LBRACE + fieldsDefBody + RBRACE
unionDefn = UNION_ - ident("name") + LBRACE + fieldsDefBody + RBRACE
class ThriftEnumItem:
def __init__(self, name, value, line_num):
self.name = name
self.value = value
self.line_num = line_num
def __repr__(self):
return f"EnumItem {self.name} L{self.line_num}"
def create_thrift_enum_item(src_string, locn, toks):
# toks[0] is in the form of [name] or [name, value]
value = toks[0][1] if len(toks[0]) > 1 else None
return ThriftEnumItem(toks[0][0], value, lineno(locn, src_string))
enumDefn = (
ENUM_ - ident("name") + LBRACE
+ OneOrMore(
Group(ident + Optional(EQ + integer)).setParseAction(create_thrift_enum_item)
+ optionalCommaOrSemi
)("evalues")
+ RBRACE
)
exceptionsDefn = LPAR + OneOrMore(fieldDefn) + RPAR
methodDefn = (
typespec("returnType")
- ident("methodName")
+ LPAR
+ fieldsDefBody
+ RPAR
+ Optional(THROWS_ + Group(exceptionsDefn))
+ optionalCommaOrSemi
)
serviceDefn = (
SERVICE_ - ident("serviceName") + Optional(EXTENDS_ + ident)
+ LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE
)
typeDefn = TYPEDEF_ - typespec("typespec") + ident("ident")
topLevelStatement = Group(
structDefn("struct")
| unionDefn("union")
| enumDefn("enum")
| serviceDefn("service")
)
thrift_parser = ZeroOrMore(topLevelStatement)
thrift_parser.ignore(cppStyleComment)
thrift_parser.ignore(pythonStyleComment)
# TODO: Add syntax for const variables. Currently they are all defined in a single line
# so we can ignore them like this.
thrift_parser.ignore("const " + restOfLine)
thrift_parser.ignore("namespace " + restOfLine)
thrift_parser.ignore("include " + restOfLine)
def extract_thrift_defs(thrift_file_content):
"""Extract a dict of thrift structs from pyparsing.ParseResults"""
parse_results = thrift_parser.parseString(thrift_file_content)
structs = defaultdict()
enums = defaultdict()
for top_level_item in parse_results:
if top_level_item.getName() == 'struct':
# A dict from field id to thrift_parser.ThriftField
struct_fields = defaultdict()
for field in top_level_item.body:
struct_fields[field.id] = field
structs[top_level_item.name] = struct_fields
elif top_level_item.getName() == 'enum':
# A dict from enum value (i32) to thrift_parser.ThriftEnumItem
enum_items = defaultdict()
index = 0
for item in top_level_item.evalues:
if item.value is not None:
enum_items[item.value] = item
else:
# Deal with enum items without value(index), e.g. items of TQueryTableColumn
enum_items[index] = item
index += 1
enums[top_level_item.name] = enum_items
return structs, enums
if __name__ == '__main__':
# thrift_dir = os.environ["IMPALA_HOME"] + "/common/thrift"
thrift_dir = sys.argv[1]
for entry in os.scandir(thrift_dir):
if entry.is_file() and entry.name.endswith(".thrift"):
print("parsing " + entry.path)
with open(entry.path, 'r') as in_file:
contents = in_file.read()
try:
structs, enums = extract_thrift_defs(contents)
for struct_name, struct_fields in structs.items():
print(f"struct {struct_name}")
for field in struct_fields.values():
print(field)
for enum_name, enum_items in enums.items():
print(f"enum {enum_name}")
for item in enum_items.values():
print(item)
except ParseBaseException as e:
print("Failed to parse line:\n" + line(e.loc, contents))
raise e

View File

@@ -39,7 +39,7 @@ pg8000 == 1.10.2
prettytable == 0.7.2
prometheus-client == 0.12.0
psutil == 5.6.3
pyparsing == 2.0.3
pyparsing == 2.4.7
pytest == 2.9.2
py == 1.4.32
pytest-forked == 0.2

View File

@@ -0,0 +1,38 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
import os
from subprocess import check_output
def test_thrift_parser():
"""Make sure thrift_parser can parse all the thrift files"""
impala_home = os.getenv("IMPALA_HOME")
# Only supports python3
output = check_output([os.path.join(impala_home, "bin/impala-python3"),
os.path.join(impala_home, "bin/jenkins/thrift_parser.py"),
os.path.join(impala_home, "common/thrift")],
universal_newlines=True)
assert "TStatus" in output
assert "Field1 required ErrorCodes.TErrorCode status_code" in output
assert "Field2 optional list<string> error_msgs" in output
assert "TErrorCode" in output
assert "EnumItem OK" in output
assert "EnumItem UNUSED" in output
assert "EnumItem GENERAL" in output
assert "EnumItem CANCELLED" in output