IMPALA-12517: Decode binary data with Python 3

When impala-shell receives binary data with the HS2 protocol, it uses a
stringifier to decode it. In Python 3, 'str' on binary data wraps it in
"b'...'"; to get equivalent output to 'str' in Python 2, we need to
decode as UTF-8 and handle errors.

Adds a test case for how impala-shell formats binary data.

Change-Id: I9222cd1ac081a38ab2b37d58628faac0812695ec
Reviewed-on: http://gerrit.cloudera.org:8080/20624
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Michael Smith
2023-10-25 16:41:38 -07:00
committed by Impala Public Jenkins
parent d9f1271c96
commit 09f15eea78
2 changed files with 33 additions and 1 deletions

View File

@@ -1185,6 +1185,31 @@ class TestImpalaShell(ImpalaTestSuite):
assert "| a | b |" in result.stdout, result.stdout
assert "| true | false |" in result.stdout, result.stdout
def test_binary_display(self, vector):
"""Test that binary values are displayed correctly."""
query = "select binary_col from functional.binary_tbl"
result = run_impala_shell_cmd(vector, ['-q', query])
assert "| binary1 |" in result.stdout, result.stdout
assert "| NULL |" in result.stdout, result.stdout
assert "| |" in result.stdout, result.stdout
assert "| árvíztűrőtükörfúró |" in result.stdout, result.stdout
assert "| 你好hello |" in result.stdout, result.stdout
assert "| \x00\xef\xbf\xbd\x00\xef\xbf\xbd |" in result.stdout, \
result.stdout
assert '| \xef\xbf\xbdD3"\x11\x00 |' in result.stdout, result.stdout
def test_binary_as_string(self, vector):
query = """select cast(binary_col as string) from functional.binary_tbl
where string_col != "invalid utf8" """
result = run_impala_shell_cmd(vector, ['-q', query])
# Column length omitted because some strict HS2 protocol returns header "binary_col"
# while others return "cast(binary_col as string)".
assert "| binary1 " in result.stdout, result.stdout
assert "| NULL " in result.stdout, result.stdout
assert "| " in result.stdout, result.stdout
assert "| árvíztűrőtükörfúró " in result.stdout, result.stdout
assert "| 你好hello " in result.stdout, result.stdout
def test_null_values(self, vector):
"""Test that null values are displayed correctly."""
if vector.get_value('strict_hs2_protocol'):