From 9ad9a1624a3ccc3caadab6d7dd4eb7de96e110dd Mon Sep 17 00:00:00 2001 From: Andrew Sherman Date: Wed, 20 Mar 2019 09:33:31 -0700 Subject: [PATCH] IMPALA-8325: Leading Unicode comments cause Impala Shell failure. This change fixes a regression introduced by "IMPALA-2195 Improper handling of comments in queries." The Impala Shell parses input text into several strings using the sqlparse library. One of the returned strings is the sql command, this is used to determine the correct do_ method to call. Another of the returned strings is the leading comment, which is a comment that appears before legal sql text. Python2 has strings with multiple encodings. The strings returned from the sqlparse library have the Unicode encoding. Impala Shell converts the sql command string to utf-8 encoding before using it. If the Impala Shell needs to send the sql command to an Impala Coordinator then it (re)constructs the query out of the strings returned by the sqlparse library. This query is sent to the Coordinator via Beeswax protocol. The query is converted to an ascii string before being sent. The conversion can fail if the leading comment string contains Unicode characters, which can't be directly converted to ascii. So the trigger for the bug is that the leading comment contains Unicode. The fix is that the leading comment string should be converted to utf-8 in the same way as the sql command. TESTING: Ran all end -to-end tests. Added two test cases to tests/shell/test_shell_interactive.py Change-Id: I8633935b6e0ca33594afd32ad242779555e09944 Reviewed-on: http://gerrit.cloudera.org:8080/12812 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- shell/impala_shell.py | 2 ++ tests/shell/test_shell_interactive.py | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/shell/impala_shell.py b/shell/impala_shell.py index 5fb2e6c48..d382c8b23 100755 --- a/shell/impala_shell.py +++ b/shell/impala_shell.py @@ -1336,6 +1336,8 @@ class ImpalaShell(object, cmd.Cmd): """ leading_comment, line = ImpalaShell.strip_leading_comment(line.strip()) line = line.encode('utf-8') + if leading_comment: + leading_comment = leading_comment.encode('utf-8') if line and line[0] == '@': line = 'rerun ' + line[1:] return super(ImpalaShell, self).parseline(line) + (leading_comment,) diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py index 353e07677..670550119 100755 --- a/tests/shell/test_shell_interactive.py +++ b/tests/shell/test_shell_interactive.py @@ -538,7 +538,12 @@ class TestImpalaShellInteractive(object): result = run_impala_shell_interactive('-- comment\n' 'select * from leading_comment;') assert 'Fetched 1 row(s)' in result.stderr - + result = run_impala_shell_interactive('--한글\n' + 'select * from leading_comment;') + assert 'Fetched 1 row(s)' in result.stderr + result = run_impala_shell_interactive('/* 한글 */\n' + 'select * from leading_comment;') + assert 'Fetched 1 row(s)' in result.stderr result = run_impala_shell_interactive('/* comment */\n' 'select * from leading_comment;') assert 'Fetched 1 row(s)' in result.stderr