#!/usr/bin/env python # encoding=utf-8 # Copyright 2012 Cloudera Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import logging import pytest import shlex import sys from time import sleep from subprocess import Popen, PIPE, call from tests.common.impala_cluster import ImpalaCluster SHELL_CMD = "%s/bin/impala-shell.sh" % os.environ['IMPALA_HOME'] DEFAULT_QUERY = 'select 1' TEST_DB = "tmp_shell" TEST_TBL = "tbl1" QUERY_FILE_PATH = os.path.join(os.environ['IMPALA_HOME'], 'tests', 'shell') class TestImpalaShell(object): """A set of sanity tests for the Impala shell commandiline parameters. The tests need to maintain Python 2.4 compatibility as a sub-goal of having shell tests is to ensure that it's not broken in systems running Python 2.4. The tests need a running impalad instance in order to execute queries. TODO: * Test individual modules. * Test the shell in interactive mode. * Add a test for a kerberized impala. """ @classmethod def setup_class(cls): cls.__create_shell_data() @classmethod def teardown_class(cls): run_impala_shell_cmd('-q "drop table if exists %s.%s"' % (TEST_DB, TEST_TBL)) run_impala_shell_cmd('-q "drop database if exists %s"' % TEST_DB) @classmethod def __create_shell_data(cls): # Create a temporary table and populate it with test data. stmts = ['create database if not exists %s' % TEST_DB, 'create table if not exists %s.%s (i integer, s string)' % (TEST_DB, TEST_TBL), "insert into %s.%s values (1, 'a'),(1, 'b'),(3, 'b')" % (TEST_DB, TEST_TBL) ] args = '-q "%s"' % (';'.join(stmts)) run_impala_shell_cmd(args) @pytest.mark.execute_serially def test_no_args(self): args = '-q "%s"' % DEFAULT_QUERY run_impala_shell_cmd(args) @pytest.mark.execute_serially def test_multiple_queries(self): queries = ';'.join([DEFAULT_QUERY] * 3) args = '-q "%s" -B' % queries run_impala_shell_cmd(args) @pytest.mark.execute_serially def test_default_db(self): args = '-d %s -q "describe %s" --quiet' % (TEST_DB, TEST_TBL) run_impala_shell_cmd(args) args = '-q "describe %s"' % TEST_TBL run_impala_shell_cmd(args, expect_success=False) # test keyword parquet is interpreted as an identifier # when passed as an argument to -d args = '-d parquet' result = run_impala_shell_cmd(args) assert "Query: use `parquet`" in result.stderr, result.stderr # test if backticking is idempotent args = "-d '```parquet```'" result = run_impala_shell_cmd(args) assert "Query: use `parquet`" in result.stderr, result.stderr @pytest.mark.execute_serially def test_refresh_on_connect(self): args = '-r -q "%s"' % DEFAULT_QUERY result = run_impala_shell_cmd(args) assert 'Invalidating Metadata' in result.stderr, result.stderr @pytest.mark.execute_serially def test_unsecure_message(self): results = run_impala_shell_cmd("") assert "Starting Impala Shell without Kerberos authentication" in results.stderr @pytest.mark.execute_serially def test_print_header(self): args = '--print_header -B --output_delim="," -q "select * from %s.%s"' % (TEST_DB, TEST_TBL) result = run_impala_shell_cmd(args) result_rows = result.stdout.strip().split('\n') assert len(result_rows) == 4 assert result_rows[0].split(',') == ['i', 's'] args = '-B --output_delim="," -q "select * from %s.%s"' % (TEST_DB, TEST_TBL) result = run_impala_shell_cmd(args) result_rows = result.stdout.strip().split('\n') assert len(result_rows) == 3 @pytest.mark.execute_serially def test_kerberos_option(self): args = "-k" # The command will fail because we're trying to connect to a kerberized impalad. results = run_impala_shell_cmd(args, expect_success=False) # Check that impala is using the right service name. assert "Using service name 'impala'" in results.stderr assert "Starting Impala Shell using Kerberos authentication" in results.stderr # Check that Impala warns the user if klist does not exist on the system, or if # no kerberos tickets are initialized. try: call(["klist"]) expected_error_msg = ("-k requires a valid kerberos ticket but no valid kerberos " "ticket found.") assert expected_error_msg in results.stderr except OSError: assert 'klist not found on the system' in results.stderr # Make sure we don't try to re-connect assert "retrying the connection with a secure transport" not in results.stderr # Change the service name args += " -s foobar" results = run_impala_shell_cmd(args, expect_success=False) assert "Using service name 'foobar'" in results.stderr @pytest.mark.execute_serially def test_continue_on_error(self): args = '-c -q "select foo; select bar;"' run_impala_shell_cmd(args) # Should fail args = '-q "select foo; select bar;"' run_impala_shell_cmd(args, expect_success=False) @pytest.mark.execute_serially def test_execute_queries_from_file(self): args = '-f %s/test_file_comments.sql --quiet -B' % QUERY_FILE_PATH result = run_impala_shell_cmd(args) output = result.stdout args = '-f %s/test_file_no_comments.sql --quiet -B' % QUERY_FILE_PATH result = run_impala_shell_cmd(args) assert output == result.stdout, "Queries with comments not parsed correctly" @pytest.mark.execute_serially def test_completed_query_errors(self): args = ('-q "set abort_on_error=false;' ' select count(*) from functional_seq_snap.bad_seq_snap" --quiet') result = run_impala_shell_cmd(args) assert 'WARNINGS:' in result.stderr assert 'Bad synchronization marker' in result.stderr assert 'Expected: ' in result.stderr assert 'Actual: ' in result.stderr assert 'Problem parsing file' in result.stderr @pytest.mark.execute_serially def test_output_format(self): expected_output = ['1'] * 3 args = '-q "select 1,1,1" -B --quiet' result = run_impala_shell_cmd(args) actual_output = [r.strip() for r in result.stdout.split('\t')] assert actual_output == expected_output result = run_impala_shell_cmd(args + ' --output_delim="|"') actual_output = [r.strip() for r in result.stdout.split('|')] assert actual_output == expected_output result = run_impala_shell_cmd(args + ' --output_delim="||"', expect_success=False) assert "Illegal delimiter" in result.stderr @pytest.mark.execute_serially def test_do_methods(self): """Ensure that the do_ methods in the shell work. Some of the do_ methods are implicitly tested in other tests, and as part of the test setup. """ # explain args = '-q "explain select 1"' run_impala_shell_cmd(args) # show args = '-q "show tables"' run_impala_shell_cmd(args) # with args = '-q "with t1 as (select 1) select * from t1"' run_impala_shell_cmd(args) # set # spaces around the = sign args = '-q "set default_order_by_limit = 10"' run_impala_shell_cmd(args) # no spaces around the = sign args = '-q "set default_order_by_limit=10"' run_impala_shell_cmd(args) # test query options displayed args = '-q "set"' result_set = run_impala_shell_cmd(args) assert 'MEM_LIMIT: [0]' in result_set.stdout # test to check that explain_level is 1 assert 'EXPLAIN_LEVEL: [1]' in result_set.stdout # test values displayed after setting value args = '-q "set mem_limit=1g;set"' result_set = run_impala_shell_cmd(args) # single list means one instance of mem_limit in displayed output assert 'MEM_LIMIT: 1g' in result_set.stdout assert 'MEM_LIMIT: [0]' not in result_set.stdout # Negative tests for set # use : instead of = args = '-q "set default_order_by_limit:10"' run_impala_shell_cmd(args, expect_success=False) # use 2 = signs args = '-q "set default_order_by_limit=10=50"' run_impala_shell_cmd(args, expect_success=False) # describe and desc should return the same result. args = '-q "describe %s.%s" -B' % (TEST_DB, TEST_TBL) result_describe = run_impala_shell_cmd(args) args = '-q "desc %s.%s" -B' % (TEST_DB, TEST_TBL) result_desc = run_impala_shell_cmd(args) assert result_describe.stdout == result_desc.stdout @pytest.mark.execute_serially def test_summary(self): args = "-q 'select count(*) from functional.alltypes; summary;'" result_set = run_impala_shell_cmd(args) assert "03:AGGREGATE" in result_set.stdout args = "-q 'summary;'" result_set = run_impala_shell_cmd(args) assert "Could not retrieve summary for query" in result_set.stderr args = "-q 'show tables; summary;'" result_set = run_impala_shell_cmd(args) assert "Summary not available" in result_set.stderr # Test queries without an exchange args = "-q 'select 1; summary;'" result_set = run_impala_shell_cmd(args) assert "00:UNION" in result_set.stdout @pytest.mark.execute_serially def test_queries_closed(self): """Regression test for IMPALA-897""" args = '-f %s/test_close_queries.sql --quiet -B' % QUERY_FILE_PATH cmd = "%s %s" % (SHELL_CMD, args) # Execute the shell command async p = Popen(shlex.split(cmd), shell=False, stdout=PIPE, stderr=PIPE) impala_cluster = ImpalaCluster() impalad = impala_cluster.impalads[0].service # The last query in the test SQL script will sleep for 10 seconds, so sleep # here for 5 seconds and verify the number of in-flight queries is 1. sleep(5) assert 1 == impalad.get_num_in_flight_queries() assert get_shell_cmd_result(p).rc == 0 assert 0 == impalad.get_num_in_flight_queries() @pytest.mark.execute_serially def test_get_log_once(self): """Test that get_log() is always called exactly once.""" # Query with fetch args = '-q "select * from functional.alltypeserror"' result = run_impala_shell_cmd(args) assert result.stderr.count('WARNINGS') == 1 # Insert query (doesn't fetch) INSERT_TBL = "alltypes_get_log" DROP_ARGS = '-q "drop table if exists %s.%s"' % (TEST_DB, INSERT_TBL) run_impala_shell_cmd(DROP_ARGS) args = '-q "create table %s.%s like functional.alltypeserror"' % (TEST_DB, INSERT_TBL) run_impala_shell_cmd(args) args = '-q "insert overwrite %s.%s partition(year, month)' \ 'select * from functional.alltypeserror"' % (TEST_DB, INSERT_TBL) result = run_impala_shell_cmd(args) assert result.stderr.count('WARNINGS') == 1 run_impala_shell_cmd(DROP_ARGS) @pytest.mark.execute_serially def test_international_characters(self): """Sanity test to ensure that the shell can read international characters.""" RUSSIAN_CHARS = (u"А, Б, В, Г, Д, Е, Ё, Ж, З, И, Й, К, Л, М, Н, О, П, Р," u"С, Т, У, Ф, Х, Ц,Ч, Ш, Щ, Ъ, Ы, Ь, Э, Ю, Я") args = """-B -q "select '%s'" """ % RUSSIAN_CHARS result = run_impala_shell_cmd(args.encode('utf-8')) assert 'UnicodeDecodeError' not in result.stderr #print result.stdout.encode('utf-8') assert RUSSIAN_CHARS.encode('utf-8') in result.stdout class ImpalaShellResult(object): def __init__(self): self.rc = 0 self.stdout = str() self.stderr = str() def run_impala_shell_cmd(shell_args, expect_success=True): """Runs the Impala shell on the commandline. 'shell_args' is a string which represents the commandline options. Returns a ImpalaShellResult. """ cmd = "%s %s" % (SHELL_CMD, shell_args) p = Popen(shlex.split(cmd), shell=False, stdout=PIPE, stderr=PIPE) result = get_shell_cmd_result(p) if expect_success: assert result.rc == 0, "Cmd %s was expected to succeed: %s" % (cmd, result.stderr) else: assert result.rc != 0, "Cmd %s was expected to fail" % cmd return result def get_shell_cmd_result(process): result = ImpalaShellResult() result.stdout, result.stderr = process.communicate() result.rc = process.returncode return result