#!/usr/bin/env python # Copyright 2012 Cloudera Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Impala's shell import cmd import errno import getpass import os import prettytable import re import shlex import signal import socket import sqlparse import sys import time from impala_client import (ImpalaClient, DisconnectedException, QueryStateException, RPCException, TApplicationException) from impala_shell_config_defaults import impala_shell_defaults from option_parser import get_option_parser, get_config_from_file from shell_output import DelimitedOutputFormatter, OutputStream, PrettyOutputFormatter from subprocess import call VERSION_FORMAT = "Impala Shell v%(version)s (%(git_hash)s) built on %(build_date)s" VERSION_STRING = "build version not available" HISTORY_LENGTH = 100 # Tarball / packaging build makes impala_build_version available try: from impala_build_version import get_git_hash, get_build_date, get_version VERSION_STRING = VERSION_FORMAT % {'version': get_version(), 'git_hash': get_git_hash()[:7], 'build_date': get_build_date()} except Exception: pass class CmdStatus: """Values indicate the execution status of a command to the cmd shell driver module SUCCESS and ERROR continue running the shell and ABORT exits the shell Since SUCCESS == None, successful commands do not need to explicitly return anything on completion """ SUCCESS = None ABORT = True ERROR = False class ImpalaPrettyTable(prettytable.PrettyTable): """Patched version of PrettyTable that TODO""" def _unicode(self, value): if not isinstance(value, basestring): value = str(value) if not isinstance(value, unicode): # If a value cannot be encoded, replace it with a placeholder. value = unicode(value, self.encoding, "replace") return value class ImpalaShell(cmd.Cmd): """ Simple Impala Shell. Basic usage: type connect to connect to an impalad Then issue queries or other commands. Tab-completion should show the set of available commands. Methods that implement shell commands return a boolean tuple (stop, status) stop is a flag the command loop uses to continue/discontinue the prompt. Status tells the caller that the command completed successfully. """ # If not connected to an impalad, the server version is unknown. UNKNOWN_SERVER_VERSION = "Not Connected" DISCONNECTED_PROMPT = "[Not connected] > " # Error and warning that is printed by cancel_query CANCELLATION_ERROR = 'Cancelled' # Message to display in shell when cancelling a query CANCELLATION_MESSAGE = ' Cancelling Query' # Commands are terminated with the following delimiter. CMD_DELIM = ';' DEFAULT_DB = 'default' # Regex applied to all tokens of a query to detect the query type. INSERT_REGEX = re.compile("^insert$", re.I) # Seperator for queries in the history file. HISTORY_FILE_QUERY_DELIM = '_IMP_DELIM_' def __init__(self, options): cmd.Cmd.__init__(self) self.is_alive = True self.impalad = None self.use_kerberos = options.use_kerberos self.kerberos_service_name = options.kerberos_service_name self.use_ssl = options.ssl self.ca_cert = options.ca_cert self.user = options.user self.ldap_password = None; self.use_ldap = options.use_ldap self.verbose = options.verbose self.prompt = ImpalaShell.DISCONNECTED_PROMPT self.server_version = ImpalaShell.UNKNOWN_SERVER_VERSION self.refresh_after_connect = options.refresh_after_connect self.current_db = options.default_db self.history_file = os.path.expanduser("~/.impalahistory") # Stores the state of user input until a delimiter is seen. self.partial_cmd = str() # Stores the old prompt while the user input is incomplete. self.cached_prompt = str() self.show_profiles = options.show_profiles # Output formatting flags/options self.output_file = options.output_file self.output_delimiter = options.output_delimiter self.write_delimited = options.write_delimited self.print_header = options.print_header self.set_query_options = {} self._populate_command_list() self.imp_client = None; # Tracks query handle of the last query executed. Used by the 'profile' command. self.last_query_handle = None; self.query_handle_closed = None try: self.readline = __import__('readline') self.readline.set_history_length(HISTORY_LENGTH) except ImportError: self._disable_readline() if options.use_ldap: self.ldap_password = getpass.getpass("LDAP password for %s:" % self.user) if options.impalad != None: self.do_connect(options.impalad) # We handle Ctrl-C ourselves, using an Event object to signal cancellation # requests between the handler and the main shell thread. signal.signal(signal.SIGINT, self._signal_handler) def _populate_command_list(self): """Populate a list of commands in the shell. Each command has its own method of the form do_, and can be extracted by introspecting the class directory. """ # Slice the command method name to get the name of the command. self.commands = [cmd[3:] for cmd in dir(self.__class__) if cmd.startswith('do_')] def _disable_readline(self): """Disables the readline module. The readline module is responsible for keeping track of command history. """ self.readline = None def _print_options(self, default_options, set_options): # Prints the current query options # with default values distinguished from set values by brackets [] if not default_options and not set_options: print '\tNo options available.' else: for k in sorted(default_options.keys()): if k in set_options.keys() and set_options[k] != default_options[k]: print '\n'.join(["\t%s: %s" % (k, set_options[k])]) else: print '\n'.join(["\t%s: [%s]" % (k, default_options[k])]) def do_shell(self, args): """Run a command on the shell Usage: shell ! """ try: start_time = time.time() os.system(args) self._print_if_verbose("--------\nExecuted in %2.2fs" % (time.time() - start_time)) except Exception, e: print_to_stderr('Error running command : %s' % e) return CmdStatus.ERROR def sanitise_input(self, args, interactive=True): """Convert the command to lower case, so it's recognized""" # A command terminated by a semi-colon is legal. Check for the trailing # semi-colons and strip them from the end of the command. args = args.strip() tokens = args.split(' ') if not interactive: tokens[0] = tokens[0].lower() # Strip all the non-interactive commands of the delimiter. return ' '.join(tokens).rstrip(ImpalaShell.CMD_DELIM) # The first token is converted into lower case to route it to the # appropriate command handler. This only applies to the first line of user input. # Modifying tokens in subsequent lines may change the semantics of the command, # so do not modify the text. if not self.partial_cmd: # The first token is the command. # If it's EOF, call do_quit() if tokens[0] == 'EOF': return 'quit' else: tokens[0] = tokens[0].lower() elif tokens[0] == "EOF": # If a command is in progress and the user hits a Ctrl-D, clear its state # and reset the prompt. self.prompt = self.cached_prompt self.partial_cmd = str() # The print statement makes the new prompt appear in a new line. # Also print an extra newline to indicate that the current command has # been cancelled. print '\n' return str() args = self._check_for_command_completion(' '.join(tokens).strip()) return args.rstrip(ImpalaShell.CMD_DELIM) def _shlex_split(self, line): """Reimplement shlex.split() so that escaped single quotes are actually escaped. shlex.split() only escapes double quotes by default. This method will throw a ValueError if an open quotation (either single or double) is found. """ my_split = shlex.shlex(line, posix=True) my_split.escapedquotes = '"\'' my_split.whitespace_split = True my_split.commenters = '' return list(my_split) def _cmd_ends_with_delim(self, line): """Check if the input command ends with a command delimiter. A command ending with the delimiter and containing an open quotation character is not considered terminated. If no open quotation is found, it's considered terminated. """ if line.endswith(ImpalaShell.CMD_DELIM): try: # Look for an open quotation in the entire command, and not just the # current line. if self.partial_cmd: line = '%s %s' % (self.partial_cmd, line) self._shlex_split(line) return True # If the command ends with a delimiter, check if it has an open quotation. # shlex in self._split() throws a ValueError iff an open quotation is found. # A quotation can either be a single quote or a double quote. except ValueError: pass # This checks to see if there are any backslashed quotes # outside of quotes, since backslashed quotes # outside of single or double quotes should not be escaped. # Ex. 'abc\'xyz' -> closed because \' is escaped # \'abcxyz -> open because \' is not escaped # \'abcxyz' -> closed # Iterate through the line and switch the state if a single or double quote is found # and ignore escaped single and double quotes if the line is considered open (meaning # a previous single or double quote has not been closed yet) state_closed = True; opener = None; for i, char in enumerate(line): if state_closed and (char in ['\'', '\"']): state_closed = False opener = char elif not state_closed and opener == char: if line[i - 1] != '\\': state_closed = True opener = None; return state_closed return False def _check_for_command_completion(self, cmd): """Check for a delimiter at the end of user input. The end of the user input is scanned for a legal delimiter. If a delimiter is not found: - Input is not send to onecmd() - onecmd() is a method in Cmd which routes the user input to the appropriate method. An empty string results in a no-op. - Input is removed from history. - Input is appended to partial_cmd If a delimiter is found: - The contents of partial_cmd are put in history, as they represent a completed command. - The contents are passed to the appropriate method for execution. - partial_cmd is reset to an empty string. """ if self.readline: current_history_len = self.readline.get_current_history_length() # Input is incomplete, store the contents and do nothing. if not self._cmd_ends_with_delim(cmd): # The user input is incomplete, change the prompt to reflect this. if not self.partial_cmd and cmd: self.cached_prompt = self.prompt self.prompt = '> '.rjust(len(self.cached_prompt)) # partial_cmd is already populated, add the current input after a newline. if self.partial_cmd and cmd: self.partial_cmd = "%s\n%s" % (self.partial_cmd, cmd) else: # If the input string is empty or partial_cmd is empty. self.partial_cmd = "%s%s" % (self.partial_cmd, cmd) # Remove the most recent item from history if: # -- The current state of user input in incomplete. # -- The most recent user input is not an empty string if self.readline and current_history_len > 0 and cmd: self.readline.remove_history_item(current_history_len - 1) # An empty string results in a no-op. Look at emptyline() return str() elif self.partial_cmd: # input ends with a delimiter and partial_cmd is not empty if cmd != ImpalaShell.CMD_DELIM: completed_cmd = "%s\n%s" % (self.partial_cmd, cmd) else: completed_cmd = "%s%s" % (self.partial_cmd, cmd) # Reset partial_cmd to an empty string self.partial_cmd = str() # Replace the most recent history item with the completed command. completed_cmd = sqlparse.format(completed_cmd) if self.readline and current_history_len > 0: self.readline.replace_history_item(current_history_len - 1, completed_cmd.encode('utf-8')) # Revert the prompt to its earlier state self.prompt = self.cached_prompt else: # Input has a delimiter and partial_cmd is empty completed_cmd = sqlparse.format(cmd) return completed_cmd def _signal_handler(self, signal, frame): """Handles query cancellation on a Ctrl+C event""" if self.last_query_handle is None or self.query_handle_closed: return # Create a new connection to the impalad and cancel the query. try: self.query_handle_closed = True print_to_stderr(ImpalaShell.CANCELLATION_MESSAGE) new_imp_client = ImpalaClient(self.impalad) new_imp_client.connect() new_imp_client.cancel_query(self.last_query_handle, False) self._validate_database() except Exception, e: print_to_stderr("Failed to reconnect and close: %s" % str(e)) # TODO: Add a retry here def precmd(self, args): args = self.sanitise_input(args) if not args: return args # Split args using sqlparse. If there are multiple queries present in user input, # the length of the returned query list will be greater than one. parsed_cmds = sqlparse.split(args) if len(parsed_cmds) > 1: # The last command needs a delimiter to be successfully executed. parsed_cmds[-1] += ImpalaShell.CMD_DELIM self.cmdqueue.extend(parsed_cmds) # If cmdqueue is populated, then commands are executed from the cmdqueue, and user # input is ignored. Send an empty string as the user input just to be safe. return str() return args.encode('utf-8') def postcmd(self, status, args): # status conveys to shell how the shell should continue execution # should always be a CmdStatus return status def do_summary(self, args): summary = None try: summary = self.imp_client.get_summary(self.last_query_handle) except RPCException: pass if summary is None: print_to_stderr("Could not retrieve summary for query.") return CmdStatus.ERROR if summary.nodes is None: print_to_stderr("Summary not available") return CmdStatus.SUCCESS output = [] table = self.construct_table_header(["Operator", "#Hosts", "Avg Time", "Max Time", "#Rows", "Est. #Rows", "Peak Mem", "Est. Peak Mem", "Detail"]) self.imp_client.build_summary_table(summary, 0, False, 0, False, output) formatter = PrettyOutputFormatter(table) self.output_stream = OutputStream(formatter, filename=self.output_file) self.output_stream.write(output) def do_set(self, args): """Set or display query options. Display query options: Usage: SET Set query options: Usage: SET