#!/usr/bin/env python # Copyright 2012 Cloudera Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Impala's shell import cmd import prettytable import time import sys import os import signal import threading from optparse import OptionParser from beeswaxd import BeeswaxService from beeswaxd.BeeswaxService import QueryState from ImpalaService import ImpalaService from ImpalaService.ImpalaService import TImpalaQueryOptions from ImpalaService.constants import DEFAULT_QUERY_OPTIONS from Status.ttypes import TStatus, TStatusCode from thrift.transport.TSocket import TSocket from thrift.transport.TTransport import TBufferedTransport, TTransportException from thrift.protocol import TBinaryProtocol from thrift.Thrift import TApplicationException VERSION_FORMAT = "Impala v%(version)s (%(git_hash)s) built on %(build_date)s" COMMENT_TOKEN = '--' VERSION_STRING = "build version not available" HISTORY_LENGTH = 100 # Tarball / packaging build makes impala_build_version available try: from impala_build_version import get_git_hash, get_build_date, get_version VERSION_STRING = VERSION_FORMAT % {'version': get_version(), 'git_hash': get_git_hash()[:7], 'build_date': get_build_date()} except Exception: pass class RpcStatus: """Convenience enum to describe Rpc return statuses""" OK = 0 ERROR = 1 # Simple Impala shell. Can issue queries (with configurable options) # Basic usage: type connect to connect to an impalad # Then issue queries or other commands. Tab-completion should show the set of # available commands. # Methods that implement shell commands return a boolean tuple (stop, status) # stop is a flag the command loop uses to continue/discontinue the prompt. # Status tells the caller that the command completed successfully. # TODO: (amongst others) # - Column headers / metadata support # - Report profiles # - A lot of rpcs return a verbose TStatus from thrift/Status.thrift # This will be useful for better error handling. The next iteration # of the shell should handle this return paramter. class ImpalaShell(cmd.Cmd): DISCONNECTED_PROMPT = "[Not connected] > " def __init__(self, options): cmd.Cmd.__init__(self) self.is_alive = True self.use_kerberos = options.use_kerberos self.verbose = options.verbose self.kerberos_service_name = options.kerberos_service_name self.impalad = None self.prompt = ImpalaShell.DISCONNECTED_PROMPT self.connected = False self.imp_service = None self.transport = None self.fetch_batch_size = 1024 self.query_options = {} self.__make_default_options() self.query_state = QueryState._NAMES_TO_VALUES self.refresh_after_connect = options.refresh_after_connect self.default_db = options.default_db self.history_file = os.path.expanduser("~/.impalahistory") self.show_profiles = options.show_profiles # Tracks query handle of the last query executed. Used by the 'profile' command. self.last_query_handle = None try: self.readline = __import__('readline') self.readline.set_history_length(HISTORY_LENGTH) except ImportError: self.readline = None if options.impalad != None: self.do_connect(options.impalad) # We handle Ctrl-C ourselves, using an Event object to signal cancellation # requests between the handler and the main shell thread self.is_interrupted = threading.Event() signal.signal(signal.SIGINT, self.__signal_handler) def __get_option_name(self, option): return TImpalaQueryOptions._VALUES_TO_NAMES[option] def __make_default_options(self): self.query_options = {} for option, default in DEFAULT_QUERY_OPTIONS.iteritems(): self.query_options[self.__get_option_name(option)] = default def __print_options(self): print '\n'.join(["\t%s: %s" % (k,v) for (k,v) in self.query_options.iteritems()]) def __options_to_string_list(self): return ["%s=%s" % (k,v) for (k,v) in self.query_options.iteritems()] def do_shell(self, args): """Run a command on the shell Usage: shell ! """ try: os.system(args) except Exception, e: print 'Error running command : %s' % e return True def sanitise_input(self, args): """Convert the command to lower case, so it's recognized""" # A command terminated by a semi-colon is legal. Check for the trailing # semi-colons and strip them from the end of the command. args = args.strip() tokens = args.split(' ') # The first token should be the command # If it's EOF, call do_quit() if tokens[0] == 'EOF': return 'quit' else: tokens[0] = tokens[0].lower() return ' '.join(tokens).rstrip(';') def __signal_handler(self, signal, frame): self.is_interrupted.set() def precmd(self, args): self.is_interrupted.clear() return self.sanitise_input(args) def postcmd(self, status, args): """Hack to make non interactive mode work""" self.is_interrupted.clear() # cmd expects return of False to keep going, and True to quit. # Shell commands return True on success, False on error, and None to quit, so # translate between them. # TODO : Remove in the future once shell and Impala query processing can be separated. if status == None: return True else: return False def do_set(self, args): """Set or display query options. Display query options: Usage: SET Set query options: Usage: SET