mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Upgrades the impala-shell's bundled version of sqlparse to 0.3.1. There were some API changes in 0.2.0+ that required a re-write of the StripLeadingCommentFilter in impala_shell.py. A slight perf optimization was also added to avoid using the filter altogether if no leading comment is readily discernible. As 0.1.19 was the last version of sqlparse to support python 2.6, this patch also breaks Impala's compatibility with python 2.6. No new tests were added, but all existing tests passed without modification. Change-Id: I77a1fd5ae311634a18ee04b8c389d8a3f3a6e001 Reviewed-on: http://gerrit.cloudera.org:8080/15642 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
109 lines
3.6 KiB
Python
109 lines
3.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (C) 2009-2018 the sqlparse authors and contributors
|
|
# <see AUTHORS file>
|
|
#
|
|
# This module is part of python-sqlparse and is released under
|
|
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
|
|
|
|
from sqlparse import sql, tokens as T
|
|
|
|
|
|
class StatementSplitter(object):
|
|
"""Filter that split stream at individual statements"""
|
|
|
|
def __init__(self):
|
|
self._reset()
|
|
|
|
def _reset(self):
|
|
"""Set the filter attributes to its default values"""
|
|
self._in_declare = False
|
|
self._is_create = False
|
|
self._begin_depth = 0
|
|
|
|
self.consume_ws = False
|
|
self.tokens = []
|
|
self.level = 0
|
|
|
|
def _change_splitlevel(self, ttype, value):
|
|
"""Get the new split level (increase, decrease or remain equal)"""
|
|
|
|
# parenthesis increase/decrease a level
|
|
if ttype is T.Punctuation and value == '(':
|
|
return 1
|
|
elif ttype is T.Punctuation and value == ')':
|
|
return -1
|
|
elif ttype not in T.Keyword: # if normal token return
|
|
return 0
|
|
|
|
# Everything after here is ttype = T.Keyword
|
|
# Also to note, once entered an If statement you are done and basically
|
|
# returning
|
|
unified = value.upper()
|
|
|
|
# three keywords begin with CREATE, but only one of them is DDL
|
|
# DDL Create though can contain more words such as "or replace"
|
|
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
|
|
self._is_create = True
|
|
return 0
|
|
|
|
# can have nested declare inside of being...
|
|
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
|
|
self._in_declare = True
|
|
return 1
|
|
|
|
if unified == 'BEGIN':
|
|
self._begin_depth += 1
|
|
if self._is_create:
|
|
# FIXME(andi): This makes no sense.
|
|
return 1
|
|
return 0
|
|
|
|
# Should this respect a preceding BEGIN?
|
|
# In CASE ... WHEN ... END this results in a split level -1.
|
|
# Would having multiple CASE WHEN END and a Assignment Operator
|
|
# cause the statement to cut off prematurely?
|
|
if unified == 'END':
|
|
self._begin_depth = max(0, self._begin_depth - 1)
|
|
return -1
|
|
|
|
if (unified in ('IF', 'FOR', 'WHILE')
|
|
and self._is_create and self._begin_depth > 0):
|
|
return 1
|
|
|
|
if unified in ('END IF', 'END FOR', 'END WHILE'):
|
|
return -1
|
|
|
|
# Default
|
|
return 0
|
|
|
|
def process(self, stream):
|
|
"""Process the stream"""
|
|
EOS_TTYPE = T.Whitespace, T.Comment.Single
|
|
|
|
# Run over all stream tokens
|
|
for ttype, value in stream:
|
|
# Yield token if we finished a statement and there's no whitespaces
|
|
# It will count newline token as a non whitespace. In this context
|
|
# whitespace ignores newlines.
|
|
# why don't multi line comments also count?
|
|
if self.consume_ws and ttype not in EOS_TTYPE:
|
|
yield sql.Statement(self.tokens)
|
|
|
|
# Reset filter and prepare to process next statement
|
|
self._reset()
|
|
|
|
# Change current split level (increase, decrease or remain equal)
|
|
self.level += self._change_splitlevel(ttype, value)
|
|
|
|
# Append the token to the current statement
|
|
self.tokens.append(sql.Token(ttype, value))
|
|
|
|
# Check if we get the end of a statement
|
|
if self.level <= 0 and ttype is T.Punctuation and value == ';':
|
|
self.consume_ws = True
|
|
|
|
# Yield pending statement (if any)
|
|
if self.tokens:
|
|
yield sql.Statement(self.tokens)
|