Files
impala/shell/ext-py/sqlparse-0.3.1/sqlparse/engine/statement_splitter.py
David Knupp c26e3db4bd IMPALA-9362: Upgrade sqlparse 0.1.19 -> 0.3.1
Upgrades the impala-shell's bundled version of sqlparse to 0.3.1.
There were some API changes in 0.2.0+ that required a re-write of
the StripLeadingCommentFilter in impala_shell.py. A slight perf
optimization was also added to avoid using the filter altogether
if no leading comment is readily discernible.

As 0.1.19 was the last version of sqlparse to support python 2.6,
this patch also breaks Impala's compatibility with python 2.6.

No new tests were added, but all existing tests passed without
modification.

Change-Id: I77a1fd5ae311634a18ee04b8c389d8a3f3a6e001
Reviewed-on: http://gerrit.cloudera.org:8080/15642
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-04-17 05:04:23 +00:00

109 lines
3.6 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import sql, tokens as T
class StatementSplitter(object):
"""Filter that split stream at individual statements"""
def __init__(self):
self._reset()
def _reset(self):
"""Set the filter attributes to its default values"""
self._in_declare = False
self._is_create = False
self._begin_depth = 0
self.consume_ws = False
self.tokens = []
self.level = 0
def _change_splitlevel(self, ttype, value):
"""Get the new split level (increase, decrease or remain equal)"""
# parenthesis increase/decrease a level
if ttype is T.Punctuation and value == '(':
return 1
elif ttype is T.Punctuation and value == ')':
return -1
elif ttype not in T.Keyword: # if normal token return
return 0
# Everything after here is ttype = T.Keyword
# Also to note, once entered an If statement you are done and basically
# returning
unified = value.upper()
# three keywords begin with CREATE, but only one of them is DDL
# DDL Create though can contain more words such as "or replace"
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
self._is_create = True
return 0
# can have nested declare inside of being...
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
self._in_declare = True
return 1
if unified == 'BEGIN':
self._begin_depth += 1
if self._is_create:
# FIXME(andi): This makes no sense.
return 1
return 0
# Should this respect a preceding BEGIN?
# In CASE ... WHEN ... END this results in a split level -1.
# Would having multiple CASE WHEN END and a Assignment Operator
# cause the statement to cut off prematurely?
if unified == 'END':
self._begin_depth = max(0, self._begin_depth - 1)
return -1
if (unified in ('IF', 'FOR', 'WHILE')
and self._is_create and self._begin_depth > 0):
return 1
if unified in ('END IF', 'END FOR', 'END WHILE'):
return -1
# Default
return 0
def process(self, stream):
"""Process the stream"""
EOS_TTYPE = T.Whitespace, T.Comment.Single
# Run over all stream tokens
for ttype, value in stream:
# Yield token if we finished a statement and there's no whitespaces
# It will count newline token as a non whitespace. In this context
# whitespace ignores newlines.
# why don't multi line comments also count?
if self.consume_ws and ttype not in EOS_TTYPE:
yield sql.Statement(self.tokens)
# Reset filter and prepare to process next statement
self._reset()
# Change current split level (increase, decrease or remain equal)
self.level += self._change_splitlevel(ttype, value)
# Append the token to the current statement
self.tokens.append(sql.Token(ttype, value))
# Check if we get the end of a statement
if self.level <= 0 and ttype is T.Punctuation and value == ';':
self.consume_ws = True
# Yield pending statement (if any)
if self.tokens:
yield sql.Statement(self.tokens)