IMPALA-9362: Upgrade sqlparse 0.1.19 -> 0.3.1

Upgrades the impala-shell's bundled version of sqlparse to 0.3.1.
There were some API changes in 0.2.0+ that required a re-write of
the StripLeadingCommentFilter in impala_shell.py. A slight perf
optimization was also added to avoid using the filter altogether
if no leading comment is readily discernible.

As 0.1.19 was the last version of sqlparse to support python 2.6,
this patch also breaks Impala's compatibility with python 2.6.

No new tests were added, but all existing tests passed without
modification.

Change-Id: I77a1fd5ae311634a18ee04b8c389d8a3f3a6e001
Reviewed-on: http://gerrit.cloudera.org:8080/15642
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
David Knupp
2020-04-03 00:27:14 -07:00
committed by Impala Public Jenkins
parent 327ec29c48
commit c26e3db4bd
99 changed files with 6831 additions and 6179 deletions

View File

@@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""SQL Lexer"""
# This code is based on the SqlLexer in pygments.
# http://pygments.org/
# It's separated from the rest of pygments to increase performance
# and to allow some customizations.
from sqlparse import tokens
from sqlparse.keywords import SQL_REGEX
from sqlparse.compat import text_type, file_types
from sqlparse.utils import consume
class Lexer(object):
"""Lexer
Empty class. Leaving for backwards-compatibility
"""
@staticmethod
def get_tokens(text, encoding=None):
"""
Return an iterable of (tokentype, value) pairs generated from
`text`. If `unfiltered` is set to `True`, the filtering mechanism
is bypassed even if filters are defined.
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
Split ``text`` into (tokentype, text) pairs.
``stack`` is the initial stack (default: ``['root']``)
"""
if isinstance(text, file_types):
text = text.read()
if isinstance(text, text_type):
pass
elif isinstance(text, bytes):
if encoding:
text = text.decode(encoding)
else:
try:
text = text.decode('utf-8')
except UnicodeDecodeError:
text = text.decode('unicode-escape')
else:
raise TypeError(u"Expected text or file-like object, got {!r}".
format(type(text)))
iterable = enumerate(text)
for pos, char in iterable:
for rexmatch, action in SQL_REGEX:
m = rexmatch(text, pos)
if not m:
continue
elif isinstance(action, tokens._TokenType):
yield action, m.group()
elif callable(action):
yield action(m.group())
consume(iterable, m.end() - pos - 1)
break
else:
yield tokens.Error, char
def tokenize(sql, encoding=None):
"""Tokenize sql.
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
return Lexer().get_tokens(sql, encoding)