mirror of
https://github.com/apache/impala.git
synced 2025-12-20 02:20:11 -05:00
Upgrades the impala-shell's bundled version of sqlparse to 0.3.1. There were some API changes in 0.2.0+ that required a re-write of the StripLeadingCommentFilter in impala_shell.py. A slight perf optimization was also added to avoid using the filter altogether if no leading comment is readily discernible. As 0.1.19 was the last version of sqlparse to support python 2.6, this patch also breaks Impala's compatibility with python 2.6. No new tests were added, but all existing tests passed without modification. Change-Id: I77a1fd5ae311634a18ee04b8c389d8a3f3a6e001 Reviewed-on: http://gerrit.cloudera.org:8080/15642 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
83 lines
2.5 KiB
Python
83 lines
2.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
#
|
|
# Copyright (C) 2009-2018 the sqlparse authors and contributors
|
|
# <see AUTHORS file>
|
|
#
|
|
# This module is part of python-sqlparse and is released under
|
|
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
|
|
|
|
"""SQL Lexer"""
|
|
|
|
# This code is based on the SqlLexer in pygments.
|
|
# http://pygments.org/
|
|
# It's separated from the rest of pygments to increase performance
|
|
# and to allow some customizations.
|
|
|
|
from sqlparse import tokens
|
|
from sqlparse.keywords import SQL_REGEX
|
|
from sqlparse.compat import text_type, file_types
|
|
from sqlparse.utils import consume
|
|
|
|
|
|
class Lexer(object):
|
|
"""Lexer
|
|
Empty class. Leaving for backwards-compatibility
|
|
"""
|
|
|
|
@staticmethod
|
|
def get_tokens(text, encoding=None):
|
|
"""
|
|
Return an iterable of (tokentype, value) pairs generated from
|
|
`text`. If `unfiltered` is set to `True`, the filtering mechanism
|
|
is bypassed even if filters are defined.
|
|
|
|
Also preprocess the text, i.e. expand tabs and strip it if
|
|
wanted and applies registered filters.
|
|
|
|
Split ``text`` into (tokentype, text) pairs.
|
|
|
|
``stack`` is the initial stack (default: ``['root']``)
|
|
"""
|
|
if isinstance(text, file_types):
|
|
text = text.read()
|
|
|
|
if isinstance(text, text_type):
|
|
pass
|
|
elif isinstance(text, bytes):
|
|
if encoding:
|
|
text = text.decode(encoding)
|
|
else:
|
|
try:
|
|
text = text.decode('utf-8')
|
|
except UnicodeDecodeError:
|
|
text = text.decode('unicode-escape')
|
|
else:
|
|
raise TypeError(u"Expected text or file-like object, got {!r}".
|
|
format(type(text)))
|
|
|
|
iterable = enumerate(text)
|
|
for pos, char in iterable:
|
|
for rexmatch, action in SQL_REGEX:
|
|
m = rexmatch(text, pos)
|
|
|
|
if not m:
|
|
continue
|
|
elif isinstance(action, tokens._TokenType):
|
|
yield action, m.group()
|
|
elif callable(action):
|
|
yield action(m.group())
|
|
|
|
consume(iterable, m.end() - pos - 1)
|
|
break
|
|
else:
|
|
yield tokens.Error, char
|
|
|
|
|
|
def tokenize(sql, encoding=None):
|
|
"""Tokenize sql.
|
|
|
|
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
|
|
of ``(token type, value)`` items.
|
|
"""
|
|
return Lexer().get_tokens(sql, encoding)
|