Files
impala/shell/ext-py/sqlparse-0.3.1/sqlparse/lexer.py
David Knupp c26e3db4bd IMPALA-9362: Upgrade sqlparse 0.1.19 -> 0.3.1
Upgrades the impala-shell's bundled version of sqlparse to 0.3.1.
There were some API changes in 0.2.0+ that required a re-write of
the StripLeadingCommentFilter in impala_shell.py. A slight perf
optimization was also added to avoid using the filter altogether
if no leading comment is readily discernible.

As 0.1.19 was the last version of sqlparse to support python 2.6,
this patch also breaks Impala's compatibility with python 2.6.

No new tests were added, but all existing tests passed without
modification.

Change-Id: I77a1fd5ae311634a18ee04b8c389d8a3f3a6e001
Reviewed-on: http://gerrit.cloudera.org:8080/15642
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-04-17 05:04:23 +00:00

83 lines
2.5 KiB
Python

# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""SQL Lexer"""
# This code is based on the SqlLexer in pygments.
# http://pygments.org/
# It's separated from the rest of pygments to increase performance
# and to allow some customizations.
from sqlparse import tokens
from sqlparse.keywords import SQL_REGEX
from sqlparse.compat import text_type, file_types
from sqlparse.utils import consume
class Lexer(object):
"""Lexer
Empty class. Leaving for backwards-compatibility
"""
@staticmethod
def get_tokens(text, encoding=None):
"""
Return an iterable of (tokentype, value) pairs generated from
`text`. If `unfiltered` is set to `True`, the filtering mechanism
is bypassed even if filters are defined.
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
Split ``text`` into (tokentype, text) pairs.
``stack`` is the initial stack (default: ``['root']``)
"""
if isinstance(text, file_types):
text = text.read()
if isinstance(text, text_type):
pass
elif isinstance(text, bytes):
if encoding:
text = text.decode(encoding)
else:
try:
text = text.decode('utf-8')
except UnicodeDecodeError:
text = text.decode('unicode-escape')
else:
raise TypeError(u"Expected text or file-like object, got {!r}".
format(type(text)))
iterable = enumerate(text)
for pos, char in iterable:
for rexmatch, action in SQL_REGEX:
m = rexmatch(text, pos)
if not m:
continue
elif isinstance(action, tokens._TokenType):
yield action, m.group()
elif callable(action):
yield action(m.group())
consume(iterable, m.end() - pos - 1)
break
else:
yield tokens.Error, char
def tokenize(sql, encoding=None):
"""Tokenize sql.
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
return Lexer().get_tokens(sql, encoding)