Files
impala/shell/ext-py/sqlparse-0.1.7/sqlparse/engine/__init__.py
ishaan d3ffdbea15 IMPALA-644 The shell takes too long to parse a query file.
The shell uses an external module called sqlparse to strip the comments from a query file.
When sqlparse.format() is invoked, it runs several grouping functions on the
tokenized query text; some of these methods are very slow, and not needed for comment
removal. This change restricts sqlparse to only invoke the grouping function for removing
comments.

Change-Id: I3a067187667fcd3cd331156a325960a3de2db9c2
Reviewed-on: http://gerrit.ent.cloudera.com:8080/944
Reviewed-by: Ishaan Joshi <ishaan@cloudera.com>
Tested-by: jenkins
2014-01-08 10:54:11 -08:00

90 lines
2.8 KiB
Python

# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
"""filter"""
from sqlparse import lexer
from sqlparse.engine import grouping
from sqlparse.engine.filter import StatementFilter
# XXX remove this when cleanup is complete
Filter = object
class FilterStack(object):
def __init__(self):
self.preprocess = []
self.stmtprocess = []
self.postprocess = []
self.split_statements = False
self._grouping = False
def _flatten(self, stream):
for token in stream:
if token.is_group():
for t in self._flatten(token.tokens):
yield t
else:
yield token
def enable_grouping(self):
self._grouping = True
def full_analyze(self):
self.enable_grouping()
def run(self, sql, encoding=None):
stream = lexer.tokenize(sql, encoding)
# Process token stream
if self.preprocess:
for filter_ in self.preprocess:
stream = filter_.process(self, stream)
if (self.stmtprocess or self.postprocess or self.split_statements
or self._grouping):
splitter = StatementFilter()
stream = splitter.process(self, stream)
# import StripCommentsFilter in the run() method to avoid a circular dependency.
# For stripping comments, the only grouping method we want to invoke is
# grouping.group(), this considerably improves performance.
strip_comments_only = False
if self.stmtprocess and len(self.stmtprocess) == 1:
from sqlparse.filters import StripCommentsFilter
strip_comments_only = isinstance(self.stmtprocess[0], StripCommentsFilter)
if self._grouping:
def _group(stream):
for stmt in stream:
if strip_comments_only:
grouping.group_comments(stmt)
else:
grouping.group(stmt)
yield stmt
stream = _group(stream)
if self.stmtprocess:
def _run1(stream):
ret = []
for stmt in stream:
for filter_ in self.stmtprocess:
filter_.process(self, stmt)
ret.append(stmt)
return ret
stream = _run1(stream)
if self.postprocess:
def _run2(stream):
for stmt in stream:
stmt.tokens = list(self._flatten(stmt.tokens))
for filter_ in self.postprocess:
stmt = filter_.process(self, stmt)
yield stmt
stream = _run2(stream)
return stream