IMPALA-644 The shell takes too long to parse a query file.

The shell uses an external module called sqlparse to strip the comments from a query file. When sqlparse.format() is invoked, it runs several grouping functions on the tokenized query text; some of these methods are very slow, and not needed for comment removal. This change restricts sqlparse to only invoke the grouping function for removing comments. Change-Id: I3a067187667fcd3cd331156a325960a3de2db9c2 Reviewed-on: http://gerrit.ent.cloudera.com:8080/944 Reviewed-by: Ishaan Joshi <ishaan@cloudera.com> Tested-by: jenkins
2025-12-31 06:02:51 -05:00 · 2013-11-05 10:28:09 -08:00
parent 81b80c702c
commit d3ffdbea15
1 changed files with 12 additions and 3 deletions
--- a/shell/ext-py/sqlparse-0.1.7/sqlparse/engine/init.py
+++ b/shell/ext-py/sqlparse-0.1.7/sqlparse/engine/init.py
@@ -48,16 +48,25 @@ class FilterStack(object):
            splitter = StatementFilter()
            stream = splitter.process(self, stream)

-        if self._grouping:
+        # import StripCommentsFilter in the run() method to avoid a circular dependency.
+        # For stripping comments, the only grouping method we want to invoke is
+        # grouping.group(), this considerably improves performance.
+        strip_comments_only = False
+        if self.stmtprocess and len(self.stmtprocess) == 1:
+          from sqlparse.filters import StripCommentsFilter
+          strip_comments_only = isinstance(self.stmtprocess[0], StripCommentsFilter)

+        if self._grouping:
            def _group(stream):
                for stmt in stream:
-                    grouping.group(stmt)
+                    if strip_comments_only:
+                        grouping.group_comments(stmt)
+                    else:
+                        grouping.group(stmt)
                    yield stmt
            stream = _group(stream)

        if self.stmtprocess:
-
            def _run1(stream):
                ret = []
                for stmt in stream: