IMPALA-9362: Upgrade sqlparse 0.1.19 -> 0.3.1

Upgrades the impala-shell's bundled version of sqlparse to 0.3.1.
There were some API changes in 0.2.0+ that required a re-write of
the StripLeadingCommentFilter in impala_shell.py. A slight perf
optimization was also added to avoid using the filter altogether
if no leading comment is readily discernible.

As 0.1.19 was the last version of sqlparse to support python 2.6,
this patch also breaks Impala's compatibility with python 2.6.

No new tests were added, but all existing tests passed without
modification.

Change-Id: I77a1fd5ae311634a18ee04b8c389d8a3f3a6e001
Reviewed-on: http://gerrit.cloudera.org:8080/15642
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
David Knupp
2020-04-03 00:27:14 -07:00
committed by Impala Public Jenkins
parent 327ec29c48
commit c26e3db4bd
99 changed files with 6831 additions and 6179 deletions

View File

@@ -613,30 +613,33 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
shell/ext-py/sqlparse-0.1.19 (most parts): 3-clause BSD
shell/ext-py/sqlparse-0.3.1: 3-clause BSD
Copyright (c) 2009, Andi Albrecht <albrecht.andi@gmail.com>
Copyright (c) 2016, Andi Albrecht <albrecht.andi@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
* Neither the name of the authors nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
@@ -665,34 +668,6 @@ THE SOFTWARE.
--------------------------------------------------------------------------------
shell/ext-py/sqlparse-0.1.19/sqlparse/pipeline.py:
Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
be/src/thirdparty/mustache: Apache 2.0 license
be/src/thirdparty/pcg-cpp-0.98: Apache 2.0 license
be/src/expr/hll-bias.h: Apache 2.0 license

View File

@@ -72,7 +72,7 @@ can do so through the environment variables and scripts listed below.
| JAVA_VERSION | "java-7-oracle-amd64" | Can override to set a local Java version. |
| JAVA | "${JAVA_HOME}/bin/java" | Java binary location. |
| CLASSPATH | | See bin/set-classpath.sh for details. |
| PYTHONPATH | Will be changed to include: "${IMPALA_HOME}/shell/gen-py" "${IMPALA_HOME}/testdata" "${THRIFT_HOME}/python/lib/python2.7/site-packages" "${HIVE_HOME}/lib/py" "${IMPALA_HOME}/shell/ext-py/prettytable-0.7.1/dist/prettytable-0.7.1" "${IMPALA_HOME}/shell/ext-py/sasl-0.1.1/dist/sasl-0.1.1-py2.7-linux-x "${IMPALA_HOME}/shell/ext-py/sqlparse-0.1.19/dist/sqlparse-0.1.19-py2 |
| PYTHONPATH | Will be changed to include: "${IMPALA_HOME}/shell/gen-py" "${IMPALA_HOME}/testdata" "${THRIFT_HOME}/python/lib/python2.7/site-packages" "${HIVE_HOME}/lib/py" |
##### Source Directories for Impala

View File

@@ -45,7 +45,7 @@ shell/ext-py/bitarray-0.9.0/*
shell/ext-py/prettytable-0.7.1/*
shell/ext-py/sasl-0.1.1/*
shell/ext-py/six-1.14.0/*
shell/ext-py/sqlparse-0.1.19/*
shell/ext-py/sqlparse-0.3.1/*
shell/ext-py/thrift_sasl-0.4.2/*
www/d3.v3.min.js
www/jquery/jquery-3.4.1.min.js

View File

@@ -56,10 +56,7 @@ setuptools == 36.8.0
setuptools-scm == 1.15.4
sh == 1.11
six == 1.14.0
# Note: This version for sqlparse is not what is used for the shell. The shell uses
# a checked-in version of sqlparse (see shell/ext-py). This version is used primarily
# for dataload.
sqlparse == 0.1.19
sqlparse == 0.3.1
texttable == 0.8.3
# For dev purposes, not used in scripting. Version 1.2.1 is the latest that supports 2.6.

9
shell/.gitignore vendored
View File

@@ -8,9 +8,9 @@ ext-py/six-1.14.0/six.egg-info/
ext-py/prettytable-0.7.1/dist/
ext-py/prettytable-0.7.1/build
ext-py/prettytable-0.7.1/prettytable.egg-info
ext-py/sqlparse-0.1.19/dist/
ext-py/sqlparse-0.1.19/build/
ext-py/sqlparse-0.1.19/sqlparse.egg-info/
ext-py/sqlparse-0.3.1/dist/
ext-py/sqlparse-0.3.1/build/
ext-py/sqlparse-0.3.1/sqlparse.egg-info/
ext-py/bitarray-0.9.0/bitarray.egg-info/
ext-py/bitarray-0.9.0/dist/
ext-py/thrift_sasl-0.4.2/dist/
@@ -21,4 +21,7 @@ ext-py/thrift_sasl-0.4.2/six.egg-info/
# clean phase. Previous version of deps should be kept here for cleaning otherwise they
# may cause a build failure. Also the ignore path must be changed to the root folder
# so "git clean -Xdf" will work.
ext-py/six-1.11.0/
ext-py/sqlparse-0.1.7/
ext-py/sqlparse-0.1.19/
ext-py/thrift_sasl-0.4.1/

View File

@@ -1,14 +0,0 @@
language: python
python: 2.7
env:
- TOX_ENV=py26
- TOX_ENV=py27
- TOX_ENV=py33
- TOX_ENV=py34
- TOX_ENV=pypy
before_install:
- sudo apt-get install pypy
install:
- pip install tox
script:
- tox -e $TOX_ENV

View File

@@ -1,31 +0,0 @@
python-sqlparse is written and maintained by Andi Albrecht <albrecht.andi@gmail.com>.
This module contains code (namely the lexer and filter mechanism) from
the pygments project that was written by Georg Brandl.
Alphabetical list of contributors:
* Alexander Beedie <ayembee@gmail.com>
* Alexey Malyshev <nostrict@gmail.com>
* casey <casey@cloudera.com>
* Cristian Orellana <cristiano@groupon.com>
* Darik Gamble <darik.gamble@gmail.com>
* Florian Bauer <florian.bauer@zmdi.com>
* Gavin Wahl <gwahl@fusionbox.com>
* JacekPliszka <Jacek.Pliszka@gmail.com>
* Jesús Leganés Combarro "Piranna" <piranna@gmail.com>
* Kevin Jing Qiu <kevin.jing.qiu@gmail.com>
* Michael Schuller <chick@mschuller.net>
* Mike Amy <cocoade@googlemail.com>
* mulos <daniel.strackbein@gmail.com>
* Piet Delport <pjdelport@gmail.com>
* Prudhvi Vatala <pvatala@gmail.com>
* quest <quest@wonky.windwards.net>
* Robert Nix <com.github@rnix.org>
* Rocky Meza <rmeza@fusionbox.com>
* Ryan Wooden <rygwdn@gmail.com>
* spigwitmer <itgpmc@gmail.com>
* Tim Graham <timograham@gmail.com>
* Victor Hahn <info@victor-hahn.de>
* vthriller <farreva232@yandex.ru>
* wayne.wuw <wayne.wuw@alibaba-inc.com>
* Yago Riveiro <yago.riveiro@gmail.com>

View File

@@ -1,302 +0,0 @@
Release 0.1.19 (Mar 07, 2015)
-----------------------------
Bug Fixes
* Fix IndexError when statement contains WITH clauses (issue205).
Release 0.1.18 (Oct 25, 2015)
-----------------------------
Bug Fixes
* Remove universal wheel support, added in 0.1.17 by mistake.
Release 0.1.17 (Oct 24, 2015)
-----------------------------
Enhancements
* Speed up parsing of large SQL statements (pull request: issue201, fixes the
following issues: issue199, issue135, issue62, issue41, by Ryan Wooden).
Bug Fixes
* Fix another splitter bug regarding DECLARE (issue194).
Misc
* Packages on PyPI are signed from now on.
Release 0.1.16 (Jul 26, 2015)
-----------------------------
Bug Fixes
* Fix a regression in get_alias() introduced in 0.1.15 (issue185).
* Fix a bug in the splitter regarding DECLARE (issue193).
* sqlformat command line tool doesn't duplicat newlines anymore (issue191).
* Don't mix up MySQL comments starting with hash and MSSQL
temp tables (issue192).
* Statement.get_type() now ignores comments at the beginning of
a statement (issue186).
Release 0.1.15 (Apr 15, 2015)
-----------------------------
Bug Fixes
* Fix a regression for identifiers with square bracktes
notation (issue153, by darikg).
* Add missing SQL types (issue154, issue155, issue156, by jukebox).
* Fix parsing of multi-line comments (issue172, by JacekPliszka).
* Fix parsing of escaped backslashes (issue174, by caseyching).
* Fix parsing of identifiers starting with underscore (issue175).
* Fix misinterpretation of IN keyword (issue183).
Enhancements
* Improve formatting of HAVING statements.
* Improve parsing of inline comments (issue163).
* Group comments to parent object (issue128, issue160).
* Add double precision builtin (issue169, by darikg).
* Add support for square bracket array indexing (issue170, issue176,
issue177 by darikg).
* Improve grouping of aliased elements (issue167, by darikg).
* Support comments starting with '#' character (issue178).
Release 0.1.14 (Nov 30, 2014)
-----------------------------
Bug Fixes
* Floats in UPDATE statements are now handled correctly (issue145).
* Properly handle string literals in comparisons (issue148, change proposed
by aadis).
* Fix indentation when using tabs (issue146).
Enhancements
* Improved formatting in list when newlines precede commas (issue140).
Release 0.1.13 (Oct 09, 2014)
-----------------------------
Bug Fixes
* Fix a regression in handling of NULL keywords introduced in 0.1.12.
Release 0.1.12 (Sep 20, 2014)
-----------------------------
Bug Fixes
* Fix handling of NULL keywords in aliased identifiers.
* Fix SerializerUnicode to split unquoted newlines (issue131, by Michael Schuller).
* Fix handling of modulo operators without spaces (by gavinwahl).
Enhancements
* Improve parsing of identifier lists containing placeholders.
* Speed up query parsing of unquoted lines (by Michael Schuller).
Release 0.1.11 (Feb 07, 2014)
-----------------------------
Bug Fixes
* Fix incorrect parsing of string literals containing line breaks (issue118).
* Fix typo in keywords, add MERGE, COLLECT keywords (issue122/124,
by Cristian Orellana).
* Improve parsing of string literals in columns.
* Fix parsing and formatting of statements containing EXCEPT keyword.
* Fix Function.get_parameters() (issue126/127, by spigwitmer).
Enhancements
* Classify DML keywords (issue116, by Victor Hahn).
* Add missing FOREACH keyword.
* Grouping of BEGIN/END blocks.
Other
* Python 2.5 isn't automatically tested anymore, neither Travis nor Tox
still support it out of the box.
Release 0.1.10 (Nov 02, 2013)
-----------------------------
Bug Fixes
* Removed buffered reading again, it obviously causes wrong parsing in some rare
cases (issue114).
* Fix regression in setup.py introduced 10 months ago (issue115).
Enhancements
* Improved support for JOINs, by Alexander Beedie.
Release 0.1.9 (Sep 28, 2013)
----------------------------
Bug Fixes
* Fix an regression introduced in 0.1.5 where sqlparse didn't properly
distinguished between single and double quoted strings when tagging
identifier (issue111).
Enhancements
* New option to truncate long string literals when formatting.
* Scientific numbers are pares correctly (issue107).
* Support for arithmetic expressions (issue109, issue106; by prudhvi).
Release 0.1.8 (Jun 29, 2013)
----------------------------
Bug Fixes
* Whitespaces within certain keywords are now allowed (issue97, patch proposed
by xcombelle).
Enhancements
* Improve parsing of assignments in UPDATE statements (issue90).
* Add STRAIGHT_JOIN statement (by Yago Riveiro).
* Function.get_parameters() now returns the parameter if only one parameter is
given (issue94, by wayne.wuw).
* sqlparse.split() now removes leading and trailing whitespaces from splitted
statements.
* Add USE as keyword token (by mulos).
* Improve parsing of PEP249-style placeholders (issue103).
Release 0.1.7 (Apr 06, 2013)
----------------------------
Bug Fixes
* Fix Python 3 compatibility of sqlformat script (by Piet Delport).
* Fix parsing of SQL statements that contain binary data (by Alexey
Malyshev).
* Fix a bug where keywords were identified as aliased identifiers in
invalid SQL statements.
* Fix parsing of identifier lists where identifiers are keywords too
(issue10).
Enhancements
* Top-level API functions now accept encoding keyword to parse
statements in certain encodings more reliable (issue20).
* Improve parsing speed when SQL contains CLOBs or BLOBs (issue86).
* Improve formatting of ORDER BY clauses (issue89).
* Formatter now tries to detect runaway indentations caused by
parsing errors or invalid SQL statements. When re-indenting such
statements the formatter flips back to column 0 before going crazy.
Other
* Documentation updates.
Release 0.1.6 (Jan 01, 2013)
----------------------------
sqlparse is now compatible with Python 3 without any patches. The
Python 3 version is generated during install by 2to3. You'll need
distribute to install sqlparse for Python 3.
Bug Fixes
* Fix parsing error with dollar-quoted procedure bodies (issue83).
Other
* Documentation updates.
* Test suite now uses tox and py.test.
* py3k fixes (by vthriller).
* py3k fixes in setup.py (by Florian Bauer).
* setup.py now requires distribute (by Florian Bauer).
Release 0.1.5 (Nov 13, 2012)
----------------------------
Bug Fixes
* Improve handling of quoted identifiers (issue78).
* Improve grouping and formatting of identifiers with operators (issue53).
* Improve grouping and formatting of concatenated strings (issue53).
* Improve handling of varchar() (by Mike Amy).
* Clean up handling of various SQL elements.
* Switch to py.test and clean up tests.
* Several minor fixes.
Other
* Deprecate sqlparse.SQLParseError. Please use
sqlparse.exceptions.SQLParseError instead.
* Add caching to speed up processing.
* Add experimental filters for token processing.
* Add sqlformat.parsestream (by quest).
Release 0.1.4 (Apr 20, 2012)
----------------------------
Bug Fixes
* Avoid "stair case" effects when identifiers, functions,
placeholders or keywords are mixed in identifier lists (issue45,
issue49, issue52) and when asterisks are used as operators
(issue58).
* Make keyword detection more restrict (issue47).
* Improve handling of CASE statements (issue46).
* Fix statement splitting when parsing recursive statements (issue57,
thanks to piranna).
* Fix for negative numbers (issue56, thanks to kevinjqiu).
* Pretty format comments in identifier lists (issue59).
* Several minor bug fixes and improvements.
Release 0.1.3 (Jul 29, 2011)
----------------------------
Bug Fixes
* Improve parsing of floats (thanks to Kris).
* When formatting a statement a space before LIMIT was removed (issue35).
* Fix strip_comments flag (issue38, reported by ooberm...@gmail.com).
* Avoid parsing names as keywords (issue39, reported by djo...@taket.org).
* Make sure identifier lists in subselects are grouped (issue40,
reported by djo...@taket.org).
* Split statements with IF as functions correctly (issue33 and
issue29, reported by charles....@unige.ch).
* Relax detection of keywords, esp. when used as function names
(issue36, nyuhu...@gmail.com).
* Don't treat single characters as keywords (issue32).
* Improve parsing of stand-alone comments (issue26).
* Detection of placeholders in paramterized queries (issue22,
reported by Glyph Lefkowitz).
* Add parsing of MS Access column names with braces (issue27,
reported by frankz...@gmail.com).
Other
* Replace Django by Flask in App Engine frontend (issue11).
Release 0.1.2 (Nov 23, 2010)
----------------------------
Bug Fixes
* Fixed incorrect detection of keyword fragments embed in names (issue7,
reported and initial patch by andyboyko).
* Stricter detection of identfier aliases (issue8, reported by estama).
* WHERE grouping consumed closing parenthesis (issue9, reported by estama).
* Fixed an issue with trailing whitespaces (reported by Kris).
* Better detection of escaped single quotes (issue13, reported by
Martin Brochhaus, patch by bluemaro with test case by Dan Carley).
* Ignore identifier in double-quotes when changing cases (issue 21).
* Lots of minor fixes targeting encoding, indentation, statement
parsing and more (issues 12, 14, 15, 16, 18, 19).
* Code cleanup with a pinch of refactoring.
Release 0.1.1 (May 6, 2009)
---------------------------
Bug Fixes
* Lexers preserves original line breaks (issue1).
* Improved identifier parsing: backtick quotes, wildcards, T-SQL variables
prefixed with @.
* Improved parsing of identifier lists (issue2).
* Recursive recognition of AS (issue4) and CASE.
* Improved support for UPDATE statements.
Other
* Code cleanup and better test coverage.
Release 0.1.0 (Apr 8, 2009)
---------------------------
* Initial release.

View File

@@ -1,56 +0,0 @@
python-sqlparse - Parse SQL statements
======================================
sqlparse is a non-validating SQL parser module for Python.
|buildstatus|_
Install
-------
Run::
python setup.py install
to install python-sqlparse on your system.
python-sqlparse is compatible with Python 2 (>= 2.5) and Python 3 (>= 3.2).
Run Tests
---------
To run the test suite run::
tox
Note, you'll need tox installed, of course.
Links
-----
Project Page
https://github.com/andialbrecht/sqlparse
Documentation
http://readthedocs.org/docs/sqlparse/en/latest/
Discussions
http://groups.google.com/group/sqlparse
Issues/Bugs
https://github.com/andialbrecht/sqlparse/issues
Online Demo
http://sqlformat.org
python-sqlparse is licensed under the BSD license.
Parts of the code are based on pygments written by Georg Brandl and others.
pygments-Homepage: http://pygments.org/
.. |buildstatus| image:: https://secure.travis-ci.org/andialbrecht/sqlparse.png?branch=master
.. _buildstatus: http://travis-ci.org/#!/andialbrecht/sqlparse

View File

@@ -1,109 +0,0 @@
#!/usr/bin/env python
# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
import optparse
import os
import sys
import sqlparse
from sqlparse.exceptions import SQLParseError
_CASE_CHOICES = ['upper', 'lower', 'capitalize']
parser = optparse.OptionParser(usage='%prog [OPTIONS] FILE, ...',
version='%%prog %s' % sqlparse.__version__)
parser.set_description(('Format FILE according to OPTIONS. Use "-" as FILE '
'to read from stdin.'))
parser.add_option('-v', '--verbose', dest='verbose', action='store_true')
parser.add_option('-o', '--outfile', dest='outfile', metavar='FILE',
help='write output to FILE (defaults to stdout)')
group = parser.add_option_group('Formatting Options')
group.add_option('-k', '--keywords', metavar='CHOICE',
dest='keyword_case', choices=_CASE_CHOICES,
help=('change case of keywords, CHOICE is one of %s'
% ', '.join('"%s"' % x for x in _CASE_CHOICES)))
group.add_option('-i', '--identifiers', metavar='CHOICE',
dest='identifier_case', choices=_CASE_CHOICES,
help=('change case of identifiers, CHOICE is one of %s'
% ', '.join('"%s"' % x for x in _CASE_CHOICES)))
group.add_option('-l', '--language', metavar='LANG',
dest='output_format', choices=['python', 'php'],
help=('output a snippet in programming language LANG, '
'choices are "python", "php"'))
group.add_option('--strip-comments', dest='strip_comments',
action='store_true', default=False,
help='remove comments')
group.add_option('-r', '--reindent', dest='reindent',
action='store_true', default=False,
help='reindent statements')
group.add_option('--indent_width', dest='indent_width', default=2,
help='indentation width (defaults to 2 spaces)')
_FORMATTING_GROUP = group
def _error(msg, exit_=None):
"""Print msg and optionally exit with return code exit_."""
sys.stderr.write('[ERROR] %s\n' % msg)
if exit_ is not None:
sys.exit(exit_)
def _build_formatter_opts(options):
"""Convert command line options to dictionary."""
d = {}
for option in _FORMATTING_GROUP.option_list:
d[option.dest] = getattr(options, option.dest)
return d
def main():
options, args = parser.parse_args()
if options.verbose:
sys.stderr.write('Verbose mode\n')
if len(args) != 1:
_error('No input data.')
parser.print_usage()
sys.exit(1)
if '-' in args: # read from stdin
data = sys.stdin.read()
else:
try:
data = ''.join(open(args[0]).readlines())
except OSError:
err = sys.exc_info()[1] # Python 2.5 compatibility
_error('Failed to read %s: %s' % (args[0], err), exit_=1)
if options.outfile:
try:
stream = open(options.outfile, 'w')
except OSError:
err = sys.exc_info()[1] # Python 2.5 compatibility
_error('Failed to open %s: %s' % (options.outfile, err), exit_=1)
else:
stream = sys.stdout
formatter_opts = _build_formatter_opts(options)
try:
formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
except SQLParseError:
err = sys.exc_info()[1] # Python 2.5 compatibility
_error('Invalid options: %s' % err, exit_=1)
s = sqlparse.format(data, **formatter_opts)
if sys.version_info < (3,):
s = s.encode('utf-8', 'replace')
stream.write(s)
stream.flush()
if __name__ == '__main__':
main()

View File

@@ -1,64 +0,0 @@
.. _analyze:
Analyzing the Parsed Statement
==============================
When the :meth:`~sqlparse.parse` function is called the returned value
is a tree-ish representation of the analyzed statements. The returned
objects can be used by applications to retrieve further information about
the parsed SQL.
Base Classes
------------
All returned objects inherit from these base classes.
The :class:`~sqlparse.sql.Token` class represents a single token and
:class:`~sqlparse.sql.TokenList` class is a group of tokens.
The latter provides methods for inspecting it's child tokens.
.. autoclass:: sqlparse.sql.Token
:members:
.. autoclass:: sqlparse.sql.TokenList
:members:
SQL Representing Classes
------------------------
The following classes represent distinct parts of a SQL statement.
.. autoclass:: sqlparse.sql.Statement
:members:
.. autoclass:: sqlparse.sql.Comment
:members:
.. autoclass:: sqlparse.sql.Identifier
:members:
.. autoclass:: sqlparse.sql.IdentifierList
:members:
.. autoclass:: sqlparse.sql.Where
:members:
.. autoclass:: sqlparse.sql.Case
:members:
.. autoclass:: sqlparse.sql.Parenthesis
:members:
.. autoclass:: sqlparse.sql.If
:members:
.. autoclass:: sqlparse.sql.For
:members:
.. autoclass:: sqlparse.sql.Assignment
:members:
.. autoclass:: sqlparse.sql.Comparison
:members:

View File

@@ -1,57 +0,0 @@
:mod:`sqlparse` -- Parse SQL statements
=======================================
.. module:: sqlparse
:synopsis: Parse SQL statements.
The :mod:`sqlparse` module provides the following functions on module-level.
.. autofunction:: sqlparse.split
.. autofunction:: sqlparse.format
.. autofunction:: sqlparse.parse
In most cases there's no need to set the `encoding` parameter. If
`encoding` is not set, sqlparse assumes that the given SQL statement
is encoded either in utf-8 or latin-1.
.. _formatting:
Formatting of SQL Statements
----------------------------
The :meth:`~sqlparse.format` function accepts the following keyword arguments.
``keyword_case``
Changes how keywords are formatted. Allowed values are "upper", "lower"
and "capitalize".
``identifier_case``
Changes how identifiers are formatted. Allowed values are "upper", "lower",
and "capitalize".
``strip_comments``
If ``True`` comments are removed from the statements.
``truncate_strings``
If ``truncate_strings`` is a positive integer, string literals longer than
the given value will be truncated.
``truncate_char`` (default: "[...]")
If long string literals are truncated (see above) this value will be append
to the truncated string.
``reindent``
If ``True`` the indentations of the statements are changed.
``indent_tabs``
If ``True`` tabs instead of spaces are used for indentation.
``indent_width``
The width of the indentation, defaults to 2.
``output_format``
If given the output is additionally formatted to be used as a variable
in a programming language. Allowed values are "python" and "php".

View File

@@ -1,13 +0,0 @@
.. _changes:
Changes in python-sqlparse
==========================
Upcoming Deprecations
---------------------
* ``sqlparse.SQLParseError`` is deprecated (version 0.1.5), use
``sqlparse.exceptions.SQLParseError`` instead.
.. include:: ../../CHANGES

View File

@@ -1,200 +0,0 @@
# -*- coding: utf-8 -*-
#
# python-sqlparse documentation build configuration file, created by
# sphinx-quickstart on Thu Feb 26 08:19:28 2009.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import datetime
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.append(os.path.abspath('.'))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../'))
import sqlparse
# -- General configuration -----------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage',
'sphinx.ext.autosummary']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'python-sqlparse'
copyright = u'%s, Andi Albrecht' % datetime.date.today().strftime('%Y')
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = sqlparse.__version__
# The full version, including alpha/beta/rc tags.
release = sqlparse.__version__
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of documents that shouldn't be included in the build.
#unused_docs = []
# List of directories, relative to source directory, that shouldn't be searched
# for source files.
exclude_trees = []
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'tango'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. Major themes that come with
# Sphinx are currently 'default' and 'sphinxdoc'.
#html_theme = 'agogo'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = [os.path.abspath('../')]
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
#html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_use_modindex = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = ''
# Output file base name for HTML help builder.
htmlhelp_basename = 'python-sqlparsedoc'
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'python-sqlparse.tex', ur'python-sqlparse Documentation',
ur'Andi Albrecht', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_use_modindex = True
todo_include_todos = True

View File

@@ -1,61 +0,0 @@
.. python-sqlparse documentation master file, created by
sphinx-quickstart on Thu Feb 26 08:19:28 2009.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
python-sqlparse
===============
:mod:`sqlparse` is a non-validating SQL parser for Python.
It provides support for parsing, splitting and formatting SQL statements.
The module is compatible with Python 2 (>= 2.5) and Python 3 (>= 3.2)
and released under the terms of the `New BSD license
<http://www.opensource.org/licenses/bsd-license.php>`_.
Visit the project page at https://github.com/andialbrecht/sqlparse for
further information about this project.
tl;dr
-----
.. code-block:: bash
$ pip install sqlparse
$ python
>>> import sqlparse
>>> print(sqlparse.format('select * from foo', reindent=True))
select *
from foo
>>> parsed = sqlparse.parse('select * from foo')[0]
>>> parsed.tokens
[<DML 'select' at 0x7f22c5e15368>, <Whitespace ' ' at 0x7f22c5e153b0>, <Wildcard '*']
>>>
Contents
--------
.. toctree::
:maxdepth: 2
intro
api
analyzing
ui
changes
indices
Resources
---------
Project page
https://github.com/andialbrecht/sqlparse
Bug tracker
https://github.com/andialbrecht/sqlparse/issues
Documentation
http://sqlparse.readthedocs.org/

View File

@@ -1,7 +0,0 @@
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@@ -1,143 +0,0 @@
Introduction
============
Download & Installation
-----------------------
The latest released version can be obtained from the `Python Package
Index (PyPI) <http://pypi.python.org/pypi/sqlparse/>`_. To extract the
install the module system-wide run
.. code-block:: bash
$ tar cvfz python-sqlparse-VERSION.tar.gz
$ cd python-sqlparse/
$ sudo python setup.py install
Alternatively you can install :mod:`sqlparse` using :command:`pip`:
.. code-block:: bash
$ pip install sqlparse
Getting Started
---------------
The :mod:`sqlparse` module provides three simple functions on module level
to achieve some common tasks when working with SQL statements.
This section shows some simple usage examples of these functions.
Let's get started with splitting a string containing one or more SQL
statements into a list of single statements using :meth:`~sqlparse.split`:
.. code-block:: python
>>> import sqlparse
>>> sql = 'select * from foo; select * from bar;'
>>> sqlparse.split(sql)
[u'select * from foo; ', u'select * from bar;']
The end of a statement is identified by the occurrence of a semicolon.
Semicolons within certain SQL constructs like ``BEGIN ... END`` blocks
are handled correctly by the splitting mechanism.
SQL statements can be beautified by using the :meth:`~sqlarse.format` function.
.. code-block:: python
>>> sql = 'select * from foo where id in (select id from bar);'
>>> print sqlparse.format(sql, reindent=True, keyword_case='upper')
SELECT *
FROM foo
WHERE id IN
(SELECT id
FROM bar);
In this case all keywords in the given SQL are uppercased and the
indentation is changed to make it more readable. Read :ref:`formatting` for
a full reference of supported options given as keyword arguments
to that function.
Before proceeding with a closer look at the internal representation of
SQL statements, you should be aware that this SQL parser is intentionally
non-validating. It assumes that the given input is at least some kind
of SQL and then it tries to analyze as much as possible without making
too much assumptions about the concrete dialect or the actual statement.
At least it's up to the user of this API to interpret the results right.
When using the :meth:`~sqlparse.parse` function a tuple of
:class:`~sqlparse.sql.Statement` instances is returned:
.. code-block:: python
>>> sql = 'select * from "someschema"."mytable" where id = 1'
>>> parsed = sqlparse.parse(sql)
>>> parsed
(<Statement 'select...' at 0x9ad08ec>,)
Each item of the tuple is a single statement as identified by the above
mentioned :meth:`~sqlparse.split` function. So let's grab the only element
from that list and have a look at the ``tokens`` attribute.
Sub-tokens are stored in this attribute.
.. code-block:: python
>>> stmt = parsed[0] # grab the Statement object
>>> stmt.tokens
(<DML 'select' at 0x9b63c34>,
<Whitespace ' ' at 0x9b63e8c>,
<Operator '*' at 0x9b63e64>,
<Whitespace ' ' at 0x9b63c5c>,
<Keyword 'from' at 0x9b63c84>,
<Whitespace ' ' at 0x9b63cd4>,
<Identifier '"somes...' at 0x9b5c62c>,
<Whitespace ' ' at 0x9b63f04>,
<Where 'where ...' at 0x9b5caac>)
Each object can be converted back to a string at any time:
.. code-block:: python
>>> unicode(stmt) # str(stmt) for Python 3
u'select * from "someschema"."mytable" where id = 1'
>>> unicode(stmt.tokens[-1]) # or just the WHERE part
u'where id = 1'
Details of the returned objects are described in :ref:`analyze`.
Development & Contributing
--------------------------
To check out the latest sources of this module run
.. code-block:: bash
$ git clone git://github.com/andialbrecht/sqlparse.git
to check out the latest sources from the repository.
:mod:`sqlparse` is currently tested under Python 2.5, 2.6, 2.7, 3.2 and
pypy. Tests are automatically run on each commit and for each pull
request on Travis: https://travis-ci.org/andialbrecht/sqlparse
Make sure to run the test suite before sending a pull request by running
.. code-block:: bash
$ tox
It's ok, if :command:`tox` doesn't find all interpreters listed
above. Ideally a Python 2 and a Python 3 version should be tested
locally.
Please file bug reports and feature requests on the project site at
https://github.com/andialbrecht/sqlparse/issues/new or if you have
code to contribute upload it to http://codereview.appspot.com and
add albrecht.andi@googlemail.com as reviewer.
For more information about the review tool and how to use it visit
it's project page: http://code.google.com/p/rietveld.

View File

@@ -1,15 +0,0 @@
User Interfaces
===============
``sqlformat``
The ``sqlformat`` command line script ist distributed with the module.
Run :command:`sqlformat --help` to list available options and for usage
hints.
``sqlformat.appspot.com``
An example `Google App Engine <http://code.google.com/appengine/>`_
application that exposes the formatting features using a web front-end.
See http://sqlformat.appspot.com for details.
The source for this application is available from a source code check out
of the :mod:`sqlparse` module (see :file:`extras/appengine`).

View File

@@ -1,65 +0,0 @@
.\" Based on template /usr/share/man-db/examples/manpage.example provided by
.\" Tom Christiansen <tchrist@jhereg.perl.com>.
.TH SQLFORMAT "1" "December 2010" "python-sqlparse version: 0.1.2" "User Commands"
.SH NAME
sqlformat \- reformat SQL
.SH SYNOPSIS
.PP
.B sqlformat
[
.I "OPTION"
] ... [
.I "FILE"
] ...
.SH DESCRIPTION
.\" Putting a newline after each sentence can generate better output.
The `sqlformat' command-line tool can be used to reformat SQL file according to
specified options or prepare a snippet in in some programming language (only
Python and PHP currently supported).
Use "-" for
.I FILE
to read from stdin.
.SH OPTIONS
.TP
\fB\-i\fR \fICHOICE\fR|\fB\-\-identifiers\fR=\fIFORMAT\fR
Change case of identifiers.
.I FORMAT
is one of "upper", "lower", "capitalize".
.TP
\fB\-k\fR \fICHOICE\fR|\fB\-\-keywords\fR=\fIFORMAT\fR
Change case of keywords.
.I FORMAT
is one of "upper", "lower", "capitalize".
.TP
\fB\-l\fR \fICHOICE\fR|\fB\-\-language\fR=\fILANG\fR
Output a snippet in programming language LANG.
.I LANG
can be "python", "php".
.TP
\fB\-o\fR \fIFILE\fR|\fB\-\-outfile\fR=\fIFILE\fR
Write output to
.I FILE
(defaults to stdout).
.TP
.BR \-r | \-\-reindent
Reindent statements.
.TP
\fB\-\-indent_width\fR=\fIINDENT_WIDTH\fR
Set indent width to
.IR INDENT_WIDTH .
Default is 2 spaces.
.TP
\fB\-\-strip\-comments
Remove comments.
.TP
.BR \-h | \-\-help
Print a short help message and exit.
All subsequent options are ignored.
.TP
.BR --verbose
Verbose output.
.TP
.BR \-\-version
Print program's version number and exit.
.SH AUTHORS
This man page was written by Andriy Senkovych <jolly_roger@itblog.org.ua>

View File

@@ -1,6 +0,0 @@
[pytest]
pep8ignore =
extras/* ALL
examples/* ALL
docs/* ALL
* E125 E127

View File

@@ -1,80 +0,0 @@
# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
"""filter"""
from sqlparse import lexer
from sqlparse.engine import grouping
from sqlparse.engine.filter import StatementFilter
# XXX remove this when cleanup is complete
Filter = object
class FilterStack(object):
def __init__(self):
self.preprocess = []
self.stmtprocess = []
self.postprocess = []
self.split_statements = False
self._grouping = False
def _flatten(self, stream):
for token in stream:
if token.is_group():
for t in self._flatten(token.tokens):
yield t
else:
yield token
def enable_grouping(self):
self._grouping = True
def full_analyze(self):
self.enable_grouping()
def run(self, sql, encoding=None):
stream = lexer.tokenize(sql, encoding)
# Process token stream
if self.preprocess:
for filter_ in self.preprocess:
stream = filter_.process(self, stream)
if (self.stmtprocess or self.postprocess or self.split_statements
or self._grouping):
splitter = StatementFilter()
stream = splitter.process(self, stream)
if self._grouping:
def _group(stream):
for stmt in stream:
grouping.group(stmt)
yield stmt
stream = _group(stream)
if self.stmtprocess:
def _run1(stream):
ret = []
for stmt in stream:
for filter_ in self.stmtprocess:
filter_.process(self, stmt)
ret.append(stmt)
return ret
stream = _run1(stream)
if self.postprocess:
def _run2(stream):
for stmt in stream:
stmt.tokens = list(self._flatten(stmt.tokens))
for filter_ in self.postprocess:
stmt = filter_.process(self, stmt)
yield stmt
stream = _run2(stream)
return stream

View File

@@ -1,112 +0,0 @@
# -*- coding: utf-8 -*-
from sqlparse.sql import Statement, Token
from sqlparse import tokens as T
class StatementFilter:
"Filter that split stream at individual statements"
def __init__(self):
self._in_declare = False
self._in_dbldollar = False
self._is_create = False
self._begin_depth = 0
def _reset(self):
"Set the filter attributes to its default values"
self._in_declare = False
self._in_dbldollar = False
self._is_create = False
self._begin_depth = 0
def _change_splitlevel(self, ttype, value):
"Get the new split level (increase, decrease or remain equal)"
# PostgreSQL
if (ttype == T.Name.Builtin
and value.startswith('$') and value.endswith('$')):
if self._in_dbldollar:
self._in_dbldollar = False
return -1
else:
self._in_dbldollar = True
return 1
elif self._in_dbldollar:
return 0
# ANSI
if ttype not in T.Keyword:
return 0
unified = value.upper()
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
self._in_declare = True
return 1
if unified == 'BEGIN':
self._begin_depth += 1
if self._in_declare or self._is_create:
# FIXME(andi): This makes no sense.
return 1
return 0
if unified in ('END IF', 'END FOR'):
return -1
if unified == 'END':
# Should this respect a preceeding BEGIN?
# In CASE ... WHEN ... END this results in a split level -1.
self._begin_depth = max(0, self._begin_depth - 1)
return -1
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
self._is_create = True
return 0
if (unified in ('IF', 'FOR')
and self._is_create and self._begin_depth > 0):
return 1
# Default
return 0
def process(self, stack, stream):
"Process the stream"
consume_ws = False
splitlevel = 0
stmt = None
stmt_tokens = []
# Run over all stream tokens
for ttype, value in stream:
# Yield token if we finished a statement and there's no whitespaces
if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
stmt.tokens = stmt_tokens
yield stmt
# Reset filter and prepare to process next statement
self._reset()
consume_ws = False
splitlevel = 0
stmt = None
# Create a new statement if we are not currently in one of them
if stmt is None:
stmt = Statement()
stmt_tokens = []
# Change current split level (increase, decrease or remain equal)
splitlevel += self._change_splitlevel(ttype, value)
# Append the token to the current statement
stmt_tokens.append(Token(ttype, value))
# Check if we get the end of a statement
if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
consume_ws = True
# Yield pending statement (if any)
if stmt is not None:
stmt.tokens = stmt_tokens
yield stmt

View File

@@ -1,461 +0,0 @@
# -*- coding: utf-8 -*-
import itertools
from sqlparse import sql
from sqlparse import tokens as T
try:
next
except NameError: # Python < 2.6
next = lambda i: i.next()
def _group_left_right(tlist, ttype, value, cls,
check_right=lambda t: True,
check_left=lambda t: True,
include_semicolon=False):
[_group_left_right(sgroup, ttype, value, cls, check_right, check_left,
include_semicolon) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, cls)]
idx = 0
token = tlist.token_next_match(idx, ttype, value)
while token:
right = tlist.token_next(tlist.token_index(token))
left = tlist.token_prev(tlist.token_index(token))
if right is None or not check_right(right):
token = tlist.token_next_match(tlist.token_index(token) + 1,
ttype, value)
elif left is None or not check_left(left):
token = tlist.token_next_match(tlist.token_index(token) + 1,
ttype, value)
else:
if include_semicolon:
sright = tlist.token_next_match(tlist.token_index(right),
T.Punctuation, ';')
if sright is not None:
# only overwrite "right" if a semicolon is actually
# present.
right = sright
tokens = tlist.tokens_between(left, right)[1:]
if not isinstance(left, cls):
new = cls([left])
new_idx = tlist.token_index(left)
tlist.tokens.remove(left)
tlist.tokens.insert(new_idx, new)
left = new
left.tokens.extend(tokens)
for t in tokens:
tlist.tokens.remove(t)
token = tlist.token_next_match(tlist.token_index(left) + 1,
ttype, value)
def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value):
depth = 1
for tok in tlist.tokens[idx:]:
if tok.match(start_ttype, start_value):
depth += 1
elif tok.match(end_ttype, end_value):
depth -= 1
if depth == 1:
return tok
return None
def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
cls, include_semicolon=False, recurse=False):
[_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
cls, include_semicolon) for sgroup in tlist.get_sublists()
if recurse]
if isinstance(tlist, cls):
idx = 1
else:
idx = 0
token = tlist.token_next_match(idx, start_ttype, start_value)
while token:
tidx = tlist.token_index(token)
end = _find_matching(tidx, tlist, start_ttype, start_value,
end_ttype, end_value)
if end is None:
idx = tidx + 1
else:
if include_semicolon:
next_ = tlist.token_next(tlist.token_index(end))
if next_ and next_.match(T.Punctuation, ';'):
end = next_
group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
_group_matching(group, start_ttype, start_value,
end_ttype, end_value, cls, include_semicolon)
idx = tlist.token_index(group) + 1
token = tlist.token_next_match(idx, start_ttype, start_value)
def group_if(tlist):
_group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True)
def group_for(tlist):
_group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP',
sql.For, True)
def group_foreach(tlist):
_group_matching(tlist, T.Keyword, 'FOREACH', T.Keyword, 'END LOOP',
sql.For, True)
def group_begin(tlist):
_group_matching(tlist, T.Keyword, 'BEGIN', T.Keyword, 'END',
sql.Begin, True)
def group_as(tlist):
def _right_valid(token):
# Currently limited to DML/DDL. Maybe additional more non SQL reserved
# keywords should appear here (see issue8).
return not token.ttype in (T.DML, T.DDL)
def _left_valid(token):
if token.ttype is T.Keyword and token.value in ('NULL',):
return True
return token.ttype is not T.Keyword
_group_left_right(tlist, T.Keyword, 'AS', sql.Identifier,
check_right=_right_valid,
check_left=_left_valid)
def group_assignment(tlist):
_group_left_right(tlist, T.Assignment, ':=', sql.Assignment,
include_semicolon=True)
def group_comparison(tlist):
def _parts_valid(token):
return (token.ttype in (T.String.Symbol, T.String.Single,
T.Name, T.Number, T.Number.Float,
T.Number.Integer, T.Literal,
T.Literal.Number.Integer, T.Name.Placeholder)
or isinstance(token, (sql.Identifier, sql.Parenthesis))
or (token.ttype is T.Keyword
and token.value.upper() in ['NULL', ]))
_group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
check_left=_parts_valid, check_right=_parts_valid)
def group_case(tlist):
_group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case,
include_semicolon=True, recurse=True)
def group_identifier(tlist):
def _consume_cycle(tl, i):
# TODO: Usage of Wildcard token is ambivalent here.
x = itertools.cycle((
lambda y: (y.match(T.Punctuation, '.')
or y.ttype in (T.Operator,
T.Wildcard,
T.Name)
or isinstance(y, sql.SquareBrackets)),
lambda y: (y.ttype in (T.String.Symbol,
T.Name,
T.Wildcard,
T.Literal.String.Single,
T.Literal.Number.Integer,
T.Literal.Number.Float)
or isinstance(y, (sql.Parenthesis,
sql.SquareBrackets,
sql.Function)))))
for t in tl.tokens[i:]:
# Don't take whitespaces into account.
if t.ttype is T.Whitespace:
yield t
continue
if next(x)(t):
yield t
else:
if isinstance(t, sql.Comment) and t.is_multiline():
yield t
return
def _next_token(tl, i):
# chooses the next token. if two tokens are found then the
# first is returned.
t1 = tl.token_next_by_type(
i, (T.String.Symbol, T.Name, T.Literal.Number.Integer,
T.Literal.Number.Float))
i1 = tl.token_index(t1, start=i) if t1 else None
t2_end = None if i1 is None else i1 + 1
t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis), end=t2_end)
if t1 and t2:
i2 = tl.token_index(t2, start=i)
if i1 > i2:
return t2
else:
return t1
elif t1:
return t1
else:
return t2
# bottom up approach: group subgroups first
[group_identifier(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, sql.Identifier)]
# real processing
idx = 0
token = _next_token(tlist, idx)
while token:
identifier_tokens = [token] + list(
_consume_cycle(tlist,
tlist.token_index(token, start=idx) + 1))
# remove trailing whitespace
if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace:
identifier_tokens = identifier_tokens[:-1]
if not (len(identifier_tokens) == 1
and (isinstance(identifier_tokens[0], (sql.Function, sql.Parenthesis))
or identifier_tokens[0].ttype in (T.Literal.Number.Integer,
T.Literal.Number.Float))):
group = tlist.group_tokens(sql.Identifier, identifier_tokens)
idx = tlist.token_index(group, start=idx) + 1
else:
idx += 1
token = _next_token(tlist, idx)
def group_identifier_list(tlist):
[group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, sql.IdentifierList)]
# Allowed list items
fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
sql.Case)),
lambda t: t.is_whitespace(),
lambda t: t.ttype == T.Name,
lambda t: t.ttype == T.Wildcard,
lambda t: t.match(T.Keyword, 'null'),
lambda t: t.match(T.Keyword, 'role'),
lambda t: t.ttype == T.Number.Integer,
lambda t: t.ttype == T.String.Single,
lambda t: t.ttype == T.Name.Placeholder,
lambda t: t.ttype == T.Keyword,
lambda t: isinstance(t, sql.Comparison),
lambda t: isinstance(t, sql.Comment),
lambda t: t.ttype == T.Comment.Multiline,
]
tcomma = tlist.token_next_match(0, T.Punctuation, ',')
start = None
while tcomma is not None:
# Go back one idx to make sure to find the correct tcomma
idx = tlist.token_index(tcomma)
before = tlist.token_prev(idx)
after = tlist.token_next(idx)
# Check if the tokens around tcomma belong to a list
bpassed = apassed = False
for func in fend1_funcs:
if before is not None and func(before):
bpassed = True
if after is not None and func(after):
apassed = True
if not bpassed or not apassed:
# Something's wrong here, skip ahead to next ","
start = None
tcomma = tlist.token_next_match(idx + 1,
T.Punctuation, ',')
else:
if start is None:
start = before
after_idx = tlist.token_index(after, start=idx)
next_ = tlist.token_next(after_idx)
if next_ is None or not next_.match(T.Punctuation, ','):
# Reached the end of the list
tokens = tlist.tokens_between(start, after)
group = tlist.group_tokens(sql.IdentifierList, tokens)
start = None
tcomma = tlist.token_next_match(tlist.token_index(group) + 1,
T.Punctuation, ',')
else:
tcomma = next_
def group_brackets(tlist):
"""Group parentheses () or square brackets []
This is just like _group_matching, but complicated by the fact that
round brackets can contain square bracket groups and vice versa
"""
if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)):
idx = 1
else:
idx = 0
# Find the first opening bracket
token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
while token:
start_val = token.value # either '(' or '['
if start_val == '(':
end_val = ')'
group_class = sql.Parenthesis
else:
end_val = ']'
group_class = sql.SquareBrackets
tidx = tlist.token_index(token)
# Find the corresponding closing bracket
end = _find_matching(tidx, tlist, T.Punctuation, start_val,
T.Punctuation, end_val)
if end is None:
idx = tidx + 1
else:
group = tlist.group_tokens(group_class,
tlist.tokens_between(token, end))
# Check for nested bracket groups within this group
group_brackets(group)
idx = tlist.token_index(group) + 1
# Find the next opening bracket
token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
def group_comments(tlist):
[group_comments(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, sql.Comment)]
idx = 0
token = tlist.token_next_by_type(idx, T.Comment)
while token:
tidx = tlist.token_index(token)
end = tlist.token_not_matching(tidx + 1,
[lambda t: t.ttype in T.Comment,
lambda t: t.is_whitespace()])
if end is None:
idx = tidx + 1
else:
eidx = tlist.token_index(end)
grp_tokens = tlist.tokens_between(token,
tlist.token_prev(eidx, False))
group = tlist.group_tokens(sql.Comment, grp_tokens)
idx = tlist.token_index(group)
token = tlist.token_next_by_type(idx, T.Comment)
def group_where(tlist):
[group_where(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, sql.Where)]
idx = 0
token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
stopwords = ('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING')
while token:
tidx = tlist.token_index(token)
end = tlist.token_next_match(tidx + 1, T.Keyword, stopwords)
if end is None:
end = tlist._groupable_tokens[-1]
else:
end = tlist.tokens[tlist.token_index(end) - 1]
group = tlist.group_tokens(sql.Where,
tlist.tokens_between(token, end),
ignore_ws=True)
idx = tlist.token_index(group)
token = tlist.token_next_match(idx, T.Keyword, 'WHERE')
def group_aliased(tlist):
clss = (sql.Identifier, sql.Function, sql.Case)
[group_aliased(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, clss)]
idx = 0
token = tlist.token_next_by_instance(idx, clss)
while token:
next_ = tlist.token_next(tlist.token_index(token))
if next_ is not None and isinstance(next_, clss):
if not next_.value.upper().startswith('VARCHAR'):
grp = tlist.tokens_between(token, next_)[1:]
token.tokens.extend(grp)
for t in grp:
tlist.tokens.remove(t)
idx = tlist.token_index(token) + 1
token = tlist.token_next_by_instance(idx, clss)
def group_typecasts(tlist):
_group_left_right(tlist, T.Punctuation, '::', sql.Identifier)
def group_functions(tlist):
[group_functions(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, sql.Function)]
idx = 0
token = tlist.token_next_by_type(idx, T.Name)
while token:
next_ = tlist.token_next(token)
if not isinstance(next_, sql.Parenthesis):
idx = tlist.token_index(token) + 1
else:
func = tlist.group_tokens(sql.Function,
tlist.tokens_between(token, next_))
idx = tlist.token_index(func) + 1
token = tlist.token_next_by_type(idx, T.Name)
def group_order(tlist):
idx = 0
token = tlist.token_next_by_type(idx, T.Keyword.Order)
while token:
prev = tlist.token_prev(token)
if isinstance(prev, sql.Identifier):
ido = tlist.group_tokens(sql.Identifier,
tlist.tokens_between(prev, token))
idx = tlist.token_index(ido) + 1
else:
idx = tlist.token_index(token) + 1
token = tlist.token_next_by_type(idx, T.Keyword.Order)
def align_comments(tlist):
[align_comments(sgroup) for sgroup in tlist.get_sublists()]
idx = 0
token = tlist.token_next_by_instance(idx, sql.Comment)
while token:
before = tlist.token_prev(tlist.token_index(token))
if isinstance(before, sql.TokenList):
grp = tlist.tokens_between(before, token)[1:]
before.tokens.extend(grp)
for t in grp:
tlist.tokens.remove(t)
idx = tlist.token_index(before) + 1
else:
idx = tlist.token_index(token) + 1
token = tlist.token_next_by_instance(idx, sql.Comment)
def group(tlist):
for func in [
group_comments,
group_brackets,
group_functions,
group_where,
group_case,
group_identifier,
group_order,
group_typecasts,
group_as,
group_aliased,
group_assignment,
group_comparison,
align_comments,
group_identifier_list,
group_if,
group_for,
group_foreach,
group_begin,
]:
func(tlist)

View File

@@ -1,728 +0,0 @@
# -*- coding: utf-8 -*-
import re
from os.path import abspath, join
from sqlparse import sql, tokens as T
from sqlparse.engine import FilterStack
from sqlparse.lexer import tokenize
from sqlparse.pipeline import Pipeline
from sqlparse.tokens import (Comment, Comparison, Keyword, Name, Punctuation,
String, Whitespace)
from sqlparse.utils import memoize_generator
from sqlparse.utils import split_unquoted_newlines
# --------------------------
# token process
class _CaseFilter:
ttype = None
def __init__(self, case=None):
if case is None:
case = 'upper'
assert case in ['lower', 'upper', 'capitalize']
self.convert = getattr(unicode, case)
def process(self, stack, stream):
for ttype, value in stream:
if ttype in self.ttype:
value = self.convert(value)
yield ttype, value
class KeywordCaseFilter(_CaseFilter):
ttype = T.Keyword
class IdentifierCaseFilter(_CaseFilter):
ttype = (T.Name, T.String.Symbol)
def process(self, stack, stream):
for ttype, value in stream:
if ttype in self.ttype and not value.strip()[0] == '"':
value = self.convert(value)
yield ttype, value
class TruncateStringFilter:
def __init__(self, width, char):
self.width = max(width, 1)
self.char = unicode(char)
def process(self, stack, stream):
for ttype, value in stream:
if ttype is T.Literal.String.Single:
if value[:2] == '\'\'':
inner = value[2:-2]
quote = u'\'\''
else:
inner = value[1:-1]
quote = u'\''
if len(inner) > self.width:
value = u''.join((quote, inner[:self.width], self.char,
quote))
yield ttype, value
class GetComments:
"""Get the comments from a stack"""
def process(self, stack, stream):
for token_type, value in stream:
if token_type in Comment:
yield token_type, value
class StripComments:
"""Strip the comments from a stack"""
def process(self, stack, stream):
for token_type, value in stream:
if token_type not in Comment:
yield token_type, value
def StripWhitespace(stream):
"Strip the useless whitespaces from a stream leaving only the minimal ones"
last_type = None
has_space = False
ignore_group = frozenset((Comparison, Punctuation))
for token_type, value in stream:
# We got a previous token (not empty first ones)
if last_type:
if token_type in Whitespace:
has_space = True
continue
# Ignore first empty spaces and dot-commas
elif token_type in (Whitespace, Whitespace.Newline, ignore_group):
continue
# Yield a whitespace if it can't be ignored
if has_space:
if not ignore_group.intersection((last_type, token_type)):
yield Whitespace, ' '
has_space = False
# Yield the token and set its type for checking with the next one
yield token_type, value
last_type = token_type
class IncludeStatement:
"""Filter that enable a INCLUDE statement"""
def __init__(self, dirpath=".", maxrecursive=10, raiseexceptions=False):
if maxrecursive <= 0:
raise ValueError('Max recursion limit reached')
self.dirpath = abspath(dirpath)
self.maxRecursive = maxrecursive
self.raiseexceptions = raiseexceptions
self.detected = False
@memoize_generator
def process(self, stack, stream):
# Run over all tokens in the stream
for token_type, value in stream:
# INCLUDE statement found, set detected mode
if token_type in Name and value.upper() == 'INCLUDE':
self.detected = True
continue
# INCLUDE statement was found, parse it
elif self.detected:
# Omit whitespaces
if token_type in Whitespace:
continue
# Found file path to include
if token_type in String.Symbol:
# if token_type in tokens.String.Symbol:
# Get path of file to include
path = join(self.dirpath, value[1:-1])
try:
f = open(path)
raw_sql = f.read()
f.close()
# There was a problem loading the include file
except IOError, err:
# Raise the exception to the interpreter
if self.raiseexceptions:
raise
# Put the exception as a comment on the SQL code
yield Comment, u'-- IOError: %s\n' % err
else:
# Create new FilterStack to parse readed file
# and add all its tokens to the main stack recursively
try:
filtr = IncludeStatement(self.dirpath,
self.maxRecursive - 1,
self.raiseexceptions)
# Max recursion limit reached
except ValueError, err:
# Raise the exception to the interpreter
if self.raiseexceptions:
raise
# Put the exception as a comment on the SQL code
yield Comment, u'-- ValueError: %s\n' % err
stack = FilterStack()
stack.preprocess.append(filtr)
for tv in stack.run(raw_sql):
yield tv
# Set normal mode
self.detected = False
# Don't include any token while in detected mode
continue
# Normal token
yield token_type, value
# ----------------------
# statement process
class StripCommentsFilter:
def _get_next_comment(self, tlist):
# TODO(andi) Comment types should be unified, see related issue38
token = tlist.token_next_by_instance(0, sql.Comment)
if token is None:
token = tlist.token_next_by_type(0, T.Comment)
return token
def _process(self, tlist):
token = self._get_next_comment(tlist)
while token:
tidx = tlist.token_index(token)
prev = tlist.token_prev(tidx, False)
next_ = tlist.token_next(tidx, False)
# Replace by whitespace if prev and next exist and if they're not
# whitespaces. This doesn't apply if prev or next is a paranthesis.
if (prev is not None and next_ is not None
and not prev.is_whitespace() and not next_.is_whitespace()
and not (prev.match(T.Punctuation, '(')
or next_.match(T.Punctuation, ')'))):
tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
else:
tlist.tokens.pop(tidx)
token = self._get_next_comment(tlist)
def process(self, stack, stmt):
[self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
self._process(stmt)
class StripWhitespaceFilter:
def _stripws(self, tlist):
func_name = '_stripws_%s' % tlist.__class__.__name__.lower()
func = getattr(self, func_name, self._stripws_default)
func(tlist)
def _stripws_default(self, tlist):
last_was_ws = False
for token in tlist.tokens:
if token.is_whitespace():
if last_was_ws:
token.value = ''
else:
token.value = ' '
last_was_ws = token.is_whitespace()
def _stripws_identifierlist(self, tlist):
# Removes newlines before commas, see issue140
last_nl = None
for token in tlist.tokens[:]:
if (token.ttype is T.Punctuation
and token.value == ','
and last_nl is not None):
tlist.tokens.remove(last_nl)
if token.is_whitespace():
last_nl = token
else:
last_nl = None
return self._stripws_default(tlist)
def _stripws_parenthesis(self, tlist):
if tlist.tokens[1].is_whitespace():
tlist.tokens.pop(1)
if tlist.tokens[-2].is_whitespace():
tlist.tokens.pop(-2)
self._stripws_default(tlist)
def process(self, stack, stmt, depth=0):
[self.process(stack, sgroup, depth + 1)
for sgroup in stmt.get_sublists()]
self._stripws(stmt)
if (
depth == 0
and stmt.tokens
and stmt.tokens[-1].is_whitespace()
):
stmt.tokens.pop(-1)
class ReindentFilter:
def __init__(self, width=2, char=' ', line_width=None):
self.width = width
self.char = char
self.indent = 0
self.offset = 0
self.line_width = line_width
self._curr_stmt = None
self._last_stmt = None
def _flatten_up_to_token(self, token):
"""Yields all tokens up to token plus the next one."""
# helper for _get_offset
iterator = self._curr_stmt.flatten()
for t in iterator:
yield t
if t == token:
raise StopIteration
def _get_offset(self, token):
raw = ''.join(map(unicode, self._flatten_up_to_token(token)))
line = raw.splitlines()[-1]
# Now take current offset into account and return relative offset.
full_offset = len(line) - len(self.char * (self.width * self.indent))
return full_offset - self.offset
def nl(self):
# TODO: newline character should be configurable
space = (self.char * ((self.indent * self.width) + self.offset))
# Detect runaway indenting due to parsing errors
if len(space) > 200:
# something seems to be wrong, flip back
self.indent = self.offset = 0
space = (self.char * ((self.indent * self.width) + self.offset))
ws = '\n' + space
return sql.Token(T.Whitespace, ws)
def _split_kwds(self, tlist):
split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
'GROUP', 'ORDER', 'UNION', 'VALUES',
'SET', 'BETWEEN', 'EXCEPT', 'HAVING')
def _next_token(i):
t = tlist.token_next_match(i, T.Keyword, split_words,
regex=True)
if t and t.value.upper() == 'BETWEEN':
t = _next_token(tlist.token_index(t) + 1)
if t and t.value.upper() == 'AND':
t = _next_token(tlist.token_index(t) + 1)
return t
idx = 0
token = _next_token(idx)
added = set()
while token:
prev = tlist.token_prev(tlist.token_index(token), False)
offset = 1
if prev and prev.is_whitespace() and prev not in added:
tlist.tokens.pop(tlist.token_index(prev))
offset += 1
uprev = unicode(prev)
if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))):
nl = tlist.token_next(token)
else:
nl = self.nl()
added.add(nl)
tlist.insert_before(token, nl)
offset += 1
token = _next_token(tlist.token_index(nl) + offset)
def _split_statements(self, tlist):
idx = 0
token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
while token:
prev = tlist.token_prev(tlist.token_index(token), False)
if prev and prev.is_whitespace():
tlist.tokens.pop(tlist.token_index(prev))
# only break if it's not the first token
if prev:
nl = self.nl()
tlist.insert_before(token, nl)
token = tlist.token_next_by_type(tlist.token_index(token) + 1,
(T.Keyword.DDL, T.Keyword.DML))
def _process(self, tlist):
func_name = '_process_%s' % tlist.__class__.__name__.lower()
func = getattr(self, func_name, self._process_default)
func(tlist)
def _process_where(self, tlist):
token = tlist.token_next_match(0, T.Keyword, 'WHERE')
try:
tlist.insert_before(token, self.nl())
except ValueError: # issue121, errors in statement
pass
self.indent += 1
self._process_default(tlist)
self.indent -= 1
def _process_having(self, tlist):
token = tlist.token_next_match(0, T.Keyword, 'HAVING')
try:
tlist.insert_before(token, self.nl())
except ValueError: # issue121, errors in statement
pass
self.indent += 1
self._process_default(tlist)
self.indent -= 1
def _process_parenthesis(self, tlist):
first = tlist.token_next(0)
indented = False
if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
self.indent += 1
tlist.tokens.insert(0, self.nl())
indented = True
num_offset = self._get_offset(
tlist.token_next_match(0, T.Punctuation, '('))
self.offset += num_offset
self._process_default(tlist, stmts=not indented)
if indented:
self.indent -= 1
self.offset -= num_offset
def _process_identifierlist(self, tlist):
identifiers = list(tlist.get_identifiers())
if len(identifiers) > 1 and not tlist.within(sql.Function):
first = list(identifiers[0].flatten())[0]
if self.char == '\t':
# when using tabs we don't count the actual word length
# in spaces.
num_offset = 1
else:
num_offset = self._get_offset(first) - len(first.value)
self.offset += num_offset
for token in identifiers[1:]:
tlist.insert_before(token, self.nl())
self.offset -= num_offset
self._process_default(tlist)
def _process_case(self, tlist):
is_first = True
num_offset = None
case = tlist.tokens[0]
outer_offset = self._get_offset(case) - len(case.value)
self.offset += outer_offset
for cond, value in tlist.get_cases():
if is_first:
tcond = list(cond[0].flatten())[0]
is_first = False
num_offset = self._get_offset(tcond) - len(tcond.value)
self.offset += num_offset
continue
if cond is None:
token = value[0]
else:
token = cond[0]
tlist.insert_before(token, self.nl())
# Line breaks on group level are done. Now let's add an offset of
# 5 (=length of "when", "then", "else") and process subgroups.
self.offset += 5
self._process_default(tlist)
self.offset -= 5
if num_offset is not None:
self.offset -= num_offset
end = tlist.token_next_match(0, T.Keyword, 'END')
tlist.insert_before(end, self.nl())
self.offset -= outer_offset
def _process_default(self, tlist, stmts=True, kwds=True):
if stmts:
self._split_statements(tlist)
if kwds:
self._split_kwds(tlist)
[self._process(sgroup) for sgroup in tlist.get_sublists()]
def process(self, stack, stmt):
if isinstance(stmt, sql.Statement):
self._curr_stmt = stmt
self._process(stmt)
if isinstance(stmt, sql.Statement):
if self._last_stmt is not None:
if unicode(self._last_stmt).endswith('\n'):
nl = '\n'
else:
nl = '\n\n'
stmt.tokens.insert(
0, sql.Token(T.Whitespace, nl))
if self._last_stmt != stmt:
self._last_stmt = stmt
# FIXME: Doesn't work ;)
class RightMarginFilter:
keep_together = (
# sql.TypeCast, sql.Identifier, sql.Alias,
)
def __init__(self, width=79):
self.width = width
self.line = ''
def _process(self, stack, group, stream):
for token in stream:
if token.is_whitespace() and '\n' in token.value:
if token.value.endswith('\n'):
self.line = ''
else:
self.line = token.value.splitlines()[-1]
elif (token.is_group()
and not token.__class__ in self.keep_together):
token.tokens = self._process(stack, token, token.tokens)
else:
val = unicode(token)
if len(self.line) + len(val) > self.width:
match = re.search('^ +', self.line)
if match is not None:
indent = match.group()
else:
indent = ''
yield sql.Token(T.Whitespace, '\n%s' % indent)
self.line = indent
self.line += val
yield token
def process(self, stack, group):
return
group.tokens = self._process(stack, group, group.tokens)
class ColumnsSelect:
"""Get the columns names of a SELECT query"""
def process(self, stack, stream):
mode = 0
oldValue = ""
parenthesis = 0
for token_type, value in stream:
# Ignore comments
if token_type in Comment:
continue
# We have not detected a SELECT statement
if mode == 0:
if token_type in Keyword and value == 'SELECT':
mode = 1
# We have detected a SELECT statement
elif mode == 1:
if value == 'FROM':
if oldValue:
yield oldValue
mode = 3 # Columns have been checked
elif value == 'AS':
oldValue = ""
mode = 2
elif (token_type == Punctuation
and value == ',' and not parenthesis):
if oldValue:
yield oldValue
oldValue = ""
elif token_type not in Whitespace:
if value == '(':
parenthesis += 1
elif value == ')':
parenthesis -= 1
oldValue += value
# We are processing an AS keyword
elif mode == 2:
# We check also for Keywords because a bug in SQLParse
if token_type == Name or token_type == Keyword:
yield value
mode = 1
# ---------------------------
# postprocess
class SerializerUnicode:
def process(self, stack, stmt):
raw = unicode(stmt)
lines = split_unquoted_newlines(raw)
res = '\n'.join(line.rstrip() for line in lines)
return res
def Tokens2Unicode(stream):
result = ""
for _, value in stream:
result += unicode(value)
return result
class OutputFilter:
varname_prefix = ''
def __init__(self, varname='sql'):
self.varname = self.varname_prefix + varname
self.count = 0
def _process(self, stream, varname, has_nl):
raise NotImplementedError
def process(self, stack, stmt):
self.count += 1
if self.count > 1:
varname = '%s%d' % (self.varname, self.count)
else:
varname = self.varname
has_nl = len(unicode(stmt).strip().splitlines()) > 1
stmt.tokens = self._process(stmt.tokens, varname, has_nl)
return stmt
class OutputPythonFilter(OutputFilter):
def _process(self, stream, varname, has_nl):
# SQL query asignation to varname
if self.count > 1:
yield sql.Token(T.Whitespace, '\n')
yield sql.Token(T.Name, varname)
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Operator, '=')
yield sql.Token(T.Whitespace, ' ')
if has_nl:
yield sql.Token(T.Operator, '(')
yield sql.Token(T.Text, "'")
# Print the tokens on the quote
for token in stream:
# Token is a new line separator
if token.is_whitespace() and '\n' in token.value:
# Close quote and add a new line
yield sql.Token(T.Text, " '")
yield sql.Token(T.Whitespace, '\n')
# Quote header on secondary lines
yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4))
yield sql.Token(T.Text, "'")
# Indentation
after_lb = token.value.split('\n', 1)[1]
if after_lb:
yield sql.Token(T.Whitespace, after_lb)
continue
# Token has escape chars
elif "'" in token.value:
token.value = token.value.replace("'", "\\'")
# Put the token
yield sql.Token(T.Text, token.value)
# Close quote
yield sql.Token(T.Text, "'")
if has_nl:
yield sql.Token(T.Operator, ')')
class OutputPHPFilter(OutputFilter):
varname_prefix = '$'
def _process(self, stream, varname, has_nl):
# SQL query asignation to varname (quote header)
if self.count > 1:
yield sql.Token(T.Whitespace, '\n')
yield sql.Token(T.Name, varname)
yield sql.Token(T.Whitespace, ' ')
if has_nl:
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Operator, '=')
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Text, '"')
# Print the tokens on the quote
for token in stream:
# Token is a new line separator
if token.is_whitespace() and '\n' in token.value:
# Close quote and add a new line
yield sql.Token(T.Text, ' ";')
yield sql.Token(T.Whitespace, '\n')
# Quote header on secondary lines
yield sql.Token(T.Name, varname)
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Operator, '.=')
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Text, '"')
# Indentation
after_lb = token.value.split('\n', 1)[1]
if after_lb:
yield sql.Token(T.Whitespace, after_lb)
continue
# Token has escape chars
elif '"' in token.value:
token.value = token.value.replace('"', '\\"')
# Put the token
yield sql.Token(T.Text, token.value)
# Close quote
yield sql.Token(T.Text, '"')
yield sql.Token(T.Punctuation, ';')
class Limit:
"""Get the LIMIT of a query.
If not defined, return -1 (SQL specification for no LIMIT query)
"""
def process(self, stack, stream):
index = 7
stream = list(stream)
stream.reverse()
# Run over all tokens in the stream from the end
for token_type, value in stream:
index -= 1
# if index and token_type in Keyword:
if index and token_type in Keyword and value == 'LIMIT':
return stream[4 - index][1]
return -1
def compact(stream):
"""Function that return a compacted version of the stream"""
pipe = Pipeline()
pipe.append(StripComments())
pipe.append(StripWhitespace)
return pipe(stream)

View File

@@ -1,137 +0,0 @@
# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
"""SQL formatter"""
from sqlparse import filters
from sqlparse.exceptions import SQLParseError
def validate_options(options):
"""Validates options."""
kwcase = options.get('keyword_case', None)
if kwcase not in [None, 'upper', 'lower', 'capitalize']:
raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
idcase = options.get('identifier_case', None)
if idcase not in [None, 'upper', 'lower', 'capitalize']:
raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
ofrmt = options.get('output_format', None)
if ofrmt not in [None, 'sql', 'python', 'php']:
raise SQLParseError('Unknown output format: %r' % ofrmt)
strip_comments = options.get('strip_comments', False)
if strip_comments not in [True, False]:
raise SQLParseError('Invalid value for strip_comments: %r'
% strip_comments)
strip_ws = options.get('strip_whitespace', False)
if strip_ws not in [True, False]:
raise SQLParseError('Invalid value for strip_whitespace: %r'
% strip_ws)
truncate_strings = options.get('truncate_strings', None)
if truncate_strings is not None:
try:
truncate_strings = int(truncate_strings)
except (ValueError, TypeError):
raise SQLParseError('Invalid value for truncate_strings: %r'
% truncate_strings)
if truncate_strings <= 1:
raise SQLParseError('Invalid value for truncate_strings: %r'
% truncate_strings)
options['truncate_strings'] = truncate_strings
options['truncate_char'] = options.get('truncate_char', '[...]')
reindent = options.get('reindent', False)
if reindent not in [True, False]:
raise SQLParseError('Invalid value for reindent: %r'
% reindent)
elif reindent:
options['strip_whitespace'] = True
indent_tabs = options.get('indent_tabs', False)
if indent_tabs not in [True, False]:
raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs)
elif indent_tabs:
options['indent_char'] = '\t'
else:
options['indent_char'] = ' '
indent_width = options.get('indent_width', 2)
try:
indent_width = int(indent_width)
except (TypeError, ValueError):
raise SQLParseError('indent_width requires an integer')
if indent_width < 1:
raise SQLParseError('indent_width requires an positive integer')
options['indent_width'] = indent_width
right_margin = options.get('right_margin', None)
if right_margin is not None:
try:
right_margin = int(right_margin)
except (TypeError, ValueError):
raise SQLParseError('right_margin requires an integer')
if right_margin < 10:
raise SQLParseError('right_margin requires an integer > 10')
options['right_margin'] = right_margin
return options
def build_filter_stack(stack, options):
"""Setup and return a filter stack.
Args:
stack: :class:`~sqlparse.filters.FilterStack` instance
options: Dictionary with options validated by validate_options.
"""
# Token filter
if options.get('keyword_case', None):
stack.preprocess.append(
filters.KeywordCaseFilter(options['keyword_case']))
if options.get('identifier_case', None):
stack.preprocess.append(
filters.IdentifierCaseFilter(options['identifier_case']))
if options.get('truncate_strings', None) is not None:
stack.preprocess.append(filters.TruncateStringFilter(
width=options['truncate_strings'], char=options['truncate_char']))
# After grouping
if options.get('strip_comments', False):
stack.enable_grouping()
stack.stmtprocess.append(filters.StripCommentsFilter())
if (options.get('strip_whitespace', False)
or options.get('reindent', False)):
stack.enable_grouping()
stack.stmtprocess.append(filters.StripWhitespaceFilter())
if options.get('reindent', False):
stack.enable_grouping()
stack.stmtprocess.append(
filters.ReindentFilter(char=options['indent_char'],
width=options['indent_width']))
if options.get('right_margin', False):
stack.enable_grouping()
stack.stmtprocess.append(
filters.RightMarginFilter(width=options['right_margin']))
# Serializer
if options.get('output_format'):
frmt = options['output_format']
if frmt.lower() == 'php':
fltr = filters.OutputPHPFilter()
elif frmt.lower() == 'python':
fltr = filters.OutputPythonFilter()
else:
fltr = None
if fltr is not None:
stack.postprocess.append(fltr)
return stack

View File

@@ -1,44 +0,0 @@
'''
Created on 17/05/2012
@author: piranna
Several utility functions to extract info from the SQL sentences
'''
from sqlparse.filters import ColumnsSelect, Limit
from sqlparse.pipeline import Pipeline
from sqlparse.tokens import Keyword, Whitespace
def getlimit(stream):
"""Function that return the LIMIT of a input SQL """
pipe = Pipeline()
pipe.append(Limit())
result = pipe(stream)
try:
return int(result)
except ValueError:
return result
def getcolumns(stream):
"""Function that return the colums of a SELECT query"""
pipe = Pipeline()
pipe.append(ColumnsSelect())
return pipe(stream)
class IsType(object):
"""Functor that return is the statement is of a specific type"""
def __init__(self, type):
self.type = type
def __call__(self, stream):
for token_type, value in stream:
if token_type not in Whitespace:
return token_type in Keyword and value == self.type

View File

@@ -1,363 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
"""SQL Lexer"""
# This code is based on the SqlLexer in pygments.
# http://pygments.org/
# It's separated from the rest of pygments to increase performance
# and to allow some customizations.
import re
import sys
from sqlparse import tokens
from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
from cStringIO import StringIO
class include(str):
pass
class combined(tuple):
"""Indicates a state combined from multiple states."""
def __new__(cls, *args):
return tuple.__new__(cls, args)
def __init__(self, *args):
# tuple.__init__ doesn't do anything
pass
def is_keyword(value):
test = value.upper()
return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
def apply_filters(stream, filters, lexer=None):
"""
Use this method to apply an iterable of filters to
a stream. If lexer is given it's forwarded to the
filter, otherwise the filter receives `None`.
"""
def _apply(filter_, stream):
for token in filter_.filter(lexer, stream):
yield token
for filter_ in filters:
stream = _apply(filter_, stream)
return stream
class LexerMeta(type):
"""
Metaclass for Lexer, creates the self._tokens attribute from
self.tokens on the first instantiation.
"""
def _process_state(cls, unprocessed, processed, state):
assert type(state) is str, "wrong state name %r" % state
assert state[0] != '#', "invalid state name %r" % state
if state in processed:
return processed[state]
tokenlist = processed[state] = []
rflags = cls.flags
for tdef in unprocessed[state]:
if isinstance(tdef, include):
# it's a state reference
assert tdef != state, "circular state reference %r" % state
tokenlist.extend(cls._process_state(
unprocessed, processed, str(tdef)))
continue
assert type(tdef) is tuple, "wrong rule def %r" % tdef
try:
rex = re.compile(tdef[0], rflags).match
except Exception, err:
raise ValueError(("uncompilable regex %r in state"
" %r of %r: %s"
% (tdef[0], state, cls, err)))
assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \
('token type must be simple type or callable, not %r'
% (tdef[1],))
if len(tdef) == 2:
new_state = None
else:
tdef2 = tdef[2]
if isinstance(tdef2, str):
# an existing state
if tdef2 == '#pop':
new_state = -1
elif tdef2 in unprocessed:
new_state = (tdef2,)
elif tdef2 == '#push':
new_state = tdef2
elif tdef2[:5] == '#pop:':
new_state = -int(tdef2[5:])
else:
assert False, 'unknown new state %r' % tdef2
elif isinstance(tdef2, combined):
# combine a new state from existing ones
new_state = '_tmp_%d' % cls._tmpname
cls._tmpname += 1
itokens = []
for istate in tdef2:
assert istate != state, \
'circular state ref %r' % istate
itokens.extend(cls._process_state(unprocessed,
processed, istate))
processed[new_state] = itokens
new_state = (new_state,)
elif isinstance(tdef2, tuple):
# push more than one state
for state in tdef2:
assert (state in unprocessed or
state in ('#pop', '#push')), \
'unknown new state ' + state
new_state = tdef2
else:
assert False, 'unknown new state def %r' % tdef2
tokenlist.append((rex, tdef[1], new_state))
return tokenlist
def process_tokendef(cls):
cls._all_tokens = {}
cls._tmpname = 0
processed = cls._all_tokens[cls.__name__] = {}
#tokendefs = tokendefs or cls.tokens[name]
for state in cls.tokens.keys():
cls._process_state(cls.tokens, processed, state)
return processed
def __call__(cls, *args, **kwds):
if not hasattr(cls, '_tokens'):
cls._all_tokens = {}
cls._tmpname = 0
if hasattr(cls, 'token_variants') and cls.token_variants:
# don't process yet
pass
else:
cls._tokens = cls.process_tokendef()
return type.__call__(cls, *args, **kwds)
class Lexer(object):
__metaclass__ = LexerMeta
encoding = 'utf-8'
stripall = False
stripnl = False
tabsize = 0
flags = re.IGNORECASE | re.UNICODE
tokens = {
'root': [
(r'(--|# ).*?(\r\n|\r|\n)', tokens.Comment.Single),
# $ matches *before* newline, therefore we have two patterns
# to match Comment.Single
(r'(--|# ).*?$', tokens.Comment.Single),
(r'(\r\n|\r|\n)', tokens.Newline),
(r'\s+', tokens.Whitespace),
(r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
(r':=', tokens.Assignment),
(r'::', tokens.Punctuation),
(r'[*]', tokens.Wildcard),
(r'CASE\b', tokens.Keyword), # extended CASE(foo)
(r"`(``|[^`])*`", tokens.Name),
(r"´(´´|[^´])*´", tokens.Name),
(r'\$([^\W\d]\w*)?\$', tokens.Name.Builtin),
(r'\?{1}', tokens.Name.Placeholder),
(r'%\(\w+\)s', tokens.Name.Placeholder),
(r'%s', tokens.Name.Placeholder),
(r'[$:?]\w+', tokens.Name.Placeholder),
# FIXME(andi): VALUES shouldn't be listed here
# see https://github.com/andialbrecht/sqlparse/pull/64
(r'VALUES', tokens.Keyword),
(r'(@|##|#)[^\W\d_]\w+', tokens.Name),
# IN is special, it may be followed by a parenthesis, but
# is never a functino, see issue183
(r'in\b(?=[ (])?', tokens.Keyword),
(r'[^\W\d_]\w*(?=[.(])', tokens.Name), # see issue39
(r'[-]?0x[0-9a-fA-F]+', tokens.Number.Hexadecimal),
(r'[-]?[0-9]*(\.[0-9]+)?[eE][-]?[0-9]+', tokens.Number.Float),
(r'[-]?[0-9]*\.[0-9]+', tokens.Number.Float),
(r'[-]?[0-9]+', tokens.Number.Integer),
(r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
# not a real string literal in ANSI SQL:
# A patch based on: https://github.com/andialbrecht/sqlparse/pull/396
(r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol),
# sqlite names can be escaped with [square brackets]. left bracket
# cannot be preceded by word character or a right bracket --
# otherwise it's probably an array index
(r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
(r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
(r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword),
(r'NOT NULL\b', tokens.Keyword),
(r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
(r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
(r'(?<=\.)[^\W\d_]\w*', tokens.Name),
(r'[^\W\d]\w*', is_keyword),
(r'[;:()\[\],\.]', tokens.Punctuation),
(r'[<>=~!]+', tokens.Operator.Comparison),
(r'[+/@#%^&|`?^-]+', tokens.Operator),
],
'multiline-comments': [
(r'/\*', tokens.Comment.Multiline, 'multiline-comments'),
(r'\*/', tokens.Comment.Multiline, '#pop'),
(r'[^/\*]+', tokens.Comment.Multiline),
(r'[/*]', tokens.Comment.Multiline),
]}
def __init__(self):
self.filters = []
def add_filter(self, filter_, **options):
from sqlparse.filters import Filter
if not isinstance(filter_, Filter):
filter_ = filter_(**options)
self.filters.append(filter_)
def _decode(self, text):
if sys.version_info[0] == 3:
if isinstance(text, str):
return text
if self.encoding == 'guess':
try:
text = text.decode('utf-8')
if text.startswith(u'\ufeff'):
text = text[len(u'\ufeff'):]
except UnicodeDecodeError:
text = text.decode('latin1')
else:
try:
text = text.decode(self.encoding)
except UnicodeDecodeError:
text = text.decode('unicode-escape')
if self.tabsize > 0:
text = text.expandtabs(self.tabsize)
return text
def get_tokens(self, text, unfiltered=False):
"""
Return an iterable of (tokentype, value) pairs generated from
`text`. If `unfiltered` is set to `True`, the filtering mechanism
is bypassed even if filters are defined.
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
"""
if isinstance(text, basestring):
if self.stripall:
text = text.strip()
elif self.stripnl:
text = text.strip('\n')
if sys.version_info[0] < 3 and isinstance(text, unicode):
text = StringIO(text.encode('utf-8'))
self.encoding = 'utf-8'
else:
text = StringIO(text)
def streamer():
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
stream = streamer()
if not unfiltered:
stream = apply_filters(stream, self.filters, self)
return stream
def get_tokens_unprocessed(self, stream, stack=('root',)):
"""
Split ``text`` into (tokentype, text) pairs.
``stack`` is the inital stack (default: ``['root']``)
"""
pos = 0
tokendefs = self._tokens # see __call__, pylint:disable=E1101
statestack = list(stack)
statetokens = tokendefs[statestack[-1]]
known_names = {}
text = stream.read()
text = self._decode(text)
while 1:
for rexmatch, action, new_state in statetokens:
m = rexmatch(text, pos)
if m:
value = m.group()
if value in known_names:
yield pos, known_names[value], value
elif type(action) is tokens._TokenType:
yield pos, action, value
elif hasattr(action, '__call__'):
ttype, value = action(value)
known_names[value] = ttype
yield pos, ttype, value
else:
for item in action(self, m):
yield item
pos = m.end()
if new_state is not None:
# state transition
if isinstance(new_state, tuple):
for state in new_state:
if state == '#pop':
statestack.pop()
elif state == '#push':
statestack.append(statestack[-1])
elif (
# Ugly hack - multiline-comments
# are not stackable
state != 'multiline-comments'
or not statestack
or statestack[-1] != 'multiline-comments'
):
statestack.append(state)
elif isinstance(new_state, int):
# pop
del statestack[new_state:]
elif new_state == '#push':
statestack.append(statestack[-1])
else:
assert False, "wrong state def: %r" % new_state
statetokens = tokendefs[statestack[-1]]
break
else:
try:
if text[pos] == '\n':
# at EOL, reset state to "root"
pos += 1
statestack = ['root']
statetokens = tokendefs['root']
yield pos, tokens.Text, u'\n'
continue
yield pos, tokens.Error, text[pos]
pos += 1
except IndexError:
break
def tokenize(sql, encoding=None):
"""Tokenize sql.
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
lexer = Lexer()
if encoding is not None:
lexer.encoding = encoding
return lexer.get_tokens(sql)

View File

@@ -1,31 +0,0 @@
# Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
from types import GeneratorType
class Pipeline(list):
"""Pipeline to process filters sequentially"""
def __call__(self, stream):
"""Run the pipeline
Return a static (non generator) version of the result
"""
# Run the stream over all the filters on the pipeline
for filter in self:
# Functions and callable objects (objects with '__call__' method)
if callable(filter):
stream = filter(stream)
# Normal filters (objects with 'process' method)
else:
stream = filter.process(None, stream)
# If last filter return a generator, staticalize it inside a list
if isinstance(stream, GeneratorType):
return list(stream)
return stream

View File

@@ -1,684 +0,0 @@
# -*- coding: utf-8 -*-
"""This module contains classes representing syntactical elements of SQL."""
import re
import sys
from sqlparse import tokens as T
class Token(object):
"""Base class for all other classes in this module.
It represents a single token and has two instance attributes:
``value`` is the unchange value of the token and ``ttype`` is
the type of the token.
"""
__slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword')
def __init__(self, ttype, value):
self.value = value
if ttype in T.Keyword:
self.normalized = value.upper()
else:
self.normalized = value
self.ttype = ttype
self.is_keyword = ttype in T.Keyword
self.parent = None
def __str__(self):
if sys.version_info[0] == 3:
return self.value
else:
return unicode(self).encode('utf-8')
def __repr__(self):
short = self._get_repr_value()
if sys.version_info[0] < 3:
short = short.encode('utf-8')
return '<%s \'%s\' at 0x%07x>' % (self._get_repr_name(),
short, id(self))
def __unicode__(self):
"""Returns a unicode representation of this object."""
return self.value or ''
def to_unicode(self):
"""Returns a unicode representation of this object.
.. deprecated:: 0.1.5
Use ``unicode(token)`` (for Python 3: ``str(token)``) instead.
"""
return unicode(self)
def _get_repr_name(self):
return str(self.ttype).split('.')[-1]
def _get_repr_value(self):
raw = unicode(self)
if len(raw) > 7:
raw = raw[:6] + u'...'
return re.sub('\s+', ' ', raw)
def flatten(self):
"""Resolve subgroups."""
yield self
def match(self, ttype, values, regex=False):
"""Checks whether the token matches the given arguments.
*ttype* is a token type. If this token doesn't match the given token
type.
*values* is a list of possible values for this token. The values
are OR'ed together so if only one of the values matches ``True``
is returned. Except for keyword tokens the comparison is
case-sensitive. For convenience it's ok to pass in a single string.
If *regex* is ``True`` (default is ``False``) the given values are
treated as regular expressions.
"""
type_matched = self.ttype is ttype
if not type_matched or values is None:
return type_matched
if regex:
if isinstance(values, basestring):
values = set([values])
if self.ttype is T.Keyword:
values = set(re.compile(v, re.IGNORECASE) for v in values)
else:
values = set(re.compile(v) for v in values)
for pattern in values:
if pattern.search(self.value):
return True
return False
if isinstance(values, basestring):
if self.is_keyword:
return values.upper() == self.normalized
return values == self.value
if self.is_keyword:
for v in values:
if v.upper() == self.normalized:
return True
return False
return self.value in values
def is_group(self):
"""Returns ``True`` if this object has children."""
return False
def is_whitespace(self):
"""Return ``True`` if this token is a whitespace token."""
return self.ttype and self.ttype in T.Whitespace
def within(self, group_cls):
"""Returns ``True`` if this token is within *group_cls*.
Use this method for example to check if an identifier is within
a function: ``t.within(sql.Function)``.
"""
parent = self.parent
while parent:
if isinstance(parent, group_cls):
return True
parent = parent.parent
return False
def is_child_of(self, other):
"""Returns ``True`` if this token is a direct child of *other*."""
return self.parent == other
def has_ancestor(self, other):
"""Returns ``True`` if *other* is in this tokens ancestry."""
parent = self.parent
while parent:
if parent == other:
return True
parent = parent.parent
return False
class TokenList(Token):
"""A group of tokens.
It has an additional instance attribute ``tokens`` which holds a
list of child-tokens.
"""
__slots__ = ('value', 'ttype', 'tokens')
def __init__(self, tokens=None):
if tokens is None:
tokens = []
self.tokens = tokens
Token.__init__(self, None, self._to_string())
def __unicode__(self):
return self._to_string()
def __str__(self):
str_ = self._to_string()
if sys.version_info[0] < 2:
str_ = str_.encode('utf-8')
return str_
def _to_string(self):
if sys.version_info[0] == 3:
return ''.join(x.value for x in self.flatten())
else:
return ''.join(unicode(x) for x in self.flatten())
def _get_repr_name(self):
return self.__class__.__name__
def _pprint_tree(self, max_depth=None, depth=0):
"""Pretty-print the object tree."""
indent = ' ' * (depth * 2)
for idx, token in enumerate(self.tokens):
if token.is_group():
pre = ' +-'
else:
pre = ' | '
print '%s%s%d %s \'%s\'' % (indent, pre, idx,
token._get_repr_name(),
token._get_repr_value())
if (token.is_group() and (max_depth is None or depth < max_depth)):
token._pprint_tree(max_depth, depth + 1)
def _remove_quotes(self, val):
"""Helper that removes surrounding quotes from strings."""
if not val:
return val
if val[0] in ('"', '\'') and val[-1] == val[0]:
val = val[1:-1]
return val
def get_token_at_offset(self, offset):
"""Returns the token that is on position offset."""
idx = 0
for token in self.flatten():
end = idx + len(token.value)
if idx <= offset <= end:
return token
idx = end
def flatten(self):
"""Generator yielding ungrouped tokens.
This method is recursively called for all child tokens.
"""
for token in self.tokens:
if isinstance(token, TokenList):
for item in token.flatten():
yield item
else:
yield token
# def __iter__(self):
# return self
#
# def next(self):
# for token in self.tokens:
# yield token
def is_group(self):
return True
def get_sublists(self):
# return [x for x in self.tokens if isinstance(x, TokenList)]
for x in self.tokens:
if isinstance(x, TokenList):
yield x
@property
def _groupable_tokens(self):
return self.tokens
def token_first(self, ignore_whitespace=True, ignore_comments=False):
"""Returns the first child token.
If *ignore_whitespace* is ``True`` (the default), whitespace
tokens are ignored.
if *ignore_comments* is ``True`` (default: ``False``), comments are
ignored too.
"""
for token in self.tokens:
if ignore_whitespace and token.is_whitespace():
continue
if ignore_comments and isinstance(token, Comment):
continue
return token
def token_next_by_instance(self, idx, clss, end=None):
"""Returns the next token matching a class.
*idx* is where to start searching in the list of child tokens.
*clss* is a list of classes the token should be an instance of.
If no matching token can be found ``None`` is returned.
"""
if not isinstance(clss, (list, tuple)):
clss = (clss,)
for token in self.tokens[idx:end]:
if isinstance(token, clss):
return token
def token_next_by_type(self, idx, ttypes):
"""Returns next matching token by it's token type."""
if not isinstance(ttypes, (list, tuple)):
ttypes = [ttypes]
for token in self.tokens[idx:]:
if token.ttype in ttypes:
return token
def token_next_match(self, idx, ttype, value, regex=False):
"""Returns next token where it's ``match`` method returns ``True``."""
if not isinstance(idx, int):
idx = self.token_index(idx)
for n in xrange(idx, len(self.tokens)):
token = self.tokens[n]
if token.match(ttype, value, regex):
return token
def token_not_matching(self, idx, funcs):
for token in self.tokens[idx:]:
passed = False
for func in funcs:
if func(token):
passed = True
break
if not passed:
return token
def token_matching(self, idx, funcs):
for token in self.tokens[idx:]:
for func in funcs:
if func(token):
return token
def token_prev(self, idx, skip_ws=True):
"""Returns the previous token relative to *idx*.
If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
``None`` is returned if there's no previous token.
"""
if idx is None:
return None
if not isinstance(idx, int):
idx = self.token_index(idx)
while idx:
idx -= 1
if self.tokens[idx].is_whitespace() and skip_ws:
continue
return self.tokens[idx]
def token_next(self, idx, skip_ws=True):
"""Returns the next token relative to *idx*.
If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
``None`` is returned if there's no next token.
"""
if idx is None:
return None
if not isinstance(idx, int):
idx = self.token_index(idx)
while idx < len(self.tokens) - 1:
idx += 1
if self.tokens[idx].is_whitespace() and skip_ws:
continue
return self.tokens[idx]
def token_index(self, token, start=0):
"""Return list index of token."""
if start > 0:
# Performing `index` manually is much faster when starting in the middle
# of the list of tokens and expecting to find the token near to the starting
# index.
for i in xrange(start, len(self.tokens)):
if self.tokens[i] == token:
return i
return -1
return self.tokens.index(token)
def tokens_between(self, start, end, exclude_end=False):
"""Return all tokens between (and including) start and end.
If *exclude_end* is ``True`` (default is ``False``) the end token
is included too.
"""
# FIXME(andi): rename exclude_end to inlcude_end
if exclude_end:
offset = 0
else:
offset = 1
end_idx = self.token_index(end) + offset
start_idx = self.token_index(start)
return self.tokens[start_idx:end_idx]
def group_tokens(self, grp_cls, tokens, ignore_ws=False):
"""Replace tokens by an instance of *grp_cls*."""
idx = self.token_index(tokens[0])
if ignore_ws:
while tokens and tokens[-1].is_whitespace():
tokens = tokens[:-1]
for t in tokens:
self.tokens.remove(t)
grp = grp_cls(tokens)
for token in tokens:
token.parent = grp
grp.parent = self
self.tokens.insert(idx, grp)
return grp
def insert_before(self, where, token):
"""Inserts *token* before *where*."""
self.tokens.insert(self.token_index(where), token)
def insert_after(self, where, token, skip_ws=True):
"""Inserts *token* after *where*."""
next_token = self.token_next(where, skip_ws=skip_ws)
if next_token is None:
self.tokens.append(token)
else:
self.tokens.insert(self.token_index(next_token), token)
def has_alias(self):
"""Returns ``True`` if an alias is present."""
return self.get_alias() is not None
def get_alias(self):
"""Returns the alias for this identifier or ``None``."""
# "name AS alias"
kw = self.token_next_match(0, T.Keyword, 'AS')
if kw is not None:
return self._get_first_name(kw, keywords=True)
# "name alias" or "complicated column expression alias"
if len(self.tokens) > 2 \
and self.token_next_by_type(0, T.Whitespace) is not None:
return self._get_first_name(reverse=True)
return None
def get_name(self):
"""Returns the name of this identifier.
This is either it's alias or it's real name. The returned valued can
be considered as the name under which the object corresponding to
this identifier is known within the current statement.
"""
alias = self.get_alias()
if alias is not None:
return alias
return self.get_real_name()
def get_real_name(self):
"""Returns the real name (object name) of this identifier."""
# a.b
dot = self.token_next_match(0, T.Punctuation, '.')
if dot is not None:
return self._get_first_name(self.token_index(dot))
return self._get_first_name()
def get_parent_name(self):
"""Return name of the parent object if any.
A parent object is identified by the first occuring dot.
"""
dot = self.token_next_match(0, T.Punctuation, '.')
if dot is None:
return None
prev_ = self.token_prev(self.token_index(dot))
if prev_ is None: # something must be verry wrong here..
return None
return self._remove_quotes(prev_.value)
def _get_first_name(self, idx=None, reverse=False, keywords=False):
"""Returns the name of the first token with a name"""
if idx and not isinstance(idx, int):
idx = self.token_index(idx) + 1
tokens = self.tokens[idx:] if idx else self.tokens
tokens = reversed(tokens) if reverse else tokens
types = [T.Name, T.Wildcard, T.String.Symbol]
if keywords:
types.append(T.Keyword)
for tok in tokens:
if tok.ttype in types:
return self._remove_quotes(tok.value)
elif isinstance(tok, Identifier) or isinstance(tok, Function):
return tok.get_name()
return None
class Statement(TokenList):
"""Represents a SQL statement."""
__slots__ = ('value', 'ttype', 'tokens')
def get_type(self):
"""Returns the type of a statement.
The returned value is a string holding an upper-cased reprint of
the first DML or DDL keyword. If the first token in this group
isn't a DML or DDL keyword "UNKNOWN" is returned.
Whitespaces and comments at the beginning of the statement
are ignored.
"""
first_token = self.token_first(ignore_comments=True)
if first_token is None:
# An "empty" statement that either has not tokens at all
# or only whitespace tokens.
return 'UNKNOWN'
elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
return first_token.normalized
return 'UNKNOWN'
class Identifier(TokenList):
"""Represents an identifier.
Identifiers may have aliases or typecasts.
"""
__slots__ = ('value', 'ttype', 'tokens')
def is_wildcard(self):
"""Return ``True`` if this identifier contains a wildcard."""
token = self.token_next_by_type(0, T.Wildcard)
return token is not None
def get_typecast(self):
"""Returns the typecast or ``None`` of this object as a string."""
marker = self.token_next_match(0, T.Punctuation, '::')
if marker is None:
return None
next_ = self.token_next(self.token_index(marker), False)
if next_ is None:
return None
return unicode(next_)
def get_ordering(self):
"""Returns the ordering or ``None`` as uppercase string."""
ordering = self.token_next_by_type(0, T.Keyword.Order)
if ordering is None:
return None
return ordering.value.upper()
def get_array_indices(self):
"""Returns an iterator of index token lists"""
for tok in self.tokens:
if isinstance(tok, SquareBrackets):
# Use [1:-1] index to discard the square brackets
yield tok.tokens[1:-1]
class IdentifierList(TokenList):
"""A list of :class:`~sqlparse.sql.Identifier`\'s."""
__slots__ = ('value', 'ttype', 'tokens')
def get_identifiers(self):
"""Returns the identifiers.
Whitespaces and punctuations are not included in this generator.
"""
for x in self.tokens:
if not x.is_whitespace() and not x.match(T.Punctuation, ','):
yield x
class Parenthesis(TokenList):
"""Tokens between parenthesis."""
__slots__ = ('value', 'ttype', 'tokens')
@property
def _groupable_tokens(self):
return self.tokens[1:-1]
class SquareBrackets(TokenList):
"""Tokens between square brackets"""
__slots__ = ('value', 'ttype', 'tokens')
@property
def _groupable_tokens(self):
return self.tokens[1:-1]
class Assignment(TokenList):
"""An assignment like 'var := val;'"""
__slots__ = ('value', 'ttype', 'tokens')
class If(TokenList):
"""An 'if' clause with possible 'else if' or 'else' parts."""
__slots__ = ('value', 'ttype', 'tokens')
class For(TokenList):
"""A 'FOR' loop."""
__slots__ = ('value', 'ttype', 'tokens')
class Comparison(TokenList):
"""A comparison used for example in WHERE clauses."""
__slots__ = ('value', 'ttype', 'tokens')
@property
def left(self):
return self.tokens[0]
@property
def right(self):
return self.tokens[-1]
class Comment(TokenList):
"""A comment."""
__slots__ = ('value', 'ttype', 'tokens')
def is_multiline(self):
return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
class Where(TokenList):
"""A WHERE clause."""
__slots__ = ('value', 'ttype', 'tokens')
class Case(TokenList):
"""A CASE statement with one or more WHEN and possibly an ELSE part."""
__slots__ = ('value', 'ttype', 'tokens')
def get_cases(self):
"""Returns a list of 2-tuples (condition, value).
If an ELSE exists condition is None.
"""
CONDITION = 1
VALUE = 2
ret = []
mode = CONDITION
for token in self.tokens:
# Set mode from the current statement
if token.match(T.Keyword, 'CASE'):
continue
elif token.match(T.Keyword, 'WHEN'):
ret.append(([], []))
mode = CONDITION
elif token.match(T.Keyword, 'THEN'):
mode = VALUE
elif token.match(T.Keyword, 'ELSE'):
ret.append((None, []))
mode = VALUE
elif token.match(T.Keyword, 'END'):
mode = None
# First condition without preceding WHEN
if mode and not ret:
ret.append(([], []))
# Append token depending of the current mode
if mode == CONDITION:
ret[-1][0].append(token)
elif mode == VALUE:
ret[-1][1].append(token)
# Return cases list
return ret
class Function(TokenList):
"""A function or procedure call."""
__slots__ = ('value', 'ttype', 'tokens')
def get_parameters(self):
"""Return a list of parameters."""
parenthesis = self.tokens[-1]
for t in parenthesis.tokens:
if isinstance(t, IdentifierList):
return t.get_identifiers()
elif isinstance(t, Identifier) or \
isinstance(t, Function) or \
t.ttype in T.Literal:
return [t,]
return []
class Begin(TokenList):
"""A BEGIN/END block."""
__slots__ = ('value', 'ttype', 'tokens')

View File

@@ -1,137 +0,0 @@
'''
Created on 17/05/2012
@author: piranna
'''
import re
try:
from collections import OrderedDict
except ImportError:
OrderedDict = None
if OrderedDict:
class Cache(OrderedDict):
"""Cache with LRU algorithm using an OrderedDict as basis
"""
def __init__(self, maxsize=100):
OrderedDict.__init__(self)
self._maxsize = maxsize
def __getitem__(self, key, *args, **kwargs):
# Get the key and remove it from the cache, or raise KeyError
value = OrderedDict.__getitem__(self, key)
del self[key]
# Insert the (key, value) pair on the front of the cache
OrderedDict.__setitem__(self, key, value)
# Return the value from the cache
return value
def __setitem__(self, key, value, *args, **kwargs):
# Key was inserted before, remove it so we put it at front later
if key in self:
del self[key]
# Too much items on the cache, remove the least recent used
elif len(self) >= self._maxsize:
self.popitem(False)
# Insert the (key, value) pair on the front of the cache
OrderedDict.__setitem__(self, key, value, *args, **kwargs)
else:
class Cache(dict):
"""Cache that reset when gets full
"""
def __init__(self, maxsize=100):
dict.__init__(self)
self._maxsize = maxsize
def __setitem__(self, key, value, *args, **kwargs):
# Reset the cache if we have too much cached entries and start over
if len(self) >= self._maxsize:
self.clear()
# Insert the (key, value) pair on the front of the cache
dict.__setitem__(self, key, value, *args, **kwargs)
def memoize_generator(func):
"""Memoize decorator for generators
Store `func` results in a cache according to their arguments as 'memoize'
does but instead this works on decorators instead of regular functions.
Obviusly, this is only useful if the generator will always return the same
values for each specific parameters...
"""
cache = Cache()
def wrapped_func(*args, **kwargs):
# params = (args, kwargs)
params = (args, tuple(sorted(kwargs.items())))
# Look if cached
try:
cached = cache[params]
# Not cached, exec and store it
except KeyError:
cached = []
for item in func(*args, **kwargs):
cached.append(item)
yield item
cache[params] = cached
# Cached, yield its items
else:
for item in cached:
yield item
return wrapped_func
# This regular expression replaces the home-cooked parser that was here before.
# It is much faster, but requires an extra post-processing step to get the
# desired results (that are compatible with what you would expect from the
# str.splitlines() method).
#
# It matches groups of characters: newlines, quoted strings, or unquoted text,
# and splits on that basis. The post-processing step puts those back together
# into the actual lines of SQL.
SPLIT_REGEX = re.compile(r"""
(
(?: # Start of non-capturing group
(?:\r\n|\r|\n) | # Match any single newline, or
[^\r\n'"]+ | # Match any character series without quotes or
# newlines, or
"(?:[^"\\]|\\.)*" | # Match double-quoted strings, or
'(?:[^'\\]|\\.)*' # Match single quoted strings
)
)
""", re.VERBOSE)
LINE_MATCH = re.compile(r'(\r\n|\r|\n)')
def split_unquoted_newlines(text):
"""Split a string on all unquoted newlines.
Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite
character is inside of a string."""
lines = SPLIT_REGEX.split(text)
outputlines = ['']
for line in lines:
if not line:
continue
elif LINE_MATCH.match(line):
outputlines.append('')
else:
outputlines[-1] += line
return outputlines

View File

@@ -1,78 +0,0 @@
'''
Created on 24/03/2012
@author: piranna
'''
import unittest
from sqlparse.filters import StripWhitespace, Tokens2Unicode
from sqlparse.lexer import tokenize
class Test__StripWhitespace(unittest.TestCase):
sql = """INSERT INTO dir_entries(type)VALUES(:type);
INSERT INTO directories(inode)
VALUES(:inode)
LIMIT 1"""
sql2 = """SELECT child_entry,asdf AS inode, creation
FROM links
WHERE parent_dir == :parent_dir AND name == :name
LIMIT 1"""
sql3 = """SELECT
0 AS st_dev,
0 AS st_uid,
0 AS st_gid,
dir_entries.type AS st_mode,
dir_entries.inode AS st_ino,
COUNT(links.child_entry) AS st_nlink,
:creation AS st_ctime,
dir_entries.access AS st_atime,
dir_entries.modification AS st_mtime,
COALESCE(files.size,0) AS st_size,
COALESCE(files.size,0) AS size
FROM dir_entries
LEFT JOIN files
ON dir_entries.inode == files.inode
LEFT JOIN links
ON dir_entries.inode == links.child_entry
WHERE dir_entries.inode == :inode
GROUP BY dir_entries.inode
LIMIT 1"""
def test_StripWhitespace1(self):
self.assertEqual(
Tokens2Unicode(StripWhitespace(tokenize(self.sql))),
'INSERT INTO dir_entries(type)VALUES(:type);INSERT INTO '
'directories(inode)VALUES(:inode)LIMIT 1')
def test_StripWhitespace2(self):
self.assertEqual(
Tokens2Unicode(StripWhitespace(tokenize(self.sql2))),
'SELECT child_entry,asdf AS inode,creation FROM links WHERE '
'parent_dir==:parent_dir AND name==:name LIMIT 1')
def test_StripWhitespace3(self):
self.assertEqual(
Tokens2Unicode(StripWhitespace(tokenize(self.sql3))),
'SELECT 0 AS st_dev,0 AS st_uid,0 AS st_gid,dir_entries.type AS '
'st_mode,dir_entries.inode AS st_ino,COUNT(links.child_entry)AS '
'st_nlink,:creation AS st_ctime,dir_entries.access AS st_atime,'
'dir_entries.modification AS st_mtime,COALESCE(files.size,0)AS '
'st_size,COALESCE(files.size,0)AS size FROM dir_entries LEFT JOIN'
' files ON dir_entries.inode==files.inode LEFT JOIN links ON '
'dir_entries.inode==links.child_entry WHERE dir_entries.inode=='
':inode GROUP BY dir_entries.inode LIMIT 1')
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View File

@@ -1,346 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
from tests.utils import TestCaseBase
import sqlparse
from sqlparse.exceptions import SQLParseError
class TestFormat(TestCaseBase):
def test_keywordcase(self):
sql = 'select * from bar; -- select foo\n'
res = sqlparse.format(sql, keyword_case='upper')
self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- select foo\n')
res = sqlparse.format(sql, keyword_case='capitalize')
self.ndiffAssertEqual(res, 'Select * From bar; -- select foo\n')
res = sqlparse.format(sql.upper(), keyword_case='lower')
self.ndiffAssertEqual(res, 'select * from BAR; -- SELECT FOO\n')
self.assertRaises(SQLParseError, sqlparse.format, sql,
keyword_case='foo')
def test_identifiercase(self):
sql = 'select * from bar; -- select foo\n'
res = sqlparse.format(sql, identifier_case='upper')
self.ndiffAssertEqual(res, 'select * from BAR; -- select foo\n')
res = sqlparse.format(sql, identifier_case='capitalize')
self.ndiffAssertEqual(res, 'select * from Bar; -- select foo\n')
res = sqlparse.format(sql.upper(), identifier_case='lower')
self.ndiffAssertEqual(res, 'SELECT * FROM bar; -- SELECT FOO\n')
self.assertRaises(SQLParseError, sqlparse.format, sql,
identifier_case='foo')
sql = 'select * from "foo"."bar"'
res = sqlparse.format(sql, identifier_case="upper")
self.ndiffAssertEqual(res, 'select * from "foo"."bar"')
def test_strip_comments_single(self):
sql = 'select *-- statement starts here\nfrom foo'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select * from foo')
sql = 'select * -- statement starts here\nfrom foo'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select * from foo')
sql = 'select-- foo\nfrom -- bar\nwhere'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select from where')
self.assertRaises(SQLParseError, sqlparse.format, sql,
strip_comments=None)
def test_strip_comments_multi(self):
sql = '/* sql starts here */\nselect'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select')
sql = '/* sql starts here */ select'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select')
sql = '/*\n * sql starts here\n */\nselect'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select')
sql = 'select (/* sql starts here */ select 2)'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select (select 2)')
sql = 'select (/* sql /* starts here */ select 2)'
res = sqlparse.format(sql, strip_comments=True)
self.ndiffAssertEqual(res, 'select (select 2)')
def test_strip_ws(self):
f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
s = 'select\n* from foo\n\twhere ( 1 = 2 )\n'
self.ndiffAssertEqual(f(s), 'select * from foo where (1 = 2)')
s = 'select -- foo\nfrom bar\n'
self.ndiffAssertEqual(f(s), 'select -- foo\nfrom bar')
self.assertRaises(SQLParseError, sqlparse.format, s,
strip_whitespace=None)
def test_preserve_ws(self):
# preserve at least one whitespace after subgroups
f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
s = 'select\n* /* foo */ from bar '
self.ndiffAssertEqual(f(s), 'select * /* foo */ from bar')
def test_notransform_of_quoted_crlf(self):
# Make sure that CR/CR+LF characters inside string literals don't get
# affected by the formatter.
s1 = "SELECT some_column LIKE 'value\r'"
s2 = "SELECT some_column LIKE 'value\r'\r\nWHERE id = 1\n"
s3 = "SELECT some_column LIKE 'value\\'\r' WHERE id = 1\r"
s4 = "SELECT some_column LIKE 'value\\\\\\'\r' WHERE id = 1\r\n"
f = lambda x: sqlparse.format(x)
# Because of the use of
self.ndiffAssertEqual(f(s1), "SELECT some_column LIKE 'value\r'")
self.ndiffAssertEqual(f(s2), "SELECT some_column LIKE 'value\r'\nWHERE id = 1\n")
self.ndiffAssertEqual(f(s3), "SELECT some_column LIKE 'value\\'\r' WHERE id = 1\n")
self.ndiffAssertEqual(f(s4), "SELECT some_column LIKE 'value\\\\\\'\r' WHERE id = 1\n")
def test_outputformat(self):
sql = 'select * from foo;'
self.assertRaises(SQLParseError, sqlparse.format, sql,
output_format='foo')
class TestFormatReindent(TestCaseBase):
def test_option(self):
self.assertRaises(SQLParseError, sqlparse.format, 'foo',
reindent=2)
self.assertRaises(SQLParseError, sqlparse.format, 'foo',
indent_tabs=2)
self.assertRaises(SQLParseError, sqlparse.format, 'foo',
reindent=True, indent_width='foo')
self.assertRaises(SQLParseError, sqlparse.format, 'foo',
reindent=True, indent_width=-12)
def test_stmts(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select foo; select bar'
self.ndiffAssertEqual(f(s), 'select foo;\n\nselect bar')
s = 'select foo'
self.ndiffAssertEqual(f(s), 'select foo')
s = 'select foo; -- test\n select bar'
self.ndiffAssertEqual(f(s), 'select foo; -- test\n\nselect bar')
def test_keywords(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select * from foo union select * from bar;'
self.ndiffAssertEqual(f(s), '\n'.join(['select *',
'from foo',
'union',
'select *',
'from bar;']))
def test_keywords_between(self): # issue 14
# don't break AND after BETWEEN
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'and foo between 1 and 2 and bar = 3'
self.ndiffAssertEqual(f(s), '\n'.join(['',
'and foo between 1 and 2',
'and bar = 3']))
def test_parenthesis(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select count(*) from (select * from foo);'
self.ndiffAssertEqual(f(s),
'\n'.join(['select count(*)',
'from',
' (select *',
' from foo);',
])
)
def test_where(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select * from foo where bar = 1 and baz = 2 or bzz = 3;'
self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n'
'where bar = 1\n'
' and baz = 2\n'
' or bzz = 3;'))
s = 'select * from foo where bar = 1 and (baz = 2 or bzz = 3);'
self.ndiffAssertEqual(f(s), ('select *\nfrom foo\n'
'where bar = 1\n'
' and (baz = 2\n'
' or bzz = 3);'))
def test_join(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select * from foo join bar on 1 = 2'
self.ndiffAssertEqual(f(s), '\n'.join(['select *',
'from foo',
'join bar on 1 = 2']))
s = 'select * from foo inner join bar on 1 = 2'
self.ndiffAssertEqual(f(s), '\n'.join(['select *',
'from foo',
'inner join bar on 1 = 2']))
s = 'select * from foo left outer join bar on 1 = 2'
self.ndiffAssertEqual(f(s), '\n'.join(['select *',
'from foo',
'left outer join bar on 1 = 2']
))
s = 'select * from foo straight_join bar on 1 = 2'
self.ndiffAssertEqual(f(s), '\n'.join(['select *',
'from foo',
'straight_join bar on 1 = 2']
))
def test_identifier_list(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select foo, bar, baz from table1, table2 where 1 = 2'
self.ndiffAssertEqual(f(s), '\n'.join(['select foo,',
' bar,',
' baz',
'from table1,',
' table2',
'where 1 = 2']))
s = 'select a.*, b.id from a, b'
self.ndiffAssertEqual(f(s), '\n'.join(['select a.*,',
' b.id',
'from a,',
' b']))
def test_identifier_list_with_functions(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = ("select 'abc' as foo, coalesce(col1, col2)||col3 as bar,"
"col3 from my_table")
self.ndiffAssertEqual(f(s), '\n'.join(
["select 'abc' as foo,",
" coalesce(col1, col2)||col3 as bar,",
" col3",
"from my_table"]))
def test_case(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'case when foo = 1 then 2 when foo = 3 then 4 else 5 end'
self.ndiffAssertEqual(f(s), '\n'.join(['case',
' when foo = 1 then 2',
' when foo = 3 then 4',
' else 5',
'end']))
def test_case2(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'case(foo) when bar = 1 then 2 else 3 end'
self.ndiffAssertEqual(f(s), '\n'.join(['case(foo)',
' when bar = 1 then 2',
' else 3',
'end']))
def test_nested_identifier_list(self): # issue4
f = lambda sql: sqlparse.format(sql, reindent=True)
s = '(foo as bar, bar1, bar2 as bar3, b4 as b5)'
self.ndiffAssertEqual(f(s), '\n'.join(['(foo as bar,',
' bar1,',
' bar2 as bar3,',
' b4 as b5)']))
def test_duplicate_linebreaks(self): # issue3
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select c1 -- column1\nfrom foo'
self.ndiffAssertEqual(f(s), '\n'.join(['select c1 -- column1',
'from foo']))
s = 'select c1 -- column1\nfrom foo'
r = sqlparse.format(s, reindent=True, strip_comments=True)
self.ndiffAssertEqual(r, '\n'.join(['select c1',
'from foo']))
s = 'select c1\nfrom foo\norder by c1'
self.ndiffAssertEqual(f(s), '\n'.join(['select c1',
'from foo',
'order by c1']))
s = 'select c1 from t1 where (c1 = 1) order by c1'
self.ndiffAssertEqual(f(s), '\n'.join(['select c1',
'from t1',
'where (c1 = 1)',
'order by c1']))
def test_keywordfunctions(self): # issue36
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select max(a) b, foo, bar'
self.ndiffAssertEqual(f(s), '\n'.join(['select max(a) b,',
' foo,',
' bar']))
def test_identifier_and_functions(self): # issue45
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select foo.bar, nvl(1) from dual'
self.ndiffAssertEqual(f(s), '\n'.join(['select foo.bar,',
' nvl(1)',
'from dual']))
class TestOutputFormat(TestCaseBase):
def test_python(self):
sql = 'select * from foo;'
f = lambda sql: sqlparse.format(sql, output_format='python')
self.ndiffAssertEqual(f(sql), "sql = 'select * from foo;'")
f = lambda sql: sqlparse.format(sql, output_format='python',
reindent=True)
self.ndiffAssertEqual(f(sql), ("sql = ('select * '\n"
" 'from foo;')"))
def test_php(self):
sql = 'select * from foo;'
f = lambda sql: sqlparse.format(sql, output_format='php')
self.ndiffAssertEqual(f(sql), '$sql = "select * from foo;";')
f = lambda sql: sqlparse.format(sql, output_format='php',
reindent=True)
self.ndiffAssertEqual(f(sql), ('$sql = "select * ";\n'
'$sql .= "from foo;";'))
def test_sql(self): # "sql" is an allowed option but has no effect
sql = 'select * from foo;'
f = lambda sql: sqlparse.format(sql, output_format='sql')
self.ndiffAssertEqual(f(sql), 'select * from foo;')
def test_format_column_ordering(): # issue89
sql = 'select * from foo order by c1 desc, c2, c3;'
formatted = sqlparse.format(sql, reindent=True)
expected = '\n'.join(['select *',
'from foo',
'order by c1 desc,',
' c2,',
' c3;'])
assert formatted == expected
def test_truncate_strings():
sql = 'update foo set value = \'' + 'x' * 1000 + '\';'
formatted = sqlparse.format(sql, truncate_strings=10)
assert formatted == 'update foo set value = \'xxxxxxxxxx[...]\';'
formatted = sqlparse.format(sql, truncate_strings=3, truncate_char='YYY')
assert formatted == 'update foo set value = \'xxxYYY\';'
def test_truncate_strings_invalid_option():
pytest.raises(SQLParseError, sqlparse.format,
'foo', truncate_strings='bar')
pytest.raises(SQLParseError, sqlparse.format,
'foo', truncate_strings=-1)
pytest.raises(SQLParseError, sqlparse.format,
'foo', truncate_strings=0)
@pytest.mark.parametrize('sql', ['select verrrylongcolumn from foo',
'select "verrrylongcolumn" from "foo"'])
def test_truncate_strings_doesnt_truncate_identifiers(sql):
formatted = sqlparse.format(sql, truncate_strings=2)
assert formatted == sql
def test_having_produces_newline():
sql = (
'select * from foo, bar where bar.id = foo.bar_id'
' having sum(bar.value) > 100')
formatted = sqlparse.format(sql, reindent=True)
expected = [
'select *',
'from foo,',
' bar',
'where bar.id = foo.bar_id',
'having sum(bar.value) > 100'
]
assert formatted == '\n'.join(expected)

View File

@@ -1,164 +0,0 @@
'''
Created on 13/02/2012
@author: piranna
'''
from unittest import main, TestCase
from sqlparse.filters import IncludeStatement, Tokens2Unicode
from sqlparse.lexer import tokenize
import sys
sys.path.insert(0, '..')
from sqlparse.filters import compact
from sqlparse.functions import getcolumns, getlimit, IsType
class Test_IncludeStatement(TestCase):
sql = """-- type: script
-- return: integer
INCLUDE "_Make_DirEntry.sql";
INSERT INTO directories(inode)
VALUES(:inode)
LIMIT 1"""
def test_includeStatement(self):
stream = tokenize(self.sql)
includeStatement = IncludeStatement('tests/files',
raiseexceptions=True)
stream = includeStatement.process(None, stream)
stream = compact(stream)
result = Tokens2Unicode(stream)
self.assertEqual(
result, (
'INSERT INTO dir_entries(type)VALUES(:type);INSERT INTO '
'directories(inode)VALUES(:inode)LIMIT 1'))
class Test_SQL(TestCase):
sql = """-- type: script
-- return: integer
INSERT INTO directories(inode)
VALUES(:inode)
LIMIT 1"""
sql2 = """SELECT child_entry,asdf AS inode, creation
FROM links
WHERE parent_dir == :parent_dir AND name == :name
LIMIT 1"""
sql3 = """SELECT
0 AS st_dev,
0 AS st_uid,
0 AS st_gid,
dir_entries.type AS st_mode,
dir_entries.inode AS st_ino,
COUNT(links.child_entry) AS st_nlink,
:creation AS st_ctime,
dir_entries.access AS st_atime,
dir_entries.modification AS st_mtime,
-- :creation AS st_ctime,
-- CAST(STRFTIME('%s',dir_entries.access) AS INTEGER) AS st_atime,
-- CAST(STRFTIME('%s',dir_entries.modification) AS INTEGER) AS st_mtime,
COALESCE(files.size,0) AS st_size, -- Python-FUSE
COALESCE(files.size,0) AS size -- PyFilesystem
FROM dir_entries
LEFT JOIN files
ON dir_entries.inode == files.inode
LEFT JOIN links
ON dir_entries.inode == links.child_entry
WHERE dir_entries.inode == :inode
GROUP BY dir_entries.inode
LIMIT 1"""
class Test_Compact(Test_SQL):
def test_compact1(self):
stream = compact(tokenize(self.sql))
result = Tokens2Unicode(stream)
self.assertEqual(result,
'INSERT INTO directories(inode)VALUES(:inode)LIMIT 1')
def test_compact2(self):
stream = tokenize(self.sql2)
result = compact(stream)
self.assertEqual(
Tokens2Unicode(result),
'SELECT child_entry,asdf AS inode,creation FROM links WHERE '
'parent_dir==:parent_dir AND name==:name LIMIT 1')
def test_compact3(self):
stream = tokenize(self.sql3)
result = compact(stream)
self.assertEqual(
Tokens2Unicode(result),
'SELECT 0 AS st_dev,0 AS st_uid,0 AS st_gid,dir_entries.type AS '
'st_mode,dir_entries.inode AS st_ino,COUNT(links.child_entry)AS '
'st_nlink,:creation AS st_ctime,dir_entries.access AS st_atime,'
'dir_entries.modification AS st_mtime,COALESCE(files.size,0)AS '
'st_size,COALESCE(files.size,0)AS size FROM dir_entries LEFT JOIN'
' files ON dir_entries.inode==files.inode LEFT JOIN links ON '
'dir_entries.inode==links.child_entry WHERE dir_entries.inode=='
':inode GROUP BY dir_entries.inode LIMIT 1')
class Test_GetColumns(Test_SQL):
def test_getcolumns1(self):
columns = getcolumns(tokenize(self.sql))
self.assertEqual(columns, [])
def test_getcolumns2(self):
columns = getcolumns(tokenize(self.sql2))
self.assertEqual(columns, ['child_entry', 'inode', 'creation'])
def test_getcolumns3(self):
columns = getcolumns(tokenize(self.sql3))
self.assertEqual(columns, ['st_dev', 'st_uid', 'st_gid', 'st_mode',
'st_ino', 'st_nlink', 'st_ctime',
'st_atime', 'st_mtime', 'st_size', 'size'])
class Test_GetLimit(Test_SQL):
def test_getlimit1(self):
limit = getlimit(tokenize(self.sql))
self.assertEqual(limit, 1)
def test_getlimit2(self):
limit = getlimit(tokenize(self.sql2))
self.assertEqual(limit, 1)
def test_getlimit3(self):
limit = getlimit(tokenize(self.sql3))
self.assertEqual(limit, 1)
class Test_IsType(Test_SQL):
def test_istype2(self):
stream = tokenize(self.sql2)
self.assertTrue(IsType('SELECT')(stream))
stream = tokenize(self.sql2)
self.assertFalse(IsType('INSERT')(stream))
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
main()

View File

@@ -1,399 +0,0 @@
# -*- coding: utf-8 -*-
import pytest
import sqlparse
from sqlparse import sql
from sqlparse import tokens as T
from tests.utils import TestCaseBase
class TestGrouping(TestCaseBase):
def test_parenthesis(self):
s = 'select (select (x3) x2) and (y2) bar'
parsed = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, str(parsed))
self.assertEqual(len(parsed.tokens), 7)
self.assert_(isinstance(parsed.tokens[2], sql.Parenthesis))
self.assert_(isinstance(parsed.tokens[-1], sql.Identifier))
self.assertEqual(len(parsed.tokens[2].tokens), 5)
self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Identifier))
self.assert_(isinstance(parsed.tokens[2].tokens[3].tokens[0], sql.Parenthesis))
self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3)
def test_comments(self):
s = '/*\n * foo\n */ \n bar'
parsed = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(parsed))
self.assertEqual(len(parsed.tokens), 2)
def test_assignment(self):
s = 'foo := 1;'
parsed = sqlparse.parse(s)[0]
self.assertEqual(len(parsed.tokens), 1)
self.assert_(isinstance(parsed.tokens[0], sql.Assignment))
s = 'foo := 1'
parsed = sqlparse.parse(s)[0]
self.assertEqual(len(parsed.tokens), 1)
self.assert_(isinstance(parsed.tokens[0], sql.Assignment))
def test_identifiers(self):
s = 'select foo.bar from "myscheme"."table" where fail. order'
parsed = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(parsed))
self.assert_(isinstance(parsed.tokens[2], sql.Identifier))
self.assert_(isinstance(parsed.tokens[6], sql.Identifier))
self.assert_(isinstance(parsed.tokens[8], sql.Where))
s = 'select * from foo where foo.id = 1'
parsed = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(parsed))
self.assert_(isinstance(parsed.tokens[-1].tokens[-1].tokens[0],
sql.Identifier))
s = 'select * from (select "foo"."id" from foo)'
parsed = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(parsed))
self.assert_(isinstance(parsed.tokens[-1].tokens[3], sql.Identifier))
s = "INSERT INTO `test` VALUES('foo', 'bar');"
parsed = sqlparse.parse(s)[0]
types = [l.ttype for l in parsed.tokens if not l.is_whitespace()]
self.assertEquals(types, [T.DML, T.Keyword, None,
T.Keyword, None, T.Punctuation])
s = "select 1.0*(a+b) as col, sum(c)/sum(d) from myschema.mytable"
parsed = sqlparse.parse(s)[0]
self.assertEqual(len(parsed.tokens), 7)
self.assert_(isinstance(parsed.tokens[2], sql.IdentifierList))
self.assertEqual(len(parsed.tokens[2].tokens), 4)
identifiers = list(parsed.tokens[2].get_identifiers())
self.assertEqual(len(identifiers), 2)
self.assertEquals(identifiers[0].get_alias(), u"col")
def test_identifier_wildcard(self):
p = sqlparse.parse('a.*, b.id')[0]
self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
self.assert_(isinstance(p.tokens[0].tokens[0], sql.Identifier))
self.assert_(isinstance(p.tokens[0].tokens[-1], sql.Identifier))
def test_identifier_name_wildcard(self):
p = sqlparse.parse('a.*')[0]
t = p.tokens[0]
self.assertEqual(t.get_name(), '*')
self.assertEqual(t.is_wildcard(), True)
def test_identifier_invalid(self):
p = sqlparse.parse('a.')[0]
self.assert_(isinstance(p.tokens[0], sql.Identifier))
self.assertEqual(p.tokens[0].has_alias(), False)
self.assertEqual(p.tokens[0].get_name(), None)
self.assertEqual(p.tokens[0].get_real_name(), None)
self.assertEqual(p.tokens[0].get_parent_name(), 'a')
def test_identifier_as_invalid(self): # issue8
p = sqlparse.parse('foo as select *')[0]
self.assert_(len(p.tokens), 5)
self.assert_(isinstance(p.tokens[0], sql.Identifier))
self.assertEqual(len(p.tokens[0].tokens), 1)
self.assertEqual(p.tokens[2].ttype, T.Keyword)
def test_identifier_function(self):
p = sqlparse.parse('foo() as bar')[0]
self.assert_(isinstance(p.tokens[0], sql.Identifier))
self.assert_(isinstance(p.tokens[0].tokens[0], sql.Function))
p = sqlparse.parse('foo()||col2 bar')[0]
self.assert_(isinstance(p.tokens[0], sql.Identifier))
self.assert_(isinstance(p.tokens[0].tokens[0], sql.Function))
def test_identifier_extended(self): # issue 15
p = sqlparse.parse('foo+100')[0]
self.assert_(isinstance(p.tokens[0], sql.Identifier))
p = sqlparse.parse('foo + 100')[0]
self.assert_(isinstance(p.tokens[0], sql.Identifier))
p = sqlparse.parse('foo*100')[0]
self.assert_(isinstance(p.tokens[0], sql.Identifier))
def test_identifier_list(self):
p = sqlparse.parse('a, b, c')[0]
self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
p = sqlparse.parse('(a, b, c)')[0]
self.assert_(isinstance(p.tokens[0].tokens[1], sql.IdentifierList))
def test_identifier_list_case(self):
p = sqlparse.parse('a, case when 1 then 2 else 3 end as b, c')[0]
self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
p = sqlparse.parse('(a, case when 1 then 2 else 3 end as b, c)')[0]
self.assert_(isinstance(p.tokens[0].tokens[1], sql.IdentifierList))
def test_identifier_list_other(self): # issue2
p = sqlparse.parse("select *, null, 1, 'foo', bar from mytable, x")[0]
self.assert_(isinstance(p.tokens[2], sql.IdentifierList))
l = p.tokens[2]
self.assertEqual(len(l.tokens), 13)
def test_identifier_list_with_inline_comments(self): # issue163
p = sqlparse.parse('foo /* a comment */, bar')[0]
self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
self.assert_(isinstance(p.tokens[0].tokens[0], sql.Identifier))
self.assert_(isinstance(p.tokens[0].tokens[3], sql.Identifier))
def test_where(self):
s = 'select * from foo where bar = 1 order by id desc'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertTrue(len(p.tokens), 16)
s = 'select x from (select y from foo where bar = 1) z'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertTrue(isinstance(p.tokens[-1].tokens[0].tokens[-2], sql.Where))
def test_typecast(self):
s = 'select foo::integer from bar'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertEqual(p.tokens[2].get_typecast(), 'integer')
self.assertEqual(p.tokens[2].get_name(), 'foo')
s = 'select (current_database())::information_schema.sql_identifier'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertEqual(p.tokens[2].get_typecast(),
'information_schema.sql_identifier')
def test_alias(self):
s = 'select foo as bar from mytable'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertEqual(p.tokens[2].get_real_name(), 'foo')
self.assertEqual(p.tokens[2].get_alias(), 'bar')
s = 'select foo from mytable t1'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertEqual(p.tokens[6].get_real_name(), 'mytable')
self.assertEqual(p.tokens[6].get_alias(), 't1')
s = 'select foo::integer as bar from mytable'
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertEqual(p.tokens[2].get_alias(), 'bar')
s = ('SELECT DISTINCT '
'(current_database())::information_schema.sql_identifier AS view')
p = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, unicode(p))
self.assertEqual(p.tokens[4].get_alias(), 'view')
def test_alias_case(self): # see issue46
p = sqlparse.parse('CASE WHEN 1 THEN 2 ELSE 3 END foo')[0]
self.assertEqual(len(p.tokens), 1)
self.assertEqual(p.tokens[0].get_alias(), 'foo')
def test_alias_returns_none(self): # see issue185
p = sqlparse.parse('foo.bar')[0]
self.assertEqual(len(p.tokens), 1)
self.assertEqual(p.tokens[0].get_alias(), None)
def test_idlist_function(self): # see issue10 too
p = sqlparse.parse('foo(1) x, bar')[0]
self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
def test_comparison_exclude(self):
# make sure operators are not handled too lazy
p = sqlparse.parse('(=)')[0]
self.assert_(isinstance(p.tokens[0], sql.Parenthesis))
self.assert_(not isinstance(p.tokens[0].tokens[1], sql.Comparison))
p = sqlparse.parse('(a=1)')[0]
self.assert_(isinstance(p.tokens[0].tokens[1], sql.Comparison))
p = sqlparse.parse('(a>=1)')[0]
self.assert_(isinstance(p.tokens[0].tokens[1], sql.Comparison))
def test_function(self):
p = sqlparse.parse('foo()')[0]
self.assert_(isinstance(p.tokens[0], sql.Function))
p = sqlparse.parse('foo(null, bar)')[0]
self.assert_(isinstance(p.tokens[0], sql.Function))
self.assertEqual(len(list(p.tokens[0].get_parameters())), 2)
def test_function_not_in(self): # issue183
p = sqlparse.parse('in(1, 2)')[0]
self.assertEqual(len(p.tokens), 2)
self.assertEqual(p.tokens[0].ttype, T.Keyword)
self.assert_(isinstance(p.tokens[1], sql.Parenthesis))
def test_varchar(self):
p = sqlparse.parse('"text" Varchar(50) NOT NULL')[0]
self.assert_(isinstance(p.tokens[2], sql.Function))
class TestStatement(TestCaseBase):
def test_get_type(self):
f = lambda sql: sqlparse.parse(sql)[0]
self.assertEqual(f('select * from foo').get_type(), 'SELECT')
self.assertEqual(f('update foo').get_type(), 'UPDATE')
self.assertEqual(f(' update foo').get_type(), 'UPDATE')
self.assertEqual(f('\nupdate foo').get_type(), 'UPDATE')
self.assertEqual(f('foo').get_type(), 'UNKNOWN')
# Statements that have a whitespace after the closing semicolon
# are parsed as two statements where later only consists of the
# trailing whitespace.
self.assertEqual(f('\n').get_type(), 'UNKNOWN')
def test_identifier_with_operators(): # issue 53
p = sqlparse.parse('foo||bar')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Identifier)
# again with whitespaces
p = sqlparse.parse('foo || bar')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Identifier)
def test_identifier_with_op_trailing_ws():
# make sure trailing whitespace isn't grouped with identifier
p = sqlparse.parse('foo || bar ')[0]
assert len(p.tokens) == 2
assert isinstance(p.tokens[0], sql.Identifier)
assert p.tokens[1].ttype is T.Whitespace
def test_identifier_with_string_literals():
p = sqlparse.parse('foo + \'bar\'')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Identifier)
# This test seems to be wrong. It was introduced when fixing #53, but #111
# showed that this shouldn't be an identifier at all. I'm leaving this
# commented in the source for a while.
# def test_identifier_string_concat():
# p = sqlparse.parse('\'foo\' || bar')[0]
# assert len(p.tokens) == 1
# assert isinstance(p.tokens[0], sql.Identifier)
def test_identifier_consumes_ordering(): # issue89
p = sqlparse.parse('select * from foo order by c1 desc, c2, c3')[0]
assert isinstance(p.tokens[-1], sql.IdentifierList)
ids = list(p.tokens[-1].get_identifiers())
assert len(ids) == 3
assert ids[0].get_name() == 'c1'
assert ids[0].get_ordering() == 'DESC'
assert ids[1].get_name() == 'c2'
assert ids[1].get_ordering() is None
def test_comparison_with_keywords(): # issue90
# in fact these are assignments, but for now we don't distinguish them
p = sqlparse.parse('foo = NULL')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert len(p.tokens[0].tokens) == 5
assert p.tokens[0].left.value == 'foo'
assert p.tokens[0].right.value == 'NULL'
# make sure it's case-insensitive
p = sqlparse.parse('foo = null')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
def test_comparison_with_floats(): # issue145
p = sqlparse.parse('foo = 25.5')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert len(p.tokens[0].tokens) == 5
assert p.tokens[0].left.value == 'foo'
assert p.tokens[0].right.value == '25.5'
def test_comparison_with_parenthesis(): # issue23
p = sqlparse.parse('(3 + 4) = 7')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
comp = p.tokens[0]
assert isinstance(comp.left, sql.Parenthesis)
assert comp.right.ttype is T.Number.Integer
def test_comparison_with_strings(): # issue148
p = sqlparse.parse('foo = \'bar\'')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert p.tokens[0].right.value == '\'bar\''
assert p.tokens[0].right.ttype == T.String.Single
@pytest.mark.parametrize('start', ['FOR', 'FOREACH'])
def test_forloops(start):
p = sqlparse.parse('%s foo in bar LOOP foobar END LOOP' % start)[0]
assert (len(p.tokens)) == 1
assert isinstance(p.tokens[0], sql.For)
def test_nested_for():
p = sqlparse.parse('FOR foo LOOP FOR bar LOOP END LOOP END LOOP')[0]
assert len(p.tokens) == 1
for1 = p.tokens[0]
assert for1.tokens[0].value == 'FOR'
assert for1.tokens[-1].value == 'END LOOP'
for2 = for1.tokens[6]
assert isinstance(for2, sql.For)
assert for2.tokens[0].value == 'FOR'
assert for2.tokens[-1].value == 'END LOOP'
def test_begin():
p = sqlparse.parse('BEGIN foo END')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Begin)
def test_nested_begin():
p = sqlparse.parse('BEGIN foo BEGIN bar END END')[0]
assert len(p.tokens) == 1
outer = p.tokens[0]
assert outer.tokens[0].value == 'BEGIN'
assert outer.tokens[-1].value == 'END'
inner = outer.tokens[4]
assert inner.tokens[0].value == 'BEGIN'
assert inner.tokens[-1].value == 'END'
assert isinstance(inner, sql.Begin)
def test_aliased_column_without_as():
p = sqlparse.parse('foo bar')[0].tokens
assert len(p) == 1
assert p[0].get_real_name() == 'foo'
assert p[0].get_alias() == 'bar'
p = sqlparse.parse('foo.bar baz')[0].tokens[0]
assert p.get_parent_name() == 'foo'
assert p.get_real_name() == 'bar'
assert p.get_alias() == 'baz'
def test_qualified_function():
p = sqlparse.parse('foo()')[0].tokens[0]
assert p.get_parent_name() is None
assert p.get_real_name() == 'foo'
p = sqlparse.parse('foo.bar()')[0].tokens[0]
assert p.get_parent_name() == 'foo'
assert p.get_real_name() == 'bar'
def test_aliased_function_without_as():
p = sqlparse.parse('foo() bar')[0].tokens[0]
assert p.get_parent_name() is None
assert p.get_real_name() == 'foo'
assert p.get_alias() == 'bar'
p = sqlparse.parse('foo.bar() baz')[0].tokens[0]
assert p.get_parent_name() == 'foo'
assert p.get_real_name() == 'bar'
assert p.get_alias() == 'baz'
def test_aliased_literal_without_as():
p = sqlparse.parse('1 foo')[0].tokens
assert len(p) == 1
assert p[0].get_alias() == 'foo'

View File

@@ -1,305 +0,0 @@
# -*- coding: utf-8 -*-
"""Tests sqlparse function."""
import pytest
from tests.utils import TestCaseBase
import sqlparse
import sqlparse.sql
from sqlparse import tokens as T
class SQLParseTest(TestCaseBase):
"""Tests sqlparse.parse()."""
def test_tokenize(self):
sql = 'select * from foo;'
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 1)
self.assertEqual(str(stmts[0]), sql)
def test_multistatement(self):
sql1 = 'select * from foo;'
sql2 = 'select * from bar;'
stmts = sqlparse.parse(sql1 + sql2)
self.assertEqual(len(stmts), 2)
self.assertEqual(str(stmts[0]), sql1)
self.assertEqual(str(stmts[1]), sql2)
def test_newlines(self):
sql = u'select\n*from foo;'
p = sqlparse.parse(sql)[0]
self.assertEqual(unicode(p), sql)
sql = u'select\r\n*from foo'
p = sqlparse.parse(sql)[0]
self.assertEqual(unicode(p), sql)
sql = u'select\r*from foo'
p = sqlparse.parse(sql)[0]
self.assertEqual(unicode(p), sql)
sql = u'select\r\n*from foo\n'
p = sqlparse.parse(sql)[0]
self.assertEqual(unicode(p), sql)
def test_within(self):
sql = 'foo(col1, col2)'
p = sqlparse.parse(sql)[0]
col1 = p.tokens[0].tokens[1].tokens[1].tokens[0]
self.assert_(col1.within(sqlparse.sql.Function))
def test_child_of(self):
sql = '(col1, col2)'
p = sqlparse.parse(sql)[0]
self.assert_(p.tokens[0].tokens[1].is_child_of(p.tokens[0]))
sql = 'select foo'
p = sqlparse.parse(sql)[0]
self.assert_(not p.tokens[2].is_child_of(p.tokens[0]))
self.assert_(p.tokens[2].is_child_of(p))
def test_has_ancestor(self):
sql = 'foo or (bar, baz)'
p = sqlparse.parse(sql)[0]
baz = p.tokens[-1].tokens[1].tokens[-1]
self.assert_(baz.has_ancestor(p.tokens[-1].tokens[1]))
self.assert_(baz.has_ancestor(p.tokens[-1]))
self.assert_(baz.has_ancestor(p))
def test_float(self):
t = sqlparse.parse('.5')[0].tokens
self.assertEqual(len(t), 1)
self.assert_(t[0].ttype is sqlparse.tokens.Number.Float)
t = sqlparse.parse('.51')[0].tokens
self.assertEqual(len(t), 1)
self.assert_(t[0].ttype is sqlparse.tokens.Number.Float)
t = sqlparse.parse('1.5')[0].tokens
self.assertEqual(len(t), 1)
self.assert_(t[0].ttype is sqlparse.tokens.Number.Float)
t = sqlparse.parse('12.5')[0].tokens
self.assertEqual(len(t), 1)
self.assert_(t[0].ttype is sqlparse.tokens.Number.Float)
def test_placeholder(self):
def _get_tokens(sql):
return sqlparse.parse(sql)[0].tokens[-1].tokens
t = _get_tokens('select * from foo where user = ?')
self.assert_(t[-1].ttype is sqlparse.tokens.Name.Placeholder)
self.assertEqual(t[-1].value, '?')
t = _get_tokens('select * from foo where user = :1')
self.assert_(t[-1].ttype is sqlparse.tokens.Name.Placeholder)
self.assertEqual(t[-1].value, ':1')
t = _get_tokens('select * from foo where user = :name')
self.assert_(t[-1].ttype is sqlparse.tokens.Name.Placeholder)
self.assertEqual(t[-1].value, ':name')
t = _get_tokens('select * from foo where user = %s')
self.assert_(t[-1].ttype is sqlparse.tokens.Name.Placeholder)
self.assertEqual(t[-1].value, '%s')
t = _get_tokens('select * from foo where user = $a')
self.assert_(t[-1].ttype is sqlparse.tokens.Name.Placeholder)
self.assertEqual(t[-1].value, '$a')
def test_modulo_not_placeholder(self):
tokens = list(sqlparse.lexer.tokenize('x %3'))
self.assertEqual(tokens[2][0], sqlparse.tokens.Operator)
def test_access_symbol(self): # see issue27
t = sqlparse.parse('select a.[foo bar] as foo')[0].tokens
self.assert_(isinstance(t[-1], sqlparse.sql.Identifier))
self.assertEqual(t[-1].get_name(), 'foo')
self.assertEqual(t[-1].get_real_name(), '[foo bar]')
self.assertEqual(t[-1].get_parent_name(), 'a')
def test_square_brackets_notation_isnt_too_greedy(self): # see issue153
t = sqlparse.parse('[foo], [bar]')[0].tokens
self.assert_(isinstance(t[0], sqlparse.sql.IdentifierList))
self.assertEqual(len(t[0].tokens), 4)
self.assertEqual(t[0].tokens[0].get_real_name(), '[foo]')
self.assertEqual(t[0].tokens[-1].get_real_name(), '[bar]')
def test_keyword_like_identifier(self): # see issue47
t = sqlparse.parse('foo.key')[0].tokens
self.assertEqual(len(t), 1)
self.assert_(isinstance(t[0], sqlparse.sql.Identifier))
def test_function_parameter(self): # see issue94
t = sqlparse.parse('abs(some_col)')[0].tokens[0].get_parameters()
self.assertEqual(len(t), 1)
self.assert_(isinstance(t[0], sqlparse.sql.Identifier))
def test_function_param_single_literal(self):
t = sqlparse.parse('foo(5)')[0].tokens[0].get_parameters()
self.assertEqual(len(t), 1)
self.assert_(t[0].ttype is T.Number.Integer)
def test_nested_function(self):
t = sqlparse.parse('foo(bar(5))')[0].tokens[0].get_parameters()
self.assertEqual(len(t), 1)
self.assert_(type(t[0]) is sqlparse.sql.Function)
def test_quoted_identifier():
t = sqlparse.parse('select x.y as "z" from foo')[0].tokens
assert isinstance(t[2], sqlparse.sql.Identifier)
assert t[2].get_name() == 'z'
assert t[2].get_real_name() == 'y'
@pytest.mark.parametrize('name', [
'foo',
'_foo',
])
def test_valid_identifier_names(name): # issue175
t = sqlparse.parse(name)[0].tokens
assert isinstance(t[0], sqlparse.sql.Identifier)
def test_psql_quotation_marks(): # issue83
# regression: make sure plain $$ work
t = sqlparse.split("""
CREATE OR REPLACE FUNCTION testfunc1(integer) RETURNS integer AS $$
....
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION testfunc2(integer) RETURNS integer AS $$
....
$$ LANGUAGE plpgsql;""")
assert len(t) == 2
# make sure $SOMETHING$ works too
t = sqlparse.split("""
CREATE OR REPLACE FUNCTION testfunc1(integer) RETURNS integer AS $PROC_1$
....
$PROC_1$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION testfunc2(integer) RETURNS integer AS $PROC_2$
....
$PROC_2$ LANGUAGE plpgsql;""")
assert len(t) == 2
def test_double_precision_is_builtin():
sql = 'DOUBLE PRECISION'
t = sqlparse.parse(sql)[0].tokens
assert (len(t) == 1
and t[0].ttype == sqlparse.tokens.Name.Builtin
and t[0].value == 'DOUBLE PRECISION')
@pytest.mark.parametrize('ph', ['?', ':1', ':foo', '%s', '%(foo)s'])
def test_placeholder(ph):
p = sqlparse.parse(ph)[0].tokens
assert len(p) == 1
assert p[0].ttype is T.Name.Placeholder
@pytest.mark.parametrize('num', ['6.67428E-8', '1.988e33', '1e-12'])
def test_scientific_numbers(num):
p = sqlparse.parse(num)[0].tokens
assert len(p) == 1
assert p[0].ttype is T.Number.Float
def test_single_quotes_are_strings():
p = sqlparse.parse("'foo'")[0].tokens
assert len(p) == 1
assert p[0].ttype is T.String.Single
def test_double_quotes_are_identifiers():
p = sqlparse.parse('"foo"')[0].tokens
assert len(p) == 1
assert isinstance(p[0], sqlparse.sql.Identifier)
def test_single_quotes_with_linebreaks(): # issue118
p = sqlparse.parse("'f\nf'")[0].tokens
assert len(p) == 1
assert p[0].ttype is T.String.Single
def test_sqlite_identifiers():
# Make sure we still parse sqlite style escapes
p = sqlparse.parse('[col1],[col2]')[0].tokens
assert (len(p) == 1
and isinstance(p[0], sqlparse.sql.IdentifierList)
and [id.get_name() for id in p[0].get_identifiers()]
== ['[col1]', '[col2]'])
p = sqlparse.parse('[col1]+[col2]')[0]
types = [tok.ttype for tok in p.flatten()]
assert types == [T.Name, T.Operator, T.Name]
def test_simple_1d_array_index():
p = sqlparse.parse('col[1]')[0].tokens
assert len(p) == 1
assert p[0].get_name() == 'col'
indices = list(p[0].get_array_indices())
assert (len(indices) == 1 # 1-dimensional index
and len(indices[0]) == 1 # index is single token
and indices[0][0].value == '1')
def test_2d_array_index():
p = sqlparse.parse('col[x][(y+1)*2]')[0].tokens
assert len(p) == 1
assert p[0].get_name() == 'col'
assert len(list(p[0].get_array_indices())) == 2 # 2-dimensional index
def test_array_index_function_result():
p = sqlparse.parse('somefunc()[1]')[0].tokens
assert len(p) == 1
assert len(list(p[0].get_array_indices())) == 1
def test_schema_qualified_array_index():
p = sqlparse.parse('schem.col[1]')[0].tokens
assert len(p) == 1
assert p[0].get_parent_name() == 'schem'
assert p[0].get_name() == 'col'
assert list(p[0].get_array_indices())[0][0].value == '1'
def test_aliased_array_index():
p = sqlparse.parse('col[1] x')[0].tokens
assert len(p) == 1
assert p[0].get_alias() == 'x'
assert p[0].get_real_name() == 'col'
assert list(p[0].get_array_indices())[0][0].value == '1'
def test_array_literal():
# See issue #176
p = sqlparse.parse('ARRAY[%s, %s]')[0]
assert len(p.tokens) == 2
assert len(list(p.flatten())) == 7
def test_typed_array_definition():
# array indices aren't grouped with builtins, but make sure we can extract
# indentifer names
p = sqlparse.parse('x int, y int[], z int')[0]
names = [x.get_name() for x in p.get_sublists()
if isinstance(x, sqlparse.sql.Identifier)]
assert names == ['x', 'y', 'z']
@pytest.mark.parametrize('sql', [
'select 1 -- foo',
'select 1 # foo' # see issue178
])
def test_single_line_comments(sql):
p = sqlparse.parse(sql)[0]
assert len(p.tokens) == 5
assert p.tokens[-1].ttype == T.Comment.Single
@pytest.mark.parametrize('sql', [
'foo',
'@foo',
'#foo', # see issue192
'##foo'
])
def test_names_and_special_names(sql):
p = sqlparse.parse(sql)[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sqlparse.sql.Identifier)

View File

@@ -1,70 +0,0 @@
import unittest
from sqlparse.filters import ColumnsSelect
from sqlparse.lexer import tokenize
from sqlparse.pipeline import Pipeline
class Test(unittest.TestCase):
def setUp(self):
self.pipe = Pipeline()
self.pipe.append(tokenize)
self.pipe.append(ColumnsSelect())
def test_1(self):
sql = """
-- type: script
-- return: integer
INCLUDE "Direntry.make.sql";
INSERT INTO directories(inode)
VALUES(:inode)
LIMIT 1"""
self.assertEqual([], self.pipe(sql))
def test_2(self):
sql = """
SELECT child_entry,asdf AS inode, creation
FROM links
WHERE parent_dir == :parent_dir AND name == :name
LIMIT 1"""
self.assertEqual([u'child_entry', u'inode', u'creation'],
self.pipe(sql))
def test_3(self):
sql = """
SELECT
0 AS st_dev,
0 AS st_uid,
0 AS st_gid,
dir_entries.type AS st_mode,
dir_entries.inode AS st_ino,
COUNT(links.child_entry) AS st_nlink,
:creation AS st_ctime,
dir_entries.access AS st_atime,
dir_entries.modification AS st_mtime,
-- :creation AS st_ctime,
-- CAST(STRFTIME('%s',dir_entries.access) AS INTEGER) AS st_atime,
-- CAST(STRFTIME('%s',dir_entries.modification) AS INTEGER) AS st_mtime,
COALESCE(files.size,0) AS st_size, -- Python-FUSE
COALESCE(files.size,0) AS size -- PyFilesystem
FROM dir_entries
LEFT JOIN files
ON dir_entries.inode == files.inode
LEFT JOIN links
ON dir_entries.inode == links.child_entry
WHERE dir_entries.inode == :inode
GROUP BY dir_entries.inode
LIMIT 1"""
self.assertEqual([u'st_dev', u'st_uid', u'st_gid', u'st_mode',
u'st_ino', u'st_nlink', u'st_ctime',
u'st_atime', u'st_mtime', u'st_size', u'size'],
self.pipe(sql))

View File

@@ -1,276 +0,0 @@
# -*- coding: utf-8 -*-
import sys
from tests.utils import TestCaseBase, load_file
import sqlparse
from sqlparse import sql
from sqlparse import tokens as T
class RegressionTests(TestCaseBase):
def test_issue9(self):
# make sure where doesn't consume parenthesis
p = sqlparse.parse('(where 1)')[0]
self.assert_(isinstance(p, sql.Statement))
self.assertEqual(len(p.tokens), 1)
self.assert_(isinstance(p.tokens[0], sql.Parenthesis))
prt = p.tokens[0]
self.assertEqual(len(prt.tokens), 3)
self.assertEqual(prt.tokens[0].ttype, T.Punctuation)
self.assertEqual(prt.tokens[-1].ttype, T.Punctuation)
def test_issue13(self):
parsed = sqlparse.parse(("select 'one';\n"
"select 'two\\'';\n"
"select 'three';"))
self.assertEqual(len(parsed), 3)
self.assertEqual(str(parsed[1]).strip(), "select 'two\\'';")
def test_issue26(self):
# parse stand-alone comments
p = sqlparse.parse('--hello')[0]
self.assertEqual(len(p.tokens), 1)
self.assert_(p.tokens[0].ttype is T.Comment.Single)
p = sqlparse.parse('-- hello')[0]
self.assertEqual(len(p.tokens), 1)
self.assert_(p.tokens[0].ttype is T.Comment.Single)
p = sqlparse.parse('--hello\n')[0]
self.assertEqual(len(p.tokens), 1)
self.assert_(p.tokens[0].ttype is T.Comment.Single)
p = sqlparse.parse('--')[0]
self.assertEqual(len(p.tokens), 1)
self.assert_(p.tokens[0].ttype is T.Comment.Single)
p = sqlparse.parse('--\n')[0]
self.assertEqual(len(p.tokens), 1)
self.assert_(p.tokens[0].ttype is T.Comment.Single)
def test_issue34(self):
t = sqlparse.parse("create")[0].token_first()
self.assertEqual(t.match(T.Keyword.DDL, "create"), True)
self.assertEqual(t.match(T.Keyword.DDL, "CREATE"), True)
def test_issue35(self):
# missing space before LIMIT
sql = sqlparse.format("select * from foo where bar = 1 limit 1",
reindent=True)
self.ndiffAssertEqual(sql, "\n".join(["select *",
"from foo",
"where bar = 1 limit 1"]))
def test_issue38(self):
sql = sqlparse.format("SELECT foo; -- comment",
strip_comments=True)
self.ndiffAssertEqual(sql, "SELECT foo;")
sql = sqlparse.format("/* foo */", strip_comments=True)
self.ndiffAssertEqual(sql, "")
def test_issue39(self):
p = sqlparse.parse('select user.id from user')[0]
self.assertEqual(len(p.tokens), 7)
idt = p.tokens[2]
self.assertEqual(idt.__class__, sql.Identifier)
self.assertEqual(len(idt.tokens), 3)
self.assertEqual(idt.tokens[0].match(T.Name, 'user'), True)
self.assertEqual(idt.tokens[1].match(T.Punctuation, '.'), True)
self.assertEqual(idt.tokens[2].match(T.Name, 'id'), True)
def test_issue40(self):
# make sure identifier lists in subselects are grouped
p = sqlparse.parse(('SELECT id, name FROM '
'(SELECT id, name FROM bar) as foo'))[0]
self.assertEqual(len(p.tokens), 7)
self.assertEqual(p.tokens[2].__class__, sql.IdentifierList)
self.assertEqual(p.tokens[-1].__class__, sql.Identifier)
self.assertEqual(p.tokens[-1].get_name(), u'foo')
sp = p.tokens[-1].tokens[0]
self.assertEqual(sp.tokens[3].__class__, sql.IdentifierList)
# make sure that formatting works as expected
self.ndiffAssertEqual(
sqlparse.format(('SELECT id, name FROM '
'(SELECT id, name FROM bar)'),
reindent=True),
('SELECT id,\n'
' name\n'
'FROM\n'
' (SELECT id,\n'
' name\n'
' FROM bar)'))
self.ndiffAssertEqual(
sqlparse.format(('SELECT id, name FROM '
'(SELECT id, name FROM bar) as foo'),
reindent=True),
('SELECT id,\n'
' name\n'
'FROM\n'
' (SELECT id,\n'
' name\n'
' FROM bar) as foo'))
def test_issue78():
# the bug author provided this nice examples, let's use them!
def _get_identifier(sql):
p = sqlparse.parse(sql)[0]
return p.tokens[2]
results = (('get_name', 'z'),
('get_real_name', 'y'),
('get_parent_name', 'x'),
('get_alias', 'z'),
('get_typecast', 'text'))
variants = (
'select x.y::text as z from foo',
'select x.y::text as "z" from foo',
'select x."y"::text as z from foo',
'select x."y"::text as "z" from foo',
'select "x".y::text as z from foo',
'select "x".y::text as "z" from foo',
'select "x"."y"::text as z from foo',
'select "x"."y"::text as "z" from foo',
)
for variant in variants:
i = _get_identifier(variant)
assert isinstance(i, sql.Identifier)
for func_name, result in results:
func = getattr(i, func_name)
assert func() == result
def test_issue83():
sql = """
CREATE OR REPLACE FUNCTION func_a(text)
RETURNS boolean LANGUAGE plpgsql STRICT IMMUTABLE AS
$_$
BEGIN
...
END;
$_$;
CREATE OR REPLACE FUNCTION func_b(text)
RETURNS boolean LANGUAGE plpgsql STRICT IMMUTABLE AS
$_$
BEGIN
...
END;
$_$;
ALTER TABLE..... ;"""
t = sqlparse.split(sql)
assert len(t) == 3
def test_comment_encoding_when_reindent():
# There was an UnicodeEncodeError in the reindent filter that
# casted every comment followed by a keyword to str.
sql = u'select foo -- Comment containing Ümläuts\nfrom bar'
formatted = sqlparse.format(sql, reindent=True)
assert formatted == sql
def test_parse_sql_with_binary():
# See https://github.com/andialbrecht/sqlparse/pull/88
digest = '\x82|\xcb\x0e\xea\x8aplL4\xa1h\x91\xf8N{'
sql = 'select * from foo where bar = \'%s\'' % digest
formatted = sqlparse.format(sql, reindent=True)
tformatted = 'select *\nfrom foo\nwhere bar = \'%s\'' % digest
if sys.version_info < (3,):
tformatted = tformatted.decode('unicode-escape')
assert formatted == tformatted
def test_dont_alias_keywords():
# The _group_left_right function had a bug where the check for the
# left side wasn't handled correctly. In one case this resulted in
# a keyword turning into an identifier.
p = sqlparse.parse('FROM AS foo')[0]
assert len(p.tokens) == 5
assert p.tokens[0].ttype is T.Keyword
assert p.tokens[2].ttype is T.Keyword
def test_format_accepts_encoding(): # issue20
sql = load_file('test_cp1251.sql', 'cp1251')
formatted = sqlparse.format(sql, reindent=True, encoding='cp1251')
if sys.version_info < (3,):
tformatted = u'insert into foo\nvalues (1); -- Песня про надежду\n'
else:
tformatted = 'insert into foo\nvalues (1); -- Песня про надежду\n'
assert formatted == tformatted
def test_issue90():
sql = ('UPDATE "gallery_photo" SET "owner_id" = 4018, "deleted_at" = NULL,'
' "width" = NULL, "height" = NULL, "rating_votes" = 0,'
' "rating_score" = 0, "thumbnail_width" = NULL,'
' "thumbnail_height" = NULL, "price" = 1, "description" = NULL')
formatted = sqlparse.format(sql, reindent=True)
tformatted = '\n'.join(['UPDATE "gallery_photo"',
'SET "owner_id" = 4018,',
' "deleted_at" = NULL,',
' "width" = NULL,',
' "height" = NULL,',
' "rating_votes" = 0,',
' "rating_score" = 0,',
' "thumbnail_width" = NULL,',
' "thumbnail_height" = NULL,',
' "price" = 1,',
' "description" = NULL'])
assert formatted == tformatted
def test_except_formatting():
sql = 'SELECT 1 FROM foo WHERE 2 = 3 EXCEPT SELECT 2 FROM bar WHERE 1 = 2'
formatted = sqlparse.format(sql, reindent=True)
tformatted = '\n'.join([
'SELECT 1',
'FROM foo',
'WHERE 2 = 3',
'EXCEPT',
'SELECT 2',
'FROM bar',
'WHERE 1 = 2'
])
assert formatted == tformatted
def test_null_with_as():
sql = 'SELECT NULL AS c1, NULL AS c2 FROM t1'
formatted = sqlparse.format(sql, reindent=True)
tformatted = '\n'.join([
'SELECT NULL AS c1,',
' NULL AS c2',
'FROM t1'
])
assert formatted == tformatted
def test_issue193_splitting_function():
sql = """CREATE FUNCTION a(x VARCHAR(20)) RETURNS VARCHAR(20)
BEGIN
DECLARE y VARCHAR(20);
RETURN x;
END;
SELECT * FROM a.b;"""
splitted = sqlparse.split(sql)
assert len(splitted) == 2
def test_issue194_splitting_function():
sql = """CREATE FUNCTION a(x VARCHAR(20)) RETURNS VARCHAR(20)
BEGIN
DECLARE y VARCHAR(20);
IF (1 = 1) THEN
SET x = y;
END IF;
RETURN x;
END;
SELECT * FROM a.b;"""
splitted = sqlparse.split(sql)
assert len(splitted) == 2
def test_issue186_get_type():
sql = "-- comment\ninsert into foo"
p = sqlparse.parse(sql)[0]
assert p.get_type() == 'INSERT'

View File

@@ -1,153 +0,0 @@
# -*- coding: utf-8 -*-
# Tests splitting functions.
import unittest
from tests.utils import load_file, TestCaseBase
import sqlparse
class SQLSplitTest(TestCaseBase):
"""Tests sqlparse.sqlsplit()."""
_sql1 = 'select * from foo;'
_sql2 = 'select * from bar;'
def test_split_semicolon(self):
sql2 = 'select * from foo where bar = \'foo;bar\';'
stmts = sqlparse.parse(''.join([self._sql1, sql2]))
self.assertEqual(len(stmts), 2)
self.ndiffAssertEqual(unicode(stmts[0]), self._sql1)
self.ndiffAssertEqual(unicode(stmts[1]), sql2)
def test_split_backslash(self):
stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';")
self.assertEqual(len(stmts), 3)
def test_create_function(self):
sql = load_file('function.sql')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 1)
self.ndiffAssertEqual(unicode(stmts[0]), sql)
def test_create_function_psql(self):
sql = load_file('function_psql.sql')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 1)
self.ndiffAssertEqual(unicode(stmts[0]), sql)
def test_create_function_psql3(self):
sql = load_file('function_psql3.sql')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 1)
self.ndiffAssertEqual(unicode(stmts[0]), sql)
def test_create_function_psql2(self):
sql = load_file('function_psql2.sql')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 1)
self.ndiffAssertEqual(unicode(stmts[0]), sql)
def test_dashcomments(self):
sql = load_file('dashcomment.sql')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 3)
self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
def test_dashcomments_eol(self):
stmts = sqlparse.parse('select foo; -- comment\n')
self.assertEqual(len(stmts), 1)
stmts = sqlparse.parse('select foo; -- comment\r')
self.assertEqual(len(stmts), 1)
stmts = sqlparse.parse('select foo; -- comment\r\n')
self.assertEqual(len(stmts), 1)
stmts = sqlparse.parse('select foo; -- comment')
self.assertEqual(len(stmts), 1)
def test_begintag(self):
sql = load_file('begintag.sql')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 3)
self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
def test_begintag_2(self):
sql = load_file('begintag_2.sql')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 1)
self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
def test_dropif(self):
sql = 'DROP TABLE IF EXISTS FOO;\n\nSELECT * FROM BAR;'
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 2)
self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
def test_comment_with_umlaut(self):
sql = (u'select * from foo;\n'
u'-- Testing an umlaut: ä\n'
u'select * from bar;')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 2)
self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
def test_comment_end_of_line(self):
sql = ('select * from foo; -- foo\n'
'select * from bar;')
stmts = sqlparse.parse(sql)
self.assertEqual(len(stmts), 2)
self.ndiffAssertEqual(''.join(unicode(q) for q in stmts), sql)
# make sure the comment belongs to first query
self.ndiffAssertEqual(unicode(stmts[0]), 'select * from foo; -- foo\n')
def test_casewhen(self):
sql = ('SELECT case when val = 1 then 2 else null end as foo;\n'
'comment on table actor is \'The actor table.\';')
stmts = sqlparse.split(sql)
self.assertEqual(len(stmts), 2)
def test_cursor_declare(self):
sql = ('DECLARE CURSOR "foo" AS SELECT 1;\n'
'SELECT 2;')
stmts = sqlparse.split(sql)
self.assertEqual(len(stmts), 2)
def test_if_function(self): # see issue 33
# don't let IF as a function confuse the splitter
sql = ('CREATE TEMPORARY TABLE tmp '
'SELECT IF(a=1, a, b) AS o FROM one; '
'SELECT t FROM two')
stmts = sqlparse.split(sql)
self.assertEqual(len(stmts), 2)
def test_split_stream(self):
import types
from cStringIO import StringIO
stream = StringIO("SELECT 1; SELECT 2;")
stmts = sqlparse.parsestream(stream)
self.assertEqual(type(stmts), types.GeneratorType)
self.assertEqual(len(list(stmts)), 2)
def test_encoding_parsestream(self):
from cStringIO import StringIO
stream = StringIO("SELECT 1; SELECT 2;")
stmts = list(sqlparse.parsestream(stream))
self.assertEqual(type(stmts[0].tokens[0].value), unicode)
def test_split_quotes_with_new_line(self):
stmts = sqlparse.split('select "foo\nbar"')
assert len(stmts) == 1
assert stmts[0] == 'select "foo\nbar"'
stmts = sqlparse.split("select 'foo\n\bar'")
assert len(stmts) == 1
assert stmts[0] == "select 'foo\n\bar'"
def test_split_simple():
stmts = sqlparse.split('select * from foo; select * from bar;')
assert len(stmts) == 2
assert stmts[0] == 'select * from foo;'
assert stmts[1] == 'select * from bar;'

View File

@@ -1,190 +0,0 @@
# -*- coding: utf-8 -*-
import sys
import types
import unittest
import pytest
import sqlparse
from sqlparse import lexer
from sqlparse import sql
from sqlparse.tokens import *
class TestTokenize(unittest.TestCase):
def test_simple(self):
s = 'select * from foo;'
stream = lexer.tokenize(s)
self.assert_(isinstance(stream, types.GeneratorType))
tokens = list(stream)
self.assertEqual(len(tokens), 8)
self.assertEqual(len(tokens[0]), 2)
self.assertEqual(tokens[0], (Keyword.DML, u'select'))
self.assertEqual(tokens[-1], (Punctuation, u';'))
def test_backticks(self):
s = '`foo`.`bar`'
tokens = list(lexer.tokenize(s))
self.assertEqual(len(tokens), 3)
self.assertEqual(tokens[0], (Name, u'`foo`'))
def test_linebreaks(self): # issue1
s = 'foo\nbar\n'
tokens = lexer.tokenize(s)
self.assertEqual(''.join(str(x[1]) for x in tokens), s)
s = 'foo\rbar\r'
tokens = lexer.tokenize(s)
self.assertEqual(''.join(str(x[1]) for x in tokens), s)
s = 'foo\r\nbar\r\n'
tokens = lexer.tokenize(s)
self.assertEqual(''.join(str(x[1]) for x in tokens), s)
s = 'foo\r\nbar\n'
tokens = lexer.tokenize(s)
self.assertEqual(''.join(str(x[1]) for x in tokens), s)
def test_inline_keywords(self): # issue 7
s = "create created_foo"
tokens = list(lexer.tokenize(s))
self.assertEqual(len(tokens), 3)
self.assertEqual(tokens[0][0], Keyword.DDL)
self.assertEqual(tokens[2][0], Name)
self.assertEqual(tokens[2][1], u'created_foo')
s = "enddate"
tokens = list(lexer.tokenize(s))
self.assertEqual(len(tokens), 1)
self.assertEqual(tokens[0][0], Name)
s = "join_col"
tokens = list(lexer.tokenize(s))
self.assertEqual(len(tokens), 1)
self.assertEqual(tokens[0][0], Name)
s = "left join_col"
tokens = list(lexer.tokenize(s))
self.assertEqual(len(tokens), 3)
self.assertEqual(tokens[2][0], Name)
self.assertEqual(tokens[2][1], 'join_col')
def test_negative_numbers(self):
s = "values(-1)"
tokens = list(lexer.tokenize(s))
self.assertEqual(len(tokens), 4)
self.assertEqual(tokens[2][0], Number.Integer)
self.assertEqual(tokens[2][1], '-1')
# Somehow this test fails on Python 3.2
@pytest.mark.skipif('sys.version_info >= (3,0)')
def test_tab_expansion(self):
s = "\t"
lex = lexer.Lexer()
lex.tabsize = 5
tokens = list(lex.get_tokens(s))
self.assertEqual(tokens[0][1], " " * 5)
class TestToken(unittest.TestCase):
def test_str(self):
token = sql.Token(None, 'FoO')
self.assertEqual(str(token), 'FoO')
def test_repr(self):
token = sql.Token(Keyword, 'foo')
tst = "<Keyword 'foo' at 0x"
self.assertEqual(repr(token)[:len(tst)], tst)
token = sql.Token(Keyword, '1234567890')
tst = "<Keyword '123456...' at 0x"
self.assertEqual(repr(token)[:len(tst)], tst)
def test_flatten(self):
token = sql.Token(Keyword, 'foo')
gen = token.flatten()
self.assertEqual(type(gen), types.GeneratorType)
lgen = list(gen)
self.assertEqual(lgen, [token])
class TestTokenList(unittest.TestCase):
def test_repr(self):
p = sqlparse.parse('foo, bar, baz')[0]
tst = "<IdentifierList 'foo, b...' at 0x"
self.assertEqual(repr(p.tokens[0])[:len(tst)], tst)
def test_token_first(self):
p = sqlparse.parse(' select foo')[0]
first = p.token_first()
self.assertEqual(first.value, 'select')
self.assertEqual(p.token_first(ignore_whitespace=False).value, ' ')
self.assertEqual(sql.TokenList([]).token_first(), None)
def test_token_matching(self):
t1 = sql.Token(Keyword, 'foo')
t2 = sql.Token(Punctuation, ',')
x = sql.TokenList([t1, t2])
self.assertEqual(x.token_matching(0, [lambda t: t.ttype is Keyword]),
t1)
self.assertEqual(x.token_matching(0,
[lambda t: t.ttype is Punctuation]),
t2)
self.assertEqual(x.token_matching(1, [lambda t: t.ttype is Keyword]),
None)
class TestStream(unittest.TestCase):
def test_simple(self):
from cStringIO import StringIO
stream = StringIO("SELECT 1; SELECT 2;")
lex = lexer.Lexer()
tokens = lex.get_tokens(stream)
self.assertEqual(len(list(tokens)), 9)
stream.seek(0)
lex.bufsize = 4
tokens = list(lex.get_tokens(stream))
self.assertEqual(len(tokens), 9)
stream.seek(0)
lex.bufsize = len(stream.getvalue())
tokens = list(lex.get_tokens(stream))
self.assertEqual(len(tokens), 9)
def test_error(self):
from cStringIO import StringIO
stream = StringIO("FOOBAR{")
lex = lexer.Lexer()
lex.bufsize = 4
tokens = list(lex.get_tokens(stream))
self.assertEqual(len(tokens), 2)
self.assertEqual(tokens[1][0], Error)
@pytest.mark.parametrize('expr', ['JOIN', 'LEFT JOIN', 'LEFT OUTER JOIN',
'FULL OUTER JOIN', 'NATURAL JOIN',
'CROSS JOIN', 'STRAIGHT JOIN',
'INNER JOIN', 'LEFT INNER JOIN'])
def test_parse_join(expr):
p = sqlparse.parse('%s foo' % expr)[0]
assert len(p.tokens) == 3
assert p.tokens[0].ttype is Keyword
def test_parse_endifloop():
p = sqlparse.parse('END IF')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is Keyword
p = sqlparse.parse('END IF')[0]
assert len(p.tokens) == 1
p = sqlparse.parse('END\t\nIF')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is Keyword
p = sqlparse.parse('END LOOP')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is Keyword
p = sqlparse.parse('END LOOP')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is Keyword

View File

@@ -1,46 +0,0 @@
# -*- coding: utf-8 -*-
"""Helpers for testing."""
import codecs
import difflib
import os
import unittest
from StringIO import StringIO
import sqlparse.utils
NL = '\n'
DIR_PATH = os.path.abspath(os.path.dirname(__file__))
PARENT_DIR = os.path.dirname(DIR_PATH)
FILES_DIR = os.path.join(DIR_PATH, 'files')
def load_file(filename, encoding='utf-8'):
"""Opens filename with encoding and return it's contents."""
f = codecs.open(os.path.join(FILES_DIR, filename), 'r', encoding)
data = f.read()
f.close()
return data
class TestCaseBase(unittest.TestCase):
"""Base class for test cases with some additional checks."""
# Adopted from Python's tests.
def ndiffAssertEqual(self, first, second):
"""Like failUnlessEqual except use ndiff for readable output."""
if first != second:
sfirst = unicode(first)
ssecond = unicode(second)
# Using the built-in .splitlines() method here will cause incorrect
# results when splitting statements that have quoted CR/CR+LF
# characters.
sfirst = sqlparse.utils.split_unquoted_newlines(sfirst)
ssecond = sqlparse.utils.split_unquoted_newlines(ssecond)
diff = difflib.ndiff(sfirst, ssecond)
fp = StringIO()
fp.write(NL)
fp.write(NL.join(diff))
print fp.getvalue()
raise self.failureException, fp.getvalue()

View File

@@ -1,37 +0,0 @@
[tox]
envlist=py26,py27,py32,py33,py34,pypy
[testenv]
deps=
pytest
pytest-cov
commands=
sqlformat --version # Sanity check.
py.test --cov=sqlparse/ tests
[testenv:py32]
changedir={envdir}
commands=
sqlformat --version # Sanity check.
rm -rf tests/
cp -r {toxinidir}/tests/ tests/
2to3 -w --no-diffs -n tests/
py.test --cov={envdir}/lib/python3.2/site-packages/sqlparse/ tests
[testenv:py33]
changedir={envdir}
commands=
sqlformat --version # Sanity check.
rm -rf tests/
cp -r {toxinidir}/tests/ tests/
2to3 -w --no-diffs -n tests/
py.test --cov={envdir}/lib/python3.3/site-packages/sqlparse/ tests
[testenv:py34]
changedir={envdir}
commands=
sqlformat --version # Sanity check.
rm -rf tests/
cp -r {toxinidir}/tests/ tests/
2to3 -w --no-diffs -n tests/
py.test --cov={envdir}/lib/python3.4/site-packages/sqlparse/ tests

View File

@@ -0,0 +1,67 @@
python-sqlparse is written and maintained by Andi Albrecht <albrecht.andi@gmail.com>.
This module contains code (namely the lexer and filter mechanism) from
the pygments project that was written by Georg Brandl.
This module contains code (Python 2/3 compatibility) from the six
project: https://bitbucket.org/gutworth/six.
Alphabetical list of contributors:
* Adam Greenhall <agreenhall@lyft.com>
* Alexander Beedie <ayembee@gmail.com>
* Alexey Malyshev <nostrict@gmail.com>
* andrew deryabin <github@djsf.com>
* Andrew Tipton <andrew.tipton@compareglobalgroup.com>
* atronah <atronah.ds@gmail.com>
* casey <casey@cloudera.com>
* Cauê Beloni <cbeloni@gmail.com>
* circld <circld1@gmail.com>
* Corey Zumar <corey.zumar@databricks.com>
* Cristian Orellana <cristiano@groupon.com>
* Dag Wieers <dag@wieers.com>
* Darik Gamble <darik.gamble@gmail.com>
* Demetrio92 <Demetrio.Rodriguez.T@gmail.com>
* Dennis Taylor <dennis.taylor@clio.com>
* Dvořák Václav <Vaclav.Dvorak@ysoft.com>
* Florian Bauer <florian.bauer@zmdi.com>
* Fredy Wijaya <fredy.wijaya@gmail.com>
* Gavin Wahl <gwahl@fusionbox.com>
* Ian Robertson <ian.robertson@capitalone.com>
* JacekPliszka <Jacek.Pliszka@gmail.com>
* Jesús Leganés Combarro "Piranna" <piranna@gmail.com>
* Johannes Hoff <johshoff@gmail.com>
* John Bodley <john.bodley@airbnb.com>
* Jon Dufresne <jon.dufresne@gmail.com>
* Josh Soref <jsoref@users.noreply.github.com>
* Kevin Jing Qiu <kevin.jing.qiu@gmail.com>
* koljonen <koljonen@outlook.com>
* Likai Liu <liulk@likai.org>
* mathilde.oustlant <mathilde.oustlant@ext.cdiscount.com>
* Michael Schuller <chick@mschuller.net>
* Mike Amy <cocoade@googlemail.com>
* mulos <daniel.strackbein@gmail.com>
* Oleg Broytman <phd@phdru.name>
* Patrick Schemitz <patrick.schemitz@digitalbriefkasten.de>
* Pi Delport <pjdelport@gmail.com>
* Prudhvi Vatala <pvatala@gmail.com>
* quest <quest@wonky.windwards.net>
* Robert Nix <com.github@rnix.org>
* Rocky Meza <rmeza@fusionbox.com>
* Romain Rigaux <romain.rigaux@gmail.com>
* Rowan Seymour <rowanseymour@gmail.com>
* Ryan Wooden <rygwdn@gmail.com>
* saaj <id@saaj.me>
* Shen Longxing <shenlongxing2012@gmail.com>
* Sjoerd Job Postmus
* Soloman Weng <soloman1124@gmail.com>
* spigwitmer <itgpmc@gmail.com>
* Tao Wang <twang2218@gmail.com>
* Tenghuan <tenghuanhe@gmail.com>
* Tim Graham <timograham@gmail.com>
* Victor Hahn <info@victor-hahn.de>
* Victor Uriarte <vmuriart@gmail.com>
* Ville Skyttä <ville.skytta@iki.fi>
* vthriller <farreva232@yandex.ru>
* wayne.wuw <wayne.wuw@alibaba-inc.com>
* William Ivanski <william.ivanski@gmail.com>
* Yago Riveiro <yago.riveiro@gmail.com>

View File

@@ -0,0 +1,522 @@
Release 0.3.1 (Feb 29, 2020)
----------------------------
Enhancements
* Add HQL keywords (pr475, by matwalk).
* Add support for time zone casts (issue489).
* Enhance formatting of AS keyword (issue507, by john-bodley).
* Stabilize grouping engine when parsing invalid SQL statements.
Bug Fixes
* Fix splitting of SQL with multiple statements inside
parentheses (issue485, pr486 by win39).
* Correctly identify NULLS FIRST / NULLS LAST as keywords (issue487).
* Fix splitting of SQL statements that contain dollar signs in
identifiers (issue491).
* Remove support for parsing double slash comments introduced in
0.3.0 (issue456) as it had some side-effects with other dialects and
doesn't seem to be widely used (issue476).
* Restrict detection of alias names to objects that acutally could
have an alias (issue455, adopted some parts of pr509 by john-bodley).
* Fix parsing of date/time literals (issue438, by vashek).
* Fix initialization of TokenList (issue499, pr505 by john-bodley).
* Fix parsing of LIKE (issue493, pr525 by dbczumar).
* Improve parsing of identifiers (pr527 by liulk).
Release 0.3.0 (Mar 11, 2019)
----------------------------
Notable Changes
* Remove support for Python 3.3.
Enhancements
* New formatting option "--indent_after_first" (pr345, by johshoff).
* New formatting option "--indent_columns" (pr393, by digitalarbeiter).
* Add UPSERT keyword (issue408).
* Strip multiple whitespace within parentheses (issue473, by john-bodley).
* Support double slash (//) comments (issue456, by theianrobertson).
* Support for Calcite temporal keywords (pr468, by john-bodley).
Bug Fixes
* Fix occasional IndexError (pr390, by circld, issue313).
* Fix incorrect splitting of strings containing new lines (pr396, by fredyw).
* Fix reindent issue for parenthesis (issue427, by fredyw).
* Fix from( parsing issue (issue446, by fredyw) .
* Fix for get_real_name() to return correct name (issue369, by fredyw).
* Wrap function params when wrap_after is set (pr398, by soloman1124).
* Fix parsing of "WHEN name" clauses (pr418, by andrew deryabin).
* Add missing EXPLAIN keyword (issue421).
* Fix issue with strip_comments causing a syntax error (issue425, by fredyw).
* Fix formatting on INSERT which caused staircase effect on values (issue329,
by fredyw).
* Avoid formatting of psql commands (issue469).
Internal Changes
* Unify handling of GROUP BY/ORDER BY (pr457, by john-bodley).
* Remove unnecessary compat shim for bytes (pr453, by jdufresne).
Release 0.2.4 (Sep 27, 2017)
----------------------------
Enhancements
* Add more keywords for MySQL table options (pr328, pr333, by phdru).
* Add more PL/pgSQL keywords (pr357, by Demetrio92).
* Improve parsing of floats (pr330, by atronah).
Bug Fixes
* Fix parsing of MySQL table names starting with digits (issue337).
* Fix detection of identifiers using comparisons (issue327).
* Fix parsing of UNION ALL after WHERE (issue349).
* Fix handling of semicolon in assignments (issue359, issue358).
Release 0.2.3 (Mar 02, 2017)
----------------------------
Enhancements
* New command line option "--encoding" (by twang2218, pr317).
* Support CONCURRENTLY keyword (issue322, by rowanseymour).
Bug Fixes
* Fix some edge-cases when parsing invalid SQL statements.
* Fix indentation of LIMIT (by romainr, issue320).
* Fix parsing of INTO keyword (issue324).
Internal Changes
* Several improvements regarding encodings.
Release 0.2.2 (Oct 22, 2016)
----------------------------
Enhancements
* Add comma_first option: When splitting list "comma first" notation
is used (issue141).
Bug Fixes
* Fix parsing of incomplete AS (issue284, by vmuriart).
* Fix parsing of Oracle names containing dollars (issue291).
* Fix parsing of UNION ALL (issue294).
* Fix grouping of identifiers containing typecasts (issue297).
* Add Changelog to sdist again (issue302).
Internal Changes
* `is_whitespace` and `is_group` changed into properties
Release 0.2.1 (Aug 13, 2016)
----------------------------
Notable Changes
* PostgreSQL: Function bodys are parsed as literal string. Previously
sqlparse assumed that all function bodys are parsable psql
strings (see issue277).
Bug Fixes
* Fix a regression to parse streams again (issue273, reported and
test case by gmccreight).
* Improve Python 2/3 compatibility when using parsestream (issue190,
by phdru).
* Improve splitting of PostgreSQL functions (issue277).
Release 0.2.0 (Jul 20, 2016)
----------------------------
IMPORTANT: The supported Python versions have changed with this release.
sqlparse 0.2.x supports Python 2.7 and Python >= 3.3.
Thanks to the many contributors for writing bug reports and working
on pull requests who made this version possible!
Internal Changes
* sqlparse.SQLParseError was removed from top-level module and moved to
sqlparse.exceptions.
* sqlparse.sql.Token.to_unicode was removed.
* The signature of a filter's process method has changed from
process(stack, stream) -> to process(stream). Stack was never used at
all.
* Lots of code cleanups and modernization (thanks esp. to vmuriart!).
* Improved grouping performance. (sjoerdjob)
Enhancements
* Support WHILE loops (issue215, by shenlongxing).
* Better support for CTEs (issue217, by Andrew Tipton).
* Recognize USING as a keyword more consistently (issue236, by koljonen).
* Improve alignment of columns (issue207, issue235, by vmuriat).
* Add wrap_after option for better alignment when formatting
lists (issue248, by Dennis Taylor).
* Add reindent-aligned option for alternate formatting (Adam Greenhall)
* Improved grouping of operations (issue211, by vmuriat).
Bug Fixes
* Leading whitespaces are now removed when format() is called with
strip_whitespace=True (issue213, by shenlongxing).
* Fix typo in keywords list (issue229, by cbeloni).
* Fix parsing of functions in comparisons (issue230, by saaj).
* Fix grouping of identifiers (issue233).
* Fix parsing of CREATE TABLE statements (issue242, by Tenghuan).
* Minor bug fixes (issue101).
* Improve formatting of CASE WHEN constructs (issue164, by vmuriat).
Release 0.1.19 (Mar 07, 2016)
-----------------------------
Bug Fixes
* Fix IndexError when statement contains WITH clauses (issue205).
Release 0.1.18 (Oct 25, 2015)
-----------------------------
Bug Fixes
* Remove universal wheel support, added in 0.1.17 by mistake.
Release 0.1.17 (Oct 24, 2015)
-----------------------------
Enhancements
* Speed up parsing of large SQL statements (pull request: issue201, fixes the
following issues: issue199, issue135, issue62, issue41, by Ryan Wooden).
Bug Fixes
* Fix another splitter bug regarding DECLARE (issue194).
Misc
* Packages on PyPI are signed from now on.
Release 0.1.16 (Jul 26, 2015)
-----------------------------
Bug Fixes
* Fix a regression in get_alias() introduced in 0.1.15 (issue185).
* Fix a bug in the splitter regarding DECLARE (issue193).
* sqlformat command line tool doesn't duplicate newlines anymore (issue191).
* Don't mix up MySQL comments starting with hash and MSSQL
temp tables (issue192).
* Statement.get_type() now ignores comments at the beginning of
a statement (issue186).
Release 0.1.15 (Apr 15, 2015)
-----------------------------
Bug Fixes
* Fix a regression for identifiers with square bracktes
notation (issue153, by darikg).
* Add missing SQL types (issue154, issue155, issue156, by jukebox).
* Fix parsing of multi-line comments (issue172, by JacekPliszka).
* Fix parsing of escaped backslashes (issue174, by caseyching).
* Fix parsing of identifiers starting with underscore (issue175).
* Fix misinterpretation of IN keyword (issue183).
Enhancements
* Improve formatting of HAVING statements.
* Improve parsing of inline comments (issue163).
* Group comments to parent object (issue128, issue160).
* Add double precision builtin (issue169, by darikg).
* Add support for square bracket array indexing (issue170, issue176,
issue177 by darikg).
* Improve grouping of aliased elements (issue167, by darikg).
* Support comments starting with '#' character (issue178).
Release 0.1.14 (Nov 30, 2014)
-----------------------------
Bug Fixes
* Floats in UPDATE statements are now handled correctly (issue145).
* Properly handle string literals in comparisons (issue148, change proposed
by aadis).
* Fix indentation when using tabs (issue146).
Enhancements
* Improved formatting in list when newlines precede commas (issue140).
Release 0.1.13 (Oct 09, 2014)
-----------------------------
Bug Fixes
* Fix a regression in handling of NULL keywords introduced in 0.1.12.
Release 0.1.12 (Sep 20, 2014)
-----------------------------
Bug Fixes
* Fix handling of NULL keywords in aliased identifiers.
* Fix SerializerUnicode to split unquoted newlines (issue131, by Michael Schuller).
* Fix handling of modulo operators without spaces (by gavinwahl).
Enhancements
* Improve parsing of identifier lists containing placeholders.
* Speed up query parsing of unquoted lines (by Michael Schuller).
Release 0.1.11 (Feb 07, 2014)
-----------------------------
Bug Fixes
* Fix incorrect parsing of string literals containing line breaks (issue118).
* Fix typo in keywords, add MERGE, COLLECT keywords (issue122/124,
by Cristian Orellana).
* Improve parsing of string literals in columns.
* Fix parsing and formatting of statements containing EXCEPT keyword.
* Fix Function.get_parameters() (issue126/127, by spigwitmer).
Enhancements
* Classify DML keywords (issue116, by Victor Hahn).
* Add missing FOREACH keyword.
* Grouping of BEGIN/END blocks.
Other
* Python 2.5 isn't automatically tested anymore, neither Travis nor Tox
still support it out of the box.
Release 0.1.10 (Nov 02, 2013)
-----------------------------
Bug Fixes
* Removed buffered reading again, it obviously causes wrong parsing in some rare
cases (issue114).
* Fix regression in setup.py introduced 10 months ago (issue115).
Enhancements
* Improved support for JOINs, by Alexander Beedie.
Release 0.1.9 (Sep 28, 2013)
----------------------------
Bug Fixes
* Fix an regression introduced in 0.1.5 where sqlparse didn't properly
distinguished between single and double quoted strings when tagging
identifier (issue111).
Enhancements
* New option to truncate long string literals when formatting.
* Scientific numbers are pares correctly (issue107).
* Support for arithmetic expressions (issue109, issue106; by prudhvi).
Release 0.1.8 (Jun 29, 2013)
----------------------------
Bug Fixes
* Whitespaces within certain keywords are now allowed (issue97, patch proposed
by xcombelle).
Enhancements
* Improve parsing of assignments in UPDATE statements (issue90).
* Add STRAIGHT_JOIN statement (by Yago Riveiro).
* Function.get_parameters() now returns the parameter if only one parameter is
given (issue94, by wayne.wuw).
* sqlparse.split() now removes leading and trailing whitespaces from split
statements.
* Add USE as keyword token (by mulos).
* Improve parsing of PEP249-style placeholders (issue103).
Release 0.1.7 (Apr 06, 2013)
----------------------------
Bug Fixes
* Fix Python 3 compatibility of sqlformat script (by Pi Delport).
* Fix parsing of SQL statements that contain binary data (by Alexey
Malyshev).
* Fix a bug where keywords were identified as aliased identifiers in
invalid SQL statements.
* Fix parsing of identifier lists where identifiers are keywords too
(issue10).
Enhancements
* Top-level API functions now accept encoding keyword to parse
statements in certain encodings more reliable (issue20).
* Improve parsing speed when SQL contains CLOBs or BLOBs (issue86).
* Improve formatting of ORDER BY clauses (issue89).
* Formatter now tries to detect runaway indentations caused by
parsing errors or invalid SQL statements. When re-indenting such
statements the formatter flips back to column 0 before going crazy.
Other
* Documentation updates.
Release 0.1.6 (Jan 01, 2013)
----------------------------
sqlparse is now compatible with Python 3 without any patches. The
Python 3 version is generated during install by 2to3. You'll need
distribute to install sqlparse for Python 3.
Bug Fixes
* Fix parsing error with dollar-quoted procedure bodies (issue83).
Other
* Documentation updates.
* Test suite now uses tox and pytest.
* py3k fixes (by vthriller).
* py3k fixes in setup.py (by Florian Bauer).
* setup.py now requires distribute (by Florian Bauer).
Release 0.1.5 (Nov 13, 2012)
----------------------------
Bug Fixes
* Improve handling of quoted identifiers (issue78).
* Improve grouping and formatting of identifiers with operators (issue53).
* Improve grouping and formatting of concatenated strings (issue53).
* Improve handling of varchar() (by Mike Amy).
* Clean up handling of various SQL elements.
* Switch to pytest and clean up tests.
* Several minor fixes.
Other
* Deprecate sqlparse.SQLParseError. Please use
sqlparse.exceptions.SQLParseError instead.
* Add caching to speed up processing.
* Add experimental filters for token processing.
* Add sqlformat.parsestream (by quest).
Release 0.1.4 (Apr 20, 2012)
----------------------------
Bug Fixes
* Avoid "stair case" effects when identifiers, functions,
placeholders or keywords are mixed in identifier lists (issue45,
issue49, issue52) and when asterisks are used as operators
(issue58).
* Make keyword detection more restrict (issue47).
* Improve handling of CASE statements (issue46).
* Fix statement splitting when parsing recursive statements (issue57,
thanks to piranna).
* Fix for negative numbers (issue56, thanks to kevinjqiu).
* Pretty format comments in identifier lists (issue59).
* Several minor bug fixes and improvements.
Release 0.1.3 (Jul 29, 2011)
----------------------------
Bug Fixes
* Improve parsing of floats (thanks to Kris).
* When formatting a statement a space before LIMIT was removed (issue35).
* Fix strip_comments flag (issue38, reported by ooberm...@gmail.com).
* Avoid parsing names as keywords (issue39, reported by djo...@taket.org).
* Make sure identifier lists in subselects are grouped (issue40,
reported by djo...@taket.org).
* Split statements with IF as functions correctly (issue33 and
issue29, reported by charles....@unige.ch).
* Relax detection of keywords, esp. when used as function names
(issue36, nyuhu...@gmail.com).
* Don't treat single characters as keywords (issue32).
* Improve parsing of stand-alone comments (issue26).
* Detection of placeholders in paramterized queries (issue22,
reported by Glyph Lefkowitz).
* Add parsing of MS Access column names with braces (issue27,
reported by frankz...@gmail.com).
Other
* Replace Django by Flask in App Engine frontend (issue11).
Release 0.1.2 (Nov 23, 2010)
----------------------------
Bug Fixes
* Fixed incorrect detection of keyword fragments embed in names (issue7,
reported and initial patch by andyboyko).
* Stricter detection of identifier aliases (issue8, reported by estama).
* WHERE grouping consumed closing parenthesis (issue9, reported by estama).
* Fixed an issue with trailing whitespaces (reported by Kris).
* Better detection of escaped single quotes (issue13, reported by
Martin Brochhaus, patch by bluemaro with test case by Dan Carley).
* Ignore identifier in double-quotes when changing cases (issue 21).
* Lots of minor fixes targeting encoding, indentation, statement
parsing and more (issues 12, 14, 15, 16, 18, 19).
* Code cleanup with a pinch of refactoring.
Release 0.1.1 (May 6, 2009)
---------------------------
Bug Fixes
* Lexers preserves original line breaks (issue1).
* Improved identifier parsing: backtick quotes, wildcards, T-SQL variables
prefixed with @.
* Improved parsing of identifier lists (issue2).
* Recursive recognition of AS (issue4) and CASE.
* Improved support for UPDATE statements.
Other
* Code cleanup and better test coverage.
Release 0.1.0 (Apr 8, 2009)
---------------------------
Initial release.

View File

@@ -1,4 +1,4 @@
Copyright (c) 2009, Andi Albrecht <albrecht.andi@gmail.com>
Copyright (c) 2016, Andi Albrecht <albrecht.andi@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,

View File

@@ -2,10 +2,10 @@ recursive-include docs source/*
include docs/sqlformat.1
include docs/Makefile
recursive-include tests *.py *.sql
include COPYING
include LICENSE
include TODO
include AUTHORS
include CHANGES
include CHANGELOG
include Makefile
include pytest.ini
include setup.cfg
include tox.ini

View File

@@ -0,0 +1,77 @@
python-sqlparse - Parse SQL statements
======================================
|buildstatus|_
|coverage|_
.. docincludebegin
sqlparse is a non-validating SQL parser for Python.
It provides support for parsing, splitting and formatting SQL statements.
The module is compatible with Python 2.7 and Python 3 (>= 3.4)
and released under the terms of the `New BSD license
<https://opensource.org/licenses/BSD-3-Clause>`_.
.. note::
Support for Python<3.4 (including 2.x) will be dropped soon.
Visit the project page at https://github.com/andialbrecht/sqlparse for
further information about this project.
Quick Start
-----------
.. code-block:: sh
$ pip install sqlparse
.. code-block:: python
>>> import sqlparse
>>> # Split a string containing two SQL statements:
>>> raw = 'select * from foo; select * from bar;'
>>> statements = sqlparse.split(raw)
>>> statements
['select * from foo;', 'select * from bar;']
>>> # Format the first statement and print it out:
>>> first = statements[0]
>>> print(sqlparse.format(first, reindent=True, keyword_case='upper'))
SELECT *
FROM foo;
>>> # Parsing a SQL statement:
>>> parsed = sqlparse.parse('select * from foo')[0]
>>> parsed.tokens
[<DML 'select' at 0x7f22c5e15368>, <Whitespace ' ' at 0x7f22c5e153b0>, <Wildcard '*' ]
>>>
Links
-----
Project page
https://github.com/andialbrecht/sqlparse
Bug tracker
https://github.com/andialbrecht/sqlparse/issues
Documentation
https://sqlparse.readthedocs.io/
Online Demo
https://sqlformat.org/
sqlparse is licensed under the BSD license.
Parts of the code are based on pygments written by Georg Brandl and others.
pygments-Homepage: http://pygments.org/
.. |buildstatus| image:: https://secure.travis-ci.org/andialbrecht/sqlparse.png?branch=master
.. _buildstatus: https://travis-ci.org/#!/andialbrecht/sqlparse
.. |coverage| image:: https://coveralls.io/repos/andialbrecht/sqlparse/badge.svg?branch=master&service=github
.. _coverage: https://coveralls.io/github/andialbrecht/sqlparse?branch=master

View File

@@ -2,6 +2,4 @@
https://groups.google.com/d/msg/sqlparse/huz9lKXt0Lc/11ybIKPJWbUJ
for some interesting hints and suggestions.
* Provide a function to replace tokens. See this thread: https://groups.google.com/d/msg/sqlparse/5xmBL2UKqX4/ZX9z_peve-AJ
* Fix bugs on issue tracker.
* Document filter stack and processing phases.
* See KnownIssues http://code.google.com/p/python-sqlparse/wiki/KnownIssues

View File

@@ -0,0 +1,20 @@
[bdist_wheel]
universal = 1
[metadata]
license_file = LICENSE
[tool:pytest]
xfail_strict = True
[flake8]
exclude =
sqlparse/compat.py
ignore =
W503,
E731
[coverage:run]
branch = False
omit =
sqlparse/__main__.py

View File

@@ -1,35 +1,27 @@
# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This setup script is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
import re
import sys
try:
from setuptools import setup, find_packages
packages = find_packages(exclude=('tests',))
except ImportError:
if sys.version_info[0] == 3:
raise RuntimeError('distribute is required to install this package.')
from distutils.core import setup
packages = ['sqlparse', 'sqlparse.engine']
from setuptools import setup, find_packages
def get_version():
"""parse __init__.py for version number instead of importing the file
see http://stackoverflow.com/questions/458550/standard-way-to-embed-version-into-python-package
"""
VERSIONFILE='sqlparse/__init__.py'
verstrline = open(VERSIONFILE, "rt").read()
"""Parse __init__.py for version number instead of importing the file."""
VERSIONFILE = 'sqlparse/__init__.py'
VSRE = r'^__version__ = [\'"]([^\'"]*)[\'"]'
with open(VERSIONFILE) as f:
verstrline = f.read()
mo = re.search(VSRE, verstrline, re.M)
if mo:
return mo.group(1)
else:
raise RuntimeError('Unable to find version string in %s.'
% (VERSIONFILE,))
raise RuntimeError('Unable to find version in {fn}'.format(fn=VERSIONFILE))
LONG_DESCRIPTION = """
@@ -49,10 +41,10 @@ Splitting SQL statements::
[u'select * from foo; ', u'select * from bar;']
Formatting statemtents::
Formatting statements::
>>> sql = 'select * from foo where id in (select id from bar);'
>>> print sqlparse.format(sql, reindent=True, keyword_case='upper')
>>> print(sqlparse.format(sql, reindent=True, keyword_case='upper'))
SELECT *
FROM foo
WHERE id IN
@@ -67,8 +59,8 @@ Parsing::
>>> res
(<Statement 'select...' at 0x9ad08ec>,)
>>> stmt = res[0]
>>> unicode(stmt) # converting it back to unicode
u'select * from someschema.mytable where id = 1'
>>> str(stmt) # converting it back to unicode
'select * from someschema.mytable where id = 1'
>>> # This is how the internal representation looks like:
>>> stmt.tokens
(<DML 'select' at 0x9b63c34>,
@@ -83,41 +75,37 @@ Parsing::
"""
VERSION = get_version()
kwargs = {}
if sys.version_info[0] == 3:
kwargs['use_2to3'] = True
setup(
name='sqlparse',
version=VERSION,
packages=packages,
description='Non-validating SQL parser',
version=get_version(),
author='Andi Albrecht',
author_email='albrecht.andi@gmail.com',
url='https://github.com/andialbrecht/sqlparse',
description='Non-validating SQL parser',
long_description=LONG_DESCRIPTION,
license='BSD',
url='https://github.com/andialbrecht/sqlparse',
python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*",
classifiers=[
'Development Status :: 4 - Beta',
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.4',
'Programming Language :: Python :: 2.5',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Topic :: Database',
'Topic :: Software Development'
'Topic :: Software Development',
],
scripts=['bin/sqlformat'],
**kwargs
packages=find_packages(exclude=('tests',)),
entry_points={
'console_scripts': [
'sqlformat = sqlparse.__main__:main',
]
},
)

View File

@@ -1,27 +1,31 @@
# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""Parse SQL statements."""
__version__ = '0.1.19'
# Setup namespace
from sqlparse import sql
from sqlparse import cli
from sqlparse import engine
from sqlparse import tokens
from sqlparse import filters
from sqlparse import formatter
# Deprecated in 0.1.5. Will be removed in 0.2.0
from sqlparse.exceptions import SQLParseError
from sqlparse.compat import text_type
__version__ = '0.3.1'
__all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']
def parse(sql, encoding=None):
"""Parse sql and return a list of statements.
:param sql: A string containting one or more SQL statements.
:param sql: A string containing one or more SQL statements.
:param encoding: The encoding of the statement (optional).
:returns: A tuple of :class:`~sqlparse.sql.Statement` instances.
"""
@@ -36,11 +40,11 @@ def parsestream(stream, encoding=None):
:returns: A generator of :class:`~sqlparse.sql.Statement` instances.
"""
stack = engine.FilterStack()
stack.full_analyze()
stack.enable_grouping()
return stack.run(stream, encoding)
def format(sql, **options):
def format(sql, encoding=None, **options):
"""Format *sql* according to *options*.
Available options are documented in :ref:`formatting`.
@@ -50,29 +54,19 @@ def format(sql, **options):
:returns: The formatted SQL statement as string.
"""
encoding = options.pop('encoding', None)
stack = engine.FilterStack()
options = formatter.validate_options(options)
stack = formatter.build_filter_stack(stack, options)
stack.postprocess.append(filters.SerializerUnicode())
return ''.join(stack.run(sql, encoding))
return u''.join(stack.run(sql, encoding))
def split(sql, encoding=None):
"""Split *sql* into single statements.
:param sql: A string containting one or more SQL statements.
:param sql: A string containing one or more SQL statements.
:param encoding: The encoding of the statement (optional).
:returns: A list of strings.
"""
stack = engine.FilterStack()
stack.split_statements = True
return [unicode(stmt).strip() for stmt in stack.run(sql, encoding)]
from sqlparse.engine.filter import StatementFilter
def split2(stream):
splitter = StatementFilter()
return list(splitter.process(None, stream))
return [text_type(stmt).strip() for stmt in stack.run(sql, encoding)]

View File

@@ -0,0 +1,23 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""Entrypoint module for `python -m sqlparse`.
Why does this file exist, and why __main__? For more info, read:
- https://www.python.org/dev/peps/pep-0338/
- https://docs.python.org/2/using/cmdline.html#cmdoption-m
- https://docs.python.org/3/using/cmdline.html#cmdoption-m
"""
import sys
from sqlparse.cli import main
if __name__ == '__main__':
sys.exit(main())

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""Module that contains the command line app.
Why does this file exist, and why not put this in __main__?
You might be tempted to import things from __main__ later, but that will
cause problems: the code will get executed twice:
- When you run `python -m sqlparse` python will execute
``__main__.py`` as a script. That means there won't be any
``sqlparse.__main__`` in ``sys.modules``.
- When you import __main__ it will get executed again (as a module) because
there's no ``sqlparse.__main__`` in ``sys.modules``.
Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration
"""
import argparse
import sys
from io import TextIOWrapper
from codecs import open, getreader
import sqlparse
from sqlparse.compat import PY2
from sqlparse.exceptions import SQLParseError
# TODO: Add CLI Tests
# TODO: Simplify formatter by using argparse `type` arguments
def create_parser():
_CASE_CHOICES = ['upper', 'lower', 'capitalize']
parser = argparse.ArgumentParser(
prog='sqlformat',
description='Format FILE according to OPTIONS. Use "-" as FILE '
'to read from stdin.',
usage='%(prog)s [OPTIONS] FILE, ...',
)
parser.add_argument('filename')
parser.add_argument(
'-o', '--outfile',
dest='outfile',
metavar='FILE',
help='write output to FILE (defaults to stdout)')
parser.add_argument(
'--version',
action='version',
version=sqlparse.__version__)
group = parser.add_argument_group('Formatting Options')
group.add_argument(
'-k', '--keywords',
metavar='CHOICE',
dest='keyword_case',
choices=_CASE_CHOICES,
help='change case of keywords, CHOICE is one of {0}'.format(
', '.join('"{0}"'.format(x) for x in _CASE_CHOICES)))
group.add_argument(
'-i', '--identifiers',
metavar='CHOICE',
dest='identifier_case',
choices=_CASE_CHOICES,
help='change case of identifiers, CHOICE is one of {0}'.format(
', '.join('"{0}"'.format(x) for x in _CASE_CHOICES)))
group.add_argument(
'-l', '--language',
metavar='LANG',
dest='output_format',
choices=['python', 'php'],
help='output a snippet in programming language LANG, '
'choices are "python", "php"')
group.add_argument(
'--strip-comments',
dest='strip_comments',
action='store_true',
default=False,
help='remove comments')
group.add_argument(
'-r', '--reindent',
dest='reindent',
action='store_true',
default=False,
help='reindent statements')
group.add_argument(
'--indent_width',
dest='indent_width',
default=2,
type=int,
help='indentation width (defaults to 2 spaces)')
group.add_argument(
'--indent_after_first',
dest='indent_after_first',
action='store_true',
default=False,
help='indent after first line of statement (e.g. SELECT)')
group.add_argument(
'--indent_columns',
dest='indent_columns',
action='store_true',
default=False,
help='indent all columns by indent_width instead of keyword length')
group.add_argument(
'-a', '--reindent_aligned',
action='store_true',
default=False,
help='reindent statements to aligned format')
group.add_argument(
'-s', '--use_space_around_operators',
action='store_true',
default=False,
help='place spaces around mathematical operators')
group.add_argument(
'--wrap_after',
dest='wrap_after',
default=0,
type=int,
help='Column after which lists should be wrapped')
group.add_argument(
'--comma_first',
dest='comma_first',
default=False,
type=bool,
help='Insert linebreak before comma (default False)')
group.add_argument(
'--encoding',
dest='encoding',
default='utf-8',
help='Specify the input encoding (default utf-8)')
return parser
def _error(msg):
"""Print msg and optionally exit with return code exit_."""
sys.stderr.write(u'[ERROR] {0}\n'.format(msg))
return 1
def main(args=None):
parser = create_parser()
args = parser.parse_args(args)
if args.filename == '-': # read from stdin
if PY2:
data = getreader(args.encoding)(sys.stdin).read()
else:
wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
try:
data = wrapper.read()
finally:
wrapper.detach()
else:
try:
with open(args.filename, 'r', args.encoding) as f:
data = ''.join(f.readlines())
except IOError as e:
return _error(
u'Failed to read {0}: {1}'.format(args.filename, e))
close_stream = False
if args.outfile:
try:
stream = open(args.outfile, 'w', args.encoding)
close_stream = True
except IOError as e:
return _error(u'Failed to open {0}: {1}'.format(args.outfile, e))
else:
stream = sys.stdout
formatter_opts = vars(args)
try:
formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
except SQLParseError as e:
return _error(u'Invalid options: {0}'.format(e))
s = sqlparse.format(data, **formatter_opts)
stream.write(s)
stream.flush()
if close_stream:
stream.close()
return 0

View File

@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""Python 2/3 compatibility.
This module only exists to avoid a dependency on six
for very trivial stuff. We only need to take care of
string types, buffers and metaclasses.
Parts of the code is copied directly from six:
https://bitbucket.org/gutworth/six
"""
import sys
from io import TextIOBase
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
if PY3:
def unicode_compatible(cls):
return cls
text_type = str
string_types = (str,)
from io import StringIO
file_types = (StringIO, TextIOBase)
elif PY2:
def unicode_compatible(cls):
cls.__unicode__ = cls.__str__
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
return cls
text_type = unicode
string_types = (str, unicode,)
from StringIO import StringIO
file_types = (file, StringIO, TextIOBase)

View File

@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse.engine import grouping
from sqlparse.engine.filter_stack import FilterStack
from sqlparse.engine.statement_splitter import StatementSplitter
__all__ = [
'grouping',
'FilterStack',
'StatementSplitter',
]

View File

@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""filter"""
from sqlparse import lexer
from sqlparse.engine import grouping
from sqlparse.engine.statement_splitter import StatementSplitter
class FilterStack(object):
def __init__(self):
self.preprocess = []
self.stmtprocess = []
self.postprocess = []
self._grouping = False
def enable_grouping(self):
self._grouping = True
def run(self, sql, encoding=None):
stream = lexer.tokenize(sql, encoding)
# Process token stream
for filter_ in self.preprocess:
stream = filter_.process(stream)
stream = StatementSplitter().process(stream)
# Output: Stream processed Statements
for stmt in stream:
if self._grouping:
stmt = grouping.group(stmt)
for filter_ in self.stmtprocess:
filter_.process(stmt)
for filter_ in self.postprocess:
stmt = filter_.process(stmt)
yield stmt

View File

@@ -0,0 +1,453 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import sql
from sqlparse import tokens as T
from sqlparse.utils import recurse, imt
T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
T_STRING = (T.String, T.String.Single, T.String.Symbol)
T_NAME = (T.Name, T.Name.Placeholder)
def _group_matching(tlist, cls):
"""Groups Tokens that have beginning and end."""
opens = []
tidx_offset = 0
for idx, token in enumerate(list(tlist)):
tidx = idx - tidx_offset
if token.is_whitespace:
# ~50% of tokens will be whitespace. Will checking early
# for them avoid 3 comparisons, but then add 1 more comparison
# for the other ~50% of tokens...
continue
if token.is_group and not isinstance(token, cls):
# Check inside previously grouped (i.e. parenthesis) if group
# of different type is inside (i.e., case). though ideally should
# should check for all open/close tokens at once to avoid recursion
_group_matching(token, cls)
continue
if token.match(*cls.M_OPEN):
opens.append(tidx)
elif token.match(*cls.M_CLOSE):
try:
open_idx = opens.pop()
except IndexError:
# this indicates invalid sql and unbalanced tokens.
# instead of break, continue in case other "valid" groups exist
continue
close_idx = tidx
tlist.group_tokens(cls, open_idx, close_idx)
tidx_offset += close_idx - open_idx
def group_brackets(tlist):
_group_matching(tlist, sql.SquareBrackets)
def group_parenthesis(tlist):
_group_matching(tlist, sql.Parenthesis)
def group_case(tlist):
_group_matching(tlist, sql.Case)
def group_if(tlist):
_group_matching(tlist, sql.If)
def group_for(tlist):
_group_matching(tlist, sql.For)
def group_begin(tlist):
_group_matching(tlist, sql.Begin)
def group_typecasts(tlist):
def match(token):
return token.match(T.Punctuation, '::')
def valid(token):
return token is not None
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
def group_tzcasts(tlist):
def match(token):
return token.ttype == T.Keyword.TZCast
def valid(token):
return token is not None
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
_group(tlist, sql.Identifier, match, valid, valid, post)
def group_typed_literal(tlist):
# definitely not complete, see e.g.:
# https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literal-syntax
# https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literals
# https://www.postgresql.org/docs/9.1/datatype-datetime.html
# https://www.postgresql.org/docs/9.1/functions-datetime.html
def match(token):
return imt(token, m=sql.TypedLiteral.M_OPEN)
def match_to_extend(token):
return isinstance(token, sql.TypedLiteral)
def valid_prev(token):
return token is not None
def valid_next(token):
return token is not None and token.match(*sql.TypedLiteral.M_CLOSE)
def valid_final(token):
return token is not None and token.match(*sql.TypedLiteral.M_EXTEND)
def post(tlist, pidx, tidx, nidx):
return tidx, nidx
_group(tlist, sql.TypedLiteral, match, valid_prev, valid_next,
post, extend=False)
_group(tlist, sql.TypedLiteral, match_to_extend, valid_prev, valid_final,
post, extend=True)
def group_period(tlist):
def match(token):
return token.match(T.Punctuation, '.')
def valid_prev(token):
sqlcls = sql.SquareBrackets, sql.Identifier
ttypes = T.Name, T.String.Symbol
return imt(token, i=sqlcls, t=ttypes)
def valid_next(token):
# issue261, allow invalid next token
return True
def post(tlist, pidx, tidx, nidx):
# next_ validation is being performed here. issue261
sqlcls = sql.SquareBrackets, sql.Function
ttypes = T.Name, T.String.Symbol, T.Wildcard
next_ = tlist[nidx] if nidx is not None else None
valid_next = imt(next_, i=sqlcls, t=ttypes)
return (pidx, nidx) if valid_next else (pidx, tidx)
_group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
def group_as(tlist):
def match(token):
return token.is_keyword and token.normalized == 'AS'
def valid_prev(token):
return token.normalized == 'NULL' or not token.is_keyword
def valid_next(token):
ttypes = T.DML, T.DDL, T.CTE
return not imt(token, t=ttypes) and token is not None
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
_group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
def group_assignment(tlist):
def match(token):
return token.match(T.Assignment, ':=')
def valid(token):
return token is not None and token.ttype not in (T.Keyword)
def post(tlist, pidx, tidx, nidx):
m_semicolon = T.Punctuation, ';'
snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx)
nidx = snidx or nidx
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.Assignment, match, valid_prev, valid_next, post)
def group_comparison(tlist):
sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier,
sql.Operation)
ttypes = T_NUMERICAL + T_STRING + T_NAME
def match(token):
return token.ttype == T.Operator.Comparison
def valid(token):
if imt(token, t=ttypes, i=sqlcls):
return True
elif token and token.is_keyword and token.normalized == 'NULL':
return True
else:
return False
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.Comparison, match,
valid_prev, valid_next, post, extend=False)
@recurse(sql.Identifier)
def group_identifier(tlist):
ttypes = (T.String.Symbol, T.Name)
tidx, token = tlist.token_next_by(t=ttypes)
while token:
tlist.group_tokens(sql.Identifier, tidx, tidx)
tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
def group_arrays(tlist):
sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function
ttypes = T.Name, T.String.Symbol
def match(token):
return isinstance(token, sql.SquareBrackets)
def valid_prev(token):
return imt(token, i=sqlcls, t=ttypes)
def valid_next(token):
return True
def post(tlist, pidx, tidx, nidx):
return pidx, tidx
_group(tlist, sql.Identifier, match,
valid_prev, valid_next, post, extend=True, recurse=False)
def group_operator(tlist):
ttypes = T_NUMERICAL + T_STRING + T_NAME
sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
sql.Identifier, sql.Operation, sql.TypedLiteral)
def match(token):
return imt(token, t=(T.Operator, T.Wildcard))
def valid(token):
return imt(token, i=sqlcls, t=ttypes) \
or (token and token.match(
T.Keyword,
('CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP')))
def post(tlist, pidx, tidx, nidx):
tlist[tidx].ttype = T.Operator
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.Operation, match,
valid_prev, valid_next, post, extend=False)
def group_identifier_list(tlist):
m_role = T.Keyword, ('null', 'role')
sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
sql.IdentifierList, sql.Operation)
ttypes = (T_NUMERICAL + T_STRING + T_NAME
+ (T.Keyword, T.Comment, T.Wildcard))
def match(token):
return token.match(T.Punctuation, ',')
def valid(token):
return imt(token, i=sqlcls, m=m_role, t=ttypes)
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.IdentifierList, match,
valid_prev, valid_next, post, extend=True)
@recurse(sql.Comment)
def group_comments(tlist):
tidx, token = tlist.token_next_by(t=T.Comment)
while token:
eidx, end = tlist.token_not_matching(
lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace, idx=tidx)
if end is not None:
eidx, end = tlist.token_prev(eidx, skip_ws=False)
tlist.group_tokens(sql.Comment, tidx, eidx)
tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx)
@recurse(sql.Where)
def group_where(tlist):
tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN)
while token:
eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx)
if end is None:
end = tlist._groupable_tokens[-1]
else:
end = tlist.tokens[eidx - 1]
# TODO: convert this to eidx instead of end token.
# i think above values are len(tlist) and eidx-1
eidx = tlist.token_index(end)
tlist.group_tokens(sql.Where, tidx, eidx)
tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx)
@recurse()
def group_aliased(tlist):
I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
sql.Operation, sql.Comparison)
tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
while token:
nidx, next_ = tlist.token_next(tidx)
if isinstance(next_, sql.Identifier):
tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True)
tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx)
@recurse(sql.Function)
def group_functions(tlist):
has_create = False
has_table = False
for tmp_token in tlist.tokens:
if tmp_token.value == 'CREATE':
has_create = True
if tmp_token.value == 'TABLE':
has_table = True
if has_create and has_table:
return
tidx, token = tlist.token_next_by(t=T.Name)
while token:
nidx, next_ = tlist.token_next(tidx)
if isinstance(next_, sql.Parenthesis):
tlist.group_tokens(sql.Function, tidx, nidx)
tidx, token = tlist.token_next_by(t=T.Name, idx=tidx)
def group_order(tlist):
"""Group together Identifier and Asc/Desc token"""
tidx, token = tlist.token_next_by(t=T.Keyword.Order)
while token:
pidx, prev_ = tlist.token_prev(tidx)
if imt(prev_, i=sql.Identifier, t=T.Number):
tlist.group_tokens(sql.Identifier, pidx, tidx)
tidx = pidx
tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx)
@recurse()
def align_comments(tlist):
tidx, token = tlist.token_next_by(i=sql.Comment)
while token:
pidx, prev_ = tlist.token_prev(tidx)
if isinstance(prev_, sql.TokenList):
tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True)
tidx = pidx
tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx)
def group_values(tlist):
tidx, token = tlist.token_next_by(m=(T.Keyword, 'VALUES'))
start_idx = tidx
end_idx = -1
while token:
if isinstance(token, sql.Parenthesis):
end_idx = tidx
tidx, token = tlist.token_next(tidx)
if end_idx != -1:
tlist.group_tokens(sql.Values, start_idx, end_idx, extend=True)
def group(stmt):
for func in [
group_comments,
# _group_matching
group_brackets,
group_parenthesis,
group_case,
group_if,
group_for,
group_begin,
group_functions,
group_where,
group_period,
group_arrays,
group_identifier,
group_order,
group_typecasts,
group_tzcasts,
group_typed_literal,
group_operator,
group_comparison,
group_as,
group_aliased,
group_assignment,
align_comments,
group_identifier_list,
group_values,
]:
func(stmt)
return stmt
def _group(tlist, cls, match,
valid_prev=lambda t: True,
valid_next=lambda t: True,
post=None,
extend=True,
recurse=True
):
"""Groups together tokens that are joined by a middle token. i.e. x < y"""
tidx_offset = 0
pidx, prev_ = None, None
for idx, token in enumerate(list(tlist)):
tidx = idx - tidx_offset
if token.is_whitespace:
continue
if recurse and token.is_group and not isinstance(token, cls):
_group(token, cls, match, valid_prev, valid_next, post, extend)
if match(token):
nidx, next_ = tlist.token_next(tidx)
if prev_ and valid_prev(prev_) and valid_next(next_):
from_idx, to_idx = post(tlist, pidx, tidx, nidx)
grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend)
tidx_offset += to_idx - from_idx
pidx, prev_ = from_idx, grp
continue
pidx, prev_ = tidx, token

View File

@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import sql, tokens as T
class StatementSplitter(object):
"""Filter that split stream at individual statements"""
def __init__(self):
self._reset()
def _reset(self):
"""Set the filter attributes to its default values"""
self._in_declare = False
self._is_create = False
self._begin_depth = 0
self.consume_ws = False
self.tokens = []
self.level = 0
def _change_splitlevel(self, ttype, value):
"""Get the new split level (increase, decrease or remain equal)"""
# parenthesis increase/decrease a level
if ttype is T.Punctuation and value == '(':
return 1
elif ttype is T.Punctuation and value == ')':
return -1
elif ttype not in T.Keyword: # if normal token return
return 0
# Everything after here is ttype = T.Keyword
# Also to note, once entered an If statement you are done and basically
# returning
unified = value.upper()
# three keywords begin with CREATE, but only one of them is DDL
# DDL Create though can contain more words such as "or replace"
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
self._is_create = True
return 0
# can have nested declare inside of being...
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
self._in_declare = True
return 1
if unified == 'BEGIN':
self._begin_depth += 1
if self._is_create:
# FIXME(andi): This makes no sense.
return 1
return 0
# Should this respect a preceding BEGIN?
# In CASE ... WHEN ... END this results in a split level -1.
# Would having multiple CASE WHEN END and a Assignment Operator
# cause the statement to cut off prematurely?
if unified == 'END':
self._begin_depth = max(0, self._begin_depth - 1)
return -1
if (unified in ('IF', 'FOR', 'WHILE')
and self._is_create and self._begin_depth > 0):
return 1
if unified in ('END IF', 'END FOR', 'END WHILE'):
return -1
# Default
return 0
def process(self, stream):
"""Process the stream"""
EOS_TTYPE = T.Whitespace, T.Comment.Single
# Run over all stream tokens
for ttype, value in stream:
# Yield token if we finished a statement and there's no whitespaces
# It will count newline token as a non whitespace. In this context
# whitespace ignores newlines.
# why don't multi line comments also count?
if self.consume_ws and ttype not in EOS_TTYPE:
yield sql.Statement(self.tokens)
# Reset filter and prepare to process next statement
self._reset()
# Change current split level (increase, decrease or remain equal)
self.level += self._change_splitlevel(ttype, value)
# Append the token to the current statement
self.tokens.append(sql.Token(ttype, value))
# Check if we get the end of a statement
if self.level <= 0 and ttype is T.Punctuation and value == ';':
self.consume_ws = True
# Yield pending statement (if any)
if self.tokens:
yield sql.Statement(self.tokens)

View File

@@ -1,7 +1,10 @@
# Copyright (C) 2012 Andi Albrecht, albrecht.andi@gmail.com
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""Exceptions used in this package."""

View File

@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse.filters.others import SerializerUnicode
from sqlparse.filters.others import StripCommentsFilter
from sqlparse.filters.others import StripWhitespaceFilter
from sqlparse.filters.others import SpacesAroundOperatorsFilter
from sqlparse.filters.output import OutputPHPFilter
from sqlparse.filters.output import OutputPythonFilter
from sqlparse.filters.tokens import KeywordCaseFilter
from sqlparse.filters.tokens import IdentifierCaseFilter
from sqlparse.filters.tokens import TruncateStringFilter
from sqlparse.filters.reindent import ReindentFilter
from sqlparse.filters.right_margin import RightMarginFilter
from sqlparse.filters.aligned_indent import AlignedIndentFilter
__all__ = [
'SerializerUnicode',
'StripCommentsFilter',
'StripWhitespaceFilter',
'SpacesAroundOperatorsFilter',
'OutputPHPFilter',
'OutputPythonFilter',
'KeywordCaseFilter',
'IdentifierCaseFilter',
'TruncateStringFilter',
'ReindentFilter',
'RightMarginFilter',
'AlignedIndentFilter',
]

View File

@@ -0,0 +1,138 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import sql, tokens as T
from sqlparse.compat import text_type
from sqlparse.utils import offset, indent
class AlignedIndentFilter(object):
join_words = (r'((LEFT\s+|RIGHT\s+|FULL\s+)?'
r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|'
r'(CROSS\s+|NATURAL\s+)?)?JOIN\b')
by_words = r'(GROUP|ORDER)\s+BY\b'
split_words = ('FROM',
join_words, 'ON', by_words,
'WHERE', 'AND', 'OR',
'HAVING', 'LIMIT',
'UNION', 'VALUES',
'SET', 'BETWEEN', 'EXCEPT')
def __init__(self, char=' ', n='\n'):
self.n = n
self.offset = 0
self.indent = 0
self.char = char
self._max_kwd_len = len('select')
def nl(self, offset=1):
# offset = 1 represent a single space after SELECT
offset = -len(offset) if not isinstance(offset, int) else offset
# add two for the space and parenthesis
indent = self.indent * (2 + self._max_kwd_len)
return sql.Token(T.Whitespace, self.n + self.char * (
self._max_kwd_len + offset + indent + self.offset))
def _process_statement(self, tlist):
if len(tlist.tokens) > 0 and tlist.tokens[0].is_whitespace \
and self.indent == 0:
tlist.tokens.pop(0)
# process the main query body
self._process(sql.TokenList(tlist.tokens))
def _process_parenthesis(self, tlist):
# if this isn't a subquery, don't re-indent
_, token = tlist.token_next_by(m=(T.DML, 'SELECT'))
if token is not None:
with indent(self):
tlist.insert_after(tlist[0], self.nl('SELECT'))
# process the inside of the parenthesis
self._process_default(tlist)
# de-indent last parenthesis
tlist.insert_before(tlist[-1], self.nl())
def _process_identifierlist(self, tlist):
# columns being selected
identifiers = list(tlist.get_identifiers())
identifiers.pop(0)
[tlist.insert_before(token, self.nl()) for token in identifiers]
self._process_default(tlist)
def _process_case(self, tlist):
offset_ = len('case ') + len('when ')
cases = tlist.get_cases(skip_ws=True)
# align the end as well
end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1]
cases.append((None, [end_token]))
condition_width = [len(' '.join(map(text_type, cond))) if cond else 0
for cond, _ in cases]
max_cond_width = max(condition_width)
for i, (cond, value) in enumerate(cases):
# cond is None when 'else or end'
stmt = cond[0] if cond else value[0]
if i > 0:
tlist.insert_before(stmt, self.nl(
offset_ - len(text_type(stmt))))
if cond:
ws = sql.Token(T.Whitespace, self.char * (
max_cond_width - condition_width[i]))
tlist.insert_after(cond[-1], ws)
def _next_token(self, tlist, idx=-1):
split_words = T.Keyword, self.split_words, True
tidx, token = tlist.token_next_by(m=split_words, idx=idx)
# treat "BETWEEN x and y" as a single statement
if token and token.normalized == 'BETWEEN':
tidx, token = self._next_token(tlist, tidx)
if token and token.normalized == 'AND':
tidx, token = self._next_token(tlist, tidx)
return tidx, token
def _split_kwds(self, tlist):
tidx, token = self._next_token(tlist)
while token:
# joins, group/order by are special case. only consider the first
# word as aligner
if (
token.match(T.Keyword, self.join_words, regex=True)
or token.match(T.Keyword, self.by_words, regex=True)
):
token_indent = token.value.split()[0]
else:
token_indent = text_type(token)
tlist.insert_before(token, self.nl(token_indent))
tidx += 1
tidx, token = self._next_token(tlist, tidx)
def _process_default(self, tlist):
self._split_kwds(tlist)
# process any sub-sub statements
for sgroup in tlist.get_sublists():
idx = tlist.token_index(sgroup)
pidx, prev_ = tlist.token_prev(idx)
# HACK: make "group/order by" work. Longer than max_len.
offset_ = 3 if (
prev_ and prev_.match(T.Keyword, self.by_words, regex=True)
) else 0
with offset(self, offset_):
self._process(sgroup)
def _process(self, tlist):
func_name = '_process_{cls}'.format(cls=type(tlist).__name__)
func = getattr(self, func_name.lower(), self._process_default)
func(tlist)
def process(self, stmt):
self._process(stmt)
return stmt

View File

@@ -0,0 +1,125 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import sql, tokens as T
from sqlparse.utils import split_unquoted_newlines
class StripCommentsFilter(object):
@staticmethod
def _process(tlist):
def get_next_comment():
# TODO(andi) Comment types should be unified, see related issue38
return tlist.token_next_by(i=sql.Comment, t=T.Comment)
tidx, token = get_next_comment()
while token:
pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
nidx, next_ = tlist.token_next(tidx, skip_ws=False)
# Replace by whitespace if prev and next exist and if they're not
# whitespaces. This doesn't apply if prev or next is a parenthesis.
if (prev_ is None or next_ is None
or prev_.is_whitespace or prev_.match(T.Punctuation, '(')
or next_.is_whitespace or next_.match(T.Punctuation, ')')):
# Insert a whitespace to ensure the following SQL produces
# a valid SQL (see #425). For example:
#
# Before: select a--comment\nfrom foo
# After: select a from foo
if prev_ is not None and next_ is None:
tlist.tokens.insert(tidx, sql.Token(T.Whitespace, ' '))
tlist.tokens.remove(token)
else:
tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
tidx, token = get_next_comment()
def process(self, stmt):
[self.process(sgroup) for sgroup in stmt.get_sublists()]
StripCommentsFilter._process(stmt)
return stmt
class StripWhitespaceFilter(object):
def _stripws(self, tlist):
func_name = '_stripws_{cls}'.format(cls=type(tlist).__name__)
func = getattr(self, func_name.lower(), self._stripws_default)
func(tlist)
@staticmethod
def _stripws_default(tlist):
last_was_ws = False
is_first_char = True
for token in tlist.tokens:
if token.is_whitespace:
token.value = '' if last_was_ws or is_first_char else ' '
last_was_ws = token.is_whitespace
is_first_char = False
def _stripws_identifierlist(self, tlist):
# Removes newlines before commas, see issue140
last_nl = None
for token in list(tlist.tokens):
if last_nl and token.ttype is T.Punctuation and token.value == ',':
tlist.tokens.remove(last_nl)
last_nl = token if token.is_whitespace else None
# next_ = tlist.token_next(token, skip_ws=False)
# if (next_ and not next_.is_whitespace and
# token.ttype is T.Punctuation and token.value == ','):
# tlist.insert_after(token, sql.Token(T.Whitespace, ' '))
return self._stripws_default(tlist)
def _stripws_parenthesis(self, tlist):
while tlist.tokens[1].is_whitespace:
tlist.tokens.pop(1)
while tlist.tokens[-2].is_whitespace:
tlist.tokens.pop(-2)
self._stripws_default(tlist)
def process(self, stmt, depth=0):
[self.process(sgroup, depth + 1) for sgroup in stmt.get_sublists()]
self._stripws(stmt)
if depth == 0 and stmt.tokens and stmt.tokens[-1].is_whitespace:
stmt.tokens.pop(-1)
return stmt
class SpacesAroundOperatorsFilter(object):
@staticmethod
def _process(tlist):
ttypes = (T.Operator, T.Comparison)
tidx, token = tlist.token_next_by(t=ttypes)
while token:
nidx, next_ = tlist.token_next(tidx, skip_ws=False)
if next_ and next_.ttype != T.Whitespace:
tlist.insert_after(tidx, sql.Token(T.Whitespace, ' '))
pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
if prev_ and prev_.ttype != T.Whitespace:
tlist.insert_before(tidx, sql.Token(T.Whitespace, ' '))
tidx += 1 # has to shift since token inserted before it
# assert tlist.token_index(token) == tidx
tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
def process(self, stmt):
[self.process(sgroup) for sgroup in stmt.get_sublists()]
SpacesAroundOperatorsFilter._process(stmt)
return stmt
# ---------------------------
# postprocess
class SerializerUnicode(object):
@staticmethod
def process(stmt):
lines = split_unquoted_newlines(stmt)
return '\n'.join(line.rstrip() for line in lines)

View File

@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import sql, tokens as T
from sqlparse.compat import text_type
class OutputFilter(object):
varname_prefix = ''
def __init__(self, varname='sql'):
self.varname = self.varname_prefix + varname
self.count = 0
def _process(self, stream, varname, has_nl):
raise NotImplementedError
def process(self, stmt):
self.count += 1
if self.count > 1:
varname = u'{f.varname}{f.count}'.format(f=self)
else:
varname = self.varname
has_nl = len(text_type(stmt).strip().splitlines()) > 1
stmt.tokens = self._process(stmt.tokens, varname, has_nl)
return stmt
class OutputPythonFilter(OutputFilter):
def _process(self, stream, varname, has_nl):
# SQL query assignation to varname
if self.count > 1:
yield sql.Token(T.Whitespace, '\n')
yield sql.Token(T.Name, varname)
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Operator, '=')
yield sql.Token(T.Whitespace, ' ')
if has_nl:
yield sql.Token(T.Operator, '(')
yield sql.Token(T.Text, "'")
# Print the tokens on the quote
for token in stream:
# Token is a new line separator
if token.is_whitespace and '\n' in token.value:
# Close quote and add a new line
yield sql.Token(T.Text, " '")
yield sql.Token(T.Whitespace, '\n')
# Quote header on secondary lines
yield sql.Token(T.Whitespace, ' ' * (len(varname) + 4))
yield sql.Token(T.Text, "'")
# Indentation
after_lb = token.value.split('\n', 1)[1]
if after_lb:
yield sql.Token(T.Whitespace, after_lb)
continue
# Token has escape chars
elif "'" in token.value:
token.value = token.value.replace("'", "\\'")
# Put the token
yield sql.Token(T.Text, token.value)
# Close quote
yield sql.Token(T.Text, "'")
if has_nl:
yield sql.Token(T.Operator, ')')
class OutputPHPFilter(OutputFilter):
varname_prefix = '$'
def _process(self, stream, varname, has_nl):
# SQL query assignation to varname (quote header)
if self.count > 1:
yield sql.Token(T.Whitespace, '\n')
yield sql.Token(T.Name, varname)
yield sql.Token(T.Whitespace, ' ')
if has_nl:
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Operator, '=')
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Text, '"')
# Print the tokens on the quote
for token in stream:
# Token is a new line separator
if token.is_whitespace and '\n' in token.value:
# Close quote and add a new line
yield sql.Token(T.Text, ' ";')
yield sql.Token(T.Whitespace, '\n')
# Quote header on secondary lines
yield sql.Token(T.Name, varname)
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Operator, '.=')
yield sql.Token(T.Whitespace, ' ')
yield sql.Token(T.Text, '"')
# Indentation
after_lb = token.value.split('\n', 1)[1]
if after_lb:
yield sql.Token(T.Whitespace, after_lb)
continue
# Token has escape chars
elif '"' in token.value:
token.value = token.value.replace('"', '\\"')
# Put the token
yield sql.Token(T.Text, token.value)
# Close quote
yield sql.Token(T.Text, '"')
yield sql.Token(T.Punctuation, ';')

View File

@@ -0,0 +1,241 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import sql, tokens as T
from sqlparse.compat import text_type
from sqlparse.utils import offset, indent
class ReindentFilter(object):
def __init__(self, width=2, char=' ', wrap_after=0, n='\n',
comma_first=False, indent_after_first=False,
indent_columns=False):
self.n = n
self.width = width
self.char = char
self.indent = 1 if indent_after_first else 0
self.offset = 0
self.wrap_after = wrap_after
self.comma_first = comma_first
self.indent_columns = indent_columns
self._curr_stmt = None
self._last_stmt = None
self._last_func = None
def _flatten_up_to_token(self, token):
"""Yields all tokens up to token but excluding current."""
if token.is_group:
token = next(token.flatten())
for t in self._curr_stmt.flatten():
if t == token:
break
yield t
@property
def leading_ws(self):
return self.offset + self.indent * self.width
def _get_offset(self, token):
raw = u''.join(map(text_type, self._flatten_up_to_token(token)))
line = (raw or '\n').splitlines()[-1]
# Now take current offset into account and return relative offset.
return len(line) - len(self.char * self.leading_ws)
def nl(self, offset=0):
return sql.Token(
T.Whitespace,
self.n + self.char * max(0, self.leading_ws + offset))
def _next_token(self, tlist, idx=-1):
split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
'GROUP BY', 'ORDER BY', 'UNION', 'VALUES',
'SET', 'BETWEEN', 'EXCEPT', 'HAVING', 'LIMIT')
m_split = T.Keyword, split_words, True
tidx, token = tlist.token_next_by(m=m_split, idx=idx)
if token and token.normalized == 'BETWEEN':
tidx, token = self._next_token(tlist, tidx)
if token and token.normalized == 'AND':
tidx, token = self._next_token(tlist, tidx)
return tidx, token
def _split_kwds(self, tlist):
tidx, token = self._next_token(tlist)
while token:
pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
uprev = text_type(prev_)
if prev_ and prev_.is_whitespace:
del tlist.tokens[pidx]
tidx -= 1
if not (uprev.endswith('\n') or uprev.endswith('\r')):
tlist.insert_before(tidx, self.nl())
tidx += 1
tidx, token = self._next_token(tlist, tidx)
def _split_statements(self, tlist):
ttypes = T.Keyword.DML, T.Keyword.DDL
tidx, token = tlist.token_next_by(t=ttypes)
while token:
pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
if prev_ and prev_.is_whitespace:
del tlist.tokens[pidx]
tidx -= 1
# only break if it's not the first token
if prev_:
tlist.insert_before(tidx, self.nl())
tidx += 1
tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
def _process(self, tlist):
func_name = '_process_{cls}'.format(cls=type(tlist).__name__)
func = getattr(self, func_name.lower(), self._process_default)
func(tlist)
def _process_where(self, tlist):
tidx, token = tlist.token_next_by(m=(T.Keyword, 'WHERE'))
# issue121, errors in statement fixed??
tlist.insert_before(tidx, self.nl())
with indent(self):
self._process_default(tlist)
def _process_parenthesis(self, tlist):
ttypes = T.Keyword.DML, T.Keyword.DDL
_, is_dml_dll = tlist.token_next_by(t=ttypes)
fidx, first = tlist.token_next_by(m=sql.Parenthesis.M_OPEN)
with indent(self, 1 if is_dml_dll else 0):
tlist.tokens.insert(0, self.nl()) if is_dml_dll else None
with offset(self, self._get_offset(first) + 1):
self._process_default(tlist, not is_dml_dll)
def _process_function(self, tlist):
self._last_func = tlist[0]
self._process_default(tlist)
def _process_identifierlist(self, tlist):
identifiers = list(tlist.get_identifiers())
if self.indent_columns:
first = next(identifiers[0].flatten())
num_offset = 1 if self.char == '\t' else self.width
else:
first = next(identifiers.pop(0).flatten())
num_offset = 1 if self.char == '\t' else self._get_offset(first)
if not tlist.within(sql.Function) and not tlist.within(sql.Values):
with offset(self, num_offset):
position = 0
for token in identifiers:
# Add 1 for the "," separator
position += len(token.value) + 1
if position > (self.wrap_after - self.offset):
adjust = 0
if self.comma_first:
adjust = -2
_, comma = tlist.token_prev(
tlist.token_index(token))
if comma is None:
continue
token = comma
tlist.insert_before(token, self.nl(offset=adjust))
if self.comma_first:
_, ws = tlist.token_next(
tlist.token_index(token), skip_ws=False)
if (ws is not None
and ws.ttype is not T.Text.Whitespace):
tlist.insert_after(
token, sql.Token(T.Whitespace, ' '))
position = 0
else:
# ensure whitespace
for token in tlist:
_, next_ws = tlist.token_next(
tlist.token_index(token), skip_ws=False)
if token.value == ',' and not next_ws.is_whitespace:
tlist.insert_after(
token, sql.Token(T.Whitespace, ' '))
end_at = self.offset + sum(len(i.value) + 1 for i in identifiers)
adjusted_offset = 0
if (self.wrap_after > 0
and end_at > (self.wrap_after - self.offset)
and self._last_func):
adjusted_offset = -len(self._last_func.value) - 1
with offset(self, adjusted_offset), indent(self):
if adjusted_offset < 0:
tlist.insert_before(identifiers[0], self.nl())
position = 0
for token in identifiers:
# Add 1 for the "," separator
position += len(token.value) + 1
if (self.wrap_after > 0
and position > (self.wrap_after - self.offset)):
adjust = 0
tlist.insert_before(token, self.nl(offset=adjust))
position = 0
self._process_default(tlist)
def _process_case(self, tlist):
iterable = iter(tlist.get_cases())
cond, _ = next(iterable)
first = next(cond[0].flatten())
with offset(self, self._get_offset(tlist[0])):
with offset(self, self._get_offset(first)):
for cond, value in iterable:
token = value[0] if cond is None else cond[0]
tlist.insert_before(token, self.nl())
# Line breaks on group level are done. let's add an offset of
# len "when ", "then ", "else "
with offset(self, len("WHEN ")):
self._process_default(tlist)
end_idx, end = tlist.token_next_by(m=sql.Case.M_CLOSE)
if end_idx is not None:
tlist.insert_before(end_idx, self.nl())
def _process_values(self, tlist):
tlist.insert_before(0, self.nl())
tidx, token = tlist.token_next_by(i=sql.Parenthesis)
first_token = token
while token:
ptidx, ptoken = tlist.token_next_by(m=(T.Punctuation, ','),
idx=tidx)
if ptoken:
if self.comma_first:
adjust = -2
offset = self._get_offset(first_token) + adjust
tlist.insert_before(ptoken, self.nl(offset))
else:
tlist.insert_after(ptoken,
self.nl(self._get_offset(token)))
tidx, token = tlist.token_next_by(i=sql.Parenthesis, idx=tidx)
def _process_default(self, tlist, stmts=True):
self._split_statements(tlist) if stmts else None
self._split_kwds(tlist)
for sgroup in tlist.get_sublists():
self._process(sgroup)
def process(self, stmt):
self._curr_stmt = stmt
self._process(stmt)
if self._last_stmt is not None:
nl = '\n' if text_type(self._last_stmt).endswith('\n') else '\n\n'
stmt.tokens.insert(0, sql.Token(T.Whitespace, nl))
self._last_stmt = stmt
return stmt

View File

@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
import re
from sqlparse import sql, tokens as T
from sqlparse.compat import text_type
# FIXME: Doesn't work
class RightMarginFilter(object):
keep_together = (
# sql.TypeCast, sql.Identifier, sql.Alias,
)
def __init__(self, width=79):
self.width = width
self.line = ''
def _process(self, group, stream):
for token in stream:
if token.is_whitespace and '\n' in token.value:
if token.value.endswith('\n'):
self.line = ''
else:
self.line = token.value.splitlines()[-1]
elif token.is_group and type(token) not in self.keep_together:
token.tokens = self._process(token, token.tokens)
else:
val = text_type(token)
if len(self.line) + len(val) > self.width:
match = re.search(r'^ +', self.line)
if match is not None:
indent = match.group()
else:
indent = ''
yield sql.Token(T.Whitespace, '\n{0}'.format(indent))
self.line = indent
self.line += val
yield token
def process(self, group):
# return
# group.tokens = self._process(group, group.tokens)
raise NotImplementedError

View File

@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
from sqlparse import tokens as T
from sqlparse.compat import text_type
class _CaseFilter(object):
ttype = None
def __init__(self, case=None):
case = case or 'upper'
self.convert = getattr(text_type, case)
def process(self, stream):
for ttype, value in stream:
if ttype in self.ttype:
value = self.convert(value)
yield ttype, value
class KeywordCaseFilter(_CaseFilter):
ttype = T.Keyword
class IdentifierCaseFilter(_CaseFilter):
ttype = T.Name, T.String.Symbol
def process(self, stream):
for ttype, value in stream:
if ttype in self.ttype and value.strip()[0] != '"':
value = self.convert(value)
yield ttype, value
class TruncateStringFilter(object):
def __init__(self, width, char):
self.width = width
self.char = char
def process(self, stream):
for ttype, value in stream:
if ttype != T.Literal.String.Single:
yield ttype, value
continue
if value[:2] == "''":
inner = value[2:-2]
quote = "''"
else:
inner = value[1:-1]
quote = "'"
if len(inner) > self.width:
value = ''.join((quote, inner[:self.width], self.char, quote))
yield ttype, value

View File

@@ -0,0 +1,199 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""SQL formatter"""
from sqlparse import filters
from sqlparse.exceptions import SQLParseError
def validate_options(options):
"""Validates options."""
kwcase = options.get('keyword_case')
if kwcase not in [None, 'upper', 'lower', 'capitalize']:
raise SQLParseError('Invalid value for keyword_case: '
'{0!r}'.format(kwcase))
idcase = options.get('identifier_case')
if idcase not in [None, 'upper', 'lower', 'capitalize']:
raise SQLParseError('Invalid value for identifier_case: '
'{0!r}'.format(idcase))
ofrmt = options.get('output_format')
if ofrmt not in [None, 'sql', 'python', 'php']:
raise SQLParseError('Unknown output format: '
'{0!r}'.format(ofrmt))
strip_comments = options.get('strip_comments', False)
if strip_comments not in [True, False]:
raise SQLParseError('Invalid value for strip_comments: '
'{0!r}'.format(strip_comments))
space_around_operators = options.get('use_space_around_operators', False)
if space_around_operators not in [True, False]:
raise SQLParseError('Invalid value for use_space_around_operators: '
'{0!r}'.format(space_around_operators))
strip_ws = options.get('strip_whitespace', False)
if strip_ws not in [True, False]:
raise SQLParseError('Invalid value for strip_whitespace: '
'{0!r}'.format(strip_ws))
truncate_strings = options.get('truncate_strings')
if truncate_strings is not None:
try:
truncate_strings = int(truncate_strings)
except (ValueError, TypeError):
raise SQLParseError('Invalid value for truncate_strings: '
'{0!r}'.format(truncate_strings))
if truncate_strings <= 1:
raise SQLParseError('Invalid value for truncate_strings: '
'{0!r}'.format(truncate_strings))
options['truncate_strings'] = truncate_strings
options['truncate_char'] = options.get('truncate_char', '[...]')
indent_columns = options.get('indent_columns', False)
if indent_columns not in [True, False]:
raise SQLParseError('Invalid value for indent_columns: '
'{0!r}'.format(indent_columns))
elif indent_columns:
options['reindent'] = True # enforce reindent
options['indent_columns'] = indent_columns
reindent = options.get('reindent', False)
if reindent not in [True, False]:
raise SQLParseError('Invalid value for reindent: '
'{0!r}'.format(reindent))
elif reindent:
options['strip_whitespace'] = True
reindent_aligned = options.get('reindent_aligned', False)
if reindent_aligned not in [True, False]:
raise SQLParseError('Invalid value for reindent_aligned: '
'{0!r}'.format(reindent))
elif reindent_aligned:
options['strip_whitespace'] = True
indent_after_first = options.get('indent_after_first', False)
if indent_after_first not in [True, False]:
raise SQLParseError('Invalid value for indent_after_first: '
'{0!r}'.format(indent_after_first))
options['indent_after_first'] = indent_after_first
indent_tabs = options.get('indent_tabs', False)
if indent_tabs not in [True, False]:
raise SQLParseError('Invalid value for indent_tabs: '
'{0!r}'.format(indent_tabs))
elif indent_tabs:
options['indent_char'] = '\t'
else:
options['indent_char'] = ' '
indent_width = options.get('indent_width', 2)
try:
indent_width = int(indent_width)
except (TypeError, ValueError):
raise SQLParseError('indent_width requires an integer')
if indent_width < 1:
raise SQLParseError('indent_width requires a positive integer')
options['indent_width'] = indent_width
wrap_after = options.get('wrap_after', 0)
try:
wrap_after = int(wrap_after)
except (TypeError, ValueError):
raise SQLParseError('wrap_after requires an integer')
if wrap_after < 0:
raise SQLParseError('wrap_after requires a positive integer')
options['wrap_after'] = wrap_after
comma_first = options.get('comma_first', False)
if comma_first not in [True, False]:
raise SQLParseError('comma_first requires a boolean value')
options['comma_first'] = comma_first
right_margin = options.get('right_margin')
if right_margin is not None:
try:
right_margin = int(right_margin)
except (TypeError, ValueError):
raise SQLParseError('right_margin requires an integer')
if right_margin < 10:
raise SQLParseError('right_margin requires an integer > 10')
options['right_margin'] = right_margin
return options
def build_filter_stack(stack, options):
"""Setup and return a filter stack.
Args:
stack: :class:`~sqlparse.filters.FilterStack` instance
options: Dictionary with options validated by validate_options.
"""
# Token filter
if options.get('keyword_case'):
stack.preprocess.append(
filters.KeywordCaseFilter(options['keyword_case']))
if options.get('identifier_case'):
stack.preprocess.append(
filters.IdentifierCaseFilter(options['identifier_case']))
if options.get('truncate_strings'):
stack.preprocess.append(filters.TruncateStringFilter(
width=options['truncate_strings'], char=options['truncate_char']))
if options.get('use_space_around_operators', False):
stack.enable_grouping()
stack.stmtprocess.append(filters.SpacesAroundOperatorsFilter())
# After grouping
if options.get('strip_comments'):
stack.enable_grouping()
stack.stmtprocess.append(filters.StripCommentsFilter())
if options.get('strip_whitespace') or options.get('reindent'):
stack.enable_grouping()
stack.stmtprocess.append(filters.StripWhitespaceFilter())
if options.get('reindent'):
stack.enable_grouping()
stack.stmtprocess.append(
filters.ReindentFilter(
char=options['indent_char'],
width=options['indent_width'],
indent_after_first=options['indent_after_first'],
indent_columns=options['indent_columns'],
wrap_after=options['wrap_after'],
comma_first=options['comma_first']))
if options.get('reindent_aligned', False):
stack.enable_grouping()
stack.stmtprocess.append(
filters.AlignedIndentFilter(char=options['indent_char']))
if options.get('right_margin'):
stack.enable_grouping()
stack.stmtprocess.append(
filters.RightMarginFilter(width=options['right_margin']))
# Serializer
if options.get('output_format'):
frmt = options['output_format']
if frmt.lower() == 'php':
fltr = filters.OutputPHPFilter()
elif frmt.lower() == 'python':
fltr = filters.OutputPythonFilter()
else:
fltr = None
if fltr is not None:
stack.postprocess.append(fltr)
return stack

View File

@@ -1,5 +1,104 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
import re
from sqlparse import tokens
def is_keyword(value):
val = value.upper()
return (KEYWORDS_COMMON.get(val)
or KEYWORDS_ORACLE.get(val)
or KEYWORDS_PLPGSQL.get(val)
or KEYWORDS_HQL.get(val)
or KEYWORDS.get(val, tokens.Name)), value
SQL_REGEX = {
'root': [
(r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
(r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint),
(r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single),
(r'/\*[\s\S]*?\*/', tokens.Comment.Multiline),
(r'(\r\n|\r|\n)', tokens.Newline),
(r'\s+?', tokens.Whitespace),
(r':=', tokens.Assignment),
(r'::', tokens.Punctuation),
(r'\*', tokens.Wildcard),
(r"`(``|[^`])*`", tokens.Name),
(r"´(´´|[^´])*´", tokens.Name),
(r'((?<!\S)\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
(r'\?', tokens.Name.Placeholder),
(r'%(\(\w+\))?s', tokens.Name.Placeholder),
(r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
(r'\\\w+', tokens.Command),
# FIXME(andi): VALUES shouldn't be listed here
# see https://github.com/andialbrecht/sqlparse/pull/64
# AS and IN are special, it may be followed by a parenthesis, but
# are never functions, see issue183 and issue507
(r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword),
(r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name),
# see issue #39
# Spaces around period `schema . name` are valid identifier
# TODO: Spaces before period not implemented
(r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name), # 'Name' .
# FIXME(atronah): never match,
# because `re.match` doesn't work with look-behind regexp feature
(r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name), # .'Name'
(r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name), # side effect: change kw to func
(r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
(r'-?\d*(\.\d+)?E-?\d+', tokens.Number.Float),
(r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
tokens.Number.Float),
(r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
(r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
# not a real string literal in ANSI SQL:
(r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol),
(r'(""|".*?[^\\]")', tokens.String.Symbol),
# sqlite names can be escaped with [square brackets]. left bracket
# cannot be preceded by word character or a right bracket --
# otherwise it's probably an array index
(r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
(r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
(r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
(r'NOT\s+NULL\b', tokens.Keyword),
(r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword),
(r'UNION\s+ALL\b', tokens.Keyword),
(r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
(r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
(r'GROUP\s+BY\b', tokens.Keyword),
(r'ORDER\s+BY\b', tokens.Keyword),
(r'(LATERAL\s+VIEW\s+)'
r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b',
tokens.Keyword),
(r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast),
(r'(NOT\s+)?(LIKE|ILIKE)\b', tokens.Operator.Comparison),
(r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword),
(r'[;:()\[\],\.]', tokens.Punctuation),
(r'[<>=~!]+', tokens.Operator.Comparison),
(r'[+/@#%^&|`?^-]+', tokens.Operator),
]}
FLAGS = re.IGNORECASE | re.UNICODE
SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']]
KEYWORDS = {
'ABORT': tokens.Keyword,
'ABS': tokens.Keyword,
@@ -16,6 +115,7 @@ KEYWORDS = {
'ANALYSE': tokens.Keyword,
'ANALYZE': tokens.Keyword,
'ANY': tokens.Keyword,
'ARRAYLEN': tokens.Keyword,
'ARE': tokens.Keyword,
'ASC': tokens.Keyword.Order,
'ASENSITIVE': tokens.Keyword,
@@ -24,7 +124,9 @@ KEYWORDS = {
'ASYMMETRIC': tokens.Keyword,
'AT': tokens.Keyword,
'ATOMIC': tokens.Keyword,
'AUDIT': tokens.Keyword,
'AUTHORIZATION': tokens.Keyword,
'AUTO_INCREMENT': tokens.Keyword,
'AVG': tokens.Keyword,
'BACKWARD': tokens.Keyword,
@@ -53,6 +155,7 @@ KEYWORDS = {
'CHARACTER_SET_NAME': tokens.Keyword,
'CHARACTER_SET_SCHEMA': tokens.Keyword,
'CHAR_LENGTH': tokens.Keyword,
'CHARSET': tokens.Keyword,
'CHECK': tokens.Keyword,
'CHECKED': tokens.Keyword,
'CHECKPOINT': tokens.Keyword,
@@ -71,12 +174,14 @@ KEYWORDS = {
'COLLECT': tokens.Keyword,
'COLUMN': tokens.Keyword,
'COLUMN_NAME': tokens.Keyword,
'COMPRESS': tokens.Keyword,
'COMMAND_FUNCTION': tokens.Keyword,
'COMMAND_FUNCTION_CODE': tokens.Keyword,
'COMMENT': tokens.Keyword,
'COMMIT': tokens.Keyword.DML,
'COMMITTED': tokens.Keyword,
'COMPLETION': tokens.Keyword,
'CONCURRENTLY': tokens.Keyword,
'CONDITION_NUMBER': tokens.Keyword,
'CONNECT': tokens.Keyword,
'CONNECTION': tokens.Keyword,
@@ -92,7 +197,7 @@ KEYWORDS = {
'CONVERSION': tokens.Keyword,
'CONVERT': tokens.Keyword,
'COPY': tokens.Keyword,
'CORRESPONTING': tokens.Keyword,
'CORRESPONDING': tokens.Keyword,
'COUNT': tokens.Keyword,
'CREATEDB': tokens.Keyword,
'CREATEUSER': tokens.Keyword,
@@ -133,6 +238,7 @@ KEYWORDS = {
'DETERMINISTIC': tokens.Keyword,
'DIAGNOSTICS': tokens.Keyword,
'DICTIONARY': tokens.Keyword,
'DISABLE': tokens.Keyword,
'DISCONNECT': tokens.Keyword,
'DISPATCH': tokens.Keyword,
'DO': tokens.Keyword,
@@ -142,25 +248,29 @@ KEYWORDS = {
'DYNAMIC_FUNCTION_CODE': tokens.Keyword,
'EACH': tokens.Keyword,
'ENABLE': tokens.Keyword,
'ENCODING': tokens.Keyword,
'ENCRYPTED': tokens.Keyword,
'END-EXEC': tokens.Keyword,
'ENGINE': tokens.Keyword,
'EQUALS': tokens.Keyword,
'ESCAPE': tokens.Keyword,
'EVERY': tokens.Keyword,
'EXCEPT': tokens.Keyword,
'ESCEPTION': tokens.Keyword,
'EXCEPTION': tokens.Keyword,
'EXCLUDING': tokens.Keyword,
'EXCLUSIVE': tokens.Keyword,
'EXEC': tokens.Keyword,
'EXECUTE': tokens.Keyword,
'EXISTING': tokens.Keyword,
'EXISTS': tokens.Keyword,
'EXPLAIN': tokens.Keyword,
'EXTERNAL': tokens.Keyword,
'EXTRACT': tokens.Keyword,
'FALSE': tokens.Keyword,
'FETCH': tokens.Keyword,
'FILE': tokens.Keyword,
'FINAL': tokens.Keyword,
'FIRST': tokens.Keyword,
'FORCE': tokens.Keyword,
@@ -189,8 +299,10 @@ KEYWORDS = {
'HAVING': tokens.Keyword,
'HIERARCHY': tokens.Keyword,
'HOLD': tokens.Keyword,
'HOUR': tokens.Keyword,
'HOST': tokens.Keyword,
'IDENTIFIED': tokens.Keyword,
'IDENTITY': tokens.Keyword,
'IGNORE': tokens.Keyword,
'ILIKE': tokens.Keyword,
@@ -206,6 +318,7 @@ KEYWORDS = {
'INDITCATOR': tokens.Keyword,
'INFIX': tokens.Keyword,
'INHERITS': tokens.Keyword,
'INITIAL': tokens.Keyword,
'INITIALIZE': tokens.Keyword,
'INITIALLY': tokens.Keyword,
'INOUT': tokens.Keyword,
@@ -249,12 +362,14 @@ KEYWORDS = {
# 'M': tokens.Keyword,
'MAP': tokens.Keyword,
'MATCH': tokens.Keyword,
'MAXEXTENTS': tokens.Keyword,
'MAXVALUE': tokens.Keyword,
'MESSAGE_LENGTH': tokens.Keyword,
'MESSAGE_OCTET_LENGTH': tokens.Keyword,
'MESSAGE_TEXT': tokens.Keyword,
'METHOD': tokens.Keyword,
'MINUTE': tokens.Keyword,
'MINUS': tokens.Keyword,
'MINVALUE': tokens.Keyword,
'MOD': tokens.Keyword,
'MODE': tokens.Keyword,
@@ -273,13 +388,17 @@ KEYWORDS = {
'NEW': tokens.Keyword,
'NEXT': tokens.Keyword,
'NO': tokens.Keyword,
'NOAUDIT': tokens.Keyword,
'NOCOMPRESS': tokens.Keyword,
'NOCREATEDB': tokens.Keyword,
'NOCREATEUSER': tokens.Keyword,
'NONE': tokens.Keyword,
'NOT': tokens.Keyword,
'NOTFOUND': tokens.Keyword,
'NOTHING': tokens.Keyword,
'NOTIFY': tokens.Keyword,
'NOTNULL': tokens.Keyword,
'NOWAIT': tokens.Keyword,
'NULL': tokens.Keyword,
'NULLABLE': tokens.Keyword,
'NULLIF': tokens.Keyword,
@@ -288,9 +407,11 @@ KEYWORDS = {
'OCTET_LENGTH': tokens.Keyword,
'OF': tokens.Keyword,
'OFF': tokens.Keyword,
'OFFLINE': tokens.Keyword,
'OFFSET': tokens.Keyword,
'OIDS': tokens.Keyword,
'OLD': tokens.Keyword,
'ONLINE': tokens.Keyword,
'ONLY': tokens.Keyword,
'OPEN': tokens.Keyword,
'OPERATION': tokens.Keyword,
@@ -305,17 +426,20 @@ KEYWORDS = {
'OVERRIDING': tokens.Keyword,
'OWNER': tokens.Keyword,
'QUARTER': tokens.Keyword,
'PAD': tokens.Keyword,
'PARAMETER': tokens.Keyword,
'PARAMETERS': tokens.Keyword,
'PARAMETER_MODE': tokens.Keyword,
'PARAMATER_NAME': tokens.Keyword,
'PARAMATER_ORDINAL_POSITION': tokens.Keyword,
'PARAMETER_NAME': tokens.Keyword,
'PARAMETER_ORDINAL_POSITION': tokens.Keyword,
'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword,
'PARAMETER_SPECIFIC_SCHEMA': tokens.Keyword,
'PARTIAL': tokens.Keyword,
'PASCAL': tokens.Keyword,
'PCTFREE': tokens.Keyword,
'PENDANT': tokens.Keyword,
'PLACING': tokens.Keyword,
'PLI': tokens.Keyword,
@@ -334,6 +458,7 @@ KEYWORDS = {
'PUBLIC': tokens.Keyword,
'RAISE': tokens.Keyword,
'RAW': tokens.Keyword,
'READ': tokens.Keyword,
'READS': tokens.Keyword,
'RECHECK': tokens.Keyword,
@@ -346,6 +471,7 @@ KEYWORDS = {
'RENAME': tokens.Keyword,
'REPEATABLE': tokens.Keyword,
'RESET': tokens.Keyword,
'RESOURCE': tokens.Keyword,
'RESTART': tokens.Keyword,
'RESTRICT': tokens.Keyword,
'RESULT': tokens.Keyword,
@@ -353,6 +479,7 @@ KEYWORDS = {
'RETURNED_LENGTH': tokens.Keyword,
'RETURNED_OCTET_LENGTH': tokens.Keyword,
'RETURNED_SQLSTATE': tokens.Keyword,
'RETURNING': tokens.Keyword,
'RETURNS': tokens.Keyword,
'REVOKE': tokens.Keyword,
'RIGHT': tokens.Keyword,
@@ -379,6 +506,7 @@ KEYWORDS = {
'SECURITY': tokens.Keyword,
'SELF': tokens.Keyword,
'SENSITIVE': tokens.Keyword,
'SEQUENCE': tokens.Keyword,
'SERIALIZABLE': tokens.Keyword,
'SERVER_NAME': tokens.Keyword,
'SESSION': tokens.Keyword,
@@ -397,6 +525,7 @@ KEYWORDS = {
'SPECIFICTYPE': tokens.Keyword,
'SPECIFIC_NAME': tokens.Keyword,
'SQL': tokens.Keyword,
'SQLBUF': tokens.Keyword,
'SQLCODE': tokens.Keyword,
'SQLERROR': tokens.Keyword,
'SQLEXCEPTION': tokens.Keyword,
@@ -404,7 +533,7 @@ KEYWORDS = {
'SQLWARNING': tokens.Keyword,
'STABLE': tokens.Keyword,
'START': tokens.Keyword.DML,
'STATE': tokens.Keyword,
# 'STATE': tokens.Keyword,
'STATEMENT': tokens.Keyword,
'STATIC': tokens.Keyword,
'STATISTICS': tokens.Keyword,
@@ -417,8 +546,10 @@ KEYWORDS = {
'SUBCLASS_ORIGIN': tokens.Keyword,
'SUBLIST': tokens.Keyword,
'SUBSTRING': tokens.Keyword,
'SUCCESSFUL': tokens.Keyword,
'SUM': tokens.Keyword,
'SYMMETRIC': tokens.Keyword,
'SYNONYM': tokens.Keyword,
'SYSID': tokens.Keyword,
'SYSTEM': tokens.Keyword,
'SYSTEM_USER': tokens.Keyword,
@@ -455,6 +586,7 @@ KEYWORDS = {
'TRUSTED': tokens.Keyword,
'TYPE': tokens.Keyword,
'UID': tokens.Keyword,
'UNCOMMITTED': tokens.Keyword,
'UNDER': tokens.Keyword,
'UNENCRYPTED': tokens.Keyword,
@@ -476,6 +608,7 @@ KEYWORDS = {
'VACUUM': tokens.Keyword,
'VALID': tokens.Keyword,
'VALIDATE': tokens.Keyword,
'VALIDATOR': tokens.Keyword,
'VALUES': tokens.Keyword,
'VARIABLE': tokens.Keyword,
@@ -484,8 +617,9 @@ KEYWORDS = {
'VIEW': tokens.Keyword,
'VOLATILE': tokens.Keyword,
'WEEK': tokens.Keyword,
'WHENEVER': tokens.Keyword,
'WITH': tokens.Keyword,
'WITH': tokens.Keyword.CTE,
'WITHOUT': tokens.Keyword,
'WORK': tokens.Keyword,
'WRITE': tokens.Keyword,
@@ -506,33 +640,50 @@ KEYWORDS = {
'DATE': tokens.Name.Builtin,
'DEC': tokens.Name.Builtin,
'DECIMAL': tokens.Name.Builtin,
'FILE_TYPE': tokens.Name.Builtin,
'FLOAT': tokens.Name.Builtin,
'INT': tokens.Name.Builtin,
'INT8': tokens.Name.Builtin,
'INTEGER': tokens.Name.Builtin,
'INTERVAL': tokens.Name.Builtin,
'LONG': tokens.Name.Builtin,
'NATURALN': tokens.Name.Builtin,
'NVARCHAR': tokens.Name.Builtin,
'NUMBER': tokens.Name.Builtin,
'NUMERIC': tokens.Name.Builtin,
'PLS_INTEGER': tokens.Name.Builtin,
'POSITIVE': tokens.Name.Builtin,
'POSITIVEN': tokens.Name.Builtin,
'REAL': tokens.Name.Builtin,
'ROWID': tokens.Name.Builtin,
'ROWLABEL': tokens.Name.Builtin,
'ROWNUM': tokens.Name.Builtin,
'SERIAL': tokens.Name.Builtin,
'SERIAL8': tokens.Name.Builtin,
'SIGNED': tokens.Name.Builtin,
'SIGNTYPE': tokens.Name.Builtin,
'SIMPLE_DOUBLE': tokens.Name.Builtin,
'SIMPLE_FLOAT': tokens.Name.Builtin,
'SIMPLE_INTEGER': tokens.Name.Builtin,
'SMALLINT': tokens.Name.Builtin,
'SYS_REFCURSOR': tokens.Name.Builtin,
'SYSDATE': tokens.Name,
'TEXT': tokens.Name.Builtin,
'TINYINT': tokens.Name.Builtin,
'UNSIGNED': tokens.Name.Builtin,
'UROWID': tokens.Name.Builtin,
'UTL_FILE': tokens.Name.Builtin,
'VARCHAR': tokens.Name.Builtin,
'VARCHAR2': tokens.Name.Builtin,
'VARYING': tokens.Name.Builtin,
}
KEYWORDS_COMMON = {
'SELECT': tokens.Keyword.DML,
'INSERT': tokens.Keyword.DML,
'DELETE': tokens.Keyword.DML,
'UPDATE': tokens.Keyword.DML,
'UPSERT': tokens.Keyword.DML,
'REPLACE': tokens.Keyword.DML,
'MERGE': tokens.Keyword.DML,
'DROP': tokens.Keyword.DDL,
@@ -565,6 +716,7 @@ KEYWORDS_COMMON = {
'AS': tokens.Keyword,
'ELSE': tokens.Keyword,
'FOR': tokens.Keyword,
'WHILE': tokens.Keyword,
'CASE': tokens.Keyword,
'WHEN': tokens.Keyword,
@@ -572,3 +724,232 @@ KEYWORDS_COMMON = {
'MAX': tokens.Keyword,
'DISTINCT': tokens.Keyword,
}
KEYWORDS_ORACLE = {
'ARCHIVE': tokens.Keyword,
'ARCHIVELOG': tokens.Keyword,
'BACKUP': tokens.Keyword,
'BECOME': tokens.Keyword,
'BLOCK': tokens.Keyword,
'BODY': tokens.Keyword,
'CANCEL': tokens.Keyword,
'CHANGE': tokens.Keyword,
'COMPILE': tokens.Keyword,
'CONTENTS': tokens.Keyword,
'CONTROLFILE': tokens.Keyword,
'DATAFILE': tokens.Keyword,
'DBA': tokens.Keyword,
'DISMOUNT': tokens.Keyword,
'DOUBLE': tokens.Keyword,
'DUMP': tokens.Keyword,
'EVENTS': tokens.Keyword,
'EXCEPTIONS': tokens.Keyword,
'EXPLAIN': tokens.Keyword,
'EXTENT': tokens.Keyword,
'EXTERNALLY': tokens.Keyword,
'FLUSH': tokens.Keyword,
'FREELIST': tokens.Keyword,
'FREELISTS': tokens.Keyword,
# groups seems too common as table name
# 'GROUPS': tokens.Keyword,
'INDICATOR': tokens.Keyword,
'INITRANS': tokens.Keyword,
'INSTANCE': tokens.Keyword,
'LAYER': tokens.Keyword,
'LINK': tokens.Keyword,
'LISTS': tokens.Keyword,
'LOGFILE': tokens.Keyword,
'MANAGE': tokens.Keyword,
'MANUAL': tokens.Keyword,
'MAXDATAFILES': tokens.Keyword,
'MAXINSTANCES': tokens.Keyword,
'MAXLOGFILES': tokens.Keyword,
'MAXLOGHISTORY': tokens.Keyword,
'MAXLOGMEMBERS': tokens.Keyword,
'MAXTRANS': tokens.Keyword,
'MINEXTENTS': tokens.Keyword,
'MODULE': tokens.Keyword,
'MOUNT': tokens.Keyword,
'NOARCHIVELOG': tokens.Keyword,
'NOCACHE': tokens.Keyword,
'NOCYCLE': tokens.Keyword,
'NOMAXVALUE': tokens.Keyword,
'NOMINVALUE': tokens.Keyword,
'NOORDER': tokens.Keyword,
'NORESETLOGS': tokens.Keyword,
'NORMAL': tokens.Keyword,
'NOSORT': tokens.Keyword,
'OPTIMAL': tokens.Keyword,
'OWN': tokens.Keyword,
'PACKAGE': tokens.Keyword,
'PARALLEL': tokens.Keyword,
'PCTINCREASE': tokens.Keyword,
'PCTUSED': tokens.Keyword,
'PLAN': tokens.Keyword,
'PRIVATE': tokens.Keyword,
'PROFILE': tokens.Keyword,
'QUOTA': tokens.Keyword,
'RECOVER': tokens.Keyword,
'RESETLOGS': tokens.Keyword,
'RESTRICTED': tokens.Keyword,
'REUSE': tokens.Keyword,
'ROLES': tokens.Keyword,
'SAVEPOINT': tokens.Keyword,
'SCN': tokens.Keyword,
'SECTION': tokens.Keyword,
'SEGMENT': tokens.Keyword,
'SHARED': tokens.Keyword,
'SNAPSHOT': tokens.Keyword,
'SORT': tokens.Keyword,
'STATEMENT_ID': tokens.Keyword,
'STOP': tokens.Keyword,
'SWITCH': tokens.Keyword,
'TABLES': tokens.Keyword,
'TABLESPACE': tokens.Keyword,
'THREAD': tokens.Keyword,
'TIME': tokens.Keyword,
'TRACING': tokens.Keyword,
'TRANSACTION': tokens.Keyword,
'TRIGGERS': tokens.Keyword,
'UNLIMITED': tokens.Keyword,
'UNLOCK': tokens.Keyword,
}
# PostgreSQL Syntax
KEYWORDS_PLPGSQL = {
'PARTITION': tokens.Keyword,
'OVER': tokens.Keyword,
'PERFORM': tokens.Keyword,
'NOTICE': tokens.Keyword,
'PLPGSQL': tokens.Keyword,
'INHERIT': tokens.Keyword,
'INDEXES': tokens.Keyword,
'BYTEA': tokens.Keyword,
'BIGSERIAL': tokens.Keyword,
'BIT VARYING': tokens.Keyword,
'BOX': tokens.Keyword,
'CHARACTER': tokens.Keyword,
'CHARACTER VARYING': tokens.Keyword,
'CIDR': tokens.Keyword,
'CIRCLE': tokens.Keyword,
'DOUBLE PRECISION': tokens.Keyword,
'INET': tokens.Keyword,
'JSON': tokens.Keyword,
'JSONB': tokens.Keyword,
'LINE': tokens.Keyword,
'LSEG': tokens.Keyword,
'MACADDR': tokens.Keyword,
'MONEY': tokens.Keyword,
'PATH': tokens.Keyword,
'PG_LSN': tokens.Keyword,
'POINT': tokens.Keyword,
'POLYGON': tokens.Keyword,
'SMALLSERIAL': tokens.Keyword,
'TSQUERY': tokens.Keyword,
'TSVECTOR': tokens.Keyword,
'TXID_SNAPSHOT': tokens.Keyword,
'UUID': tokens.Keyword,
'XML': tokens.Keyword,
'FOR': tokens.Keyword,
'IN': tokens.Keyword,
'LOOP': tokens.Keyword,
}
# Hive Syntax
KEYWORDS_HQL = {
'EXPLODE': tokens.Keyword,
'DIRECTORY': tokens.Keyword,
'DISTRIBUTE': tokens.Keyword,
'INCLUDE': tokens.Keyword,
'LOCATE': tokens.Keyword,
'OVERWRITE': tokens.Keyword,
'POSEXPLODE': tokens.Keyword,
'ARRAY_CONTAINS': tokens.Keyword,
'CMP': tokens.Keyword,
'COLLECT_LIST': tokens.Keyword,
'CONCAT': tokens.Keyword,
'CONDITION': tokens.Keyword,
'DATE_ADD': tokens.Keyword,
'DATE_SUB': tokens.Keyword,
'DECODE': tokens.Keyword,
'DBMS_OUTPUT': tokens.Keyword,
'ELEMENTS': tokens.Keyword,
'EXCHANGE': tokens.Keyword,
'EXTENDED': tokens.Keyword,
'FLOOR': tokens.Keyword,
'FOLLOWING': tokens.Keyword,
'FROM_UNIXTIME': tokens.Keyword,
'FTP': tokens.Keyword,
'HOUR': tokens.Keyword,
'INLINE': tokens.Keyword,
'INSTR': tokens.Keyword,
'LEN': tokens.Keyword,
'MAXELEMENT': tokens.Keyword,
'MAXINDEX': tokens.Keyword,
'MAX_PART_DATE': tokens.Keyword,
'MAX_PART_INT': tokens.Keyword,
'MAX_PART_STRING': tokens.Keyword,
'MINELEMENT': tokens.Keyword,
'MININDEX': tokens.Keyword,
'MIN_PART_DATE': tokens.Keyword,
'MIN_PART_INT': tokens.Keyword,
'MIN_PART_STRING': tokens.Keyword,
'NOW': tokens.Keyword,
'NVL': tokens.Keyword,
'NVL2': tokens.Keyword,
'PARSE_URL_TUPLE': tokens.Keyword,
'PART_LOC': tokens.Keyword,
'PART_COUNT': tokens.Keyword,
'PART_COUNT_BY': tokens.Keyword,
'PRINT': tokens.Keyword,
'PUT_LINE': tokens.Keyword,
'RANGE': tokens.Keyword,
'REDUCE': tokens.Keyword,
'REGEXP_REPLACE': tokens.Keyword,
'RESIGNAL': tokens.Keyword,
'RTRIM': tokens.Keyword,
'SIGN': tokens.Keyword,
'SIGNAL': tokens.Keyword,
'SIN': tokens.Keyword,
'SPLIT': tokens.Keyword,
'SQRT': tokens.Keyword,
'STACK': tokens.Keyword,
'STR': tokens.Keyword,
'SUBSTR': tokens.Keyword,
'SUMMARY': tokens.Keyword,
'TBLPROPERTIES': tokens.Keyword,
'TIMESTAMP_ISO': tokens.Keyword,
'TO_CHAR': tokens.Keyword,
'TO_DATE': tokens.Keyword,
'TO_TIMESTAMP': tokens.Keyword,
'TRUNC': tokens.Keyword,
'UNBOUNDED': tokens.Keyword,
'UNIQUEJOIN': tokens.Keyword,
'UNIX_TIMESTAMP': tokens.Keyword,
'UTC_TIMESTAMP': tokens.Keyword,
'VIEWS': tokens.Keyword,
'EXIT': tokens.Keyword,
'BREAK': tokens.Keyword,
'LEAVE': tokens.Keyword,
}

View File

@@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""SQL Lexer"""
# This code is based on the SqlLexer in pygments.
# http://pygments.org/
# It's separated from the rest of pygments to increase performance
# and to allow some customizations.
from sqlparse import tokens
from sqlparse.keywords import SQL_REGEX
from sqlparse.compat import text_type, file_types
from sqlparse.utils import consume
class Lexer(object):
"""Lexer
Empty class. Leaving for backwards-compatibility
"""
@staticmethod
def get_tokens(text, encoding=None):
"""
Return an iterable of (tokentype, value) pairs generated from
`text`. If `unfiltered` is set to `True`, the filtering mechanism
is bypassed even if filters are defined.
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
Split ``text`` into (tokentype, text) pairs.
``stack`` is the initial stack (default: ``['root']``)
"""
if isinstance(text, file_types):
text = text.read()
if isinstance(text, text_type):
pass
elif isinstance(text, bytes):
if encoding:
text = text.decode(encoding)
else:
try:
text = text.decode('utf-8')
except UnicodeDecodeError:
text = text.decode('unicode-escape')
else:
raise TypeError(u"Expected text or file-like object, got {!r}".
format(type(text)))
iterable = enumerate(text)
for pos, char in iterable:
for rexmatch, action in SQL_REGEX:
m = rexmatch(text, pos)
if not m:
continue
elif isinstance(action, tokens._TokenType):
yield action, m.group()
elif callable(action):
yield action(m.group())
consume(iterable, m.end() - pos - 1)
break
else:
yield tokens.Error, char
def tokenize(sql, encoding=None):
"""Tokenize sql.
Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
return Lexer().get_tokens(sql, encoding)

View File

@@ -0,0 +1,650 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
"""This module contains classes representing syntactical elements of SQL."""
from __future__ import print_function
import re
from sqlparse import tokens as T
from sqlparse.compat import string_types, text_type, unicode_compatible
from sqlparse.utils import imt, remove_quotes
class NameAliasMixin:
"""Implements get_real_name and get_alias."""
def get_real_name(self):
"""Returns the real name (object name) of this identifier."""
# a.b
dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
return self._get_first_name(dot_idx, real_name=True)
def get_alias(self):
"""Returns the alias for this identifier or ``None``."""
# "name AS alias"
kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS'))
if kw is not None:
return self._get_first_name(kw_idx + 1, keywords=True)
# "name alias" or "complicated column expression alias"
_, ws = self.token_next_by(t=T.Whitespace)
if len(self.tokens) > 2 and ws is not None:
return self._get_first_name(reverse=True)
@unicode_compatible
class Token(object):
"""Base class for all other classes in this module.
It represents a single token and has two instance attributes:
``value`` is the unchanged value of the token and ``ttype`` is
the type of the token.
"""
__slots__ = ('value', 'ttype', 'parent', 'normalized', 'is_keyword',
'is_group', 'is_whitespace')
def __init__(self, ttype, value):
value = text_type(value)
self.value = value
self.ttype = ttype
self.parent = None
self.is_group = False
self.is_keyword = ttype in T.Keyword
self.is_whitespace = self.ttype in T.Whitespace
self.normalized = value.upper() if self.is_keyword else value
def __str__(self):
return self.value
# Pending tokenlist __len__ bug fix
# def __len__(self):
# return len(self.value)
def __repr__(self):
cls = self._get_repr_name()
value = self._get_repr_value()
q = u'"' if value.startswith("'") and value.endswith("'") else u"'"
return u"<{cls} {q}{value}{q} at 0x{id:2X}>".format(
id=id(self), **locals())
def _get_repr_name(self):
return str(self.ttype).split('.')[-1]
def _get_repr_value(self):
raw = text_type(self)
if len(raw) > 7:
raw = raw[:6] + '...'
return re.sub(r'\s+', ' ', raw)
def flatten(self):
"""Resolve subgroups."""
yield self
def match(self, ttype, values, regex=False):
"""Checks whether the token matches the given arguments.
*ttype* is a token type. If this token doesn't match the given token
type.
*values* is a list of possible values for this token. The values
are OR'ed together so if only one of the values matches ``True``
is returned. Except for keyword tokens the comparison is
case-sensitive. For convenience it's OK to pass in a single string.
If *regex* is ``True`` (default is ``False``) the given values are
treated as regular expressions.
"""
type_matched = self.ttype is ttype
if not type_matched or values is None:
return type_matched
if isinstance(values, string_types):
values = (values,)
if regex:
# TODO: Add test for regex with is_keyboard = false
flag = re.IGNORECASE if self.is_keyword else 0
values = (re.compile(v, flag) for v in values)
for pattern in values:
if pattern.search(self.normalized):
return True
return False
if self.is_keyword:
values = (v.upper() for v in values)
return self.normalized in values
def within(self, group_cls):
"""Returns ``True`` if this token is within *group_cls*.
Use this method for example to check if an identifier is within
a function: ``t.within(sql.Function)``.
"""
parent = self.parent
while parent:
if isinstance(parent, group_cls):
return True
parent = parent.parent
return False
def is_child_of(self, other):
"""Returns ``True`` if this token is a direct child of *other*."""
return self.parent == other
def has_ancestor(self, other):
"""Returns ``True`` if *other* is in this tokens ancestry."""
parent = self.parent
while parent:
if parent == other:
return True
parent = parent.parent
return False
@unicode_compatible
class TokenList(Token):
"""A group of tokens.
It has an additional instance attribute ``tokens`` which holds a
list of child-tokens.
"""
__slots__ = 'tokens'
def __init__(self, tokens=None):
self.tokens = tokens or []
[setattr(token, 'parent', self) for token in self.tokens]
super(TokenList, self).__init__(None, text_type(self))
self.is_group = True
def __str__(self):
return u''.join(token.value for token in self.flatten())
# weird bug
# def __len__(self):
# return len(self.tokens)
def __iter__(self):
return iter(self.tokens)
def __getitem__(self, item):
return self.tokens[item]
def _get_repr_name(self):
return type(self).__name__
def _pprint_tree(self, max_depth=None, depth=0, f=None, _pre=''):
"""Pretty-print the object tree."""
token_count = len(self.tokens)
for idx, token in enumerate(self.tokens):
cls = token._get_repr_name()
value = token._get_repr_value()
last = idx == (token_count - 1)
pre = u'`- ' if last else u'|- '
q = u'"' if value.startswith("'") and value.endswith("'") else u"'"
print(u"{_pre}{pre}{idx} {cls} {q}{value}{q}"
.format(**locals()), file=f)
if token.is_group and (max_depth is None or depth < max_depth):
parent_pre = u' ' if last else u'| '
token._pprint_tree(max_depth, depth + 1, f, _pre + parent_pre)
def get_token_at_offset(self, offset):
"""Returns the token that is on position offset."""
idx = 0
for token in self.flatten():
end = idx + len(token.value)
if idx <= offset < end:
return token
idx = end
def flatten(self):
"""Generator yielding ungrouped tokens.
This method is recursively called for all child tokens.
"""
for token in self.tokens:
if token.is_group:
for item in token.flatten():
yield item
else:
yield token
def get_sublists(self):
for token in self.tokens:
if token.is_group:
yield token
@property
def _groupable_tokens(self):
return self.tokens
def _token_matching(self, funcs, start=0, end=None, reverse=False):
"""next token that match functions"""
if start is None:
return None
if not isinstance(funcs, (list, tuple)):
funcs = (funcs,)
if reverse:
assert end is None
for idx in range(start - 2, -1, -1):
token = self.tokens[idx]
for func in funcs:
if func(token):
return idx, token
else:
for idx, token in enumerate(self.tokens[start:end], start=start):
for func in funcs:
if func(token):
return idx, token
return None, None
def token_first(self, skip_ws=True, skip_cm=False):
"""Returns the first child token.
If *skip_ws* is ``True`` (the default), whitespace
tokens are ignored.
if *skip_cm* is ``True`` (default: ``False``), comments are
ignored too.
"""
# this on is inconsistent, using Comment instead of T.Comment...
def matcher(tk):
return not ((skip_ws and tk.is_whitespace)
or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
return self._token_matching(matcher)[1]
def token_next_by(self, i=None, m=None, t=None, idx=-1, end=None):
idx += 1
return self._token_matching(lambda tk: imt(tk, i, m, t), idx, end)
def token_not_matching(self, funcs, idx):
funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs
funcs = [lambda tk: not func(tk) for func in funcs]
return self._token_matching(funcs, idx)
def token_matching(self, funcs, idx):
return self._token_matching(funcs, idx)[1]
def token_prev(self, idx, skip_ws=True, skip_cm=False):
"""Returns the previous token relative to *idx*.
If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
If *skip_cm* is ``True`` comments are ignored.
``None`` is returned if there's no previous token.
"""
return self.token_next(idx, skip_ws, skip_cm, _reverse=True)
# TODO: May need to re-add default value to idx
def token_next(self, idx, skip_ws=True, skip_cm=False, _reverse=False):
"""Returns the next token relative to *idx*.
If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
If *skip_cm* is ``True`` comments are ignored.
``None`` is returned if there's no next token.
"""
if idx is None:
return None, None
idx += 1 # alot of code usage current pre-compensates for this
def matcher(tk):
return not ((skip_ws and tk.is_whitespace)
or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
return self._token_matching(matcher, idx, reverse=_reverse)
def token_index(self, token, start=0):
"""Return list index of token."""
start = start if isinstance(start, int) else self.token_index(start)
return start + self.tokens[start:].index(token)
def group_tokens(self, grp_cls, start, end, include_end=True,
extend=False):
"""Replace tokens by an instance of *grp_cls*."""
start_idx = start
start = self.tokens[start_idx]
end_idx = end + include_end
# will be needed later for new group_clauses
# while skip_ws and tokens and tokens[-1].is_whitespace:
# tokens = tokens[:-1]
if extend and isinstance(start, grp_cls):
subtokens = self.tokens[start_idx + 1:end_idx]
grp = start
grp.tokens.extend(subtokens)
del self.tokens[start_idx + 1:end_idx]
grp.value = text_type(start)
else:
subtokens = self.tokens[start_idx:end_idx]
grp = grp_cls(subtokens)
self.tokens[start_idx:end_idx] = [grp]
grp.parent = self
for token in subtokens:
token.parent = grp
return grp
def insert_before(self, where, token):
"""Inserts *token* before *where*."""
if not isinstance(where, int):
where = self.token_index(where)
token.parent = self
self.tokens.insert(where, token)
def insert_after(self, where, token, skip_ws=True):
"""Inserts *token* after *where*."""
if not isinstance(where, int):
where = self.token_index(where)
nidx, next_ = self.token_next(where, skip_ws=skip_ws)
token.parent = self
if next_ is None:
self.tokens.append(token)
else:
self.tokens.insert(nidx, token)
def has_alias(self):
"""Returns ``True`` if an alias is present."""
return self.get_alias() is not None
def get_alias(self):
"""Returns the alias for this identifier or ``None``."""
return None
def get_name(self):
"""Returns the name of this identifier.
This is either it's alias or it's real name. The returned valued can
be considered as the name under which the object corresponding to
this identifier is known within the current statement.
"""
return self.get_alias() or self.get_real_name()
def get_real_name(self):
"""Returns the real name (object name) of this identifier."""
return None
def get_parent_name(self):
"""Return name of the parent object if any.
A parent object is identified by the first occurring dot.
"""
dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
_, prev_ = self.token_prev(dot_idx)
return remove_quotes(prev_.value) if prev_ is not None else None
def _get_first_name(self, idx=None, reverse=False, keywords=False,
real_name=False):
"""Returns the name of the first token with a name"""
tokens = self.tokens[idx:] if idx else self.tokens
tokens = reversed(tokens) if reverse else tokens
types = [T.Name, T.Wildcard, T.String.Symbol]
if keywords:
types.append(T.Keyword)
for token in tokens:
if token.ttype in types:
return remove_quotes(token.value)
elif isinstance(token, (Identifier, Function)):
return token.get_real_name() if real_name else token.get_name()
class Statement(TokenList):
"""Represents a SQL statement."""
def get_type(self):
"""Returns the type of a statement.
The returned value is a string holding an upper-cased reprint of
the first DML or DDL keyword. If the first token in this group
isn't a DML or DDL keyword "UNKNOWN" is returned.
Whitespaces and comments at the beginning of the statement
are ignored.
"""
first_token = self.token_first(skip_cm=True)
if first_token is None:
# An "empty" statement that either has not tokens at all
# or only whitespace tokens.
return 'UNKNOWN'
elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
return first_token.normalized
elif first_token.ttype == T.Keyword.CTE:
# The WITH keyword should be followed by either an Identifier or
# an IdentifierList containing the CTE definitions; the actual
# DML keyword (e.g. SELECT, INSERT) will follow next.
fidx = self.token_index(first_token)
tidx, token = self.token_next(fidx, skip_ws=True)
if isinstance(token, (Identifier, IdentifierList)):
_, dml_keyword = self.token_next(tidx, skip_ws=True)
if dml_keyword is not None \
and dml_keyword.ttype == T.Keyword.DML:
return dml_keyword.normalized
# Hmm, probably invalid syntax, so return unknown.
return 'UNKNOWN'
class Identifier(NameAliasMixin, TokenList):
"""Represents an identifier.
Identifiers may have aliases or typecasts.
"""
def is_wildcard(self):
"""Return ``True`` if this identifier contains a wildcard."""
_, token = self.token_next_by(t=T.Wildcard)
return token is not None
def get_typecast(self):
"""Returns the typecast or ``None`` of this object as a string."""
midx, marker = self.token_next_by(m=(T.Punctuation, '::'))
nidx, next_ = self.token_next(midx, skip_ws=False)
return next_.value if next_ else None
def get_ordering(self):
"""Returns the ordering or ``None`` as uppercase string."""
_, ordering = self.token_next_by(t=T.Keyword.Order)
return ordering.normalized if ordering else None
def get_array_indices(self):
"""Returns an iterator of index token lists"""
for token in self.tokens:
if isinstance(token, SquareBrackets):
# Use [1:-1] index to discard the square brackets
yield token.tokens[1:-1]
class IdentifierList(TokenList):
"""A list of :class:`~sqlparse.sql.Identifier`\'s."""
def get_identifiers(self):
"""Returns the identifiers.
Whitespaces and punctuations are not included in this generator.
"""
for token in self.tokens:
if not (token.is_whitespace or token.match(T.Punctuation, ',')):
yield token
class TypedLiteral(TokenList):
"""A typed literal, such as "date '2001-09-28'" or "interval '2 hours'"."""
M_OPEN = [(T.Name.Builtin, None), (T.Keyword, "TIMESTAMP")]
M_CLOSE = T.String.Single, None
M_EXTEND = T.Keyword, ("DAY", "HOUR", "MINUTE", "MONTH", "SECOND", "YEAR")
class Parenthesis(TokenList):
"""Tokens between parenthesis."""
M_OPEN = T.Punctuation, '('
M_CLOSE = T.Punctuation, ')'
@property
def _groupable_tokens(self):
return self.tokens[1:-1]
class SquareBrackets(TokenList):
"""Tokens between square brackets"""
M_OPEN = T.Punctuation, '['
M_CLOSE = T.Punctuation, ']'
@property
def _groupable_tokens(self):
return self.tokens[1:-1]
class Assignment(TokenList):
"""An assignment like 'var := val;'"""
class If(TokenList):
"""An 'if' clause with possible 'else if' or 'else' parts."""
M_OPEN = T.Keyword, 'IF'
M_CLOSE = T.Keyword, 'END IF'
class For(TokenList):
"""A 'FOR' loop."""
M_OPEN = T.Keyword, ('FOR', 'FOREACH')
M_CLOSE = T.Keyword, 'END LOOP'
class Comparison(TokenList):
"""A comparison used for example in WHERE clauses."""
@property
def left(self):
return self.tokens[0]
@property
def right(self):
return self.tokens[-1]
class Comment(TokenList):
"""A comment."""
def is_multiline(self):
return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
class Where(TokenList):
"""A WHERE clause."""
M_OPEN = T.Keyword, 'WHERE'
M_CLOSE = T.Keyword, (
'ORDER BY', 'GROUP BY', 'LIMIT', 'UNION', 'UNION ALL', 'EXCEPT',
'HAVING', 'RETURNING', 'INTO')
class Having(TokenList):
"""A HAVING clause."""
M_OPEN = T.Keyword, 'HAVING'
M_CLOSE = T.Keyword, ('ORDER BY', 'LIMIT')
class Case(TokenList):
"""A CASE statement with one or more WHEN and possibly an ELSE part."""
M_OPEN = T.Keyword, 'CASE'
M_CLOSE = T.Keyword, 'END'
def get_cases(self, skip_ws=False):
"""Returns a list of 2-tuples (condition, value).
If an ELSE exists condition is None.
"""
CONDITION = 1
VALUE = 2
ret = []
mode = CONDITION
for token in self.tokens:
# Set mode from the current statement
if token.match(T.Keyword, 'CASE'):
continue
elif skip_ws and token.ttype in T.Whitespace:
continue
elif token.match(T.Keyword, 'WHEN'):
ret.append(([], []))
mode = CONDITION
elif token.match(T.Keyword, 'THEN'):
mode = VALUE
elif token.match(T.Keyword, 'ELSE'):
ret.append((None, []))
mode = VALUE
elif token.match(T.Keyword, 'END'):
mode = None
# First condition without preceding WHEN
if mode and not ret:
ret.append(([], []))
# Append token depending of the current mode
if mode == CONDITION:
ret[-1][0].append(token)
elif mode == VALUE:
ret[-1][1].append(token)
# Return cases list
return ret
class Function(NameAliasMixin, TokenList):
"""A function or procedure call."""
def get_parameters(self):
"""Return a list of parameters."""
parenthesis = self.tokens[-1]
for token in parenthesis.tokens:
if isinstance(token, IdentifierList):
return token.get_identifiers()
elif imt(token, i=(Function, Identifier), t=T.Literal):
return [token, ]
return []
class Begin(TokenList):
"""A BEGIN/END block."""
M_OPEN = T.Keyword, 'BEGIN'
M_CLOSE = T.Keyword, 'END'
class Operation(TokenList):
"""Grouping of operations"""
class Values(TokenList):
"""Grouping of values"""
class Command(TokenList):
"""Grouping of CLI commands."""

View File

@@ -1,8 +1,11 @@
# Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php.
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
#
# The Token implementation is based on pygment's token system written
# by Georg Brandl.
# http://pygments.org/
@@ -13,31 +16,18 @@
class _TokenType(tuple):
parent = None
def split(self):
buf = []
node = self
while node is not None:
buf.append(node)
node = node.parent
buf.reverse()
return buf
def __contains__(self, item):
return item is not None and (self is item or item[:len(self)] == self)
def __contains__(self, val):
return val is not None and (self is val or val[:len(self)] == self)
def __getattr__(self, val):
if not val or not val[0].isupper():
return tuple.__getattribute__(self, val)
new = _TokenType(self + (val,))
setattr(self, val, new)
def __getattr__(self, name):
new = _TokenType(self + (name,))
setattr(self, name, new)
new.parent = self
return new
def __hash__(self):
return hash(tuple(self))
def __repr__(self):
return 'Token' + (self and '.' or '') + '.'.join(self)
# self can be False only if its the `root` i.e. Token itself
return 'Token' + ('.' if self else '') + '.'.join(self)
Token = _TokenType()
@@ -61,12 +51,13 @@ Operator = Token.Operator
Comparison = Operator.Comparison
Wildcard = Token.Wildcard
Comment = Token.Comment
Assignment = Token.Assignement
Assignment = Token.Assignment
# Generic types for non-source code
Generic = Token.Generic
Command = Generic.Command
# String and some others are not direct childs of Token.
# String and some others are not direct children of Token.
# alias them:
Token.Token = Token
Token.String = String
@@ -75,9 +66,4 @@ Token.Number = Number
# SQL specific tokens
DML = Keyword.DML
DDL = Keyword.DDL
Command = Keyword.Command
Group = Token.Group
Group.Parenthesis = Token.Group.Parenthesis
Group.Comment = Token.Group.Comment
Group.Where = Token.Group.Where
CTE = Keyword.CTE

View File

@@ -0,0 +1,123 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2018 the sqlparse authors and contributors
# <see AUTHORS file>
#
# This module is part of python-sqlparse and is released under
# the BSD License: https://opensource.org/licenses/BSD-3-Clause
import itertools
import re
from collections import deque
from contextlib import contextmanager
from sqlparse.compat import text_type
# This regular expression replaces the home-cooked parser that was here before.
# It is much faster, but requires an extra post-processing step to get the
# desired results (that are compatible with what you would expect from the
# str.splitlines() method).
#
# It matches groups of characters: newlines, quoted strings, or unquoted text,
# and splits on that basis. The post-processing step puts those back together
# into the actual lines of SQL.
SPLIT_REGEX = re.compile(r"""
(
(?: # Start of non-capturing group
(?:\r\n|\r|\n) | # Match any single newline, or
[^\r\n'"]+ | # Match any character series without quotes or
# newlines, or
"(?:[^"\\]|\\.)*" | # Match double-quoted strings, or
'(?:[^'\\]|\\.)*' # Match single quoted strings
)
)
""", re.VERBOSE)
LINE_MATCH = re.compile(r'(\r\n|\r|\n)')
def split_unquoted_newlines(stmt):
"""Split a string on all unquoted newlines.
Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite
character is inside of a string."""
text = text_type(stmt)
lines = SPLIT_REGEX.split(text)
outputlines = ['']
for line in lines:
if not line:
continue
elif LINE_MATCH.match(line):
outputlines.append('')
else:
outputlines[-1] += line
return outputlines
def remove_quotes(val):
"""Helper that removes surrounding quotes from strings."""
if val is None:
return
if val[0] in ('"', "'") and val[0] == val[-1]:
val = val[1:-1]
return val
def recurse(*cls):
"""Function decorator to help with recursion
:param cls: Classes to not recurse over
:return: function
"""
def wrap(f):
def wrapped_f(tlist):
for sgroup in tlist.get_sublists():
if not isinstance(sgroup, cls):
wrapped_f(sgroup)
f(tlist)
return wrapped_f
return wrap
def imt(token, i=None, m=None, t=None):
"""Helper function to simplify comparisons Instance, Match and TokenType
:param token:
:param i: Class or Tuple/List of Classes
:param m: Tuple of TokenType & Value. Can be list of Tuple for multiple
:param t: TokenType or Tuple/List of TokenTypes
:return: bool
"""
clss = i
types = [t, ] if t and not isinstance(t, list) else t
mpatterns = [m, ] if m and not isinstance(m, list) else m
if token is None:
return False
elif clss and isinstance(token, clss):
return True
elif mpatterns and any(token.match(*pattern) for pattern in mpatterns):
return True
elif types and any(token.ttype in ttype for ttype in types):
return True
else:
return False
def consume(iterator, n):
"""Advance the iterator n-steps ahead. If n is none, consume entirely."""
deque(itertools.islice(iterator, n), maxlen=0)
@contextmanager
def offset(filter_, n=0):
filter_.offset += n
yield
filter_.offset -= n
@contextmanager
def indent(filter_, n=1):
filter_.indent += n
yield
filter_.indent -= n

View File

@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
"""Helpers for testing."""
import io
import os
import pytest
DIR_PATH = os.path.dirname(__file__)
FILES_DIR = os.path.join(DIR_PATH, 'files')
@pytest.fixture()
def filepath():
"""Returns full file path for test files."""
def make_filepath(filename):
# https://stackoverflow.com/questions/18011902/py-test-pass-a-parameter-to-a-fixture-function
# Alternate solution is to use parametrization `indirect=True`
# https://stackoverflow.com/questions/18011902/py-test-pass-a-parameter-to-a-fixture-function/33879151#33879151
# Syntax is noisy and requires specific variable names
return os.path.join(FILES_DIR, filename)
return make_filepath
@pytest.fixture()
def load_file(filepath):
"""Opens filename with encoding and return its contents."""
def make_load_file(filename, encoding='utf-8'):
# https://stackoverflow.com/questions/18011902/py-test-pass-a-parameter-to-a-fixture-function
# Alternate solution is to use parametrization `indirect=True`
# https://stackoverflow.com/questions/18011902/py-test-pass-a-parameter-to-a-fixture-function/33879151#33879151
# Syntax is noisy and requires specific variable names
# And seems to be limited to only 1 argument.
with io.open(filepath(filename), encoding=encoding) as f:
return f.read().strip()
return make_load_file
@pytest.fixture()
def get_stream(filepath):
def make_stream(filename, encoding='utf-8'):
return io.open(filepath(filename), encoding=encoding)
return make_stream

View File

@@ -0,0 +1,3 @@
select *
from foo
where bar = '<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϲ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Լ<EFBFBD><EFBFBD><EFBFBD>'

View File

@@ -0,0 +1,3 @@
select *
from foo
where bar = '齐天大圣.カラフルな雲.사랑해요'

View File

@@ -0,0 +1,12 @@
CREATE FUNCTION doubledollarinbody(var1 text) RETURNS text
/* see issue277 */
LANGUAGE plpgsql
AS $_$
DECLARE
str text;
BEGIN
str = $$'foo'$$||var1;
execute 'select '||str into str;
return str;
END
$_$;

View File

@@ -0,0 +1,2 @@
-- this file is streamed in
insert into foo

View File

@@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
import subprocess
import sys
import pytest
import sqlparse
def test_cli_main_empty():
with pytest.raises(SystemExit):
sqlparse.cli.main([])
def test_parser_empty():
with pytest.raises(SystemExit):
parser = sqlparse.cli.create_parser()
parser.parse_args([])
def test_main_help():
# Call with the --help option as a basic sanity check.
with pytest.raises(SystemExit) as exinfo:
sqlparse.cli.main(["--help", ])
assert exinfo.value.code == 0
def test_valid_args(filepath):
# test doesn't abort
path = filepath('function.sql')
assert sqlparse.cli.main([path, '-r']) is not None
def test_invalid_choice(filepath):
path = filepath('function.sql')
with pytest.raises(SystemExit):
sqlparse.cli.main([path, '-l', 'Spanish'])
def test_invalid_args(filepath, capsys):
path = filepath('function.sql')
sqlparse.cli.main([path, '-r', '--indent_width', '0'])
_, err = capsys.readouterr()
assert err == ("[ERROR] Invalid options: indent_width requires "
"a positive integer\n")
def test_invalid_infile(filepath, capsys):
path = filepath('missing.sql')
sqlparse.cli.main([path, '-r'])
_, err = capsys.readouterr()
assert err[:22] == "[ERROR] Failed to read"
def test_invalid_outfile(filepath, capsys):
path = filepath('function.sql')
outpath = filepath('/missing/function.sql')
sqlparse.cli.main([path, '-r', '-o', outpath])
_, err = capsys.readouterr()
assert err[:22] == "[ERROR] Failed to open"
def test_stdout(filepath, load_file, capsys):
path = filepath('begintag.sql')
expected = load_file('begintag.sql')
sqlparse.cli.main([path])
out, _ = capsys.readouterr()
assert out == expected
def test_script():
# Call with the --help option as a basic sanity check.
cmd = "{0:s} -m sqlparse.cli --help".format(sys.executable)
assert subprocess.call(cmd.split()) == 0
def test_encoding_utf8_stdout(filepath, load_file, capfd):
path = filepath('encoding_utf8.sql')
expected = load_file('encoding_utf8.sql', 'utf-8')
sys.stdout.encoding = 'utf-8'
sqlparse.cli.main([path])
out, _ = capfd.readouterr()
assert out == expected
def test_encoding_utf8_output_file(filepath, load_file, tmpdir):
in_path = filepath('encoding_utf8.sql')
expected = load_file('encoding_utf8.sql', 'utf-8')
out_path = tmpdir.dirname + '/encoding_utf8.out.sql'
sqlparse.cli.main([in_path, '-o', out_path])
out = load_file(out_path, 'utf-8')
assert out == expected
def test_encoding_gbk_stdout(filepath, load_file, capfd):
path = filepath('encoding_gbk.sql')
expected = load_file('encoding_gbk.sql', 'gbk')
sys.stdout.encoding = 'gbk'
sqlparse.cli.main([path, '--encoding', 'gbk'])
out, _ = capfd.readouterr()
assert out == expected
def test_encoding_gbk_output_file(filepath, load_file, tmpdir):
in_path = filepath('encoding_gbk.sql')
expected = load_file('encoding_gbk.sql', 'gbk')
out_path = tmpdir.dirname + '/encoding_gbk.out.sql'
sqlparse.cli.main([in_path, '--encoding', 'gbk', '-o', out_path])
out = load_file(out_path, 'gbk')
assert out == expected
def test_encoding_stdin_utf8(filepath, load_file, capfd):
path = filepath('encoding_utf8.sql')
expected = load_file('encoding_utf8.sql', 'utf-8')
old_stdin = sys.stdin
with open(path, 'r') as f:
sys.stdin = f
sys.stdout.encoding = 'utf-8'
sqlparse.cli.main(['-'])
sys.stdin = old_stdin
out, _ = capfd.readouterr()
assert out == expected
def test_encoding_stdin_gbk(filepath, load_file, capfd):
path = filepath('encoding_gbk.sql')
expected = load_file('encoding_gbk.sql', 'gbk')
old_stdin = sys.stdin
with open(path, 'r') as stream:
sys.stdin = stream
sys.stdout.encoding = 'gbk'
sqlparse.cli.main(['-', '--encoding', 'gbk'])
sys.stdin = old_stdin
out, _ = capfd.readouterr()
assert out == expected
def test_encoding(filepath, capsys):
path = filepath('test_cp1251.sql')
expected = u'insert into foo values (1); -- Песня про надежду\n'
sqlparse.cli.main([path, '--encoding=cp1251'])
out, _ = capsys.readouterr()
assert out == expected

View File

@@ -0,0 +1,709 @@
# -*- coding: utf-8 -*-
import pytest
import sqlparse
from sqlparse.exceptions import SQLParseError
class TestFormat(object):
def test_keywordcase(self):
sql = 'select * from bar; -- select foo\n'
res = sqlparse.format(sql, keyword_case='upper')
assert res == 'SELECT * FROM bar; -- select foo\n'
res = sqlparse.format(sql, keyword_case='capitalize')
assert res == 'Select * From bar; -- select foo\n'
res = sqlparse.format(sql.upper(), keyword_case='lower')
assert res == 'select * from BAR; -- SELECT FOO\n'
def test_keywordcase_invalid_option(self):
sql = 'select * from bar; -- select foo\n'
with pytest.raises(SQLParseError):
sqlparse.format(sql, keyword_case='foo')
def test_identifiercase(self):
sql = 'select * from bar; -- select foo\n'
res = sqlparse.format(sql, identifier_case='upper')
assert res == 'select * from BAR; -- select foo\n'
res = sqlparse.format(sql, identifier_case='capitalize')
assert res == 'select * from Bar; -- select foo\n'
res = sqlparse.format(sql.upper(), identifier_case='lower')
assert res == 'SELECT * FROM bar; -- SELECT FOO\n'
def test_identifiercase_invalid_option(self):
sql = 'select * from bar; -- select foo\n'
with pytest.raises(SQLParseError):
sqlparse.format(sql, identifier_case='foo')
def test_identifiercase_quotes(self):
sql = 'select * from "foo"."bar"'
res = sqlparse.format(sql, identifier_case="upper")
assert res == 'select * from "foo"."bar"'
def test_strip_comments_single(self):
sql = 'select *-- statement starts here\nfrom foo'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select * from foo'
sql = 'select * -- statement starts here\nfrom foo'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select * from foo'
sql = 'select-- foo\nfrom -- bar\nwhere'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select from where'
sql = 'select *-- statement starts here\n\nfrom foo'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select * from foo'
sql = 'select * from foo-- statement starts here\nwhere'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select * from foo where'
sql = 'select a-- statement starts here\nfrom foo'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select a from foo'
sql = '--comment\nselect a-- statement starts here\n' \
'from foo--comment\nf'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select a from foo f'
def test_strip_comments_invalid_option(self):
sql = 'select-- foo\nfrom -- bar\nwhere'
with pytest.raises(SQLParseError):
sqlparse.format(sql, strip_comments=None)
def test_strip_comments_multi(self):
sql = '/* sql starts here */\nselect'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select'
sql = '/* sql starts here */ select'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select'
sql = '/*\n * sql starts here\n */\nselect'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select'
sql = 'select (/* sql starts here */ select 2)'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select (select 2)'
sql = 'select (/* sql /* starts here */ select 2)'
res = sqlparse.format(sql, strip_comments=True)
assert res == 'select (select 2)'
def test_strip_ws(self):
f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
s = 'select\n* from foo\n\twhere ( 1 = 2 )\n'
assert f(s) == 'select * from foo where (1 = 2)'
s = 'select -- foo\nfrom bar\n'
assert f(s) == 'select -- foo\nfrom bar'
def test_strip_ws_invalid_option(self):
s = 'select -- foo\nfrom bar\n'
with pytest.raises(SQLParseError):
sqlparse.format(s, strip_whitespace=None)
def test_preserve_ws(self):
# preserve at least one whitespace after subgroups
f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
s = 'select\n* /* foo */ from bar '
assert f(s) == 'select * /* foo */ from bar'
def test_notransform_of_quoted_crlf(self):
# Make sure that CR/CR+LF characters inside string literals don't get
# affected by the formatter.
s1 = "SELECT some_column LIKE 'value\r'"
s2 = "SELECT some_column LIKE 'value\r'\r\nWHERE id = 1\n"
s3 = "SELECT some_column LIKE 'value\\'\r' WHERE id = 1\r"
s4 = "SELECT some_column LIKE 'value\\\\\\'\r' WHERE id = 1\r\n"
f = lambda x: sqlparse.format(x)
# Because of the use of
assert f(s1) == "SELECT some_column LIKE 'value\r'"
assert f(s2) == "SELECT some_column LIKE 'value\r'\nWHERE id = 1\n"
assert f(s3) == "SELECT some_column LIKE 'value\\'\r' WHERE id = 1\n"
assert (f(s4)
== "SELECT some_column LIKE 'value\\\\\\'\r' WHERE id = 1\n")
class TestFormatReindentAligned(object):
@staticmethod
def formatter(sql):
return sqlparse.format(sql, reindent_aligned=True)
def test_basic(self):
sql = """
select a, b as bb,c from table
join (select a * 2 as a from new_table) other
on table.a = other.a
where c is true
and b between 3 and 4
or d is 'blue'
limit 10
"""
assert self.formatter(sql) == '\n'.join([
'select a,',
' b as bb,',
' c',
' from table',
' join (',
' select a * 2 as a',
' from new_table',
' ) other',
' on table.a = other.a',
' where c is true',
' and b between 3 and 4',
" or d is 'blue'",
' limit 10'])
def test_joins(self):
sql = """
select * from a
join b on a.one = b.one
left join c on c.two = a.two and c.three = a.three
full outer join d on d.three = a.three
cross join e on e.four = a.four
join f using (one, two, three)
"""
assert self.formatter(sql) == '\n'.join([
'select *',
' from a',
' join b',
' on a.one = b.one',
' left join c',
' on c.two = a.two',
' and c.three = a.three',
' full outer join d',
' on d.three = a.three',
' cross join e',
' on e.four = a.four',
' join f using (one, two, three)'])
def test_case_statement(self):
sql = """
select a,
case when a = 0
then 1
when bb = 1 then 1
when c = 2 then 2
else 0 end as d,
extra_col
from table
where c is true
and b between 3 and 4
"""
assert self.formatter(sql) == '\n'.join([
'select a,',
' case when a = 0 then 1',
' when bb = 1 then 1',
' when c = 2 then 2',
' else 0',
' end as d,',
' extra_col',
' from table',
' where c is true',
' and b between 3 and 4'])
def test_case_statement_with_between(self):
sql = """
select a,
case when a = 0
then 1
when bb = 1 then 1
when c = 2 then 2
when d between 3 and 5 then 3
else 0 end as d,
extra_col
from table
where c is true
and b between 3 and 4
"""
assert self.formatter(sql) == '\n'.join([
'select a,',
' case when a = 0 then 1',
' when bb = 1 then 1',
' when c = 2 then 2',
' when d between 3 and 5 then 3',
' else 0',
' end as d,',
' extra_col',
' from table',
' where c is true',
' and b between 3 and 4'])
def test_group_by(self):
sql = """
select a, b, c, sum(x) as sum_x, count(y) as cnt_y
from table
group by a,b,c
having sum(x) > 1
and count(y) > 5
order by 3,2,1
"""
assert self.formatter(sql) == '\n'.join([
'select a,',
' b,',
' c,',
' sum(x) as sum_x,',
' count(y) as cnt_y',
' from table',
' group by a,',
' b,',
' c',
'having sum(x) > 1',
' and count(y) > 5',
' order by 3,',
' 2,',
' 1'])
def test_group_by_subquery(self):
# TODO: add subquery alias when test_identifier_list_subquery fixed
sql = """
select *, sum_b + 2 as mod_sum
from (
select a, sum(b) as sum_b
from table
group by a,z)
order by 1,2
"""
assert self.formatter(sql) == '\n'.join([
'select *,',
' sum_b + 2 as mod_sum',
' from (',
' select a,',
' sum(b) as sum_b',
' from table',
' group by a,',
' z',
' )',
' order by 1,',
' 2'])
def test_window_functions(self):
sql = """
select a,
SUM(a) OVER (PARTITION BY b ORDER BY c ROWS
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as sum_a,
ROW_NUMBER() OVER
(PARTITION BY b, c ORDER BY d DESC) as row_num
from table"""
assert self.formatter(sql) == '\n'.join([
'select a,',
' SUM(a) OVER (PARTITION BY b ORDER BY c ROWS '
'BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as sum_a,',
' ROW_NUMBER() OVER '
'(PARTITION BY b, c ORDER BY d DESC) as row_num',
' from table'])
class TestSpacesAroundOperators(object):
@staticmethod
def formatter(sql):
return sqlparse.format(sql, use_space_around_operators=True)
def test_basic(self):
sql = ('select a+b as d from table '
'where (c-d)%2= 1 and e> 3.0/4 and z^2 <100')
assert self.formatter(sql) == (
'select a + b as d from table '
'where (c - d) % 2 = 1 and e > 3.0 / 4 and z ^ 2 < 100')
def test_bools(self):
sql = 'select * from table where a &&b or c||d'
assert self.formatter(
sql) == 'select * from table where a && b or c || d'
def test_nested(self):
sql = 'select *, case when a-b then c end from table'
assert self.formatter(
sql) == 'select *, case when a - b then c end from table'
def test_wildcard_vs_mult(self):
sql = 'select a*b-c from table'
assert self.formatter(sql) == 'select a * b - c from table'
class TestFormatReindent(object):
def test_option(self):
with pytest.raises(SQLParseError):
sqlparse.format('foo', reindent=2)
with pytest.raises(SQLParseError):
sqlparse.format('foo', indent_tabs=2)
with pytest.raises(SQLParseError):
sqlparse.format('foo', reindent=True, indent_width='foo')
with pytest.raises(SQLParseError):
sqlparse.format('foo', reindent=True, indent_width=-12)
with pytest.raises(SQLParseError):
sqlparse.format('foo', reindent=True, wrap_after='foo')
with pytest.raises(SQLParseError):
sqlparse.format('foo', reindent=True, wrap_after=-12)
with pytest.raises(SQLParseError):
sqlparse.format('foo', reindent=True, comma_first='foo')
def test_stmts(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select foo; select bar'
assert f(s) == 'select foo;\n\nselect bar'
s = 'select foo'
assert f(s) == 'select foo'
s = 'select foo; -- test\n select bar'
assert f(s) == 'select foo; -- test\n\nselect bar'
def test_keywords(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select * from foo union select * from bar;'
assert f(s) == '\n'.join([
'select *',
'from foo',
'union',
'select *',
'from bar;'])
def test_keywords_between(self):
# issue 14
# don't break AND after BETWEEN
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'and foo between 1 and 2 and bar = 3'
assert f(s) == '\n'.join([
'',
'and foo between 1 and 2',
'and bar = 3'])
def test_parenthesis(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select count(*) from (select * from foo);'
assert f(s) == '\n'.join([
'select count(*)',
'from',
' (select *',
' from foo);'])
assert f("select f(1)") == 'select f(1)'
assert f("select f( 1 )") == 'select f(1)'
assert f("select f(\n\n\n1\n\n\n)") == 'select f(1)'
assert f("select f(\n\n\n 1 \n\n\n)") == 'select f(1)'
assert f("select f(\n\n\n 1 \n\n\n)") == 'select f(1)'
def test_where(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select * from foo where bar = 1 and baz = 2 or bzz = 3;'
assert f(s) == '\n'.join([
'select *',
'from foo',
'where bar = 1',
' and baz = 2',
' or bzz = 3;'])
s = 'select * from foo where bar = 1 and (baz = 2 or bzz = 3);'
assert f(s) == '\n'.join([
'select *',
'from foo',
'where bar = 1',
' and (baz = 2',
' or bzz = 3);'])
def test_join(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select * from foo join bar on 1 = 2'
assert f(s) == '\n'.join([
'select *',
'from foo',
'join bar on 1 = 2'])
s = 'select * from foo inner join bar on 1 = 2'
assert f(s) == '\n'.join([
'select *',
'from foo',
'inner join bar on 1 = 2'])
s = 'select * from foo left outer join bar on 1 = 2'
assert f(s) == '\n'.join([
'select *',
'from foo',
'left outer join bar on 1 = 2'])
s = 'select * from foo straight_join bar on 1 = 2'
assert f(s) == '\n'.join([
'select *',
'from foo',
'straight_join bar on 1 = 2'])
def test_identifier_list(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select foo, bar, baz from table1, table2 where 1 = 2'
assert f(s) == '\n'.join([
'select foo,',
' bar,',
' baz',
'from table1,',
' table2',
'where 1 = 2'])
s = 'select a.*, b.id from a, b'
assert f(s) == '\n'.join([
'select a.*,',
' b.id',
'from a,',
' b'])
def test_identifier_list_with_wrap_after(self):
f = lambda sql: sqlparse.format(sql, reindent=True, wrap_after=14)
s = 'select foo, bar, baz from table1, table2 where 1 = 2'
assert f(s) == '\n'.join([
'select foo, bar,',
' baz',
'from table1, table2',
'where 1 = 2'])
def test_identifier_list_comment_first(self):
f = lambda sql: sqlparse.format(sql, reindent=True, comma_first=True)
# not the 3: It cleans up whitespace too!
s = 'select foo, bar, baz from table where foo in (1, 2,3)'
assert f(s) == '\n'.join([
'select foo',
' , bar',
' , baz',
'from table',
'where foo in (1',
' , 2',
' , 3)'])
def test_identifier_list_with_functions(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = ("select 'abc' as foo, coalesce(col1, col2)||col3 as bar,"
"col3 from my_table")
assert f(s) == '\n'.join([
"select 'abc' as foo,",
" coalesce(col1, col2)||col3 as bar,",
" col3",
"from my_table"])
def test_long_identifier_list_with_functions(self):
f = lambda sql: sqlparse.format(sql, reindent=True, wrap_after=30)
s = ("select 'abc' as foo, json_build_object('a', a,"
"'b', b, 'c', c, 'd', d, 'e', e) as col2"
"col3 from my_table")
assert f(s) == '\n'.join([
"select 'abc' as foo,",
" json_build_object('a',",
" a, 'b', b, 'c', c, 'd', d,",
" 'e', e) as col2col3",
"from my_table"])
def test_case(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'case when foo = 1 then 2 when foo = 3 then 4 else 5 end'
assert f(s) == '\n'.join([
'case',
' when foo = 1 then 2',
' when foo = 3 then 4',
' else 5',
'end'])
def test_case2(self):
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'case(foo) when bar = 1 then 2 else 3 end'
assert f(s) == '\n'.join([
'case(foo)',
' when bar = 1 then 2',
' else 3',
'end'])
def test_nested_identifier_list(self):
# issue4
f = lambda sql: sqlparse.format(sql, reindent=True)
s = '(foo as bar, bar1, bar2 as bar3, b4 as b5)'
assert f(s) == '\n'.join([
'(foo as bar,',
' bar1,',
' bar2 as bar3,',
' b4 as b5)'])
def test_duplicate_linebreaks(self):
# issue3
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select c1 -- column1\nfrom foo'
assert f(s) == '\n'.join([
'select c1 -- column1',
'from foo'])
s = 'select c1 -- column1\nfrom foo'
r = sqlparse.format(s, reindent=True, strip_comments=True)
assert r == '\n'.join([
'select c1',
'from foo'])
s = 'select c1\nfrom foo\norder by c1'
assert f(s) == '\n'.join([
'select c1',
'from foo',
'order by c1'])
s = 'select c1 from t1 where (c1 = 1) order by c1'
assert f(s) == '\n'.join([
'select c1',
'from t1',
'where (c1 = 1)',
'order by c1'])
def test_keywordfunctions(self):
# issue36
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select max(a) b, foo, bar'
assert f(s) == '\n'.join([
'select max(a) b,',
' foo,',
' bar'])
def test_identifier_and_functions(self):
# issue45
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'select foo.bar, nvl(1) from dual'
assert f(s) == '\n'.join([
'select foo.bar,',
' nvl(1)',
'from dual'])
def test_insert_values(self):
# issue 329
f = lambda sql: sqlparse.format(sql, reindent=True)
s = 'insert into foo values (1, 2)'
assert f(s) == '\n'.join([
'insert into foo',
'values (1, 2)'])
s = 'insert into foo values (1, 2), (3, 4), (5, 6)'
assert f(s) == '\n'.join([
'insert into foo',
'values (1, 2),',
' (3, 4),',
' (5, 6)'])
s = 'insert into foo(a, b) values (1, 2), (3, 4), (5, 6)'
assert f(s) == '\n'.join([
'insert into foo(a, b)',
'values (1, 2),',
' (3, 4),',
' (5, 6)'])
f = lambda sql: sqlparse.format(sql, reindent=True,
comma_first=True)
s = 'insert into foo values (1, 2)'
assert f(s) == '\n'.join([
'insert into foo',
'values (1, 2)'])
s = 'insert into foo values (1, 2), (3, 4), (5, 6)'
assert f(s) == '\n'.join([
'insert into foo',
'values (1, 2)',
' , (3, 4)',
' , (5, 6)'])
s = 'insert into foo(a, b) values (1, 2), (3, 4), (5, 6)'
assert f(s) == '\n'.join([
'insert into foo(a, b)',
'values (1, 2)',
' , (3, 4)',
' , (5, 6)'])
class TestOutputFormat(object):
def test_python(self):
sql = 'select * from foo;'
f = lambda sql: sqlparse.format(sql, output_format='python')
assert f(sql) == "sql = 'select * from foo;'"
f = lambda sql: sqlparse.format(sql, output_format='python',
reindent=True)
assert f(sql) == '\n'.join([
"sql = ('select * '",
" 'from foo;')"])
def test_python_multiple_statements(self):
sql = 'select * from foo; select 1 from dual'
f = lambda sql: sqlparse.format(sql, output_format='python')
assert f(sql) == '\n'.join([
"sql = 'select * from foo; '",
"sql2 = 'select 1 from dual'"])
@pytest.mark.xfail(reason="Needs fixing")
def test_python_multiple_statements_with_formatting(self):
sql = 'select * from foo; select 1 from dual'
f = lambda sql: sqlparse.format(sql, output_format='python',
reindent=True)
assert f(sql) == '\n'.join([
"sql = ('select * '",
" 'from foo;')",
"sql2 = ('select 1 '",
" 'from dual')"])
def test_php(self):
sql = 'select * from foo;'
f = lambda sql: sqlparse.format(sql, output_format='php')
assert f(sql) == '$sql = "select * from foo;";'
f = lambda sql: sqlparse.format(sql, output_format='php',
reindent=True)
assert f(sql) == '\n'.join([
'$sql = "select * ";',
'$sql .= "from foo;";'])
def test_sql(self):
# "sql" is an allowed option but has no effect
sql = 'select * from foo;'
f = lambda sql: sqlparse.format(sql, output_format='sql')
assert f(sql) == 'select * from foo;'
def test_invalid_option(self):
sql = 'select * from foo;'
with pytest.raises(SQLParseError):
sqlparse.format(sql, output_format='foo')
def test_format_column_ordering():
# issue89
sql = 'select * from foo order by c1 desc, c2, c3;'
formatted = sqlparse.format(sql, reindent=True)
expected = '\n'.join([
'select *',
'from foo',
'order by c1 desc,',
' c2,',
' c3;'])
assert formatted == expected
def test_truncate_strings():
sql = "update foo set value = '{0}';".format('x' * 1000)
formatted = sqlparse.format(sql, truncate_strings=10)
assert formatted == "update foo set value = 'xxxxxxxxxx[...]';"
formatted = sqlparse.format(sql, truncate_strings=3, truncate_char='YYY')
assert formatted == "update foo set value = 'xxxYYY';"
@pytest.mark.parametrize('option', ['bar', -1, 0])
def test_truncate_strings_invalid_option2(option):
with pytest.raises(SQLParseError):
sqlparse.format('foo', truncate_strings=option)
@pytest.mark.parametrize('sql', [
'select verrrylongcolumn from foo',
'select "verrrylongcolumn" from "foo"'])
def test_truncate_strings_doesnt_truncate_identifiers(sql):
formatted = sqlparse.format(sql, truncate_strings=2)
assert formatted == sql
def test_having_produces_newline():
sql = ('select * from foo, bar where bar.id = foo.bar_id '
'having sum(bar.value) > 100')
formatted = sqlparse.format(sql, reindent=True)
expected = [
'select *',
'from foo,',
' bar',
'where bar.id = foo.bar_id',
'having sum(bar.value) > 100']
assert formatted == '\n'.join(expected)
@pytest.mark.parametrize('right_margin', ['ten', 2])
def test_format_right_margin_invalid_option(right_margin):
with pytest.raises(SQLParseError):
sqlparse.format('foo', right_margin=right_margin)
@pytest.mark.xfail(reason="Needs fixing")
def test_format_right_margin():
# TODO: Needs better test, only raises exception right now
sqlparse.format('foo', right_margin="79")

View File

@@ -0,0 +1,642 @@
# -*- coding: utf-8 -*-
import pytest
import sqlparse
from sqlparse import sql, tokens as T
def test_grouping_parenthesis():
s = 'select (select (x3) x2) and (y2) bar'
parsed = sqlparse.parse(s)[0]
assert str(parsed) == s
assert len(parsed.tokens) == 7
assert isinstance(parsed.tokens[2], sql.Parenthesis)
assert isinstance(parsed.tokens[-1], sql.Identifier)
assert len(parsed.tokens[2].tokens) == 5
assert isinstance(parsed.tokens[2].tokens[3], sql.Identifier)
assert isinstance(parsed.tokens[2].tokens[3].tokens[0], sql.Parenthesis)
assert len(parsed.tokens[2].tokens[3].tokens) == 3
def test_grouping_comments():
s = '/*\n * foo\n */ \n bar'
parsed = sqlparse.parse(s)[0]
assert str(parsed) == s
assert len(parsed.tokens) == 2
@pytest.mark.parametrize('s', ['foo := 1;', 'foo := 1'])
def test_grouping_assignment(s):
parsed = sqlparse.parse(s)[0]
assert len(parsed.tokens) == 1
assert isinstance(parsed.tokens[0], sql.Assignment)
@pytest.mark.parametrize('s', ["x > DATE '2020-01-01'", "x > TIMESTAMP '2020-01-01 00:00:00'"])
def test_grouping_typed_literal(s):
parsed = sqlparse.parse(s)[0]
assert isinstance(parsed[4], sql.TypedLiteral)
@pytest.mark.parametrize('s, a, b', [
('select a from b where c < d + e', sql.Identifier, sql.Identifier),
('select a from b where c < d + interval \'1 day\'', sql.Identifier, sql.TypedLiteral),
('select a from b where c < d + interval \'6\' month', sql.Identifier, sql.TypedLiteral),
('select a from b where c < current_timestamp - interval \'1 day\'', sql.Token, sql.TypedLiteral),
])
def test_compare_expr(s, a, b):
parsed = sqlparse.parse(s)[0]
assert str(parsed) == s
assert isinstance(parsed.tokens[2], sql.Identifier)
assert isinstance(parsed.tokens[6], sql.Identifier)
assert isinstance(parsed.tokens[8], sql.Where)
assert len(parsed.tokens) == 9
where = parsed.tokens[8]
assert isinstance(where.tokens[2], sql.Comparison)
assert len(where.tokens) == 3
comparison = where.tokens[2]
assert isinstance(comparison.tokens[0], sql.Identifier)
assert comparison.tokens[2].ttype is T.Operator.Comparison
assert isinstance(comparison.tokens[4], sql.Operation)
assert len(comparison.tokens) == 5
operation = comparison.tokens[4]
assert isinstance(operation.tokens[0], a)
assert operation.tokens[2].ttype is T.Operator
assert isinstance(operation.tokens[4], b)
assert len(operation.tokens) == 5
def test_grouping_identifiers():
s = 'select foo.bar from "myscheme"."table" where fail. order'
parsed = sqlparse.parse(s)[0]
assert str(parsed) == s
assert isinstance(parsed.tokens[2], sql.Identifier)
assert isinstance(parsed.tokens[6], sql.Identifier)
assert isinstance(parsed.tokens[8], sql.Where)
s = 'select * from foo where foo.id = 1'
parsed = sqlparse.parse(s)[0]
assert str(parsed) == s
assert isinstance(parsed.tokens[-1].tokens[-1].tokens[0], sql.Identifier)
s = 'select * from (select "foo"."id" from foo)'
parsed = sqlparse.parse(s)[0]
assert str(parsed) == s
assert isinstance(parsed.tokens[-1].tokens[3], sql.Identifier)
for s in ["INSERT INTO `test` VALUES('foo', 'bar');",
"INSERT INTO `test` VALUES(1, 2), (3, 4), (5, 6);",
"INSERT INTO `test(a, b)` VALUES(1, 2), (3, 4), (5, 6);"]:
parsed = sqlparse.parse(s)[0]
types = [l.ttype for l in parsed.tokens if not l.is_whitespace]
assert types == [T.DML, T.Keyword, None, None, T.Punctuation]
assert isinstance(parsed.tokens[6], sql.Values)
s = "select 1.0*(a+b) as col, sum(c)/sum(d) from myschema.mytable"
parsed = sqlparse.parse(s)[0]
assert len(parsed.tokens) == 7
assert isinstance(parsed.tokens[2], sql.IdentifierList)
assert len(parsed.tokens[2].tokens) == 4
identifiers = list(parsed.tokens[2].get_identifiers())
assert len(identifiers) == 2
assert identifiers[0].get_alias() == "col"
@pytest.mark.parametrize('s', [
'1 as f',
'foo as f',
'foo f',
'1/2 as f',
'1/2 f',
'1<2 as f', # issue327
'1<2 f',
])
def test_simple_identifiers(s):
parsed = sqlparse.parse(s)[0]
assert isinstance(parsed.tokens[0], sql.Identifier)
@pytest.mark.parametrize('s', [
'foo, bar',
'sum(a), sum(b)',
'sum(a) as x, b as y',
'sum(a)::integer, b',
'sum(a)/count(b) as x, y',
'sum(a)::integer as x, y',
'sum(a)::integer/count(b) as x, y', # issue297
])
def test_group_identifier_list(s):
parsed = sqlparse.parse(s)[0]
assert isinstance(parsed.tokens[0], sql.IdentifierList)
def test_grouping_identifier_wildcard():
p = sqlparse.parse('a.*, b.id')[0]
assert isinstance(p.tokens[0], sql.IdentifierList)
assert isinstance(p.tokens[0].tokens[0], sql.Identifier)
assert isinstance(p.tokens[0].tokens[-1], sql.Identifier)
def test_grouping_identifier_name_wildcard():
p = sqlparse.parse('a.*')[0]
t = p.tokens[0]
assert t.get_name() == '*'
assert t.is_wildcard() is True
def test_grouping_identifier_invalid():
p = sqlparse.parse('a.')[0]
assert isinstance(p.tokens[0], sql.Identifier)
assert p.tokens[0].has_alias() is False
assert p.tokens[0].get_name() is None
assert p.tokens[0].get_real_name() is None
assert p.tokens[0].get_parent_name() == 'a'
def test_grouping_identifier_invalid_in_middle():
# issue261
s = 'SELECT foo. FROM foo'
p = sqlparse.parse(s)[0]
assert isinstance(p[2], sql.Identifier)
assert p[2][1].ttype == T.Punctuation
assert p[3].ttype == T.Whitespace
assert str(p[2]) == 'foo.'
@pytest.mark.parametrize('s', ['foo as (select *)', 'foo as(select *)'])
def test_grouping_identifer_as(s):
# issue507
p = sqlparse.parse(s)[0]
assert isinstance(p.tokens[0], sql.Identifier)
token = p.tokens[0].tokens[2]
assert token.ttype == T.Keyword
assert token.normalized == 'AS'
def test_grouping_identifier_as_invalid():
# issue8
p = sqlparse.parse('foo as select *')[0]
assert len(p.tokens), 5
assert isinstance(p.tokens[0], sql.Identifier)
assert len(p.tokens[0].tokens) == 1
assert p.tokens[2].ttype == T.Keyword
def test_grouping_identifier_function():
p = sqlparse.parse('foo() as bar')[0]
assert isinstance(p.tokens[0], sql.Identifier)
assert isinstance(p.tokens[0].tokens[0], sql.Function)
p = sqlparse.parse('foo()||col2 bar')[0]
assert isinstance(p.tokens[0], sql.Identifier)
assert isinstance(p.tokens[0].tokens[0], sql.Operation)
assert isinstance(p.tokens[0].tokens[0].tokens[0], sql.Function)
@pytest.mark.parametrize('s', ['foo+100', 'foo + 100', 'foo*100'])
def test_grouping_operation(s):
p = sqlparse.parse(s)[0]
assert isinstance(p.tokens[0], sql.Operation)
def test_grouping_identifier_list():
p = sqlparse.parse('a, b, c')[0]
assert isinstance(p.tokens[0], sql.IdentifierList)
p = sqlparse.parse('(a, b, c)')[0]
assert isinstance(p.tokens[0].tokens[1], sql.IdentifierList)
def test_grouping_identifier_list_subquery():
"""identifier lists should still work in subqueries with aliases"""
p = sqlparse.parse("select * from ("
"select a, b + c as d from table) sub")[0]
subquery = p.tokens[-1].tokens[0]
idx, iden_list = subquery.token_next_by(i=sql.IdentifierList)
assert iden_list is not None
# all the identifiers should be within the IdentifierList
_, ilist = subquery.token_next_by(i=sql.Identifier, idx=idx)
assert ilist is None
def test_grouping_identifier_list_case():
p = sqlparse.parse('a, case when 1 then 2 else 3 end as b, c')[0]
assert isinstance(p.tokens[0], sql.IdentifierList)
p = sqlparse.parse('(a, case when 1 then 2 else 3 end as b, c)')[0]
assert isinstance(p.tokens[0].tokens[1], sql.IdentifierList)
def test_grouping_identifier_list_other():
# issue2
p = sqlparse.parse("select *, null, 1, 'foo', bar from mytable, x")[0]
assert isinstance(p.tokens[2], sql.IdentifierList)
assert len(p.tokens[2].tokens) == 13
def test_grouping_identifier_list_with_inline_comments():
# issue163
p = sqlparse.parse('foo /* a comment */, bar')[0]
assert isinstance(p.tokens[0], sql.IdentifierList)
assert isinstance(p.tokens[0].tokens[0], sql.Identifier)
assert isinstance(p.tokens[0].tokens[3], sql.Identifier)
def test_grouping_identifiers_with_operators():
p = sqlparse.parse('a+b as c from table where (d-e)%2= 1')[0]
assert len([x for x in p.flatten() if x.ttype == T.Name]) == 5
def test_grouping_identifier_list_with_order():
# issue101
p = sqlparse.parse('1, 2 desc, 3')[0]
assert isinstance(p.tokens[0], sql.IdentifierList)
assert isinstance(p.tokens[0].tokens[3], sql.Identifier)
assert str(p.tokens[0].tokens[3]) == '2 desc'
def test_grouping_where():
s = 'select * from foo where bar = 1 order by id desc'
p = sqlparse.parse(s)[0]
assert str(p) == s
assert len(p.tokens) == 12
s = 'select x from (select y from foo where bar = 1) z'
p = sqlparse.parse(s)[0]
assert str(p) == s
assert isinstance(p.tokens[-1].tokens[0].tokens[-2], sql.Where)
@pytest.mark.parametrize('s', (
'select 1 where 1 = 2 union select 2',
'select 1 where 1 = 2 union all select 2',
))
def test_grouping_where_union(s):
p = sqlparse.parse(s)[0]
assert p.tokens[5].value.startswith('union')
def test_returning_kw_ends_where_clause():
s = 'delete from foo where x > y returning z'
p = sqlparse.parse(s)[0]
assert isinstance(p.tokens[6], sql.Where)
assert p.tokens[7].ttype == T.Keyword
assert p.tokens[7].value == 'returning'
def test_into_kw_ends_where_clause(): # issue324
s = 'select * from foo where a = 1 into baz'
p = sqlparse.parse(s)[0]
assert isinstance(p.tokens[8], sql.Where)
assert p.tokens[9].ttype == T.Keyword
assert p.tokens[9].value == 'into'
@pytest.mark.parametrize('sql, expected', [
# note: typecast needs to be 2nd token for this test
('select foo::integer from bar', 'integer'),
('select (current_database())::information_schema.sql_identifier',
'information_schema.sql_identifier'),
])
def test_grouping_typecast(sql, expected):
p = sqlparse.parse(sql)[0]
assert p.tokens[2].get_typecast() == expected
def test_grouping_alias():
s = 'select foo as bar from mytable'
p = sqlparse.parse(s)[0]
assert str(p) == s
assert p.tokens[2].get_real_name() == 'foo'
assert p.tokens[2].get_alias() == 'bar'
s = 'select foo from mytable t1'
p = sqlparse.parse(s)[0]
assert str(p) == s
assert p.tokens[6].get_real_name() == 'mytable'
assert p.tokens[6].get_alias() == 't1'
s = 'select foo::integer as bar from mytable'
p = sqlparse.parse(s)[0]
assert str(p) == s
assert p.tokens[2].get_alias() == 'bar'
s = ('SELECT DISTINCT '
'(current_database())::information_schema.sql_identifier AS view')
p = sqlparse.parse(s)[0]
assert str(p) == s
assert p.tokens[4].get_alias() == 'view'
def test_grouping_alias_case():
# see issue46
p = sqlparse.parse('CASE WHEN 1 THEN 2 ELSE 3 END foo')[0]
assert len(p.tokens) == 1
assert p.tokens[0].get_alias() == 'foo'
def test_grouping_subquery_no_parens():
# Not totally sure if this is the right approach...
# When a THEN clause contains a subquery w/o parenthesis around it *and*
# a WHERE condition, the WHERE grouper consumes END too.
# This takes makes sure that it doesn't fail.
p = sqlparse.parse('CASE WHEN 1 THEN select 2 where foo = 1 end')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Case)
@pytest.mark.parametrize('s', ['foo.bar', 'x, y', 'x > y', 'x / y'])
def test_grouping_alias_returns_none(s):
# see issue185 and issue445
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert p.tokens[0].get_alias() is None
def test_grouping_idlist_function():
# see issue10 too
p = sqlparse.parse('foo(1) x, bar')[0]
assert isinstance(p.tokens[0], sql.IdentifierList)
def test_grouping_comparison_exclude():
# make sure operators are not handled too lazy
p = sqlparse.parse('(=)')[0]
assert isinstance(p.tokens[0], sql.Parenthesis)
assert not isinstance(p.tokens[0].tokens[1], sql.Comparison)
p = sqlparse.parse('(a=1)')[0]
assert isinstance(p.tokens[0].tokens[1], sql.Comparison)
p = sqlparse.parse('(a>=1)')[0]
assert isinstance(p.tokens[0].tokens[1], sql.Comparison)
def test_grouping_function():
p = sqlparse.parse('foo()')[0]
assert isinstance(p.tokens[0], sql.Function)
p = sqlparse.parse('foo(null, bar)')[0]
assert isinstance(p.tokens[0], sql.Function)
assert len(list(p.tokens[0].get_parameters())) == 2
def test_grouping_function_not_in():
# issue183
p = sqlparse.parse('in(1, 2)')[0]
assert len(p.tokens) == 2
assert p.tokens[0].ttype == T.Keyword
assert isinstance(p.tokens[1], sql.Parenthesis)
def test_grouping_varchar():
p = sqlparse.parse('"text" Varchar(50) NOT NULL')[0]
assert isinstance(p.tokens[2], sql.Function)
def test_statement_get_type():
def f(sql):
return sqlparse.parse(sql)[0]
assert f('select * from foo').get_type() == 'SELECT'
assert f('update foo').get_type() == 'UPDATE'
assert f(' update foo').get_type() == 'UPDATE'
assert f('\nupdate foo').get_type() == 'UPDATE'
assert f('foo').get_type() == 'UNKNOWN'
# Statements that have a whitespace after the closing semicolon
# are parsed as two statements where later only consists of the
# trailing whitespace.
assert f('\n').get_type() == 'UNKNOWN'
def test_identifier_with_operators():
# issue 53
p = sqlparse.parse('foo||bar')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Operation)
# again with whitespaces
p = sqlparse.parse('foo || bar')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Operation)
def test_identifier_with_op_trailing_ws():
# make sure trailing whitespace isn't grouped with identifier
p = sqlparse.parse('foo || bar ')[0]
assert len(p.tokens) == 2
assert isinstance(p.tokens[0], sql.Operation)
assert p.tokens[1].ttype is T.Whitespace
def test_identifier_with_string_literals():
p = sqlparse.parse("foo + 'bar'")[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Operation)
# This test seems to be wrong. It was introduced when fixing #53, but #111
# showed that this shouldn't be an identifier at all. I'm leaving this
# commented in the source for a while.
# def test_identifier_string_concat():
# p = sqlparse.parse("'foo' || bar")[0]
# assert len(p.tokens) == 1
# assert isinstance(p.tokens[0], sql.Identifier)
def test_identifier_consumes_ordering():
# issue89
p = sqlparse.parse('select * from foo order by c1 desc, c2, c3')[0]
assert isinstance(p.tokens[-1], sql.IdentifierList)
ids = list(p.tokens[-1].get_identifiers())
assert len(ids) == 3
assert ids[0].get_name() == 'c1'
assert ids[0].get_ordering() == 'DESC'
assert ids[1].get_name() == 'c2'
assert ids[1].get_ordering() is None
def test_comparison_with_keywords():
# issue90
# in fact these are assignments, but for now we don't distinguish them
p = sqlparse.parse('foo = NULL')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert len(p.tokens[0].tokens) == 5
assert p.tokens[0].left.value == 'foo'
assert p.tokens[0].right.value == 'NULL'
# make sure it's case-insensitive
p = sqlparse.parse('foo = null')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
def test_comparison_with_floats():
# issue145
p = sqlparse.parse('foo = 25.5')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert len(p.tokens[0].tokens) == 5
assert p.tokens[0].left.value == 'foo'
assert p.tokens[0].right.value == '25.5'
def test_comparison_with_parenthesis():
# issue23
p = sqlparse.parse('(3 + 4) = 7')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
comp = p.tokens[0]
assert isinstance(comp.left, sql.Parenthesis)
assert comp.right.ttype is T.Number.Integer
@pytest.mark.parametrize('operator', (
'=', '!=', '>', '<', '<=', '>=', '~', '~~', '!~~',
'LIKE', 'NOT LIKE', 'ILIKE', 'NOT ILIKE',
))
def test_comparison_with_strings(operator):
# issue148
p = sqlparse.parse("foo {0} 'bar'".format(operator))[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert p.tokens[0].right.value == "'bar'"
assert p.tokens[0].right.ttype == T.String.Single
def test_like_and_ilike_comparison():
def validate_where_clause(where_clause, expected_tokens):
assert len(where_clause.tokens) == len(expected_tokens)
for where_token, expected_token in zip(where_clause, expected_tokens):
expected_ttype, expected_value = expected_token
if where_token.ttype is not None:
assert where_token.match(expected_ttype, expected_value, regex=True)
else:
# Certain tokens, such as comparison tokens, do not define a ttype that can be
# matched against. For these tokens, we ensure that the token instance is of
# the expected type and has a value conforming to specified regular expression
import re
assert (isinstance(where_token, expected_ttype)
and re.match(expected_value, where_token.value))
[p1] = sqlparse.parse("select * from mytable where mytable.mycolumn LIKE 'expr%' limit 5;")
[p1_where] = [token for token in p1 if isinstance(token, sql.Where)]
validate_where_clause(p1_where, [
(T.Keyword, "where"),
(T.Whitespace, None),
(sql.Comparison, r"mytable.mycolumn LIKE.*"),
(T.Whitespace, None),
])
[p2] = sqlparse.parse(
"select * from mytable where mycolumn NOT ILIKE '-expr' group by othercolumn;")
[p2_where] = [token for token in p2 if isinstance(token, sql.Where)]
validate_where_clause(p2_where, [
(T.Keyword, "where"),
(T.Whitespace, None),
(sql.Comparison, r"mycolumn NOT ILIKE.*"),
(T.Whitespace, None),
])
def test_comparison_with_functions():
# issue230
p = sqlparse.parse('foo = DATE(bar.baz)')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert len(p.tokens[0].tokens) == 5
assert p.tokens[0].left.value == 'foo'
assert p.tokens[0].right.value == 'DATE(bar.baz)'
p = sqlparse.parse('DATE(foo.bar) = DATE(bar.baz)')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert len(p.tokens[0].tokens) == 5
assert p.tokens[0].left.value == 'DATE(foo.bar)'
assert p.tokens[0].right.value == 'DATE(bar.baz)'
p = sqlparse.parse('DATE(foo.bar) = bar.baz')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Comparison)
assert len(p.tokens[0].tokens) == 5
assert p.tokens[0].left.value == 'DATE(foo.bar)'
assert p.tokens[0].right.value == 'bar.baz'
@pytest.mark.parametrize('start', ['FOR', 'FOREACH'])
def test_forloops(start):
p = sqlparse.parse('{0} foo in bar LOOP foobar END LOOP'.format(start))[0]
assert (len(p.tokens)) == 1
assert isinstance(p.tokens[0], sql.For)
def test_nested_for():
p = sqlparse.parse('FOR foo LOOP FOR bar LOOP END LOOP END LOOP')[0]
assert len(p.tokens) == 1
for1 = p.tokens[0]
assert for1.tokens[0].value == 'FOR'
assert for1.tokens[-1].value == 'END LOOP'
for2 = for1.tokens[6]
assert isinstance(for2, sql.For)
assert for2.tokens[0].value == 'FOR'
assert for2.tokens[-1].value == 'END LOOP'
def test_begin():
p = sqlparse.parse('BEGIN foo END')[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Begin)
def test_keyword_followed_by_parenthesis():
p = sqlparse.parse('USING(somecol')[0]
assert len(p.tokens) == 3
assert p.tokens[0].ttype == T.Keyword
assert p.tokens[1].ttype == T.Punctuation
def test_nested_begin():
p = sqlparse.parse('BEGIN foo BEGIN bar END END')[0]
assert len(p.tokens) == 1
outer = p.tokens[0]
assert outer.tokens[0].value == 'BEGIN'
assert outer.tokens[-1].value == 'END'
inner = outer.tokens[4]
assert inner.tokens[0].value == 'BEGIN'
assert inner.tokens[-1].value == 'END'
assert isinstance(inner, sql.Begin)
def test_aliased_column_without_as():
p = sqlparse.parse('foo bar')[0].tokens
assert len(p) == 1
assert p[0].get_real_name() == 'foo'
assert p[0].get_alias() == 'bar'
p = sqlparse.parse('foo.bar baz')[0].tokens[0]
assert p.get_parent_name() == 'foo'
assert p.get_real_name() == 'bar'
assert p.get_alias() == 'baz'
def test_qualified_function():
p = sqlparse.parse('foo()')[0].tokens[0]
assert p.get_parent_name() is None
assert p.get_real_name() == 'foo'
p = sqlparse.parse('foo.bar()')[0].tokens[0]
assert p.get_parent_name() == 'foo'
assert p.get_real_name() == 'bar'
def test_aliased_function_without_as():
p = sqlparse.parse('foo() bar')[0].tokens[0]
assert p.get_parent_name() is None
assert p.get_real_name() == 'foo'
assert p.get_alias() == 'bar'
p = sqlparse.parse('foo.bar() baz')[0].tokens[0]
assert p.get_parent_name() == 'foo'
assert p.get_real_name() == 'bar'
assert p.get_alias() == 'baz'
def test_aliased_literal_without_as():
p = sqlparse.parse('1 foo')[0].tokens
assert len(p) == 1
assert p[0].get_alias() == 'foo'
def test_grouping_as_cte():
p = sqlparse.parse('foo AS WITH apple AS 1, banana AS 2')[0].tokens
assert len(p) > 4
assert p[0].get_alias() is None
assert p[2].value == 'AS'
assert p[4].value == 'WITH'

View File

@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
import pytest
from sqlparse import tokens
from sqlparse.keywords import SQL_REGEX
class TestSQLREGEX:
@pytest.mark.parametrize('number', ['1.0', '-1.0',
'1.', '-1.',
'.1', '-.1'])
def test_float_numbers(self, number):
ttype = next(tt for action, tt in SQL_REGEX if action(number))
assert tokens.Number.Float == ttype

View File

@@ -0,0 +1,474 @@
# -*- coding: utf-8 -*-
"""Tests sqlparse.parse()."""
import pytest
import sqlparse
from sqlparse import sql, tokens as T
from sqlparse.compat import StringIO, text_type
def test_parse_tokenize():
s = 'select * from foo;'
stmts = sqlparse.parse(s)
assert len(stmts) == 1
assert str(stmts[0]) == s
def test_parse_multistatement():
sql1 = 'select * from foo;'
sql2 = 'select * from bar;'
stmts = sqlparse.parse(sql1 + sql2)
assert len(stmts) == 2
assert str(stmts[0]) == sql1
assert str(stmts[1]) == sql2
@pytest.mark.parametrize('s', ['select\n*from foo;',
'select\r\n*from foo',
'select\r*from foo',
'select\r\n*from foo\n'])
def test_parse_newlines(s):
p = sqlparse.parse(s)[0]
assert str(p) == s
def test_parse_within():
s = 'foo(col1, col2)'
p = sqlparse.parse(s)[0]
col1 = p.tokens[0].tokens[1].tokens[1].tokens[0]
assert col1.within(sql.Function)
def test_parse_child_of():
s = '(col1, col2)'
p = sqlparse.parse(s)[0]
assert p.tokens[0].tokens[1].is_child_of(p.tokens[0])
s = 'select foo'
p = sqlparse.parse(s)[0]
assert not p.tokens[2].is_child_of(p.tokens[0])
assert p.tokens[2].is_child_of(p)
def test_parse_has_ancestor():
s = 'foo or (bar, baz)'
p = sqlparse.parse(s)[0]
baz = p.tokens[-1].tokens[1].tokens[-1]
assert baz.has_ancestor(p.tokens[-1].tokens[1])
assert baz.has_ancestor(p.tokens[-1])
assert baz.has_ancestor(p)
@pytest.mark.parametrize('s', ['.5', '.51', '1.5', '12.5'])
def test_parse_float(s):
t = sqlparse.parse(s)[0].tokens
assert len(t) == 1
assert t[0].ttype is sqlparse.tokens.Number.Float
@pytest.mark.parametrize('s, holder', [
('select * from foo where user = ?', '?'),
('select * from foo where user = :1', ':1'),
('select * from foo where user = :name', ':name'),
('select * from foo where user = %s', '%s'),
('select * from foo where user = $a', '$a')])
def test_parse_placeholder(s, holder):
t = sqlparse.parse(s)[0].tokens[-1].tokens
assert t[-1].ttype is sqlparse.tokens.Name.Placeholder
assert t[-1].value == holder
def test_parse_modulo_not_placeholder():
tokens = list(sqlparse.lexer.tokenize('x %3'))
assert tokens[2][0] == sqlparse.tokens.Operator
def test_parse_access_symbol():
# see issue27
t = sqlparse.parse('select a.[foo bar] as foo')[0].tokens
assert isinstance(t[-1], sql.Identifier)
assert t[-1].get_name() == 'foo'
assert t[-1].get_real_name() == '[foo bar]'
assert t[-1].get_parent_name() == 'a'
def test_parse_square_brackets_notation_isnt_too_greedy():
# see issue153
t = sqlparse.parse('[foo], [bar]')[0].tokens
assert isinstance(t[0], sql.IdentifierList)
assert len(t[0].tokens) == 4
assert t[0].tokens[0].get_real_name() == '[foo]'
assert t[0].tokens[-1].get_real_name() == '[bar]'
def test_parse_keyword_like_identifier():
# see issue47
t = sqlparse.parse('foo.key')[0].tokens
assert len(t) == 1
assert isinstance(t[0], sql.Identifier)
def test_parse_function_parameter():
# see issue94
t = sqlparse.parse('abs(some_col)')[0].tokens[0].get_parameters()
assert len(t) == 1
assert isinstance(t[0], sql.Identifier)
def test_parse_function_param_single_literal():
t = sqlparse.parse('foo(5)')[0].tokens[0].get_parameters()
assert len(t) == 1
assert t[0].ttype is T.Number.Integer
def test_parse_nested_function():
t = sqlparse.parse('foo(bar(5))')[0].tokens[0].get_parameters()
assert len(t) == 1
assert type(t[0]) is sql.Function
def test_quoted_identifier():
t = sqlparse.parse('select x.y as "z" from foo')[0].tokens
assert isinstance(t[2], sql.Identifier)
assert t[2].get_name() == 'z'
assert t[2].get_real_name() == 'y'
@pytest.mark.parametrize('name', [
'foo', '_foo', # issue175
'1_data', # valid MySQL table name, see issue337
])
def test_valid_identifier_names(name):
t = sqlparse.parse(name)[0].tokens
assert isinstance(t[0], sql.Identifier)
assert t[0].get_name() == name
def test_psql_quotation_marks():
# issue83
# regression: make sure plain $$ work
t = sqlparse.split("""
CREATE OR REPLACE FUNCTION testfunc1(integer) RETURNS integer AS $$
....
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION testfunc2(integer) RETURNS integer AS $$
....
$$ LANGUAGE plpgsql;""")
assert len(t) == 2
# make sure $SOMETHING$ works too
t = sqlparse.split("""
CREATE OR REPLACE FUNCTION testfunc1(integer) RETURNS integer AS $PROC_1$
....
$PROC_1$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION testfunc2(integer) RETURNS integer AS $PROC_2$
....
$PROC_2$ LANGUAGE plpgsql;""")
assert len(t) == 2
def test_double_precision_is_builtin():
s = 'DOUBLE PRECISION'
t = sqlparse.parse(s)[0].tokens
assert len(t) == 1
assert t[0].ttype == sqlparse.tokens.Name.Builtin
assert t[0].value == 'DOUBLE PRECISION'
@pytest.mark.parametrize('ph', ['?', ':1', ':foo', '%s', '%(foo)s'])
def test_placeholder(ph):
p = sqlparse.parse(ph)[0].tokens
assert len(p) == 1
assert p[0].ttype is T.Name.Placeholder
@pytest.mark.parametrize('num', ['6.67428E-8', '1.988e33', '1e-12'])
def test_scientific_numbers(num):
p = sqlparse.parse(num)[0].tokens
assert len(p) == 1
assert p[0].ttype is T.Number.Float
def test_single_quotes_are_strings():
p = sqlparse.parse("'foo'")[0].tokens
assert len(p) == 1
assert p[0].ttype is T.String.Single
def test_double_quotes_are_identifiers():
p = sqlparse.parse('"foo"')[0].tokens
assert len(p) == 1
assert isinstance(p[0], sql.Identifier)
def test_single_quotes_with_linebreaks():
# issue118
p = sqlparse.parse("'f\nf'")[0].tokens
assert len(p) == 1
assert p[0].ttype is T.String.Single
def test_sqlite_identifiers():
# Make sure we still parse sqlite style escapes
p = sqlparse.parse('[col1],[col2]')[0].tokens
id_names = [id_.get_name() for id_ in p[0].get_identifiers()]
assert len(p) == 1
assert isinstance(p[0], sql.IdentifierList)
assert id_names == ['[col1]', '[col2]']
p = sqlparse.parse('[col1]+[col2]')[0]
types = [tok.ttype for tok in p.flatten()]
assert types == [T.Name, T.Operator, T.Name]
def test_simple_1d_array_index():
p = sqlparse.parse('col[1]')[0].tokens
assert len(p) == 1
assert p[0].get_name() == 'col'
indices = list(p[0].get_array_indices())
assert len(indices) == 1 # 1-dimensional index
assert len(indices[0]) == 1 # index is single token
assert indices[0][0].value == '1'
def test_2d_array_index():
p = sqlparse.parse('col[x][(y+1)*2]')[0].tokens
assert len(p) == 1
assert p[0].get_name() == 'col'
assert len(list(p[0].get_array_indices())) == 2 # 2-dimensional index
def test_array_index_function_result():
p = sqlparse.parse('somefunc()[1]')[0].tokens
assert len(p) == 1
assert len(list(p[0].get_array_indices())) == 1
def test_schema_qualified_array_index():
p = sqlparse.parse('schem.col[1]')[0].tokens
assert len(p) == 1
assert p[0].get_parent_name() == 'schem'
assert p[0].get_name() == 'col'
assert list(p[0].get_array_indices())[0][0].value == '1'
def test_aliased_array_index():
p = sqlparse.parse('col[1] x')[0].tokens
assert len(p) == 1
assert p[0].get_alias() == 'x'
assert p[0].get_real_name() == 'col'
assert list(p[0].get_array_indices())[0][0].value == '1'
def test_array_literal():
# See issue #176
p = sqlparse.parse('ARRAY[%s, %s]')[0]
assert len(p.tokens) == 2
assert len(list(p.flatten())) == 7
def test_typed_array_definition():
# array indices aren't grouped with built-ins, but make sure we can extract
# identifier names
p = sqlparse.parse('x int, y int[], z int')[0]
names = [x.get_name() for x in p.get_sublists()
if isinstance(x, sql.Identifier)]
assert names == ['x', 'y', 'z']
@pytest.mark.parametrize('s', ['select 1 -- foo', 'select 1 # foo'])
def test_single_line_comments(s):
# see issue178
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 5
assert p.tokens[-1].ttype == T.Comment.Single
@pytest.mark.parametrize('s', ['foo', '@foo', '#foo', '##foo'])
def test_names_and_special_names(s):
# see issue192
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Identifier)
def test_get_token_at_offset():
p = sqlparse.parse('select * from dual')[0]
# 0123456789
assert p.get_token_at_offset(0) == p.tokens[0]
assert p.get_token_at_offset(1) == p.tokens[0]
assert p.get_token_at_offset(6) == p.tokens[1]
assert p.get_token_at_offset(7) == p.tokens[2]
assert p.get_token_at_offset(8) == p.tokens[3]
assert p.get_token_at_offset(9) == p.tokens[4]
assert p.get_token_at_offset(10) == p.tokens[4]
def test_pprint():
p = sqlparse.parse('select a0, b0, c0, d0, e0 from '
'(select * from dual) q0 where 1=1 and 2=2')[0]
output = StringIO()
p._pprint_tree(f=output)
pprint = '\n'.join([
"|- 0 DML 'select'",
"|- 1 Whitespace ' '",
"|- 2 IdentifierList 'a0, b0...'",
"| |- 0 Identifier 'a0'",
"| | `- 0 Name 'a0'",
"| |- 1 Punctuation ','",
"| |- 2 Whitespace ' '",
"| |- 3 Identifier 'b0'",
"| | `- 0 Name 'b0'",
"| |- 4 Punctuation ','",
"| |- 5 Whitespace ' '",
"| |- 6 Identifier 'c0'",
"| | `- 0 Name 'c0'",
"| |- 7 Punctuation ','",
"| |- 8 Whitespace ' '",
"| |- 9 Identifier 'd0'",
"| | `- 0 Name 'd0'",
"| |- 10 Punctuation ','",
"| |- 11 Whitespace ' '",
"| `- 12 Float 'e0'",
"|- 3 Whitespace ' '",
"|- 4 Keyword 'from'",
"|- 5 Whitespace ' '",
"|- 6 Identifier '(selec...'",
"| |- 0 Parenthesis '(selec...'",
"| | |- 0 Punctuation '('",
"| | |- 1 DML 'select'",
"| | |- 2 Whitespace ' '",
"| | |- 3 Wildcard '*'",
"| | |- 4 Whitespace ' '",
"| | |- 5 Keyword 'from'",
"| | |- 6 Whitespace ' '",
"| | |- 7 Identifier 'dual'",
"| | | `- 0 Name 'dual'",
"| | `- 8 Punctuation ')'",
"| |- 1 Whitespace ' '",
"| `- 2 Identifier 'q0'",
"| `- 0 Name 'q0'",
"|- 7 Whitespace ' '",
"`- 8 Where 'where ...'",
" |- 0 Keyword 'where'",
" |- 1 Whitespace ' '",
" |- 2 Comparison '1=1'",
" | |- 0 Integer '1'",
" | |- 1 Comparison '='",
" | `- 2 Integer '1'",
" |- 3 Whitespace ' '",
" |- 4 Keyword 'and'",
" |- 5 Whitespace ' '",
" `- 6 Comparison '2=2'",
" |- 0 Integer '2'",
" |- 1 Comparison '='",
" `- 2 Integer '2'",
""])
assert output.getvalue() == pprint
def test_wildcard_multiplication():
p = sqlparse.parse('select * from dual')[0]
assert p.tokens[2].ttype == T.Wildcard
p = sqlparse.parse('select a0.* from dual a0')[0]
assert p.tokens[2][2].ttype == T.Wildcard
p = sqlparse.parse('select 1 * 2 from dual')[0]
assert p.tokens[2][2].ttype == T.Operator
def test_stmt_tokens_parents():
# see issue 226
s = "CREATE TABLE test();"
stmt = sqlparse.parse(s)[0]
for token in stmt.tokens:
assert token.has_ancestor(stmt)
@pytest.mark.parametrize('sql, is_literal', [
('$$foo$$', True),
('$_$foo$_$', True),
('$token$ foo $token$', True),
# don't parse inner tokens
('$_$ foo $token$bar$token$ baz$_$', True),
('$A$ foo $B$', False) # tokens don't match
])
def test_dbldollar_as_literal(sql, is_literal):
# see issue 277
p = sqlparse.parse(sql)[0]
if is_literal:
assert len(p.tokens) == 1
assert p.tokens[0].ttype == T.Literal
else:
for token in p.tokens:
assert token.ttype != T.Literal
def test_non_ascii():
_test_non_ascii = u"insert into test (id, name) values (1, 'тест');"
s = _test_non_ascii
stmts = sqlparse.parse(s)
assert len(stmts) == 1
statement = stmts[0]
assert text_type(statement) == s
assert statement._pprint_tree() is None
s = _test_non_ascii.encode('utf-8')
stmts = sqlparse.parse(s, 'utf-8')
assert len(stmts) == 1
statement = stmts[0]
assert text_type(statement) == _test_non_ascii
assert statement._pprint_tree() is None
def test_get_real_name():
# issue 369
s = u"update a t set t.b=1"
stmts = sqlparse.parse(s)
assert len(stmts) == 1
assert 'a' == stmts[0].tokens[2].get_real_name()
assert 't' == stmts[0].tokens[2].get_alias()
def test_from_subquery():
# issue 446
s = u'from(select 1)'
stmts = sqlparse.parse(s)
assert len(stmts) == 1
assert len(stmts[0].tokens) == 2
assert stmts[0].tokens[0].value == 'from'
assert stmts[0].tokens[0].ttype == T.Keyword
s = u'from (select 1)'
stmts = sqlparse.parse(s)
assert len(stmts) == 1
assert len(stmts[0].tokens) == 3
assert stmts[0].tokens[0].value == 'from'
assert stmts[0].tokens[0].ttype == T.Keyword
assert stmts[0].tokens[1].ttype == T.Whitespace
def test_parenthesis():
tokens = sqlparse.parse("(\n\n1\n\n)")[0].tokens[0].tokens
assert list(map(lambda t: t.ttype, tokens)) == [T.Punctuation,
T.Newline,
T.Newline,
T.Number.Integer,
T.Newline,
T.Newline,
T.Punctuation]
tokens = sqlparse.parse("(\n\n 1 \n\n)")[0].tokens[0].tokens
assert list(map(lambda t: t.ttype, tokens)) == [T.Punctuation,
T.Newline,
T.Newline,
T.Whitespace,
T.Number.Integer,
T.Whitespace,
T.Newline,
T.Newline,
T.Punctuation]

View File

@@ -0,0 +1,408 @@
# -*- coding: utf-8 -*-
import pytest
import sqlparse
from sqlparse import sql, tokens as T
from sqlparse.compat import PY2
def test_issue9():
# make sure where doesn't consume parenthesis
p = sqlparse.parse('(where 1)')[0]
assert isinstance(p, sql.Statement)
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Parenthesis)
prt = p.tokens[0]
assert len(prt.tokens) == 3
assert prt.tokens[0].ttype == T.Punctuation
assert prt.tokens[-1].ttype == T.Punctuation
def test_issue13():
parsed = sqlparse.parse(("select 'one';\n"
"select 'two\\'';\n"
"select 'three';"))
assert len(parsed) == 3
assert str(parsed[1]).strip() == "select 'two\\'';"
@pytest.mark.parametrize('s', ['--hello', '-- hello', '--hello\n',
'--', '--\n'])
def test_issue26(s):
# parse stand-alone comments
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is T.Comment.Single
@pytest.mark.parametrize('value', ['create', 'CREATE'])
def test_issue34(value):
t = sqlparse.parse("create")[0].token_first()
assert t.match(T.Keyword.DDL, value) is True
def test_issue35():
# missing space before LIMIT. Updated for #321
sql = sqlparse.format("select * from foo where bar = 1 limit 1",
reindent=True)
assert sql == "\n".join([
"select *",
"from foo",
"where bar = 1",
"limit 1"])
def test_issue38():
sql = sqlparse.format("SELECT foo; -- comment", strip_comments=True)
assert sql == "SELECT foo;"
sql = sqlparse.format("/* foo */", strip_comments=True)
assert sql == ""
def test_issue39():
p = sqlparse.parse('select user.id from user')[0]
assert len(p.tokens) == 7
idt = p.tokens[2]
assert idt.__class__ == sql.Identifier
assert len(idt.tokens) == 3
assert idt.tokens[0].match(T.Name, 'user') is True
assert idt.tokens[1].match(T.Punctuation, '.') is True
assert idt.tokens[2].match(T.Name, 'id') is True
def test_issue40():
# make sure identifier lists in subselects are grouped
p = sqlparse.parse(('SELECT id, name FROM '
'(SELECT id, name FROM bar) as foo'))[0]
assert len(p.tokens) == 7
assert p.tokens[2].__class__ == sql.IdentifierList
assert p.tokens[-1].__class__ == sql.Identifier
assert p.tokens[-1].get_name() == 'foo'
sp = p.tokens[-1].tokens[0]
assert sp.tokens[3].__class__ == sql.IdentifierList
# make sure that formatting works as expected
s = sqlparse.format('SELECT id == name FROM '
'(SELECT id, name FROM bar)', reindent=True)
assert s == '\n'.join([
'SELECT id == name',
'FROM',
' (SELECT id,',
' name',
' FROM bar)'])
s = sqlparse.format('SELECT id == name FROM '
'(SELECT id, name FROM bar) as foo', reindent=True)
assert s == '\n'.join([
'SELECT id == name',
'FROM',
' (SELECT id,',
' name',
' FROM bar) as foo'])
@pytest.mark.parametrize('s', ['select x.y::text as z from foo',
'select x.y::text as "z" from foo',
'select x."y"::text as z from foo',
'select x."y"::text as "z" from foo',
'select "x".y::text as z from foo',
'select "x".y::text as "z" from foo',
'select "x"."y"::text as z from foo',
'select "x"."y"::text as "z" from foo'])
@pytest.mark.parametrize('func_name, result', [('get_name', 'z'),
('get_real_name', 'y'),
('get_parent_name', 'x'),
('get_alias', 'z'),
('get_typecast', 'text')])
def test_issue78(s, func_name, result):
# the bug author provided this nice examples, let's use them!
p = sqlparse.parse(s)[0]
i = p.tokens[2]
assert isinstance(i, sql.Identifier)
func = getattr(i, func_name)
assert func() == result
def test_issue83():
sql = """ CREATE OR REPLACE FUNCTION func_a(text)
RETURNS boolean LANGUAGE plpgsql STRICT IMMUTABLE AS
$_$
BEGIN
...
END;
$_$;
CREATE OR REPLACE FUNCTION func_b(text)
RETURNS boolean LANGUAGE plpgsql STRICT IMMUTABLE AS
$_$
BEGIN
...
END;
$_$;
ALTER TABLE..... ;"""
t = sqlparse.split(sql)
assert len(t) == 3
def test_comment_encoding_when_reindent():
# There was an UnicodeEncodeError in the reindent filter that
# casted every comment followed by a keyword to str.
sql = u'select foo -- Comment containing Ümläuts\nfrom bar'
formatted = sqlparse.format(sql, reindent=True)
assert formatted == sql
def test_parse_sql_with_binary():
# See https://github.com/andialbrecht/sqlparse/pull/88
# digest = '‚êŠplL4¡h‘øN{'
digest = '\x82|\xcb\x0e\xea\x8aplL4\xa1h\x91\xf8N{'
sql = "select * from foo where bar = '{0}'".format(digest)
formatted = sqlparse.format(sql, reindent=True)
tformatted = "select *\nfrom foo\nwhere bar = '{0}'".format(digest)
if PY2:
tformatted = tformatted.decode('unicode-escape')
assert formatted == tformatted
def test_dont_alias_keywords():
# The _group_left_right function had a bug where the check for the
# left side wasn't handled correctly. In one case this resulted in
# a keyword turning into an identifier.
p = sqlparse.parse('FROM AS foo')[0]
assert len(p.tokens) == 5
assert p.tokens[0].ttype is T.Keyword
assert p.tokens[2].ttype is T.Keyword
def test_format_accepts_encoding(load_file):
# issue20
sql = load_file('test_cp1251.sql', 'cp1251')
formatted = sqlparse.format(sql, reindent=True, encoding='cp1251')
tformatted = u'insert into foo\nvalues (1); -- Песня про надежду'
assert formatted == tformatted
def test_stream(get_stream):
with get_stream("stream.sql") as stream:
p = sqlparse.parse(stream)[0]
assert p.get_type() == 'INSERT'
def test_issue90():
sql = ('UPDATE "gallery_photo" SET "owner_id" = 4018, "deleted_at" = NULL,'
' "width" = NULL, "height" = NULL, "rating_votes" = 0,'
' "rating_score" = 0, "thumbnail_width" = NULL,'
' "thumbnail_height" = NULL, "price" = 1, "description" = NULL')
formatted = sqlparse.format(sql, reindent=True)
tformatted = '\n'.join([
'UPDATE "gallery_photo"',
'SET "owner_id" = 4018,',
' "deleted_at" = NULL,',
' "width" = NULL,',
' "height" = NULL,',
' "rating_votes" = 0,',
' "rating_score" = 0,',
' "thumbnail_width" = NULL,',
' "thumbnail_height" = NULL,',
' "price" = 1,',
' "description" = NULL'])
assert formatted == tformatted
def test_except_formatting():
sql = 'SELECT 1 FROM foo WHERE 2 = 3 EXCEPT SELECT 2 FROM bar WHERE 1 = 2'
formatted = sqlparse.format(sql, reindent=True)
tformatted = '\n'.join([
'SELECT 1',
'FROM foo',
'WHERE 2 = 3',
'EXCEPT',
'SELECT 2',
'FROM bar',
'WHERE 1 = 2'])
assert formatted == tformatted
def test_null_with_as():
sql = 'SELECT NULL AS c1, NULL AS c2 FROM t1'
formatted = sqlparse.format(sql, reindent=True)
tformatted = '\n'.join([
'SELECT NULL AS c1,',
' NULL AS c2',
'FROM t1'])
assert formatted == tformatted
def test_issue190_open_file(filepath):
path = filepath('stream.sql')
with open(path) as stream:
p = sqlparse.parse(stream)[0]
assert p.get_type() == 'INSERT'
def test_issue193_splitting_function():
sql = """ CREATE FUNCTION a(x VARCHAR(20)) RETURNS VARCHAR(20)
BEGIN
DECLARE y VARCHAR(20);
RETURN x;
END;
SELECT * FROM a.b;"""
statements = sqlparse.split(sql)
assert len(statements) == 2
def test_issue194_splitting_function():
sql = """ CREATE FUNCTION a(x VARCHAR(20)) RETURNS VARCHAR(20)
BEGIN
DECLARE y VARCHAR(20);
IF (1 = 1) THEN
SET x = y;
END IF;
RETURN x;
END;
SELECT * FROM a.b;"""
statements = sqlparse.split(sql)
assert len(statements) == 2
def test_issue186_get_type():
sql = "-- comment\ninsert into foo"
p = sqlparse.parse(sql)[0]
assert p.get_type() == 'INSERT'
def test_issue212_py2unicode():
t1 = sql.Token(T.String, u'schöner ')
t2 = sql.Token(T.String, 'bug')
token_list = sql.TokenList([t1, t2])
assert str(token_list) == 'schöner bug'
def test_issue213_leadingws():
sql = " select * from foo"
assert sqlparse.format(sql, strip_whitespace=True) == "select * from foo"
def test_issue227_gettype_cte():
select_stmt = sqlparse.parse('SELECT 1, 2, 3 FROM foo;')
assert select_stmt[0].get_type() == 'SELECT'
with_stmt = sqlparse.parse('WITH foo AS (SELECT 1, 2, 3)'
'SELECT * FROM foo;')
assert with_stmt[0].get_type() == 'SELECT'
with2_stmt = sqlparse.parse("""
WITH foo AS (SELECT 1 AS abc, 2 AS def),
bar AS (SELECT * FROM something WHERE x > 1)
INSERT INTO elsewhere SELECT * FROM foo JOIN bar;""")
assert with2_stmt[0].get_type() == 'INSERT'
def test_issue207_runaway_format():
sql = 'select 1 from (select 1 as one, 2 as two, 3 from dual) t0'
p = sqlparse.format(sql, reindent=True)
assert p == '\n'.join([
"select 1",
"from",
" (select 1 as one,",
" 2 as two,",
" 3",
" from dual) t0"])
def test_token_next_doesnt_ignore_skip_cm():
sql = '--comment\nselect 1'
tok = sqlparse.parse(sql)[0].token_next(-1, skip_cm=True)[1]
assert tok.value == 'select'
@pytest.mark.parametrize('s', [
'SELECT x AS',
'AS'
])
def test_issue284_as_grouping(s):
p = sqlparse.parse(s)[0]
assert s == str(p)
def test_issue315_utf8_by_default():
# Make sure the lexer can handle utf-8 string by default correctly
# digest = '齐天大圣.カラフルな雲.사랑해요'
# The digest contains Chinese, Japanese and Korean characters
# All in 'utf-8' encoding.
digest = (
'\xe9\xbd\x90\xe5\xa4\xa9\xe5\xa4\xa7\xe5\x9c\xa3.'
'\xe3\x82\xab\xe3\x83\xa9\xe3\x83\x95\xe3\x83\xab\xe3\x81\xaa\xe9'
'\x9b\xb2.'
'\xec\x82\xac\xeb\x9e\x91\xed\x95\xb4\xec\x9a\x94'
)
sql = "select * from foo where bar = '{0}'".format(digest)
formatted = sqlparse.format(sql, reindent=True)
tformatted = "select *\nfrom foo\nwhere bar = '{0}'".format(digest)
if PY2:
tformatted = tformatted.decode('utf-8')
assert formatted == tformatted
def test_issue322_concurrently_is_keyword():
s = 'CREATE INDEX CONCURRENTLY myindex ON mytable(col1);'
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 12
assert p.tokens[0].ttype is T.Keyword.DDL # CREATE
assert p.tokens[2].ttype is T.Keyword # INDEX
assert p.tokens[4].ttype is T.Keyword # CONCURRENTLY
assert p.tokens[4].value == 'CONCURRENTLY'
assert isinstance(p.tokens[6], sql.Identifier)
assert p.tokens[6].value == 'myindex'
@pytest.mark.parametrize('s', [
'SELECT @min_price:=MIN(price), @max_price:=MAX(price) FROM shop;',
'SELECT @min_price:=MIN(price), @max_price:=MAX(price) FROM shop',
])
def test_issue359_index_error_assignments(s):
sqlparse.parse(s)
sqlparse.format(s, strip_comments=True)
def test_issue469_copy_as_psql_command():
formatted = sqlparse.format(
'\\copy select * from foo',
keyword_case='upper', identifier_case='capitalize')
assert formatted == '\\copy SELECT * FROM Foo'
@pytest.mark.xfail(reason='Needs to be fixed')
def test_issue484_comments_and_newlines():
formatted = sqlparse.format('\n'.join([
'Create table myTable',
'(',
' myId TINYINT NOT NULL, --my special comment',
' myName VARCHAR2(100) NOT NULL',
')']),
strip_comments=True)
assert formatted == ('\n'.join([
'Create table myTable',
'(',
' myId TINYINT NOT NULL,',
' myName VARCHAR2(100) NOT NULL',
')']))
def test_issue485_split_multi():
p_sql = '''CREATE OR REPLACE RULE ruled_tab_2rules AS ON INSERT
TO public.ruled_tab
DO instead (
select 1;
select 2;
);'''
assert len(sqlparse.split(p_sql)) == 1
def test_issue489_tzcasts():
p = sqlparse.parse('select bar at time zone \'UTC\' as foo')[0]
assert p.tokens[-1].has_alias() is True
assert p.tokens[-1].get_alias() == 'foo'

View File

@@ -0,0 +1,151 @@
# -*- coding: utf-8 -*-
# Tests splitting functions.
import types
import pytest
import sqlparse
from sqlparse.compat import StringIO, text_type
def test_split_semicolon():
sql1 = 'select * from foo;'
sql2 = "select * from foo where bar = 'foo;bar';"
stmts = sqlparse.parse(''.join([sql1, sql2]))
assert len(stmts) == 2
assert str(stmts[0]) == sql1
assert str(stmts[1]) == sql2
def test_split_backslash():
stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';")
assert len(stmts) == 3
@pytest.mark.parametrize('fn', ['function.sql',
'function_psql.sql',
'function_psql2.sql',
'function_psql3.sql',
'function_psql4.sql'])
def test_split_create_function(load_file, fn):
sql = load_file(fn)
stmts = sqlparse.parse(sql)
assert len(stmts) == 1
assert text_type(stmts[0]) == sql
def test_split_dashcomments(load_file):
sql = load_file('dashcomment.sql')
stmts = sqlparse.parse(sql)
assert len(stmts) == 3
assert ''.join(str(q) for q in stmts) == sql
@pytest.mark.parametrize('s', ['select foo; -- comment\n',
'select foo; -- comment\r',
'select foo; -- comment\r\n',
'select foo; -- comment'])
def test_split_dashcomments_eol(s):
stmts = sqlparse.parse(s)
assert len(stmts) == 1
def test_split_begintag(load_file):
sql = load_file('begintag.sql')
stmts = sqlparse.parse(sql)
assert len(stmts) == 3
assert ''.join(str(q) for q in stmts) == sql
def test_split_begintag_2(load_file):
sql = load_file('begintag_2.sql')
stmts = sqlparse.parse(sql)
assert len(stmts) == 1
assert ''.join(str(q) for q in stmts) == sql
def test_split_dropif():
sql = 'DROP TABLE IF EXISTS FOO;\n\nSELECT * FROM BAR;'
stmts = sqlparse.parse(sql)
assert len(stmts) == 2
assert ''.join(str(q) for q in stmts) == sql
def test_split_comment_with_umlaut():
sql = (u'select * from foo;\n'
u'-- Testing an umlaut: ä\n'
u'select * from bar;')
stmts = sqlparse.parse(sql)
assert len(stmts) == 2
assert ''.join(text_type(q) for q in stmts) == sql
def test_split_comment_end_of_line():
sql = ('select * from foo; -- foo\n'
'select * from bar;')
stmts = sqlparse.parse(sql)
assert len(stmts) == 2
assert ''.join(str(q) for q in stmts) == sql
# make sure the comment belongs to first query
assert str(stmts[0]) == 'select * from foo; -- foo\n'
def test_split_casewhen():
sql = ("SELECT case when val = 1 then 2 else null end as foo;\n"
"comment on table actor is 'The actor table.';")
stmts = sqlparse.split(sql)
assert len(stmts) == 2
def test_split_cursor_declare():
sql = ('DECLARE CURSOR "foo" AS SELECT 1;\n'
'SELECT 2;')
stmts = sqlparse.split(sql)
assert len(stmts) == 2
def test_split_if_function(): # see issue 33
# don't let IF as a function confuse the splitter
sql = ('CREATE TEMPORARY TABLE tmp '
'SELECT IF(a=1, a, b) AS o FROM one; '
'SELECT t FROM two')
stmts = sqlparse.split(sql)
assert len(stmts) == 2
def test_split_stream():
stream = StringIO("SELECT 1; SELECT 2;")
stmts = sqlparse.parsestream(stream)
assert isinstance(stmts, types.GeneratorType)
assert len(list(stmts)) == 2
def test_split_encoding_parsestream():
stream = StringIO("SELECT 1; SELECT 2;")
stmts = list(sqlparse.parsestream(stream))
assert isinstance(stmts[0].tokens[0].value, text_type)
def test_split_unicode_parsestream():
stream = StringIO(u'SELECT ö')
stmts = list(sqlparse.parsestream(stream))
assert str(stmts[0]) == 'SELECT ö'
def test_split_simple():
stmts = sqlparse.split('select * from foo; select * from bar;')
assert len(stmts) == 2
assert stmts[0] == 'select * from foo;'
assert stmts[1] == 'select * from bar;'
def test_split_quotes_with_new_line():
stmts = sqlparse.split('select "foo\nbar"')
assert len(stmts) == 1
assert stmts[0] == 'select "foo\nbar"'
stmts = sqlparse.split("select 'foo\n\bar'")
assert len(stmts) == 1
assert stmts[0] == "select 'foo\n\bar'"

View File

@@ -0,0 +1,238 @@
# -*- coding: utf-8 -*-
import types
import pytest
import sqlparse
from sqlparse import lexer
from sqlparse import sql, tokens as T
from sqlparse.compat import StringIO
def test_tokenize_simple():
s = 'select * from foo;'
stream = lexer.tokenize(s)
assert isinstance(stream, types.GeneratorType)
tokens = list(stream)
assert len(tokens) == 8
assert len(tokens[0]) == 2
assert tokens[0] == (T.Keyword.DML, 'select')
assert tokens[-1] == (T.Punctuation, ';')
def test_tokenize_backticks():
s = '`foo`.`bar`'
tokens = list(lexer.tokenize(s))
assert len(tokens) == 3
assert tokens[0] == (T.Name, '`foo`')
@pytest.mark.parametrize('s', ['foo\nbar\n', 'foo\rbar\r',
'foo\r\nbar\r\n', 'foo\r\nbar\n'])
def test_tokenize_linebreaks(s):
# issue1
tokens = lexer.tokenize(s)
assert ''.join(str(x[1]) for x in tokens) == s
def test_tokenize_inline_keywords():
# issue 7
s = "create created_foo"
tokens = list(lexer.tokenize(s))
assert len(tokens) == 3
assert tokens[0][0] == T.Keyword.DDL
assert tokens[2][0] == T.Name
assert tokens[2][1] == 'created_foo'
s = "enddate"
tokens = list(lexer.tokenize(s))
assert len(tokens) == 1
assert tokens[0][0] == T.Name
s = "join_col"
tokens = list(lexer.tokenize(s))
assert len(tokens) == 1
assert tokens[0][0] == T.Name
s = "left join_col"
tokens = list(lexer.tokenize(s))
assert len(tokens) == 3
assert tokens[2][0] == T.Name
assert tokens[2][1] == 'join_col'
def test_tokenize_negative_numbers():
s = "values(-1)"
tokens = list(lexer.tokenize(s))
assert len(tokens) == 4
assert tokens[2][0] == T.Number.Integer
assert tokens[2][1] == '-1'
def test_token_str():
token = sql.Token(None, 'FoO')
assert str(token) == 'FoO'
def test_token_repr():
token = sql.Token(T.Keyword, 'foo')
tst = "<Keyword 'foo' at 0x"
assert repr(token)[:len(tst)] == tst
token = sql.Token(T.Keyword, '1234567890')
tst = "<Keyword '123456...' at 0x"
assert repr(token)[:len(tst)] == tst
def test_token_flatten():
token = sql.Token(T.Keyword, 'foo')
gen = token.flatten()
assert isinstance(gen, types.GeneratorType)
lgen = list(gen)
assert lgen == [token]
def test_tokenlist_repr():
p = sqlparse.parse('foo, bar, baz')[0]
tst = "<IdentifierList 'foo, b...' at 0x"
assert repr(p.tokens[0])[:len(tst)] == tst
def test_single_quotes():
p = sqlparse.parse("'test'")[0]
tst = "<Single \"'test'\" at 0x"
assert repr(p.tokens[0])[:len(tst)] == tst
def test_tokenlist_first():
p = sqlparse.parse(' select foo')[0]
first = p.token_first()
assert first.value == 'select'
assert p.token_first(skip_ws=False).value == ' '
assert sql.TokenList([]).token_first() is None
def test_tokenlist_token_matching():
t1 = sql.Token(T.Keyword, 'foo')
t2 = sql.Token(T.Punctuation, ',')
x = sql.TokenList([t1, t2])
assert x.token_matching([lambda t: t.ttype is T.Keyword], 0) == t1
assert x.token_matching([lambda t: t.ttype is T.Punctuation], 0) == t2
assert x.token_matching([lambda t: t.ttype is T.Keyword], 1) is None
def test_stream_simple():
stream = StringIO("SELECT 1; SELECT 2;")
tokens = lexer.tokenize(stream)
assert len(list(tokens)) == 9
stream.seek(0)
tokens = list(lexer.tokenize(stream))
assert len(tokens) == 9
stream.seek(0)
tokens = list(lexer.tokenize(stream))
assert len(tokens) == 9
def test_stream_error():
stream = StringIO("FOOBAR{")
tokens = list(lexer.tokenize(stream))
assert len(tokens) == 2
assert tokens[1][0] == T.Error
@pytest.mark.parametrize('expr', [
'JOIN',
'LEFT JOIN',
'LEFT OUTER JOIN',
'FULL OUTER JOIN',
'NATURAL JOIN',
'CROSS JOIN',
'STRAIGHT JOIN',
'INNER JOIN',
'LEFT INNER JOIN'])
def test_parse_join(expr):
p = sqlparse.parse('{0} foo'.format(expr))[0]
assert len(p.tokens) == 3
assert p.tokens[0].ttype is T.Keyword
def test_parse_union(): # issue294
p = sqlparse.parse('UNION ALL')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is T.Keyword
@pytest.mark.parametrize('s', ['END IF', 'END IF', 'END\t\nIF',
'END LOOP', 'END LOOP', 'END\t\nLOOP'])
def test_parse_endifloop(s):
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is T.Keyword
@pytest.mark.parametrize('s', ['NULLS FIRST', 'NULLS LAST'])
def test_parse_nulls(s): # issue487
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is T.Keyword
@pytest.mark.parametrize('s', [
'foo',
'Foo',
'FOO',
'v$name', # issue291
])
def test_parse_identifiers(s):
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
token = p.tokens[0]
assert str(token) == s
assert isinstance(token, sql.Identifier)
def test_parse_group_by():
p = sqlparse.parse('GROUP BY')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is T.Keyword
def test_parse_order_by():
p = sqlparse.parse('ORDER BY')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype is T.Keyword
@pytest.mark.parametrize('s', (
"LIKE", "ILIKE", "NOT LIKE", "NOT ILIKE",
"NOT LIKE", "NOT ILIKE",
))
def test_like_and_ilike_parsed_as_comparisons(s):
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype == T.Operator.Comparison
@pytest.mark.parametrize('s', (
"LIKEaaa", "bILIKE", "aaILIKEbb", "NOTLIKE", "NOTILIKE",
))
def test_near_like_and_ilike_parsed_appropriately(s):
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert isinstance(p.tokens[0], sql.Identifier)
@pytest.mark.parametrize('s', (
'AT TIME ZONE \'UTC\'',
))
def test_parse_tzcast(s):
p = sqlparse.parse(s)[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype == T.Keyword.TZCast
def test_cli_commands():
p = sqlparse.parse('\\copy')[0]
assert len(p.tokens) == 1
assert p.tokens[0].ttype == T.Command

View File

@@ -0,0 +1,28 @@
[tox]
skip_missing_interpreters = True
envlist =
py27
py34
py35
py36
py37
py38
pypy_54
flake8
[testenv]
deps =
pytest
pytest-cov
pytest-travis-fold
passenv =
TRAVIS
commands =
sqlformat --version
pytest --cov=sqlparse {posargs}
[testenv:flake8]
deps =
flake8
commands =
flake8 sqlparse tests setup.py

View File

@@ -1434,35 +1434,57 @@ class ImpalaShell(object, cmd.Cmd):
command: select
leading comment: /*first comment*/
"""
if ImpalaShell._has_leading_comment(line):
leading_comment, line = ImpalaShell.strip_leading_comment(line.strip())
line = line.encode('utf-8')
if leading_comment:
leading_comment = leading_comment.encode('utf-8')
else:
leading_comment, line = None, line.strip()
if line and line[0] == '@':
line = 'rerun ' + line[1:]
return super(ImpalaShell, self).parseline(line) + (leading_comment,)
@staticmethod
def _has_leading_comment(raw_line):
"""
Helper function that returns Boolean true if a query starts with a comment.
This saves us from relying on sqlparse filtering, which can be slow.
"""
line = raw_line.lstrip()
if line and (line.startswith('--') or line.startswith('/*')):
return True
else:
return False
@staticmethod
def strip_leading_comment(sql):
"""
Filter a leading comment in the SQL statement. This function returns a tuple
containing (leading comment, line without the leading comment).
"""
class StripLeadingCommentFilter:
class StripLeadingCommentFilter(object):
def __init__(self):
self.comment = None
def _process(self, tlist):
"""
Iterate through the list of tokens, appending each leading commment
to self.comment, and then popping that element off the list. When we
hit the first non-comment and non-whitespace token, then we're done --
the remainder after that point is the SQL statement.
"""
token = tlist.token_first()
if self._is_comment(token):
self.comment = ''
while token:
if self._is_comment(token) or self._is_whitespace(token):
tidx = tlist.token_index(token)
if self.comment is None:
self.comment = token.value
else:
self.comment += token.value
tlist.tokens.pop(tidx)
tidx -= 1
token = tlist.token_next(tidx, False)
tlist.tokens.pop(0)
# skip_ws=False treats white space characters as tokens also
token = tlist.token_first(skip_ws=False)
else:
break
@@ -1475,8 +1497,8 @@ class ImpalaShell(object, cmd.Cmd):
return token.ttype == sqlparse.tokens.Whitespace or \
token.ttype == sqlparse.tokens.Newline
def process(self, stack, stmt):
[self.process(stack, sgroup) for sgroup in stmt.get_sublists()]
def process(self, stmt):
[self.process(sgroup) for sgroup in stmt.get_sublists()]
self._process(stmt)
stack = sqlparse.engine.FilterStack()

View File

@@ -3,6 +3,6 @@ prettytable==0.7.1
sasl==0.2.1
setuptools>=36.8.0
six==1.14.0
sqlparse==0.1.19
sqlparse==0.3.1
thrift==0.9.3
thrift_sasl==0.4.2

View File

@@ -134,8 +134,6 @@ class TestImpalaShell(ImpalaTestSuite):
def test_multiple_queries_with_escaped_backslash(self, vector):
"""Regression test for string containing an escaped backslash.
This relies on the patch at thirdparty/patches/sqlparse/0001-....patch.
"""
run_impala_shell_cmd(vector, ['-q', r'''select '\\'; select '\'';''', '-B'])