IMPALA-12934: Added Calcite parsing files to Impala

Adding the framework to create our own parsing syntax for Impala using
the base Calcite Parser.jj file.

The Parser.jj file here was grabbed from Calcite 1.36. So with this commit,
we are using the same parsing analysis as Calcite 1.36. Any changes made
on top of the Parser.jj file or the config.fmpp file in the future are Impala
specific changes, so a diff can be done from this commit to see all the Impala
parsing changes.

The config.fmpp file was grabbed from Calcite 1.36 default_config.fmpp. The
Calcite intention of the config.fmpp file is to allow markup of variables in
the Parser.jj file. So it is always preferable to modify the
default_config.fmpp file when possible. Our version is grabbed from
https://github.com/apache/calcite/blob/main/core/src/main/codegen/config.fmpp
and slightly modified with the class name to make it compile for Impala.

There's no unit test needed since there is no functional change. The Calcite
planner will eventually make changes in the ".jj" file to support the differences
between the Impala parser and the Calcite parser.
Change-Id: If756b5ea8beb85661a30fb5d029e74ebb6719767
Reviewed-on: http://gerrit.cloudera.org:8080/21194
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
Steve Carlin
2024-03-07 14:38:35 -08:00
committed by Joe McDonnell
parent fdb87a755a
commit 2a3ce2071b
4 changed files with 9616 additions and 1 deletions

View File

@@ -103,6 +103,79 @@ under the License.
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution> <!-- copy all templates/data in the same location to compile them at once -->
<id>copy-resources</id>
<phase>generate-sources</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/codegen</outputDirectory>
<resources>
<resource>
<directory>src/main/codegen</directory>
<filtering>false</filtering>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.googlecode.fmpp-maven-plugin</groupId>
<artifactId>fmpp-maven-plugin</artifactId>
<version>1.0</version>
<dependencies>
<dependency>
<groupId>org.freemarker</groupId>
<artifactId>freemarker</artifactId>
<version>2.3.32</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>generate-fmpp-sources</id>
<phase>generate-sources</phase>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<cfgFile>${project.build.directory}/codegen/config.fmpp</cfgFile>
<outputDirectory>target/generated-sources</outputDirectory>
<templateDirectory>${project.build.directory}/codegen/templates</templateDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- This must be run AFTER the fmpp-maven-plugin -->
<groupId>org.codehaus.mojo</groupId>
<artifactId>javacc-maven-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<phase>generate-sources</phase>
<id>javacc</id>
<goals>
<goal>javacc</goal>
</goals>
<configuration>
<sourceDirectory>${project.build.directory}/generated-sources/</sourceDirectory>
<includes>
<include>**/Parser.jj</include>
</includes>
<!-- This must be kept synced with Apache Calcite. -->
<lookAhead>1</lookAhead>
<isStatic>false</isStatic>
<outputDirectory>${project.build.directory}/generated-sources/</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,467 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default data declarations for parsers.
# Each of these may be overridden in a parser's config.fmpp file.
# In addition, each parser must define "package" and "class".
data: {
parser: {
package: "org.apache.impala.calcite.parser",
class: "ImpalaSqlParserImpl",
# List of additional classes and packages to import.
# Example: "org.apache.calcite.sql.*", "java.util.List".
imports: [
]
# List of new keywords. Example: "DATABASES", "TABLES". If the keyword is
# not a reserved keyword, add it to the 'nonReservedKeywords' section.
keywords: [
]
# List of keywords from "keywords" section that are not reserved.
nonReservedKeywords: [
"A"
"ABSENT"
"ABSOLUTE"
"ACTION"
"ADA"
"ADD"
"ADMIN"
"AFTER"
"ALWAYS"
"APPLY"
"ARRAY_AGG"
"ARRAY_CONCAT_AGG"
"ASC"
"ASSERTION"
"ASSIGNMENT"
"ATTRIBUTE"
"ATTRIBUTES"
"BEFORE"
"BERNOULLI"
"BREADTH"
"C"
"CASCADE"
"CATALOG"
"CATALOG_NAME"
"CENTURY"
"CHAIN"
"CHARACTERISTICS"
"CHARACTERS"
"CHARACTER_SET_CATALOG"
"CHARACTER_SET_NAME"
"CHARACTER_SET_SCHEMA"
"CLASS_ORIGIN"
"COBOL"
"COLLATION"
"COLLATION_CATALOG"
"COLLATION_NAME"
"COLLATION_SCHEMA"
"COLUMN_NAME"
"COMMAND_FUNCTION"
"COMMAND_FUNCTION_CODE"
"COMMITTED"
"CONDITIONAL"
"CONDITION_NUMBER"
"CONNECTION"
"CONNECTION_NAME"
"CONSTRAINT_CATALOG"
"CONSTRAINT_NAME"
"CONSTRAINTS"
"CONSTRAINT_SCHEMA"
"CONSTRUCTOR"
"CONTAINS_SUBSTR"
"CONTINUE"
"CURSOR_NAME"
"DATA"
"DATABASE"
"DATE_DIFF"
"DATE_TRUNC"
"DATETIME_DIFF"
"DATETIME_INTERVAL_CODE"
"DATETIME_INTERVAL_PRECISION"
"DATETIME_TRUNC"
"DAYOFWEEK"
"DAYOFYEAR"
"DAY"
"DAYS"
"DECADE"
"DEFAULTS"
"DEFERRABLE"
"DEFERRED"
"DEFINED"
"DEFINER"
"DEGREE"
"DEPTH"
"DERIVED"
"DESC"
"DESCRIPTION"
"DESCRIPTOR"
"DIAGNOSTICS"
"DISPATCH"
"DOMAIN"
"DOW"
"DOY"
"DOT_FORMAT"
"DYNAMIC_FUNCTION"
"DYNAMIC_FUNCTION_CODE"
"ENCODING"
"EPOCH"
"ERROR"
"EXCEPTION"
"EXCLUDE"
"EXCLUDING"
"FINAL"
"FIRST"
"FOLLOWING"
"FORMAT"
"FORTRAN"
"FOUND"
"FRAC_SECOND"
"G"
"GENERAL"
"GENERATED"
"GEOMETRY"
"GO"
"GOTO"
"GRANTED"
"GROUP_CONCAT"
"HIERARCHY"
"HOP"
"HOURS"
"IGNORE"
"ILIKE"
"IMMEDIATE"
"IMMEDIATELY"
"IMPLEMENTATION"
"INCLUDE"
"INCLUDING"
"INCREMENT"
"INITIALLY"
"INPUT"
"INSTANCE"
"INSTANTIABLE"
"INVOKER"
"ISODOW"
"ISOLATION"
"ISOYEAR"
"JAVA"
"JSON"
"K"
"KEY"
"KEY_MEMBER"
"KEY_TYPE"
"LABEL"
"LAST"
"LENGTH"
"LEVEL"
"LIBRARY"
"LOCATOR"
"M"
"MAP"
"MATCHED"
"MAXVALUE"
"MESSAGE_LENGTH"
"MESSAGE_OCTET_LENGTH"
"MESSAGE_TEXT"
"MICROSECOND"
"MILLENNIUM"
"MILLISECOND"
"MINUTES"
"MINVALUE"
"MONTH"
"MONTHS"
"MORE_"
"MUMPS"
"NAME"
"NAMES"
"NANOSECOND"
"NESTING"
"NORMALIZED"
"NULLABLE"
"NULLS"
"NUMBER"
"OBJECT"
"OCTETS"
"OPTION"
"OPTIONS"
"ORDERING"
"ORDINALITY"
"OTHERS"
"OUTPUT"
"OVERRIDING"
"PAD"
"PARAMETER_MODE"
"PARAMETER_NAME"
"PARAMETER_ORDINAL_POSITION"
"PARAMETER_SPECIFIC_CATALOG"
"PARAMETER_SPECIFIC_NAME"
"PARAMETER_SPECIFIC_SCHEMA"
"PARTIAL"
"PASCAL"
"PASSING"
"PASSTHROUGH"
"PAST"
"PATH"
"PIVOT"
"PLACING"
"PLAN"
"PLI"
"PRECEDING"
"PRESERVE"
"PRIOR"
"PRIVILEGES"
"PUBLIC"
"QUARTER"
"QUARTERS"
"READ"
"RELATIVE"
"REPEATABLE"
"REPLACE"
"RESPECT"
"RESTART"
"RESTRICT"
"RETURNED_CARDINALITY"
"RETURNED_LENGTH"
"RETURNED_OCTET_LENGTH"
"RETURNED_SQLSTATE"
"RETURNING"
"RLIKE"
"ROLE"
"ROUTINE"
"ROUTINE_CATALOG"
"ROUTINE_NAME"
"ROUTINE_SCHEMA"
"ROW_COUNT"
"SCALAR"
"SCALE"
"SCHEMA"
"SCHEMA_NAME"
"SCOPE_CATALOGS"
"SCOPE_NAME"
"SCOPE_SCHEMA"
"SECONDS"
"SECTION"
"SECURITY"
"SELF"
"SEPARATOR"
"SEQUENCE"
"SERIALIZABLE"
"SERVER"
"SERVER_NAME"
"SESSION"
"SETS"
"SIMPLE"
"SIZE"
"SOURCE"
"SPACE"
"SPECIFIC_NAME"
"SQL_BIGINT"
"SQL_BINARY"
"SQL_BIT"
"SQL_BLOB"
"SQL_BOOLEAN"
"SQL_CHAR"
"SQL_CLOB"
"SQL_DATE"
"SQL_DECIMAL"
"SQL_DOUBLE"
"SQL_FLOAT"
"SQL_INTEGER"
"SQL_INTERVAL_DAY"
"SQL_INTERVAL_DAY_TO_HOUR"
"SQL_INTERVAL_DAY_TO_MINUTE"
"SQL_INTERVAL_DAY_TO_SECOND"
"SQL_INTERVAL_HOUR"
"SQL_INTERVAL_HOUR_TO_MINUTE"
"SQL_INTERVAL_HOUR_TO_SECOND"
"SQL_INTERVAL_MINUTE"
"SQL_INTERVAL_MINUTE_TO_SECOND"
"SQL_INTERVAL_MONTH"
"SQL_INTERVAL_SECOND"
"SQL_INTERVAL_YEAR"
"SQL_INTERVAL_YEAR_TO_MONTH"
"SQL_LONGVARBINARY"
"SQL_LONGVARCHAR"
"SQL_LONGVARNCHAR"
"SQL_NCHAR"
"SQL_NCLOB"
"SQL_NUMERIC"
"SQL_NVARCHAR"
"SQL_REAL"
"SQL_SMALLINT"
"SQL_TIME"
"SQL_TIMESTAMP"
"SQL_TINYINT"
"SQL_TSI_DAY"
"SQL_TSI_FRAC_SECOND"
"SQL_TSI_HOUR"
"SQL_TSI_MICROSECOND"
"SQL_TSI_MINUTE"
"SQL_TSI_MONTH"
"SQL_TSI_QUARTER"
"SQL_TSI_SECOND"
"SQL_TSI_WEEK"
"SQL_TSI_YEAR"
"SQL_VARBINARY"
"SQL_VARCHAR"
"STATE"
"STATEMENT"
"STRING_AGG"
"STRUCTURE"
"STYLE"
"SUBCLASS_ORIGIN"
"SUBSTITUTE"
"TABLE_NAME"
"TEMPORARY"
"TIES"
"TIME_DIFF"
"TIME_TRUNC"
"TIMESTAMPADD"
"TIMESTAMPDIFF"
"TIMESTAMP_DIFF"
"TIMESTAMP_TRUNC"
"TOP_LEVEL_COUNT"
"TRANSACTION"
"TRANSACTIONS_ACTIVE"
"TRANSACTIONS_COMMITTED"
"TRANSACTIONS_ROLLED_BACK"
"TRANSFORM"
"TRANSFORMS"
"TRIGGER_CATALOG"
"TRIGGER_NAME"
"TRIGGER_SCHEMA"
"TUMBLE"
"TYPE"
"UNBOUNDED"
"UNCOMMITTED"
"UNCONDITIONAL"
"UNDER"
"UNPIVOT"
"UNNAMED"
"USAGE"
"USER_DEFINED_TYPE_CATALOG"
"USER_DEFINED_TYPE_CODE"
"USER_DEFINED_TYPE_NAME"
"USER_DEFINED_TYPE_SCHEMA"
"UTF16"
"UTF32"
"UTF8"
"VERSION"
"VIEW"
"WEEK"
"WEEKS"
"WORK"
"WRAPPER"
"WRITE"
"XML"
"YEAR"
"YEARS"
"ZONE"
]
# List of non-reserved keywords to add;
# items in this list become non-reserved.
nonReservedKeywordsToAdd: [
]
# List of non-reserved keywords to remove;
# items in this list become reserved.
nonReservedKeywordsToRemove: [
]
# List of additional join types. Each is a method with no arguments.
# Example: "LeftSemiJoin".
joinTypes: [
]
# List of methods for parsing custom SQL statements.
# Return type of method implementation should be 'SqlNode'.
# Example: "SqlShowDatabases()", "SqlShowTables()".
statementParserMethods: [
]
# List of methods for parsing custom literals.
# Return type of method implementation should be "SqlNode".
# Example: ParseJsonLiteral().
literalParserMethods: [
]
# List of methods for parsing custom data types.
# Return type of method implementation should be "SqlTypeNameSpec".
# Example: SqlParseTimeStampZ().
dataTypeParserMethods: [
]
# List of methods for parsing builtin function calls.
# Return type of method implementation should be "SqlNode".
# Example: "DateTimeConstructorCall()".
builtinFunctionCallMethods: [
]
# List of methods for parsing extensions to "ALTER <scope>" calls.
# Each must accept arguments "(SqlParserPos pos, String scope)".
# Example: "SqlAlterTable".
alterStatementParserMethods: [
]
# List of methods for parsing extensions to "CREATE [OR REPLACE]" calls.
# Each must accept arguments "(SqlParserPos pos, boolean replace)".
# Example: "SqlCreateForeignSchema".
createStatementParserMethods: [
]
# List of methods for parsing extensions to "DROP" calls.
# Each must accept arguments "(SqlParserPos pos)".
# Example: "SqlDropSchema".
dropStatementParserMethods: [
]
# List of methods for parsing extensions to "TRUNCATE" calls.
# Each must accept arguments "(SqlParserPos pos)".
# Example: "SqlTruncate".
truncateStatementParserMethods: [
]
# Binary operators tokens.
# Example: "< INFIX_CAST: \"::\" >".
binaryOperatorsTokens: [
]
# Binary operators initialization.
# Example: "InfixCast".
extraBinaryExpressions: [
]
# List of files in @includes directory that have parser method
# implementations for parsing custom SQL statements, literals or types
# given as part of "statementParserMethods", "literalParserMethods" or
# "dataTypeParserMethods".
# Example: "parserImpls.ftl".
implementationFiles: [
]
# Custom identifier token.
# Example: "< IDENTIFIER: (<LETTER>|<DIGIT>)+ >".
customIdentifierToken: ""
includePosixOperators: false
includeCompoundIdentifier: true
includeBraces: true
includeAdditionalDeclarations: false
includeParsingStringLiteralAsArrayLiteral: false
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,8 @@ package org.apache.impala.calcite.service;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.impala.calcite.parser.ImpalaSqlParserImpl;
import org.apache.impala.calcite.validate.ImpalaConformance;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -40,7 +42,9 @@ public class CalciteQueryParser implements CompilerStep {
public SqlNode parse() throws SqlParseException {
// Create an SQL parser
SqlParser parser = SqlParser.create(queryCtx_.getStmt());
SqlParser parser = SqlParser.create(queryCtx_.getStmt(),
SqlParser.config().withParserFactory(ImpalaSqlParserImpl.FACTORY)
.withConformance(ImpalaConformance.INSTANCE));
// Parse the query into an AST
SqlNode sqlNode = parser.parseQuery();