From 76fa3b2ded92cf5542c19e01809ce6933c40ddae Mon Sep 17 00:00:00 2001 From: Lenni Kuff Date: Thu, 5 Dec 2013 08:09:33 -0800 Subject: [PATCH] Update DDL to support 'STORED AS PARQUET' and 'STORED AS AVRO' syntax This change updates our DDL syntax support to allow for using 'STORED AS PARQUET' as well as 'STORED AS PARQUETFILE'. Moving forward we should prefer the new syntax, but continue to support the old. I made the same change for 'AVROFILE', but since we have not yet documented the 'AVROFILE' syntax I left out support for the old syntax. Change-Id: I10c73a71a94ee488c9ae205485777b58ab8957c9 Reviewed-on: http://gerrit.ent.cloudera.com:8080/1053 Reviewed-by: Marcel Kornacker Tested-by: jenkins --- fe/src/main/cup/sql-parser.y | 12 ++++++----- .../analysis/CreateTableAsSelectStmt.java | 2 +- .../impala/catalog/HdfsFileFormat.java | 4 ++-- fe/src/main/jflex/sql-scanner.flex | 3 ++- .../impala/analysis/AnalyzeDDLTest.java | 20 ++++++++++--------- .../impala/analysis/AuthorizationTest.java | 6 +++--- .../cloudera/impala/analysis/ParserTest.java | 14 ++++++------- testdata/bin/generate-schema-statements.py | 4 ++-- .../queries/QueryTest/create.test | 6 +++--- .../queries/QueryTest/insert_permutation.test | 2 +- .../queries/QueryTest/show-create-table.test | 6 +++--- 11 files changed, 42 insertions(+), 37 deletions(-) diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index 409083193..1c362cf9b 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -200,7 +200,7 @@ parser code {: // List of keywords. Please keep them sorted alphabetically. terminal KW_ADD, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_AND, KW_AS, KW_ASC, KW_AVG, - KW_AVROFILE, KW_BETWEEN, KW_BIGINT, KW_BOOLEAN, KW_BY, KW_CASE, KW_CAST, + KW_AVRO, KW_BETWEEN, KW_BIGINT, KW_BOOLEAN, KW_BY, KW_CASE, KW_CAST, KW_CHANGE, KW_CHAR, KW_COLUMN, KW_COLUMNS, KW_COMMENT, KW_COMPUTE, KW_COUNT, KW_CREATE, KW_DATA, KW_DATABASE, KW_DATABASES, KW_DATE, KW_DATETIME, KW_DELIMITED, KW_DESC, KW_DESCRIBE, KW_DISTINCT, KW_DISTINCTPC, KW_DISTINCTPCSA, KW_DIV, @@ -212,8 +212,8 @@ terminal KW_INVALIDATE, KW_IS, KW_JOIN, KW_LAST, KW_LEFT, KW_LIKE, KW_LIMIT, KW_LINES, KW_LOAD, KW_LOCATION, KW_MAX, KW_MERGE_FN, KW_METADATA, KW_MIN, KW_NDV, KW_NOT, KW_NULL, KW_NULLS, KW_OFFSET, KW_ON, KW_OR, KW_ORDER, KW_OUTER, KW_OVERWRITE, - KW_PARQUETFILE, KW_PARTITION, KW_PARTITIONED, KW_RCFILE, KW_REFRESH, KW_REGEXP, - KW_RENAME, KW_REPLACE, KW_RETURNS, KW_RIGHT, KW_RLIKE, KW_ROW, KW_SCHEMA, + KW_PARQUET, KW_PARQUETFILE, KW_PARTITION, KW_PARTITIONED, KW_RCFILE, KW_REFRESH, + KW_REGEXP, KW_RENAME, KW_REPLACE, KW_RETURNS, KW_RIGHT, KW_RLIKE, KW_ROW, KW_SCHEMA, KW_SCHEMAS, KW_SELECT, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES, KW_SERIALIZE_FN, KW_SET, KW_SHOW, KW_SMALLINT, KW_STORED, KW_STRING, KW_SUM, KW_SYMBOL, KW_TABLE, KW_TABLES, KW_TBLPROPERTIES, KW_TERMINATED, KW_TEXTFILE, @@ -719,7 +719,9 @@ file_format_create_table_val ::= ; file_format_val ::= - KW_PARQUETFILE + KW_PARQUET + {: RESULT = THdfsFileFormat.PARQUET; :} + | KW_PARQUETFILE {: RESULT = THdfsFileFormat.PARQUET; :} | KW_TEXTFILE {: RESULT = THdfsFileFormat.TEXT; :} @@ -727,7 +729,7 @@ file_format_val ::= {: RESULT = THdfsFileFormat.SEQUENCE_FILE; :} | KW_RCFILE {: RESULT = THdfsFileFormat.RC_FILE; :} - | KW_AVROFILE + | KW_AVRO {: RESULT = THdfsFileFormat.AVRO; :} ; diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableAsSelectStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableAsSelectStmt.java index 2ad79435d..7c257e5df 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableAsSelectStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableAsSelectStmt.java @@ -82,7 +82,7 @@ public class CreateTableAsSelectStmt extends StatementBase { throw new AnalysisException(String.format("CREATE TABLE AS SELECT " + "does not support (%s) file format. Supported formats are: (%s)", createStmt_.getFileFormat().toString().replace("_", ""), - "PARQUETFILE, TEXTFILE")); + "PARQUET, TEXTFILE")); } // The full privilege check for the database will be done as part of the INSERT diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsFileFormat.java b/fe/src/main/java/com/cloudera/impala/catalog/HdfsFileFormat.java index 5fd5d62d5..64f5562ee 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsFileFormat.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/HdfsFileFormat.java @@ -136,8 +136,8 @@ public enum HdfsFileFormat { case RC_FILE: return "RCFILE"; case TEXT: return "TEXTFILE"; case SEQUENCE_FILE: return "SEQUENCEFILE"; - case AVRO: return "AVROFILE"; - case PARQUET: return "PARQUETFILE"; + case AVRO: return "AVRO"; + case PARQUET: return "PARQUET"; case LZO_TEXT: // It is not currently possible to create a table with LZO compressed text files // in Impala, but this is valid in Hive. diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index af7aaec74..d5fcd3c36 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -57,7 +57,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols; keywordMap.put("as", new Integer(SqlParserSymbols.KW_AS)); keywordMap.put("asc", new Integer(SqlParserSymbols.KW_ASC)); keywordMap.put("avg", new Integer(SqlParserSymbols.KW_AVG)); - keywordMap.put("avrofile", new Integer(SqlParserSymbols.KW_AVROFILE)); + keywordMap.put("avro", new Integer(SqlParserSymbols.KW_AVRO)); keywordMap.put("between", new Integer(SqlParserSymbols.KW_BETWEEN)); keywordMap.put("bigint", new Integer(SqlParserSymbols.KW_BIGINT)); keywordMap.put("boolean", new Integer(SqlParserSymbols.KW_BOOLEAN)); @@ -143,6 +143,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols; keywordMap.put("order", new Integer(SqlParserSymbols.KW_ORDER)); keywordMap.put("outer", new Integer(SqlParserSymbols.KW_OUTER)); keywordMap.put("overwrite", new Integer(SqlParserSymbols.KW_OVERWRITE)); + keywordMap.put("parquet", new Integer(SqlParserSymbols.KW_PARQUET)); keywordMap.put("parquetfile", new Integer(SqlParserSymbols.KW_PARQUETFILE)); keywordMap.put("partition", new Integer(SqlParserSymbols.KW_PARTITION)); keywordMap.put("partitioned", new Integer(SqlParserSymbols.KW_PARTITIONED)); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java index b2b32764b..dc370840a 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java @@ -259,7 +259,7 @@ public class AnalyzeDDLTest extends AnalyzerTest { AnalyzesOk("alter table functional.alltypes PARTITION (month=11, year=2010) " + "set fileformat parquetfile"); AnalyzesOk("alter table functional.stringpartitionkey PARTITION " + - "(string_col='partition1') set fileformat parquetfile"); + "(string_col='partition1') set fileformat parquet"); AnalyzesOk("alter table functional.stringpartitionkey PARTITION " + "(string_col='PaRtiTion1') set location '/a/b/c'"); // Arbitrary exprs as partition key values. Constant exprs are ok. @@ -542,6 +542,8 @@ public class AnalyzeDDLTest extends AnalyzerTest { "as select * from functional.jointbl"); AnalyzesOk("create table newtbl stored as parquetfile " + "as select * from functional.alltypes"); + AnalyzesOk("create table newtbl stored as parquet " + + "as select * from functional.alltypes"); AnalyzesOk("create table newtbl as select int_col from functional.alltypes"); AnalyzesOk("create table functional.newtbl " + @@ -567,10 +569,10 @@ public class AnalyzeDDLTest extends AnalyzerTest { // Unsupported file formats AnalysisError("create table foo stored as sequencefile as select 1", "CREATE TABLE AS SELECT does not support (SEQUENCEFILE) file format. " + - "Supported formats are: (PARQUETFILE, TEXTFILE)"); + "Supported formats are: (PARQUET, TEXTFILE)"); AnalysisError("create table foo stored as RCFILE as select 1", "CREATE TABLE AS SELECT does not support (RCFILE) file format. " + - "Supported formats are: (PARQUETFILE, TEXTFILE)"); + "Supported formats are: (PARQUET, TEXTFILE)"); } @Test @@ -621,16 +623,16 @@ public class AnalyzeDDLTest extends AnalyzerTest { // Analysis of Avro schemas AnalyzesOk("create table foo (i int) with serdeproperties ('avro.schema.url'=" + - "'hdfs://schema.avsc') stored as avrofile"); - AnalyzesOk("create table foo (i int) stored as avrofile tblproperties " + + "'hdfs://schema.avsc') stored as avro"); + AnalyzesOk("create table foo (i int) stored as avro tblproperties " + "('avro.schema.url'='hdfs://schema.avsc')"); - AnalyzesOk("create table foo (i int) stored as avrofile tblproperties " + + AnalyzesOk("create table foo (i int) stored as avro tblproperties " + "('avro.schema.literal'='{\"name\": \"my_record\"}')"); - AnalysisError("create table foo (i int) stored as avrofile", + AnalysisError("create table foo (i int) stored as avro", "No Avro schema provided for table: default.foo"); - AnalysisError("create table foo (i int) stored as avrofile tblproperties ('a'='b')", + AnalysisError("create table foo (i int) stored as avro tblproperties ('a'='b')", "No Avro schema provided for table: default.foo"); - AnalysisError("create table foo (i int) stored as avrofile tblproperties " + + AnalysisError("create table foo (i int) stored as avro tblproperties " + "('avro.schema.url'='schema.avsc')", "avro.schema.url must be of form " + "\"http://path/to/schema/file\" or \"hdfs://namenode:port/path/to/schema/file" + "\", got schema.avsc"); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AuthorizationTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AuthorizationTest.java index dfe25119f..a0bb396c3 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AuthorizationTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AuthorizationTest.java @@ -541,7 +541,7 @@ public class AuthorizationTest { AuthzOk("ALTER TABLE functional_seq_snap.alltypes CHANGE int_col c1 int"); AuthzOk("ALTER TABLE functional_seq_snap.alltypes DROP int_col"); AuthzOk("ALTER TABLE functional_seq_snap.alltypes RENAME TO functional_seq_snap.t1"); - AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET FILEFORMAT PARQUETFILE"); + AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET FILEFORMAT PARQUET"); AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET LOCATION " + "'/test-warehouse/new_table'"); AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET TBLPROPERTIES " + @@ -571,7 +571,7 @@ public class AuthorizationTest { "User '%s' does not have privileges to access: " + "hdfs://localhost:20510/test-warehouse/new_table"); - AuthzError("ALTER TABLE functional.alltypes SET FILEFORMAT PARQUETFILE", + AuthzError("ALTER TABLE functional.alltypes SET FILEFORMAT PARQUET", "User '%s' does not have privileges to execute 'ALTER' on: functional.alltypes"); AuthzError("ALTER TABLE functional.alltypes ADD COLUMNS (c1 int)", "User '%s' does not have privileges to execute 'ALTER' on: functional.alltypes"); @@ -587,7 +587,7 @@ public class AuthorizationTest { "User '%s' does not have privileges to execute 'ALTER' on: functional.alltypes"); // Trying to ALTER TABLE a view does not reveal any privileged information. - AuthzError("ALTER TABLE functional.view_view SET FILEFORMAT PARQUETFILE", + AuthzError("ALTER TABLE functional.view_view SET FILEFORMAT PARQUET", "User '%s' does not have privileges to execute 'ALTER' on: functional.view_view"); AuthzError("ALTER TABLE functional.view_view ADD COLUMNS (c1 int)", "User '%s' does not have privileges to execute 'ALTER' on: functional.view_view"); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java index 08f07dc07..59709c6b4 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java @@ -1507,7 +1507,7 @@ public class ParserTest { public void TestAlterTableSet() { // Supported file formats String [] supportedFileFormats = - {"TEXTFILE", "SEQUENCEFILE", "PARQUETFILE", "RCFILE", "AVROFILE"}; + {"TEXTFILE", "SEQUENCEFILE", "PARQUET", "PARQUETFILE", "RCFILE", "AVRO"}; for (String format: supportedFileFormats) { ParsesOk("ALTER TABLE Foo SET FILEFORMAT " + format); ParsesOk("ALTER TABLE TestDb.Foo SET FILEFORMAT " + format); @@ -1530,7 +1530,7 @@ public class ParserTest { ParserError("ALTER TABLE Foo PARTITION ('str') SET FILEFORMAT TEXTFILE"); ParserError("ALTER TABLE Foo PARTITION (a=1, 5) SET FILEFORMAT TEXTFILE"); ParserError("ALTER TABLE Foo PARTITION () SET FILEFORMAT PARQUETFILE"); - ParserError("ALTER TABLE Foo PARTITION (,) SET FILEFORMAT PARQUETFILE"); + ParserError("ALTER TABLE Foo PARTITION (,) SET FILEFORMAT PARQUET"); ParserError("ALTER TABLE Foo PARTITION (a=1) SET FILEFORMAT"); ParserError("ALTER TABLE Foo PARTITION (a=1) SET LOCATION"); ParserError("ALTER TABLE TestDb.Foo SET LOCATION abc"); @@ -1614,7 +1614,7 @@ public class ParserTest { // Supported file formats String [] supportedFileFormats = - {"TEXTFILE", "SEQUENCEFILE", "PARQUETFILE", "RCFILE", "AVROFILE"}; + {"TEXTFILE", "SEQUENCEFILE", "PARQUET", "PARQUETFILE", "RCFILE", "AVRO"}; for (String format: supportedFileFormats) { ParsesOk("CREATE TABLE Foo (i int, s string) STORED AS " + format); ParsesOk("CREATE EXTERNAL TABLE Foo (i int, s string) STORED AS " + format); @@ -1798,7 +1798,7 @@ public class ParserTest { ParsesOk("CREATE TABLE Foo.Bar AS SELECT int_col, bool_col from tbl limit 10"); ParsesOk("CREATE TABLE Foo.Bar LOCATION '/a/b' AS SELECT * from foo"); ParsesOk("CREATE TABLE IF NOT EXISTS Foo.Bar LOCATION '/a/b' AS SELECT * from foo"); - ParsesOk("CREATE TABLE Foo STORED AS PARQUETFILE AS SELECT 1"); + ParsesOk("CREATE TABLE Foo STORED AS PARQUET AS SELECT 1"); ParsesOk("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS SELECT 1"); ParsesOk("CREATE TABLE Foo TBLPROPERTIES ('a'='b', 'c'='d') AS SELECT * from bar"); @@ -1806,9 +1806,9 @@ public class ParserTest { ParsesOk("CREATE TABLE Foo AS with t1 as (select 1) select * from t1"); // Incomplete AS SELECT statement - ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS SELECT"); - ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS WITH"); - ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS"); + ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUET AS SELECT"); + ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUET AS WITH"); + ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUET AS"); // INSERT statements are not allowed ParserError("CREATE TABLE Foo AS INSERT INTO Foo SELECT 1"); diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py index 209bf1c4c..7c45c4724 100755 --- a/testdata/bin/generate-schema-statements.py +++ b/testdata/bin/generate-schema-statements.py @@ -110,11 +110,11 @@ FILE_FORMAT_MAP = { 'text': 'TEXTFILE', 'seq': 'SEQUENCEFILE', 'rc': 'RCFILE', - 'parquet': 'PARQUETFILE', + 'parquet': 'PARQUET', 'text_lzo': "\nINPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'" + "\nOUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'", - 'avro': 'AVROFILE', + 'avro': 'AVRO', 'hbase': "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" } diff --git a/testdata/workloads/functional-query/queries/QueryTest/create.test b/testdata/workloads/functional-query/queries/QueryTest/create.test index 02394bbbf..5d95dec34 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create.test @@ -152,7 +152,7 @@ STRING ---- QUERY # CREATE TABLE LIKE on partitioned table create table alltypes_test like functional_seq_snap.alltypes -stored as parquetfile +stored as parquet ---- RESULTS ==== ---- QUERY @@ -254,7 +254,7 @@ INT, STRING ==== ---- QUERY # Ensure that a table can be created using CTAS -create table ctas_join stored as parquetfile as +create table ctas_join stored as parquet as select j.*, a.int_col, 1*2 from functional.jointbl j join functional_seq_snap.alltypes a on (j.alltypes_id=a.id) @@ -295,7 +295,7 @@ BIGINT, STRING, INT, INT, INT, BIGINT ==== ---- QUERY # Since the table already exists, the second CTAS should be a no-op -create table if not exists ctas_join stored as parquetfile as +create table if not exists ctas_join stored as parquet as select j.*, a.int_col, 1*2 from functional.jointbl j join functional_seq_snap.alltypes a on (j.alltypes_id=a.id) limit 1 diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test index 617d86183..1e8c93111 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test +++ b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test @@ -11,7 +11,7 @@ use insert_permutation_test create table perm_nopart(int_col1 int, string_col string, int_col2 int); create table perm_part(int_col1 int, string_col string) partitioned by (p1 int, p2 string); create table parquet_part(int_col1 int, string_col string) -partitioned by (p1 int, p2 string) stored as parquetfile; +partitioned by (p1 int, p2 string) stored as parquet; ---- RESULTS ==== ---- QUERY diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test index 8e87a2e18..578029eab 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test @@ -137,20 +137,20 @@ STORED AS TEXTFILE LOCATION '$$location_uri$$' ==== ---- CREATE_TABLE -# testing with parquetfile specified +# testing with parquet specified CREATE TABLE test6 ( year INT, month INT, id INT COMMENT 'Add a comment' ) -STORED AS PARQUETFILE +STORED AS PARQUET ---- RESULTS CREATE TABLE show_create_table_test_db.test6 ( year INT, month INT, id INT COMMENT 'Add a comment' ) -STORED AS PARQUETFILE +STORED AS PARQUET LOCATION '$$location_uri$$' ==== ---- CREATE_TABLE