Update DDL to support 'STORED AS PARQUET' and 'STORED AS AVRO' syntax

This change updates our DDL syntax support to allow for using 'STORED AS PARQUET'
as well as 'STORED AS PARQUETFILE'. Moving forward we should prefer the new syntax,
but continue to support the old.  I made the same change for 'AVROFILE', but since
we have not yet documented the 'AVROFILE' syntax I left out support for the old syntax.

Change-Id: I10c73a71a94ee488c9ae205485777b58ab8957c9
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1053
Reviewed-by: Marcel Kornacker <marcel@cloudera.com>
Tested-by: jenkins
This commit is contained in:
Lenni Kuff
2013-12-05 08:09:33 -08:00
committed by Henry Robinson
parent 9d71dd3d0c
commit 76fa3b2ded
11 changed files with 42 additions and 37 deletions

View File

@@ -200,7 +200,7 @@ parser code {:
// List of keywords. Please keep them sorted alphabetically.
terminal
KW_ADD, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_AND, KW_AS, KW_ASC, KW_AVG,
KW_AVROFILE, KW_BETWEEN, KW_BIGINT, KW_BOOLEAN, KW_BY, KW_CASE, KW_CAST,
KW_AVRO, KW_BETWEEN, KW_BIGINT, KW_BOOLEAN, KW_BY, KW_CASE, KW_CAST,
KW_CHANGE, KW_CHAR, KW_COLUMN, KW_COLUMNS, KW_COMMENT, KW_COMPUTE, KW_COUNT, KW_CREATE,
KW_DATA, KW_DATABASE, KW_DATABASES, KW_DATE, KW_DATETIME, KW_DELIMITED,
KW_DESC, KW_DESCRIBE, KW_DISTINCT, KW_DISTINCTPC, KW_DISTINCTPCSA, KW_DIV,
@@ -212,8 +212,8 @@ terminal
KW_INVALIDATE, KW_IS, KW_JOIN, KW_LAST, KW_LEFT, KW_LIKE, KW_LIMIT,
KW_LINES, KW_LOAD, KW_LOCATION, KW_MAX, KW_MERGE_FN, KW_METADATA, KW_MIN, KW_NDV,
KW_NOT, KW_NULL, KW_NULLS, KW_OFFSET, KW_ON, KW_OR, KW_ORDER, KW_OUTER, KW_OVERWRITE,
KW_PARQUETFILE, KW_PARTITION, KW_PARTITIONED, KW_RCFILE, KW_REFRESH, KW_REGEXP,
KW_RENAME, KW_REPLACE, KW_RETURNS, KW_RIGHT, KW_RLIKE, KW_ROW, KW_SCHEMA,
KW_PARQUET, KW_PARQUETFILE, KW_PARTITION, KW_PARTITIONED, KW_RCFILE, KW_REFRESH,
KW_REGEXP, KW_RENAME, KW_REPLACE, KW_RETURNS, KW_RIGHT, KW_RLIKE, KW_ROW, KW_SCHEMA,
KW_SCHEMAS, KW_SELECT, KW_SEMI, KW_SEQUENCEFILE, KW_SERDEPROPERTIES,
KW_SERIALIZE_FN, KW_SET, KW_SHOW, KW_SMALLINT, KW_STORED, KW_STRING, KW_SUM,
KW_SYMBOL, KW_TABLE, KW_TABLES, KW_TBLPROPERTIES, KW_TERMINATED, KW_TEXTFILE,
@@ -719,7 +719,9 @@ file_format_create_table_val ::=
;
file_format_val ::=
KW_PARQUETFILE
KW_PARQUET
{: RESULT = THdfsFileFormat.PARQUET; :}
| KW_PARQUETFILE
{: RESULT = THdfsFileFormat.PARQUET; :}
| KW_TEXTFILE
{: RESULT = THdfsFileFormat.TEXT; :}
@@ -727,7 +729,7 @@ file_format_val ::=
{: RESULT = THdfsFileFormat.SEQUENCE_FILE; :}
| KW_RCFILE
{: RESULT = THdfsFileFormat.RC_FILE; :}
| KW_AVROFILE
| KW_AVRO
{: RESULT = THdfsFileFormat.AVRO; :}
;

View File

@@ -82,7 +82,7 @@ public class CreateTableAsSelectStmt extends StatementBase {
throw new AnalysisException(String.format("CREATE TABLE AS SELECT " +
"does not support (%s) file format. Supported formats are: (%s)",
createStmt_.getFileFormat().toString().replace("_", ""),
"PARQUETFILE, TEXTFILE"));
"PARQUET, TEXTFILE"));
}
// The full privilege check for the database will be done as part of the INSERT

View File

@@ -136,8 +136,8 @@ public enum HdfsFileFormat {
case RC_FILE: return "RCFILE";
case TEXT: return "TEXTFILE";
case SEQUENCE_FILE: return "SEQUENCEFILE";
case AVRO: return "AVROFILE";
case PARQUET: return "PARQUETFILE";
case AVRO: return "AVRO";
case PARQUET: return "PARQUET";
case LZO_TEXT:
// It is not currently possible to create a table with LZO compressed text files
// in Impala, but this is valid in Hive.

View File

@@ -57,7 +57,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols;
keywordMap.put("as", new Integer(SqlParserSymbols.KW_AS));
keywordMap.put("asc", new Integer(SqlParserSymbols.KW_ASC));
keywordMap.put("avg", new Integer(SqlParserSymbols.KW_AVG));
keywordMap.put("avrofile", new Integer(SqlParserSymbols.KW_AVROFILE));
keywordMap.put("avro", new Integer(SqlParserSymbols.KW_AVRO));
keywordMap.put("between", new Integer(SqlParserSymbols.KW_BETWEEN));
keywordMap.put("bigint", new Integer(SqlParserSymbols.KW_BIGINT));
keywordMap.put("boolean", new Integer(SqlParserSymbols.KW_BOOLEAN));
@@ -143,6 +143,7 @@ import com.cloudera.impala.analysis.SqlParserSymbols;
keywordMap.put("order", new Integer(SqlParserSymbols.KW_ORDER));
keywordMap.put("outer", new Integer(SqlParserSymbols.KW_OUTER));
keywordMap.put("overwrite", new Integer(SqlParserSymbols.KW_OVERWRITE));
keywordMap.put("parquet", new Integer(SqlParserSymbols.KW_PARQUET));
keywordMap.put("parquetfile", new Integer(SqlParserSymbols.KW_PARQUETFILE));
keywordMap.put("partition", new Integer(SqlParserSymbols.KW_PARTITION));
keywordMap.put("partitioned", new Integer(SqlParserSymbols.KW_PARTITIONED));

View File

@@ -259,7 +259,7 @@ public class AnalyzeDDLTest extends AnalyzerTest {
AnalyzesOk("alter table functional.alltypes PARTITION (month=11, year=2010) " +
"set fileformat parquetfile");
AnalyzesOk("alter table functional.stringpartitionkey PARTITION " +
"(string_col='partition1') set fileformat parquetfile");
"(string_col='partition1') set fileformat parquet");
AnalyzesOk("alter table functional.stringpartitionkey PARTITION " +
"(string_col='PaRtiTion1') set location '/a/b/c'");
// Arbitrary exprs as partition key values. Constant exprs are ok.
@@ -542,6 +542,8 @@ public class AnalyzeDDLTest extends AnalyzerTest {
"as select * from functional.jointbl");
AnalyzesOk("create table newtbl stored as parquetfile " +
"as select * from functional.alltypes");
AnalyzesOk("create table newtbl stored as parquet " +
"as select * from functional.alltypes");
AnalyzesOk("create table newtbl as select int_col from functional.alltypes");
AnalyzesOk("create table functional.newtbl " +
@@ -567,10 +569,10 @@ public class AnalyzeDDLTest extends AnalyzerTest {
// Unsupported file formats
AnalysisError("create table foo stored as sequencefile as select 1",
"CREATE TABLE AS SELECT does not support (SEQUENCEFILE) file format. " +
"Supported formats are: (PARQUETFILE, TEXTFILE)");
"Supported formats are: (PARQUET, TEXTFILE)");
AnalysisError("create table foo stored as RCFILE as select 1",
"CREATE TABLE AS SELECT does not support (RCFILE) file format. " +
"Supported formats are: (PARQUETFILE, TEXTFILE)");
"Supported formats are: (PARQUET, TEXTFILE)");
}
@Test
@@ -621,16 +623,16 @@ public class AnalyzeDDLTest extends AnalyzerTest {
// Analysis of Avro schemas
AnalyzesOk("create table foo (i int) with serdeproperties ('avro.schema.url'=" +
"'hdfs://schema.avsc') stored as avrofile");
AnalyzesOk("create table foo (i int) stored as avrofile tblproperties " +
"'hdfs://schema.avsc') stored as avro");
AnalyzesOk("create table foo (i int) stored as avro tblproperties " +
"('avro.schema.url'='hdfs://schema.avsc')");
AnalyzesOk("create table foo (i int) stored as avrofile tblproperties " +
AnalyzesOk("create table foo (i int) stored as avro tblproperties " +
"('avro.schema.literal'='{\"name\": \"my_record\"}')");
AnalysisError("create table foo (i int) stored as avrofile",
AnalysisError("create table foo (i int) stored as avro",
"No Avro schema provided for table: default.foo");
AnalysisError("create table foo (i int) stored as avrofile tblproperties ('a'='b')",
AnalysisError("create table foo (i int) stored as avro tblproperties ('a'='b')",
"No Avro schema provided for table: default.foo");
AnalysisError("create table foo (i int) stored as avrofile tblproperties " +
AnalysisError("create table foo (i int) stored as avro tblproperties " +
"('avro.schema.url'='schema.avsc')", "avro.schema.url must be of form " +
"\"http://path/to/schema/file\" or \"hdfs://namenode:port/path/to/schema/file" +
"\", got schema.avsc");

View File

@@ -541,7 +541,7 @@ public class AuthorizationTest {
AuthzOk("ALTER TABLE functional_seq_snap.alltypes CHANGE int_col c1 int");
AuthzOk("ALTER TABLE functional_seq_snap.alltypes DROP int_col");
AuthzOk("ALTER TABLE functional_seq_snap.alltypes RENAME TO functional_seq_snap.t1");
AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET FILEFORMAT PARQUETFILE");
AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET FILEFORMAT PARQUET");
AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET LOCATION " +
"'/test-warehouse/new_table'");
AuthzOk("ALTER TABLE functional_seq_snap.alltypes SET TBLPROPERTIES " +
@@ -571,7 +571,7 @@ public class AuthorizationTest {
"User '%s' does not have privileges to access: " +
"hdfs://localhost:20510/test-warehouse/new_table");
AuthzError("ALTER TABLE functional.alltypes SET FILEFORMAT PARQUETFILE",
AuthzError("ALTER TABLE functional.alltypes SET FILEFORMAT PARQUET",
"User '%s' does not have privileges to execute 'ALTER' on: functional.alltypes");
AuthzError("ALTER TABLE functional.alltypes ADD COLUMNS (c1 int)",
"User '%s' does not have privileges to execute 'ALTER' on: functional.alltypes");
@@ -587,7 +587,7 @@ public class AuthorizationTest {
"User '%s' does not have privileges to execute 'ALTER' on: functional.alltypes");
// Trying to ALTER TABLE a view does not reveal any privileged information.
AuthzError("ALTER TABLE functional.view_view SET FILEFORMAT PARQUETFILE",
AuthzError("ALTER TABLE functional.view_view SET FILEFORMAT PARQUET",
"User '%s' does not have privileges to execute 'ALTER' on: functional.view_view");
AuthzError("ALTER TABLE functional.view_view ADD COLUMNS (c1 int)",
"User '%s' does not have privileges to execute 'ALTER' on: functional.view_view");

View File

@@ -1507,7 +1507,7 @@ public class ParserTest {
public void TestAlterTableSet() {
// Supported file formats
String [] supportedFileFormats =
{"TEXTFILE", "SEQUENCEFILE", "PARQUETFILE", "RCFILE", "AVROFILE"};
{"TEXTFILE", "SEQUENCEFILE", "PARQUET", "PARQUETFILE", "RCFILE", "AVRO"};
for (String format: supportedFileFormats) {
ParsesOk("ALTER TABLE Foo SET FILEFORMAT " + format);
ParsesOk("ALTER TABLE TestDb.Foo SET FILEFORMAT " + format);
@@ -1530,7 +1530,7 @@ public class ParserTest {
ParserError("ALTER TABLE Foo PARTITION ('str') SET FILEFORMAT TEXTFILE");
ParserError("ALTER TABLE Foo PARTITION (a=1, 5) SET FILEFORMAT TEXTFILE");
ParserError("ALTER TABLE Foo PARTITION () SET FILEFORMAT PARQUETFILE");
ParserError("ALTER TABLE Foo PARTITION (,) SET FILEFORMAT PARQUETFILE");
ParserError("ALTER TABLE Foo PARTITION (,) SET FILEFORMAT PARQUET");
ParserError("ALTER TABLE Foo PARTITION (a=1) SET FILEFORMAT");
ParserError("ALTER TABLE Foo PARTITION (a=1) SET LOCATION");
ParserError("ALTER TABLE TestDb.Foo SET LOCATION abc");
@@ -1614,7 +1614,7 @@ public class ParserTest {
// Supported file formats
String [] supportedFileFormats =
{"TEXTFILE", "SEQUENCEFILE", "PARQUETFILE", "RCFILE", "AVROFILE"};
{"TEXTFILE", "SEQUENCEFILE", "PARQUET", "PARQUETFILE", "RCFILE", "AVRO"};
for (String format: supportedFileFormats) {
ParsesOk("CREATE TABLE Foo (i int, s string) STORED AS " + format);
ParsesOk("CREATE EXTERNAL TABLE Foo (i int, s string) STORED AS " + format);
@@ -1798,7 +1798,7 @@ public class ParserTest {
ParsesOk("CREATE TABLE Foo.Bar AS SELECT int_col, bool_col from tbl limit 10");
ParsesOk("CREATE TABLE Foo.Bar LOCATION '/a/b' AS SELECT * from foo");
ParsesOk("CREATE TABLE IF NOT EXISTS Foo.Bar LOCATION '/a/b' AS SELECT * from foo");
ParsesOk("CREATE TABLE Foo STORED AS PARQUETFILE AS SELECT 1");
ParsesOk("CREATE TABLE Foo STORED AS PARQUET AS SELECT 1");
ParsesOk("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS SELECT 1");
ParsesOk("CREATE TABLE Foo TBLPROPERTIES ('a'='b', 'c'='d') AS SELECT * from bar");
@@ -1806,9 +1806,9 @@ public class ParserTest {
ParsesOk("CREATE TABLE Foo AS with t1 as (select 1) select * from t1");
// Incomplete AS SELECT statement
ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS SELECT");
ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS WITH");
ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUETFILE AS");
ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUET AS SELECT");
ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUET AS WITH");
ParserError("CREATE TABLE Foo ROW FORMAT DELIMITED STORED AS PARQUET AS");
// INSERT statements are not allowed
ParserError("CREATE TABLE Foo AS INSERT INTO Foo SELECT 1");

View File

@@ -110,11 +110,11 @@ FILE_FORMAT_MAP = {
'text': 'TEXTFILE',
'seq': 'SEQUENCEFILE',
'rc': 'RCFILE',
'parquet': 'PARQUETFILE',
'parquet': 'PARQUET',
'text_lzo':
"\nINPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'" +
"\nOUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'",
'avro': 'AVROFILE',
'avro': 'AVRO',
'hbase': "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'"
}

View File

@@ -152,7 +152,7 @@ STRING
---- QUERY
# CREATE TABLE LIKE on partitioned table
create table alltypes_test like functional_seq_snap.alltypes
stored as parquetfile
stored as parquet
---- RESULTS
====
---- QUERY
@@ -254,7 +254,7 @@ INT, STRING
====
---- QUERY
# Ensure that a table can be created using CTAS
create table ctas_join stored as parquetfile as
create table ctas_join stored as parquet as
select j.*, a.int_col, 1*2
from functional.jointbl j join functional_seq_snap.alltypes a
on (j.alltypes_id=a.id)
@@ -295,7 +295,7 @@ BIGINT, STRING, INT, INT, INT, BIGINT
====
---- QUERY
# Since the table already exists, the second CTAS should be a no-op
create table if not exists ctas_join stored as parquetfile as
create table if not exists ctas_join stored as parquet as
select j.*, a.int_col, 1*2
from functional.jointbl j join functional_seq_snap.alltypes a
on (j.alltypes_id=a.id) limit 1

View File

@@ -11,7 +11,7 @@ use insert_permutation_test
create table perm_nopart(int_col1 int, string_col string, int_col2 int);
create table perm_part(int_col1 int, string_col string) partitioned by (p1 int, p2 string);
create table parquet_part(int_col1 int, string_col string)
partitioned by (p1 int, p2 string) stored as parquetfile;
partitioned by (p1 int, p2 string) stored as parquet;
---- RESULTS
====
---- QUERY

View File

@@ -137,20 +137,20 @@ STORED AS TEXTFILE
LOCATION '$$location_uri$$'
====
---- CREATE_TABLE
# testing with parquetfile specified
# testing with parquet specified
CREATE TABLE test6 (
year INT,
month INT,
id INT COMMENT 'Add a comment'
)
STORED AS PARQUETFILE
STORED AS PARQUET
---- RESULTS
CREATE TABLE show_create_table_test_db.test6 (
year INT,
month INT,
id INT COMMENT 'Add a comment'
)
STORED AS PARQUETFILE
STORED AS PARQUET
LOCATION '$$location_uri$$'
====
---- CREATE_TABLE