From 6f0b255c5ac2b8d0d2980ce1db18cc272f0ee4aa Mon Sep 17 00:00:00 2001 From: Alex Behm Date: Mon, 20 Jul 2015 13:19:03 -0700 Subject: [PATCH] Address several shortcomings with respect to the usability of Avro tables. Addressed JIRAs: IMPALA-1947 and IMPALA-1813 New Feature: Adds support for creating an Avro table without an explicit Avro schema with the following syntax. CREATE TABLE column_defs STORED AS AVRO Fixes and Improvements: This patch fixes and unifies the logic for reconciling differences between an Avro table's Avro Schema and its column definitions. This reconciliation logic is executed during Impala's CREATE TABLE and when loading a table's metadata. Impala generally performs the schema reconciliation during table creation, but Hive does not. In many cases, Hive's CREATE TABLE stores the original column definitions in the HMS (in the StorageDescriptor) instead of the reconciled column definitions. The reconciliation logic considers the field/column names and follows this conflict resolution policy which is similar to Hive's: Mismatched number of columns -> Prefer Avro columns. Mismatched name/type -> Prefer Avro column, except: A CHAR/VARCHAR column definition maps to an Avro STRING, and is preserved as a CHAR/VARCHAR in the reconciled schema. Behavior for TIMESTAMP: A TIMESTAMP column definition maps to an Avro STRING and is presented as a STRING in the reconciled schema, because Avro has no binary TIMESTAMP representation. As a result, no Avro table may have a TIMESTAMP column (existing behavior). Change-Id: I8457354568b6049b2dd2794b65fadc06e619d648 Reviewed-on: http://gerrit.cloudera.org:8080/550 Reviewed-by: Alex Behm Tested-by: Internal Jenkins --- .../cloudera/impala/analysis/ColumnDef.java | 45 ++++- .../impala/analysis/CreateTableStmt.java | 132 ++++--------- .../cloudera/impala/catalog/HdfsTable.java | 120 ++---------- .../com/cloudera/impala/catalog/MapType.java | 8 + .../impala/util/AvroSchemaConverter.java | 26 ++- .../impala/util/AvroSchemaParser.java | 49 +++-- .../cloudera/impala/util/AvroSchemaUtils.java | 179 ++++++++++++++++++ .../impala/analysis/AnalyzeDDLTest.java | 75 ++++---- .../functional/functional_schema_template.sql | 7 +- .../QueryTest/avro-schema-resolution.test | 4 +- .../queries/QueryTest/create.test | 79 ++++++++ 11 files changed, 473 insertions(+), 251 deletions(-) create mode 100644 fe/src/main/java/com/cloudera/impala/util/AvroSchemaUtils.java diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ColumnDef.java b/fe/src/main/java/com/cloudera/impala/analysis/ColumnDef.java index d1ffe2be1..7d4145011 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/ColumnDef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/ColumnDef.java @@ -14,12 +14,17 @@ package com.cloudera.impala.analysis; +import java.util.List; + import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import com.cloudera.impala.catalog.Type; import com.cloudera.impala.common.AnalysisException; import com.cloudera.impala.thrift.TColumn; +import com.google.common.base.Function; import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; /** * Represents a column definition in a CREATE/ALTER TABLE/VIEW statement. @@ -31,7 +36,7 @@ import com.google.common.base.Preconditions; */ public class ColumnDef { private final String colName_; - private final String comment_; + private String comment_; // Required in CREATE/ALTER TABLE stmts. Set to NULL in CREATE/ALTER VIEW stmts, // for which we setType() after analyzing the defining view definition stmt. @@ -39,15 +44,33 @@ public class ColumnDef { private Type type_; public ColumnDef(String colName, TypeDef typeDef, String comment) { - colName_ = colName; + colName_ = colName.toLowerCase(); typeDef_ = typeDef; comment_ = comment; } + /** + * Creates an analyzed ColumnDef from a Hive FieldSchema. Throws if the FieldSchema's + * type is not supported. + */ + private ColumnDef(FieldSchema fs) throws AnalysisException { + Type type = Type.parseColumnType(fs.getType()); + if (type == null) { + throw new AnalysisException(String.format( + "Unsupported type '%s' in Hive field schema '%s'", + fs.getType(), fs.getName())); + } + colName_ = fs.getName(); + typeDef_ = new TypeDef(type); + comment_ = fs.getComment(); + analyze(); + } + public void setType(Type type) { type_ = type; } public Type getType() { return type_; } public TypeDef getTypeDef() { return typeDef_; } public String getColName() { return colName_; } + public void setComment(String comment) { comment_ = comment; } public String getComment() { return comment_; } public void analyze() throws AnalysisException { @@ -80,4 +103,22 @@ public class ColumnDef { col.setComment(getComment()); return col; } + + public static List createFromFieldSchemas(List fieldSchemas) + throws AnalysisException { + List result = Lists.newArrayListWithCapacity(fieldSchemas.size()); + for (FieldSchema fs: fieldSchemas) result.add(new ColumnDef(fs)); + return result; + } + + public static List toFieldSchemas(List colDefs) { + return Lists.transform(colDefs, new Function() { + public FieldSchema apply(ColumnDef colDef) { + Preconditions.checkNotNull(colDef.getType()); + return new FieldSchema(colDef.getColName(), colDef.getType().toSql(), + colDef.getComment()); + } + }); + } + } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java index 899d4455c..91c14c8ff 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CreateTableStmt.java @@ -19,12 +19,12 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.avro.Schema; +import org.apache.avro.SchemaParseException; import org.apache.hadoop.fs.permission.FsAction; import com.cloudera.impala.authorization.Privilege; -import com.cloudera.impala.catalog.Column; import com.cloudera.impala.catalog.HdfsStorageDescriptor; -import com.cloudera.impala.catalog.HdfsTable; import com.cloudera.impala.catalog.RowFormat; import com.cloudera.impala.catalog.TableLoadingException; import com.cloudera.impala.common.AnalysisException; @@ -33,7 +33,9 @@ import com.cloudera.impala.thrift.TCatalogObjectType; import com.cloudera.impala.thrift.TCreateTableParams; import com.cloudera.impala.thrift.THdfsFileFormat; import com.cloudera.impala.thrift.TTableName; +import com.cloudera.impala.util.AvroSchemaConverter; import com.cloudera.impala.util.AvroSchemaParser; +import com.cloudera.impala.util.AvroSchemaUtils; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -43,21 +45,20 @@ import com.google.common.collect.Sets; * Represents a CREATE TABLE statement. */ public class CreateTableStmt extends StatementBase { - private final ArrayList columnDefs_; + private List columnDefs_; private final String comment_; private final boolean isExternal_; private final boolean ifNotExists_; private final THdfsFileFormat fileFormat_; private final ArrayList partitionColDefs_; private final RowFormat rowFormat_; - private final TableName tableName_; + private TableName tableName_; private final Map tblProperties_; private final Map serdeProperties_; private final HdfsCachingOp cachingOp_; private HdfsUri location_; // Set during analysis - private String dbName_; private String owner_; /** @@ -153,8 +154,8 @@ public class CreateTableStmt extends StatementBase { * be created within. */ public String getDb() { - Preconditions.checkNotNull(dbName_); - return dbName_; + Preconditions.checkState(isAnalyzed()); + return tableName_.getDb(); } @Override @@ -184,18 +185,18 @@ public class CreateTableStmt extends StatementBase { @Override public void analyze(Analyzer analyzer) throws AnalysisException { + super.analyze(analyzer); Preconditions.checkState(tableName_ != null && !tableName_.isEmpty()); + tableName_ = analyzer.getFqTableName(tableName_); tableName_.analyze(); - dbName_ = analyzer.getTargetDbName(tableName_); owner_ = analyzer.getUser().getName(); - if (analyzer.dbContainsTable(dbName_, tableName_.getTbl(), Privilege.CREATE) && - !ifNotExists_) { - throw new AnalysisException(Analyzer.TBL_ALREADY_EXISTS_ERROR_MSG + - String.format("%s.%s", dbName_, getTbl())); + if (analyzer.dbContainsTable(tableName_.getDb(), tableName_.getTbl(), Privilege.CREATE) + && !ifNotExists_) { + throw new AnalysisException(Analyzer.TBL_ALREADY_EXISTS_ERROR_MSG + tableName_); } - analyzer.addAccessEvent(new TAccessEvent(dbName_ + "." + tableName_.getTbl(), + analyzer.addAccessEvent(new TAccessEvent(tableName_.toString(), TCatalogObjectType.TABLE, Privilege.CREATE.toString())); // Only Avro tables can have empty column defs because they can infer them from @@ -216,13 +217,13 @@ public class CreateTableStmt extends StatementBase { analyzeColumnDefs(analyzer); if (fileFormat_ == THdfsFileFormat.AVRO) { - List newColumnDefs = analyzeAvroSchema(analyzer); - if (newColumnDefs != columnDefs_) { - // Replace the old column defs with the new ones and analyze them. - columnDefs_.clear(); - columnDefs_.addAll(newColumnDefs); - analyzeColumnDefs(analyzer); + columnDefs_ = analyzeAvroSchema(analyzer); + if (columnDefs_.isEmpty()) { + throw new AnalysisException( + "An Avro table requires column definitions or an Avro schema."); } + AvroSchemaUtils.setFromSerdeComment(columnDefs_); + analyzeColumnDefs(analyzer); } if (cachingOp_ != null) cachingOp_.analyze(analyzer); @@ -261,93 +262,42 @@ public class CreateTableStmt extends StatementBase { private List analyzeAvroSchema(Analyzer analyzer) throws AnalysisException { Preconditions.checkState(fileFormat_ == THdfsFileFormat.AVRO); - // Look for the schema in TBLPROPERTIES and in SERDEPROPERTIES, with the latter + // Look for the schema in TBLPROPERTIES and in SERDEPROPERTIES, with latter // taking precedence. List> schemaSearchLocations = Lists.newArrayList(); - String fullTblName = dbName_ + "." + tableName_.getTbl(); schemaSearchLocations.add(serdeProperties_); schemaSearchLocations.add(tblProperties_); String avroSchema = null; + List avroCols = null; // parsed from avroSchema try { - avroSchema = HdfsTable.getAvroSchema(schemaSearchLocations); - // TODO: Allow creating Avro tables without an Avro schema, inferring the schema - // from the column definitions. + avroSchema = AvroSchemaUtils.getAvroSchema(schemaSearchLocations); if (avroSchema == null) { - throw new AnalysisException(String.format("No Avro schema provided in " + - "SERDEPROPERTIES or TBLPROPERTIES for table: %s ", - dbName_ + "." + tableName_.getTbl())); + // No Avro schema was explicitly set in the serde or table properties, so infer + // the Avro schema from the column definitions. + Schema inferredSchema = AvroSchemaConverter.convertColumnDefs( + columnDefs_, tableName_.toString()); + avroSchema = inferredSchema.toString(); } - } catch (TableLoadingException e) { - throw new AnalysisException(e.getMessage(), e); - } - - if (Strings.isNullOrEmpty(avroSchema)) { - throw new AnalysisException("Avro schema is null or empty: " + fullTblName); - } - - // List of columns parsed from the Avro schema. - List avroColumns = null; - try { - avroColumns = AvroSchemaParser.parse(avroSchema); - } catch (Exception e) { + if (Strings.isNullOrEmpty(avroSchema)) { + throw new AnalysisException("Avro schema is null or empty: " + + tableName_.toString()); + } + avroCols = AvroSchemaParser.parse(avroSchema); + } catch (SchemaParseException e) { throw new AnalysisException(String.format( - "Error parsing Avro schema for table '%s': %s", fullTblName, + "Error parsing Avro schema for table '%s': %s", tableName_.toString(), e.getMessage())); } - Preconditions.checkNotNull(avroColumns); + Preconditions.checkNotNull(avroCols); // Analyze the Avro schema to detect inconsistencies with the columnDefs_. // In case of inconsistencies, the column defs are ignored in favor of the Avro // schema for simplicity and, in particular, to enable COMPUTE STATS (IMPALA-1104). - String warnStr = null; // set if inconsistency detected - if (avroColumns.size() != columnDefs_.size() && !columnDefs_.isEmpty()) { - warnStr = String.format( - "Ignoring column definitions in favor of Avro schema.\n" + - "The Avro schema has %s column(s) but %s column definition(s) were given.", - avroColumns.size(), columnDefs_.size()); - } else { - // Determine whether the column names and the types match. - for (int i = 0; i < columnDefs_.size(); ++i) { - ColumnDef colDesc = columnDefs_.get(i); - Column avroCol = avroColumns.get(i); - String warnDetail = null; - if (!colDesc.getColName().equalsIgnoreCase(avroCol.getName())) { - warnDetail = "name"; - } - if (colDesc.getType().isStringType() && - avroCol.getType().isStringType()) { - // This is OK -- avro types for CHAR, VARCHAR, and STRING are "string" - } else if (!colDesc.getType().equals(avroCol.getType())) { - warnDetail = "type"; - } - if (warnDetail != null) { - warnStr = String.format( - "Ignoring column definitions in favor of Avro schema due to a " + - "mismatched column %s at position %s.\n" + - "Column definition: %s\n" + - "Avro schema column: %s", warnDetail, i + 1, - colDesc.getColName() + " " + colDesc.getType().toSql(), - avroCol.getName() + " " + avroCol.getType().toSql()); - break; - } - } - } - - if (warnStr != null || columnDefs_.isEmpty()) { - analyzer.addWarning(warnStr); - // Create new columnDefs_ based on the Avro schema and return them. - List avroSchemaColDefs = - Lists.newArrayListWithCapacity(avroColumns.size()); - for (Column avroCol: avroColumns) { - ColumnDef colDef = - new ColumnDef(avroCol.getName(), null, avroCol.getComment()); - colDef.setType(avroCol.getType()); - avroSchemaColDefs.add(colDef); - } - return avroSchemaColDefs; - } - // The existing col defs are consistent with the Avro schema. - return columnDefs_; + StringBuilder warning = new StringBuilder(); + List reconciledColDefs = + AvroSchemaUtils.reconcileSchemas(columnDefs_, avroCols, warning); + if (warning.length() > 0) analyzer.addWarning(warning.toString()); + return reconciledColDefs; } private void analyzeRowFormatValue(String value) throws AnalysisException { diff --git a/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java b/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java index 6b94e7e89..5f39bba5a 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/HdfsTable.java @@ -15,9 +15,7 @@ package com.cloudera.impala.catalog; import java.io.IOException; -import java.io.InputStream; import java.net.URI; -import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -29,7 +27,6 @@ import java.util.Set; import java.util.TreeMap; import org.apache.avro.Schema; -import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStorageLocation; @@ -44,11 +41,11 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.cloudera.impala.analysis.ColumnDef; import com.cloudera.impala.analysis.Expr; import com.cloudera.impala.analysis.LiteralExpr; import com.cloudera.impala.analysis.NullLiteral; @@ -78,6 +75,7 @@ import com.cloudera.impala.thrift.TTableDescriptor; import com.cloudera.impala.thrift.TTableType; import com.cloudera.impala.util.AvroSchemaConverter; import com.cloudera.impala.util.AvroSchemaParser; +import com.cloudera.impala.util.AvroSchemaUtils; import com.cloudera.impala.util.FsPermissionChecker; import com.cloudera.impala.util.HdfsCachingUtil; import com.cloudera.impala.util.ListMap; @@ -1028,13 +1026,13 @@ public class HdfsTable extends Table { getMetaStoreTable().getSd().getSerdeInfo().getParameters()); schemaSearchLocations.add(getMetaStoreTable().getParameters()); - avroSchema_ = HdfsTable.getAvroSchema(schemaSearchLocations); + avroSchema_ = AvroSchemaUtils.getAvroSchema(schemaSearchLocations); if (avroSchema_ == null) { // No Avro schema was explicitly set in the table metadata, so infer the Avro // schema from the column definitions. - Schema avroSchema = AvroSchemaConverter.convertFieldSchemas( - msColDefs, getFullName()); - avroSchema_ = avroSchema.toString(); + Schema inferredSchema = AvroSchemaConverter.convertFieldSchemas( + msTbl.getSd().getCols(), getFullName()); + avroSchema_ = inferredSchema.toString(); } String serdeLib = msTbl.getSd().getSerdeInfo().getSerializationLib(); if (serdeLib == null || @@ -1050,39 +1048,18 @@ public class HdfsTable extends Table { // Impala-created tables this step is not necessary because the same // resolution is done during table creation. But Hive-created tables // store the original column definitions, and not the reconciled ones. - // The schemas are reconciled according to following policy: - // - // Mismatched number of columns -> Prefer Avro column. - // Mismatched name/type -> Prefer Avro column, except: - // A CHAR/VARCHAR column definition maps to an Avro STRING, and is preserved - // as a CHAR/VARCHAR in the reconciled schema. - // Behavior for TIMESTAMP: - // A TIMESTAMP column definition maps to an Avro STRING and is presented as a - // STRING in the reconciled schema, because Avro has no binary TIMESTAMP - // representation. As a result, no Avro table may have a TIMESTAMP column. - // - // TODO: Factor out common schema reconciliation logic from here and - // CreateTableStmt. - List avroCols = AvroSchemaParser.parse(avroSchema_); - for (int i = 0; i < avroCols.size(); ++i) { - boolean useAvroType = true; - if (msColDefs.size() == avroCols.size()) { - Type colDefType = parseColumnType(msColDefs.get(i)); - if (avroCols.get(i).getType().isStringType() && colDefType.isStringType()) { - // Preserve CHAR/VARCHAR from column definition. Avro only has STRING. - useAvroType = false; - } - } - FieldSchema newFs = new FieldSchema(); - newFs.setName(avroCols.get(i).getName()); - newFs.setComment("from deserializer"); - if (useAvroType) { - newFs.setType(avroCols.get(i).getType().toSql()); - } else { - newFs.setType(msColDefs.get(i).getType()); - } - nonPartFieldSchemas_.add(newFs); + List colDefs = + ColumnDef.createFromFieldSchemas(msTbl.getSd().getCols()); + List avroCols = AvroSchemaParser.parse(avroSchema_); + StringBuilder warning = new StringBuilder(); + List reconciledColDefs = + AvroSchemaUtils.reconcileSchemas(colDefs, avroCols, warning); + if (warning.length() != 0) { + LOG.warn(String.format("Warning while loading table %s:\n%s", + getFullName(), warning.toString())); } + AvroSchemaUtils.setFromSerdeComment(reconciledColDefs); + nonPartFieldSchemas_.addAll(ColumnDef.toFieldSchemas(reconciledColDefs)); } } else { nonPartFieldSchemas_.addAll(msColDefs); @@ -1187,69 +1164,6 @@ public class HdfsTable extends Table { } } - /** - * Gets an Avro table's JSON schema from the list of given table property search - * locations. The schema may be specified as a string literal or provided as a - * Hadoop FileSystem or http URL that points to the schema. Apart from ensuring - * that the JSON schema is not SCHEMA_NONE, this function does not perform any - * additional validation on the returned string (e.g., it may not be a valid - * schema). Returns the Avro schema or null if none was specified in the search - * locations. Throws a TableLoadingException if a schema was specified, but could not - * be retrieved, e.g., because of an invalid URL. - */ - public static String getAvroSchema(List> schemaSearchLocations) - throws TableLoadingException { - String url = null; - // Search all locations and break out on the first valid schema found. - for (Map schemaLocation: schemaSearchLocations) { - if (schemaLocation == null) continue; - - String literal = schemaLocation.get(AvroSerdeUtils.SCHEMA_LITERAL); - if (literal != null && !literal.equals(AvroSerdeUtils.SCHEMA_NONE)) return literal; - - url = schemaLocation.get(AvroSerdeUtils.SCHEMA_URL); - if (url != null && !url.equals(AvroSerdeUtils.SCHEMA_NONE)) { - url = url.trim(); - break; - } - } - if (url == null) return null; - - String schema = null; - if (url.toLowerCase().startsWith("http://")) { - InputStream urlStream = null; - try { - urlStream = new URL(url).openStream(); - schema = IOUtils.toString(urlStream); - } catch (IOException e) { - throw new TableLoadingException("Problem reading Avro schema from: " + url, e); - } finally { - IOUtils.closeQuietly(urlStream); - } - } else { - Path path = new Path(url); - FileSystem fs = null; - try { - fs = path.getFileSystem(FileSystemUtil.getConfiguration()); - } catch (Exception e) { - throw new TableLoadingException(String.format( - "Invalid avro.schema.url: %s. %s", path, e.getMessage())); - } - StringBuilder errorMsg = new StringBuilder(); - if (!FileSystemUtil.isPathReachable(path, fs, errorMsg)) { - throw new TableLoadingException(String.format( - "Invalid avro.schema.url: %s. %s", path, errorMsg)); - } - try { - schema = FileSystemUtil.readFile(path); - } catch (IOException e) { - throw new TableLoadingException( - "Problem reading Avro schema at: " + url, e); - } - } - return schema; - } - @Override protected List getColumnNamesWithHmsStats() { List ret = Lists.newArrayList(); diff --git a/fe/src/main/java/com/cloudera/impala/catalog/MapType.java b/fe/src/main/java/com/cloudera/impala/catalog/MapType.java index f7de5b90b..41e9b97ae 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/MapType.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/MapType.java @@ -24,6 +24,14 @@ public class MapType extends Type { public Type getKeyType() { return keyType_; } public Type getValueType() { return valueType_; } + @Override + public boolean equals(Object other) { + if (!(other instanceof MapType)) return false; + MapType otherMapType = (MapType) other; + return otherMapType.keyType_.equals(keyType_) && + otherMapType.valueType_.equals(valueType_); + } + @Override public String toSql() { return String.format("MAP<%s,%s>", keyType_.toSql(), valueType_.toSql()); diff --git a/fe/src/main/java/com/cloudera/impala/util/AvroSchemaConverter.java b/fe/src/main/java/com/cloudera/impala/util/AvroSchemaConverter.java index b808b6ce6..504d263ea 100644 --- a/fe/src/main/java/com/cloudera/impala/util/AvroSchemaConverter.java +++ b/fe/src/main/java/com/cloudera/impala/util/AvroSchemaConverter.java @@ -1,4 +1,4 @@ -// Copyright 2014 Cloudera Inc. +// Copyright 2015 Cloudera Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,7 +18,9 @@ import java.util.List; import org.apache.avro.Schema; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.codehaus.jackson.node.IntNode; +import com.cloudera.impala.analysis.ColumnDef; import com.cloudera.impala.catalog.ArrayType; import com.cloudera.impala.catalog.Column; import com.cloudera.impala.catalog.MapType; @@ -58,6 +60,12 @@ public class AvroSchemaConverter { return converter.convertColumnsImpl(columns, schemaName); } + public static Schema convertColumnDefs( + List colDefs, String schemaName) { + AvroSchemaConverter converter = new AvroSchemaConverter(); + return converter.convertColumnDefsImpl(colDefs, schemaName); + } + public static Schema convertFieldSchemas( List fieldSchemas, String schemaName) { AvroSchemaConverter converter = new AvroSchemaConverter(); @@ -78,6 +86,16 @@ public class AvroSchemaConverter { return createAvroRecord(avroFields, schemaName); } + private Schema convertColumnDefsImpl(List colDefs, String schemaName) { + List avroFields = Lists.newArrayList(); + for (ColumnDef colDef: colDefs) { + final Schema.Field avroField = new Schema.Field(colDef.getColName(), + createAvroSchema(colDef.getType()), colDef.getComment(), null); + avroFields.add(avroField); + } + return createAvroRecord(avroFields, schemaName); + } + private Schema convertFieldSchemasImpl( List fieldSchemas, String schemaName) { List avroFields = Lists.newArrayList(); @@ -144,11 +162,11 @@ public class AvroSchemaConverter { private Schema createDecimalSchema(ScalarType impalaDecimalType) { Schema decimalSchema = Schema.create(Schema.Type.BYTES); decimalSchema.addProp(AVRO_LOGICAL_TYPE, AVRO_DECIMAL_TYPE); - // addProp expects a string as the value. + // precision and scale must be integer values decimalSchema.addProp(PRECISION_PROP_NAME, - String.valueOf(impalaDecimalType.decimalPrecision())); + new IntNode(impalaDecimalType.decimalPrecision())); decimalSchema.addProp(SCALE_PROP_NAME, - String.valueOf(impalaDecimalType.decimalScale())); + new IntNode(impalaDecimalType.decimalScale())); return decimalSchema; } diff --git a/fe/src/main/java/com/cloudera/impala/util/AvroSchemaParser.java b/fe/src/main/java/com/cloudera/impala/util/AvroSchemaParser.java index d0d0cfc9b..9ddaabaac 100644 --- a/fe/src/main/java/com/cloudera/impala/util/AvroSchemaParser.java +++ b/fe/src/main/java/com/cloudera/impala/util/AvroSchemaParser.java @@ -30,9 +30,15 @@ import org.apache.avro.Schema; import org.apache.avro.SchemaParseException; import org.codehaus.jackson.JsonNode; -import com.cloudera.impala.catalog.Column; +import com.cloudera.impala.analysis.ColumnDef; +import com.cloudera.impala.analysis.TypeDef; +import com.cloudera.impala.catalog.ArrayType; +import com.cloudera.impala.catalog.MapType; import com.cloudera.impala.catalog.ScalarType; +import com.cloudera.impala.catalog.StructField; +import com.cloudera.impala.catalog.StructType; import com.cloudera.impala.catalog.Type; +import com.cloudera.impala.common.AnalysisException; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -57,31 +63,35 @@ public class AvroSchemaParser { /** * Parses the Avro schema string literal, mapping the Avro types to Impala types. - * Returns a list of Column objects with their name and type info set. - * Throws an UnsupportedOperationException if the Avro type maps to a type Impala + * Returns a list of ColumnDef objects with their name and type info set. + * Throws an AnalysisException if the Avro type maps to a type that Impala * does not yet support. * Throws a SchemaParseException if the Avro schema was invalid. */ - public static List parse(String schemaStr) throws SchemaParseException { + public static List parse(String schemaStr) + throws SchemaParseException, AnalysisException { Schema.Parser avroSchemaParser = new Schema.Parser(); Schema schema = avroSchemaParser.parse(schemaStr); if (!schema.getType().equals(Schema.Type.RECORD)) { throw new UnsupportedOperationException("Schema for table must be of type " + "RECORD. Received type: " + schema.getType()); } - List cols = Lists.newArrayList(); - for (int i = 0; i < schema.getFields().size(); ++i) { - Schema.Field field = schema.getFields().get(i); - cols.add(new Column(field.name(), getTypeInfo(field.schema(), field.name()), i)); + List colDefs = Lists.newArrayListWithCapacity(schema.getFields().size()); + for (Schema.Field field: schema.getFields()) { + ColumnDef colDef = new ColumnDef(field.name(), + new TypeDef(getTypeInfo(field.schema(), field.name())), field.doc()); + colDef.analyze(); + colDefs.add(colDef); } - return cols; + return colDefs; } /** * Parses the given Avro schema and returns the matching Impala type * for this field. Handles primitive and complex types. */ - private static Type getTypeInfo(Schema schema, String colName) { + private static Type getTypeInfo(Schema schema, String colName) + throws AnalysisException { // Avro requires NULLable types to be defined as unions of some type T // and NULL. This is annoying and we're going to hide it from the user. if (isNullableType(schema)) { @@ -94,19 +104,30 @@ public class AvroSchemaParser { } switch(type) { + case ARRAY: + Type itemType = getTypeInfo(schema.getElementType(), colName); + return new ArrayType(itemType); + case MAP: + Type valueType = getTypeInfo(schema.getValueType(), colName); + return new MapType(Type.STRING, valueType); + case RECORD: + StructType structType = new StructType(); + for (Schema.Field field: schema.getFields()) { + Type fieldType = getTypeInfo(field.schema(), colName); + structType.addField(new StructField(field.name(), fieldType, field.doc())); + } + return structType; case BYTES: // Decimal is stored in Avro as a BYTE. Type decimalType = getDecimalType(schema); if (decimalType != null) return decimalType; - case RECORD: - case MAP: - case ARRAY: + // TODO: Add support for stored Avro UNIONs by exposing them as STRUCTs in Impala. case UNION: case ENUM: case FIXED: case NULL: default: { - throw new UnsupportedOperationException(String.format( + throw new AnalysisException(String.format( "Unsupported type '%s' of column '%s'", type.getName(), colName)); } } diff --git a/fe/src/main/java/com/cloudera/impala/util/AvroSchemaUtils.java b/fe/src/main/java/com/cloudera/impala/util/AvroSchemaUtils.java new file mode 100644 index 000000000..e9bebbae8 --- /dev/null +++ b/fe/src/main/java/com/cloudera/impala/util/AvroSchemaUtils.java @@ -0,0 +1,179 @@ +// Copyright 2015 Cloudera Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.cloudera.impala.util; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.List; +import java.util.Map; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; + +import com.cloudera.impala.analysis.ColumnDef; +import com.cloudera.impala.catalog.PrimitiveType; +import com.cloudera.impala.common.AnalysisException; +import com.cloudera.impala.common.FileSystemUtil; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; + +/** + * Contains utility functions for dealing with Avro schemas. + */ +public class AvroSchemaUtils { + + /** + * Gets an Avro table's JSON schema from the list of given table property search + * locations. The schema may be specified as a string literal or provided as a + * Hadoop FileSystem or http URL that points to the schema. Apart from ensuring + * that the JSON schema is not SCHEMA_NONE, this function does not perform any + * additional validation on the returned string (e.g., it may not be a valid + * schema). Returns the Avro schema or null if none was specified in the search + * locations. Throws an AnalysisException if a schema was specified, but could not + * be retrieved, e.g., because of an invalid URL. + */ + public static String getAvroSchema(List> schemaSearchLocations) + throws AnalysisException { + String url = null; + // Search all locations and break out on the first valid schema found. + for (Map schemaLocation: schemaSearchLocations) { + if (schemaLocation == null) continue; + + String literal = schemaLocation.get(AvroSerdeUtils.SCHEMA_LITERAL); + if (literal != null && !literal.equals(AvroSerdeUtils.SCHEMA_NONE)) return literal; + + url = schemaLocation.get(AvroSerdeUtils.SCHEMA_URL); + if (url != null && !url.equals(AvroSerdeUtils.SCHEMA_NONE)) { + url = url.trim(); + break; + } + } + if (url == null) return null; + + String schema = null; + InputStream urlStream = null; + try { + // TODO: Add support for https:// here. + if (url.toLowerCase().startsWith("http://")) { + urlStream = new URL(url).openStream(); + schema = IOUtils.toString(urlStream); + } else { + Path path = new Path(url); + FileSystem fs = null; + fs = path.getFileSystem(FileSystemUtil.getConfiguration()); + StringBuilder errorMsg = new StringBuilder(); + if (!FileSystemUtil.isPathReachable(path, fs, errorMsg)) { + throw new AnalysisException(String.format( + "Invalid avro.schema.url: %s. %s", url, errorMsg)); + } + schema = FileSystemUtil.readFile(path); + } + } catch (AnalysisException e) { + throw e; + } catch (IOException e) { + throw new AnalysisException(String.format( + "Failed to read Avro schema at: %s. %s ", url, e.getMessage())); + } catch (Exception e) { + throw new AnalysisException(String.format( + "Invalid avro.schema.url: %s. %s", url, e.getMessage())); + } finally { + if (urlStream != null) IOUtils.closeQuietly(urlStream); + } + return schema; + } + + /** + * Reconciles differences in names/types between the given list of column definitions + * and the column definitions corresponding to an Avro Schema. Populates 'warning' + * if there are inconsistencies between the column definitions and the Avro schema, + * Returns the reconciled column definitions according to the following conflict + * resolution policy: + * + * Mismatched number of columns -> Prefer Avro columns. + * Mismatched name/type -> Prefer Avro column, except: + * A CHAR/VARCHAR column definition maps to an Avro STRING, and is preserved + * as a CHAR/VARCHAR in the reconciled schema. + * + * Behavior for TIMESTAMP: + * A TIMESTAMP column definition maps to an Avro STRING and is presented as a STRING + * in the reconciled schema, because Avro has no binary TIMESTAMP representation. + * As a result, no Avro table may have a TIMESTAMP column. + */ + public static List reconcileSchemas( + List colDefs, List avroCols, StringBuilder warning) { + if (colDefs.size() != avroCols.size()) { + warning.append(String.format( + "Ignoring column definitions in favor of Avro schema.\n" + + "The Avro schema has %s column(s) but %s column definition(s) were given.", + avroCols.size(), colDefs.size())); + return avroCols; + } + + List result = Lists.newArrayListWithCapacity(colDefs.size()); + for (int i = 0; i < avroCols.size(); ++i) { + ColumnDef colDef = colDefs.get(i); + ColumnDef avroCol = avroCols.get(i); + Preconditions.checkNotNull(colDef.getType()); + Preconditions.checkNotNull(avroCol.getType()); + + // A CHAR/VARCHAR column definition maps to an Avro STRING, and is preserved + // as a CHAR/VARCHAR in the reconciled schema. + if ((colDef.getType().isStringType() && avroCol.getType().isStringType())) { + Preconditions.checkState( + avroCol.getType().getPrimitiveType() == PrimitiveType.STRING); + result.add(colDef); + } else { + result.add(avroCol); + } + + // Populate warning string if there are name and/or type inconsistencies. + if (!colDef.getColName().equals(avroCol.getColName()) || + !colDef.getType().equals(avroCol.getType())) { + if (warning.length() == 0) { + // Add warning preamble for the first mismatch. + warning.append("Resolved the following name and/or type inconsistencies " + + "between the column definitions and the Avro schema.\n"); + } + warning.append(String.format("Column definition at position %s: %s %s\n", + i, colDefs.get(i).getColName(), colDefs.get(i).getType().toSql())); + warning.append(String.format("Avro schema column at position %s: %s %s\n", + i, avroCols.get(i).getColName(), avroCols.get(i).getType().toSql())); + warning.append(String.format("Resolution at position %s: %s %s\n", + i, result.get(i).getColName(), result.get(i).getType().toSql())); + } + } + Preconditions.checkState(result.size() == avroCols.size()); + Preconditions.checkState(result.size() == colDefs.size()); + return result; + } + + /** + * Sets the comment of each column definition to 'from deserializer' if not already + * set. The purpose of this function is to provide behavioral consistency with + * Hive ('deserializer' is not applicable to Impala) with respect to column comments + * set for Avro tables. + */ + public static void setFromSerdeComment(List colDefs) { + for (ColumnDef colDef: colDefs) { + if (Strings.isNullOrEmpty(colDef.getComment())) { + colDef.setComment("from deserializer"); + } + } + } +} diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java index 134b7e6d9..50d3fa747 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java @@ -293,7 +293,7 @@ public class AnalyzeDDLTest extends AnalyzerTest { AnalysisError( "alter table functional.alltypes change column int_col Tinyint_col int", - "Column already exists: Tinyint_col"); + "Column already exists: tinyint_col"); // Invalid column name. AnalysisError("alter table functional.alltypes change column int_col `???` int", @@ -1120,7 +1120,7 @@ public class AnalyzeDDLTest extends AnalyzerTest { AnalysisError("create table db_does_not_exist.new_table (i int)", "Database does not exist: db_does_not_exist"); AnalysisError("create table new_table (i int, I string)", - "Duplicate column name: I"); + "Duplicate column name: i"); AnalysisError("create table new_table (c1 double, col2 int, c1 double, c4 string)", "Duplicate column name: c1"); AnalysisError("create table new_table (i int, s string) PARTITIONED BY (i int)", @@ -1269,10 +1269,14 @@ public class AnalyzeDDLTest extends AnalyzerTest { "double_col double, date_string_col string, string_col string, " + "timestamp_col timestamp) with serdeproperties ('avro.schema.url'='%s')" + "stored as avro", alltypesSchemaLoc), - "Ignoring column definitions in favor of Avro schema due to a mismatched " + - "column name at position 5.\n" + - "Column definition: bad_int_col INT\n" + - "Avro schema column: int_col INT"); + "Resolved the following name and/or type inconsistencies between the column " + + "definitions and the Avro schema.\n" + + "Column definition at position 4: bad_int_col INT\n" + + "Avro schema column at position 4: int_col INT\n" + + "Resolution at position 4: int_col INT\n" + + "Column definition at position 10: timestamp_col TIMESTAMP\n" + + "Avro schema column at position 10: timestamp_col STRING\n" + + "Resolution at position 10: timestamp_col STRING"); // Mismatched type. AnalyzesOk(String.format( "create table foo_avro (id int, bool_col boolean, tinyint_col int, " + @@ -1280,33 +1284,43 @@ public class AnalyzeDDLTest extends AnalyzerTest { "double_col bigint, date_string_col string, string_col string, " + "timestamp_col timestamp) stored as avro tblproperties ('avro.schema.url'='%s')", alltypesSchemaLoc), - "Ignoring column definitions in favor of Avro schema due to a mismatched " + - "column type at position 8.\n" + - "Column definition: double_col BIGINT\n" + - "Avro schema column: double_col DOUBLE"); + "Resolved the following name and/or type inconsistencies between the column " + + "definitions and the Avro schema.\n" + + "Column definition at position 7: double_col BIGINT\n" + + "Avro schema column at position 7: double_col DOUBLE\n" + + "Resolution at position 7: double_col DOUBLE\n" + + "Column definition at position 10: timestamp_col TIMESTAMP\n" + + "Avro schema column at position 10: timestamp_col STRING\n" + + "Resolution at position 10: timestamp_col STRING"); - // No Avro schema specified for Avro format table. - AnalysisError("create table foo_avro (i int) stored as avro", - "No Avro schema provided in SERDEPROPERTIES or TBLPROPERTIES for table: " + - "default.foo_avro"); - AnalysisError("create table foo_avro (i int) stored as avro tblproperties ('a'='b')", - "No Avro schema provided in SERDEPROPERTIES or TBLPROPERTIES for table: " + - "default.foo_avro"); + // Avro schema is inferred from column definitions. + AnalyzesOk("create table foo_avro (c1 tinyint, c2 smallint, c3 int, c4 bigint, " + + "c5 float, c6 double, c7 timestamp, c8 string, c9 char(10), c10 varchar(20)," + + "c11 decimal(10, 5), c12 struct, c13 array," + + "c14 map) stored as avro"); + AnalyzesOk("create table foo_avro (c1 tinyint, c2 smallint, c3 int, c4 bigint, " + + "c5 float, c6 double, c7 timestamp, c8 string, c9 char(10), c10 varchar(20)," + + "c11 decimal(10, 5), c12 struct, c13 array," + + "c14 map) partitioned by (year int, month int) stored as avro"); + // Neither Avro schema nor column definitions. AnalysisError("create table foo_avro stored as avro tblproperties ('a'='b')", - "No Avro schema provided in SERDEPROPERTIES or TBLPROPERTIES for table: "+ - "default.foo_avro"); + "An Avro table requires column definitions or an Avro schema."); // Invalid schema URL + AnalysisError("create table foo_avro (i int) stored as avro tblproperties " + + "('avro.schema.url'='')", + "Invalid avro.schema.url: . Can not create a Path from an empty string"); AnalysisError("create table foo_avro (i int) stored as avro tblproperties " + "('avro.schema.url'='schema.avsc')", "Invalid avro.schema.url: schema.avsc. Path does not exist."); AnalysisError("create table foo_avro (i int) stored as avro tblproperties " + "('avro.schema.url'='hdfs://invalid*host/schema.avsc')", - "Invalid avro.schema.url: hdfs://invalid*host/schema.avsc. " + + "Failed to read Avro schema at: hdfs://invalid*host/schema.avsc. " + "Incomplete HDFS URI, no host: hdfs://invalid*host/schema.avsc"); AnalysisError("create table foo_avro (i int) stored as avro tblproperties " + "('avro.schema.url'='foo://bar/schema.avsc')", - "Invalid avro.schema.url: foo://bar/schema.avsc. No FileSystem for scheme: foo"); + "Failed to read Avro schema at: foo://bar/schema.avsc. " + + "No FileSystem for scheme: foo"); // Decimal parsing AnalyzesOk("create table foo_avro (i int) stored as avro tblproperties " + @@ -1347,28 +1361,21 @@ public class AnalyzeDDLTest extends AnalyzerTest { "org.codehaus.jackson.JsonParseException: Unexpected close marker ']': "+ "expected '}'"); - // Unsupported types - // Array - AnalysisError("create table foo_avro (i int) stored as avro tblproperties " + + // Map/Array types in Avro schema. + AnalyzesOk("create table foo_avro (i int) stored as avro tblproperties " + "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " + "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," + - "{\"name\": \"list1\", \"type\": {\"type\":\"array\", \"items\": \"int\"}}]}')", - "Error parsing Avro schema for table 'default.foo_avro': " + - "Unsupported type 'array' of column 'list1'"); - // Map - AnalysisError("create table foo_avro (i int) stored as avro tblproperties " + + "{\"name\": \"list1\", \"type\": {\"type\":\"array\", \"items\": \"int\"}}]}')"); + AnalyzesOk("create table foo_avro (i int) stored as avro tblproperties " + "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " + "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," + - "{\"name\": \"map1\", \"type\": {\"type\":\"map\", \"values\": \"int\"}}]}')", - "Error parsing Avro schema for table 'default.foo_avro': " + - "Unsupported type 'map' of column 'map1'"); + "{\"name\": \"map1\", \"type\": {\"type\":\"map\", \"values\": \"int\"}}]}')"); - // Union + // Union is not supported AnalysisError("create table foo_avro (i int) stored as avro tblproperties " + "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " + "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," + "{\"name\": \"union1\", \"type\": [\"float\", \"boolean\"]}]}')", - "Error parsing Avro schema for table 'default.foo_avro': " + "Unsupported type 'union' of column 'union1'"); // TODO: Add COLLECTION ITEMS TERMINATED BY and MAP KEYS TERMINATED BY clauses. diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 9d196154d..a93f07780 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -1504,8 +1504,11 @@ PARTITIONED BY (year int, month int) STORED AS AVRO LOCATION '/test-warehouse/alltypes_avro_snap'; ---- ALTER -ALTER TABLE {table_name} ADD PARTITION (year=2009,month=9); -ALTER TABLE {table_name} ADD PARTITION (year=2010,month=10); +-- The second partition is added twice because there seems to be a Hive/beeline +-- bug where the last alter is not executed properly. +ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION (year=2009,month=9); +ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION (year=2010,month=10); +ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION (year=2010,month=10); ==== ---- DATASET functional diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-resolution.test b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-resolution.test index 7f11f7156..244a5c232 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-resolution.test +++ b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-resolution.test @@ -11,7 +11,9 @@ false,2,2,2,2,'serialized string','','NULL' ---- QUERY # IMPALA-1136: Tests that Impala can read Hive-created Avro tables that have # no specified Avro schema, i.e., the Avro schema is inferred from the column -# definitions +# definitions. +# IMPALA-1947: A TIMESTAMP from the column definitions results in a STRING column +# backed by a stored Avro STRING during table loading. # See testdata/avro_schema_resolution select * from no_avro_schema where year = 2009 order by id limit 1 union all diff --git a/testdata/workloads/functional-query/queries/QueryTest/create.test b/testdata/workloads/functional-query/queries/QueryTest/create.test index ef837e474..c60f9f049 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create.test @@ -674,6 +674,85 @@ describe avro_alltypes_part STRING, STRING, STRING ==== ---- QUERY +# Test creating a partitioned Avro table without an Avro schema. +# The Avro schema is inferred from the column definitions. +create table no_avro_schema ( + c1 tinyint, + c2 smallint comment 'becomes int', + c3 int, + c4 bigint, + c5 float, + c6 double, + c7 timestamp comment 'becomes string', + c8 string, + c9 char(10) comment 'preserved', + c10 varchar(20), + c11 decimal(10, 5), + c12 struct, + c13 array, + c14 map) +partitioned by (year int, month int) +stored as avro +---- RESULTS +==== +---- QUERY +describe no_avro_schema +---- RESULTS +'c1','int','from deserializer' +'c2','int','becomes int' +'c3','int','from deserializer' +'c4','bigint','from deserializer' +'c5','float','from deserializer' +'c6','double','from deserializer' +'c7','string','becomes string' +'c8','string','from deserializer' +'c9','char(10)','preserved' +'c10','varchar(20)','from deserializer' +'c11','decimal(10,5)','from deserializer' +'c12','struct<\n f1:int,\n f2:string\n>','from deserializer' +'c13','array','from deserializer' +'c14','map','from deserializer' +'year','int','' +'month','int','' +---- TYPES +STRING, STRING, STRING +==== +---- QUERY +# Test creating an Avro table without an Avro schema via CREATE TABLE LIKE (IMPALA-1813) +create table like_no_avro_schema like no_avro_schema stored as avro +---- RESULTS +==== +---- QUERY +describe like_no_avro_schema +---- RESULTS +'c1','int','from deserializer' +'c2','int','becomes int' +'c3','int','from deserializer' +'c4','bigint','from deserializer' +'c5','float','from deserializer' +'c6','double','from deserializer' +'c7','string','becomes string' +'c8','string','from deserializer' +'c9','char(10)','preserved' +'c10','varchar(20)','from deserializer' +'c11','decimal(10,5)','from deserializer' +'c12','struct<\n f1:int,\n f2:string\n>','from deserializer' +'c13','array','from deserializer' +'c14','map','from deserializer' +'year','int','' +'month','int','' +---- TYPES +STRING, STRING, STRING +==== +---- QUERY +drop table like_no_avro_schema +---- RESULTS +==== +---- QUERY +drop table no_avro_schema +---- RESULTS +==== +---- QUERY drop table avro_alltypes_part ---- RESULTS ====