From dbaf718221492e72ff79203b98b2c511372cc091 Mon Sep 17 00:00:00 2001 From: Victor Bittorf Date: Thu, 25 Sep 2014 12:55:03 -0700 Subject: [PATCH] IMPALA-1185: Make Avro and Seq writers unsupported Avro and Sequence writers are only available if query option ALLOW_UNSUPPORTED_FORMATS is set to true, prints an error otherwise. Change-Id: I597039f7c68f708fda10f848531eb557d6910f92 Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4539 Reviewed-by: Nong Li Tested-by: jenkins --- be/src/exec/hdfs-table-sink.cc | 31 +++++++++++++++++-- .../queries/QueryTest/avro-writer.test | 8 +++++ .../queries/QueryTest/seq-writer.test | 12 +++++++ .../queries/QueryTest/text-writer.test | 11 +++++++ 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/be/src/exec/hdfs-table-sink.cc b/be/src/exec/hdfs-table-sink.cc index 9ac2e392e..ad345f967 100644 --- a/be/src/exec/hdfs-table-sink.cc +++ b/be/src/exec/hdfs-table-sink.cc @@ -351,6 +351,31 @@ Status HdfsTableSink::InitOutputPartition(RuntimeState* state, BuildHdfsFileNames(partition_descriptor, output_partition); output_partition->hdfs_connection = hdfs_connection_; + + bool allow_unsupported_formats = + state->query_options().__isset.allow_unsupported_formats && + state->query_options().allow_unsupported_formats; + if (!allow_unsupported_formats) { + if (partition_descriptor.file_format() == THdfsFileFormat::SEQUENCE_FILE || + partition_descriptor.file_format() == THdfsFileFormat::AVRO) { + stringstream error_msg; + map::const_iterator i = + _THdfsFileFormat_VALUES_TO_NAMES.find(partition_descriptor.file_format()); + error_msg << "Writing to table format " << i->second + << " is not supported. Use query option ALLOW_UNSUPPORTED_FORMATS" + " to override."; + return Status(error_msg.str()); + } + if (partition_descriptor.file_format() == THdfsFileFormat::TEXT && + state->query_options().__isset.compression_codec && + state->query_options().compression_codec != THdfsCompression::NONE) { + stringstream error_msg; + error_msg << "Writing to compressed text table is not supported. " + "Use query option ALLOW_UNSUPPORTED_FORMATS to override."; + return Status(error_msg.str()); + } + } + switch (partition_descriptor.file_format()) { case THdfsFileFormat::TEXT: output_partition->writer.reset( @@ -382,10 +407,10 @@ Status HdfsTableSink::InitOutputPartition(RuntimeState* state, _THdfsFileFormat_VALUES_TO_NAMES.find(partition_descriptor.file_format()); if (i != _THdfsFileFormat_VALUES_TO_NAMES.end()) { error_msg << "Cannot write to table with format " << i->second << ". " - << "Impala only supports writing to TEXT, PARQUET, SEQ, and AVRO tables."; + << "Impala only supports writing to TEXT and PARQUET."; } else { - error_msg << "Cannot write to table. Impala only supports writing to TEXT," - << " PARQUET, SEQ, and AVRO tables. (Unknown file format: " + error_msg << "Cannot write to table. Impala only supports writing to TEXT" + << " and PARQUET tables. (Unknown file format: " << partition_descriptor.file_format() << ")"; } return Status(error_msg.str()); diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro-writer.test b/testdata/workloads/functional-query/queries/QueryTest/avro-writer.test index d6b33f6c5..6dc0899d3 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/avro-writer.test +++ b/testdata/workloads/functional-query/queries/QueryTest/avro-writer.test @@ -16,10 +16,12 @@ TBLPROPERTIES ('avro.schema.literal'='{ ==== ---- QUERY SET COMPRESSION_CODEC=NONE; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __avro_write select 0, "a", 1.1; ==== ---- QUERY SET COMPRESSION_CODEC=SNAPPY; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __avro_write select 1, "b", 2.2; ==== ---- QUERY @@ -31,5 +33,11 @@ select * from __avro_write; INT,STRING,DOUBLE ==== ---- QUERY +SET ALLOW_UNSUPPORTED_FORMATS=0; +insert into __avro_write select 1, "b", 2.2; +---- CATCH +Writing to table format AVRO is not supported. Use query option ALLOW_UNSUPPORTED_FORMATS +==== +---- QUERY drop table __avro_write; ==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/seq-writer.test b/testdata/workloads/functional-query/queries/QueryTest/seq-writer.test index 1cca17e4f..bd8de5baf 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/seq-writer.test +++ b/testdata/workloads/functional-query/queries/QueryTest/seq-writer.test @@ -4,30 +4,42 @@ drop table if exists __seq_write; ==== ---- QUERY SET COMPRESSION_CODEC=NONE; +SET ALLOW_UNSUPPORTED_FORMATS=1; create table __seq_write (i int, s string, d double) stored as SEQUENCEFILE; ==== ---- QUERY SET COMPRESSION_CODEC=NONE; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __seq_write select 0, "a", 1.1; ==== ---- QUERY SET COMPRESSION_CODEC=DEFAULT; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __seq_write values (1, "b", 2.2); ==== ---- QUERY SET COMPRESSION_CODEC=SNAPPY; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __seq_write values (2, "c", 3.3); ==== ---- QUERY SET COMPRESSION_CODEC=SNAPPY_BLOCKED; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __seq_write values (3, "d", 4.4); ==== ---- QUERY SET COMPRESSION_CODEC=GZIP; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __seq_write values (4, "e", 5.5); ==== ---- QUERY +SET ALLOW_UNSUPPORTED_FORMATS=0; +insert into __seq_write values (4, "e", 5.5); +---- CATCH +Writing to table format SEQUENCEFILE is not supported. Use query option +==== +---- QUERY select * from __seq_write; ---- RESULTS 0,'a',1.1 diff --git a/testdata/workloads/functional-query/queries/QueryTest/text-writer.test b/testdata/workloads/functional-query/queries/QueryTest/text-writer.test index 39789ba66..89cd730d4 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/text-writer.test +++ b/testdata/workloads/functional-query/queries/QueryTest/text-writer.test @@ -7,21 +7,32 @@ create table __text_write (i int, s string, d double); ==== ---- QUERY SET COMPRESSION_CODEC=NONE; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __text_write select 0, "a", 1.1; ==== ---- QUERY SET COMPRESSION_CODEC=DEFAULT; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __text_write values (1, "b", 2.2); ==== ---- QUERY SET COMPRESSION_CODEC=SNAPPY; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __text_write values (2, "c", 3.3); ==== ---- QUERY SET COMPRESSION_CODEC=GZIP; +SET ALLOW_UNSUPPORTED_FORMATS=1; insert into __text_write values (3, "d", 4.4); ==== ---- QUERY +SET COMPRESSION_CODEC=GZIP; +SET ALLOW_UNSUPPORTED_FORMATS=0; +insert into __text_write values (3, "d", 4.4); +---- CATCH +Writing to compressed text table is not supported. +==== +---- QUERY select * from __text_write; ---- RESULTS 0,'a',1.1