IMPALA-1149: read bytes fields as strings in HdfsAvroScanner::MaterializeTuple()

Hive converts "bytes"-type fields to an array<tinyint> column, which
we can't even load the metadata for. However, if a bytes field appears
in a file schema but not the table schema, this change allows us to
read (but not materialize) the field. Otherwise we can't read the file
at all.

This change also adds a "bytes"-type field to one of the files in
functional_avro_snap.schema_resolution_test.

Change-Id: I25953ee049e174fc4dbff5d68520a6f87e545339
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3823
Reviewed-by: Skye Wanderman-Milne <skye@cloudera.com>
Tested-by: jenkins
(cherry picked from commit 0e2e7c1ac0f63623b7ec3724920e9927cd782508)
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3895
This commit is contained in:
Skye Wanderman-Milne
2014-08-18 14:32:19 -07:00
committed by jenkins
parent 91d262a1d7
commit 8e44347831
4 changed files with 12 additions and 6 deletions

View File

@@ -411,8 +411,7 @@ HdfsAvroScanner::SchemaElement HdfsAvroScanner::ConvertSchema(
// not a complex type nor a [<primitive type>, "null"] union itself), we treat
// this node as the same type as child except with null_union_position set
// appropriately.
if (is_avro_primitive(child.schema.get()) &&
child.null_union_position == -1) {
if (is_avro_primitive(child.schema.get()) && child.null_union_position == -1) {
element = child;
element.null_union_position = null_position;
}
@@ -620,6 +619,7 @@ void HdfsAvroScanner::MaterializeTuple(MemPool* pool, uint8_t** data, Tuple* tup
ReadAvroDouble(slot_type, data, write_slot, slot, pool);
break;
case AVRO_STRING:
case AVRO_BYTES:
ReadAvroString(slot_type, data, write_slot, slot, pool);
break;
case AVRO_DECIMAL: {
@@ -808,6 +808,8 @@ Function* HdfsAvroScanner::CodegenMaterializeTuple(HdfsScanNode* node,
break;
default:
// Unsupported type, can't codegen
VLOG(1) << "Failed to codegen MaterializeTuple() due to unsupported type: "
<< element.schema->type;
fn->eraseFromParent();
return NULL;
}
@@ -858,5 +860,7 @@ Function* HdfsAvroScanner::CodegenDecodeAvroData(RuntimeState* state,
DCHECK_EQ(replaced, 1);
decode_avro_data_fn->setName("DecodeAvroData");
return codegen->OptimizeFunctionWithExprs(decode_avro_data_fn);
decode_avro_data_fn = codegen->OptimizeFunctionWithExprs(decode_avro_data_fn);
DCHECK(decode_avro_data_fn != NULL);
return decode_avro_data_fn;
}

View File

@@ -1,7 +1,9 @@
{"name": "a",
"type": "record",
"comment": "Contains a field not in the table and none of the table fields",
"comment": "Contains fields not in the table and none of the table fields",
"fields": [
{"name":"boolean2", "type": {"type": "boolean", "note": "alternate primitive syntax"}}
{"name":"boolean2", "type": {"type": "boolean", "note": "alternate primitive syntax"}},
{"name":"bytes", "type": "bytes",
"note": "bytes is an invalid column type but can be read as a non-materialized field"}
]
}

Binary file not shown.

View File

@@ -1 +1 @@
{"boolean2": false}
{"boolean2": false, "bytes": "\u00FF"}