mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-1149: read bytes fields as strings in HdfsAvroScanner::MaterializeTuple()
Hive converts "bytes"-type fields to an array<tinyint> column, which we can't even load the metadata for. However, if a bytes field appears in a file schema but not the table schema, this change allows us to read (but not materialize) the field. Otherwise we can't read the file at all. This change also adds a "bytes"-type field to one of the files in functional_avro_snap.schema_resolution_test. Change-Id: I25953ee049e174fc4dbff5d68520a6f87e545339 Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3823 Reviewed-by: Skye Wanderman-Milne <skye@cloudera.com> Tested-by: jenkins (cherry picked from commit 0e2e7c1ac0f63623b7ec3724920e9927cd782508) Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3895
This commit is contained in:
committed by
jenkins
parent
91d262a1d7
commit
8e44347831
@@ -411,8 +411,7 @@ HdfsAvroScanner::SchemaElement HdfsAvroScanner::ConvertSchema(
|
||||
// not a complex type nor a [<primitive type>, "null"] union itself), we treat
|
||||
// this node as the same type as child except with null_union_position set
|
||||
// appropriately.
|
||||
if (is_avro_primitive(child.schema.get()) &&
|
||||
child.null_union_position == -1) {
|
||||
if (is_avro_primitive(child.schema.get()) && child.null_union_position == -1) {
|
||||
element = child;
|
||||
element.null_union_position = null_position;
|
||||
}
|
||||
@@ -620,6 +619,7 @@ void HdfsAvroScanner::MaterializeTuple(MemPool* pool, uint8_t** data, Tuple* tup
|
||||
ReadAvroDouble(slot_type, data, write_slot, slot, pool);
|
||||
break;
|
||||
case AVRO_STRING:
|
||||
case AVRO_BYTES:
|
||||
ReadAvroString(slot_type, data, write_slot, slot, pool);
|
||||
break;
|
||||
case AVRO_DECIMAL: {
|
||||
@@ -808,6 +808,8 @@ Function* HdfsAvroScanner::CodegenMaterializeTuple(HdfsScanNode* node,
|
||||
break;
|
||||
default:
|
||||
// Unsupported type, can't codegen
|
||||
VLOG(1) << "Failed to codegen MaterializeTuple() due to unsupported type: "
|
||||
<< element.schema->type;
|
||||
fn->eraseFromParent();
|
||||
return NULL;
|
||||
}
|
||||
@@ -858,5 +860,7 @@ Function* HdfsAvroScanner::CodegenDecodeAvroData(RuntimeState* state,
|
||||
DCHECK_EQ(replaced, 1);
|
||||
decode_avro_data_fn->setName("DecodeAvroData");
|
||||
|
||||
return codegen->OptimizeFunctionWithExprs(decode_avro_data_fn);
|
||||
decode_avro_data_fn = codegen->OptimizeFunctionWithExprs(decode_avro_data_fn);
|
||||
DCHECK(decode_avro_data_fn != NULL);
|
||||
return decode_avro_data_fn;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
{"name": "a",
|
||||
"type": "record",
|
||||
"comment": "Contains a field not in the table and none of the table fields",
|
||||
"comment": "Contains fields not in the table and none of the table fields",
|
||||
"fields": [
|
||||
{"name":"boolean2", "type": {"type": "boolean", "note": "alternate primitive syntax"}}
|
||||
{"name":"boolean2", "type": {"type": "boolean", "note": "alternate primitive syntax"}},
|
||||
{"name":"bytes", "type": "bytes",
|
||||
"note": "bytes is an invalid column type but can be read as a non-materialized field"}
|
||||
]
|
||||
}
|
||||
|
||||
BIN
testdata/avro_schema_resolution/records1.avro
vendored
BIN
testdata/avro_schema_resolution/records1.avro
vendored
Binary file not shown.
@@ -1 +1 @@
|
||||
{"boolean2": false}
|
||||
{"boolean2": false, "bytes": "\u00FF"}
|
||||
|
||||
Reference in New Issue
Block a user