mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
The query option PARQUET_FALLBACK_SCHEMA_RESOLUTION allows matching of Parquet fields by name instead of by index (the default). Parquet column names are case sensitive, but Impala treats db/table/column/field names as case-insensitive. Today, there is no way today to select Parquet columns with mixed casing via SQL using the name-based field resolution policy. This patch changes the matching of Parquet fields to be case-insensitive. Testing: - Modified the data files backing complextypestbl to contain fields with mixed casing. - Several existing tests run against this table, including the test for name-based resolution. - I confirmed that without this fix, the existing name-based resolution tests fail on the modified data files. - I locally ran test_scanners.py and test_nested_types.py on exhaustive with this fix. Change-Id: I87395f84ba29b4c3d8e41be1ea4e89e500b8a9f4 Reviewed-on: http://gerrit.cloudera.org:8080/5891 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Impala Public Jenkins
33 lines
1.5 KiB
JSON
33 lines
1.5 KiB
JSON
{"type": "record",
|
|
"namespace": "org.apache.impala",
|
|
"name": "ComplexTypesTbl",
|
|
/* Field names have mixed casing to test the case-insensitive matching of
|
|
fields in Parquet files. */
|
|
"fields": [
|
|
{"name": "ID", "type": "long"},
|
|
{"name": "Int_Array", "type": {"type": "array", "items": "int"}},
|
|
{"name": "int_array_array", "type": {"type": "array", "items":
|
|
{"type": "array", "items": "int"}}},
|
|
{"name": "Int_Map", "type": {"type": "map", "values": "int"}},
|
|
{"name": "int_map_array", "type": {"type": "array", "items":
|
|
{"type": "map", "values": "int"}}},
|
|
{"name": "nested_Struct", "type":
|
|
{"type": "record", "name": "r1", "fields": [
|
|
{"name": "a", "type": "int"},
|
|
{"name": "B", "type": {"type": "array", "items": "int"}},
|
|
{"name": "c", "type": {"type": "record", "name": "r2", "fields": [
|
|
{"name": "D", "type": {"type": "array", "items":
|
|
{"type": "array", "items":
|
|
{"type": "record", "name": "r3", "fields": [
|
|
{"name": "e", "type": "int"},
|
|
{"name": "f", "type": "string"}]}}}}
|
|
]}},
|
|
{"name": "G", "type": {"type": "map", "values": {
|
|
"type": "record", "name": "r4", "fields": [
|
|
{"name": "h", "type": {"type": "record", "name": "r5", "fields": [
|
|
{"name": "i", "type": {"type": "array", "items": "double"}}]}}
|
|
]}}}
|
|
]}}
|
|
]
|
|
}
|