mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
The query option PARQUET_FALLBACK_SCHEMA_RESOLUTION allows matching of Parquet fields by name instead of by index (the default). Parquet column names are case sensitive, but Impala treats db/table/column/field names as case-insensitive. Today, there is no way today to select Parquet columns with mixed casing via SQL using the name-based field resolution policy. This patch changes the matching of Parquet fields to be case-insensitive. Testing: - Modified the data files backing complextypestbl to contain fields with mixed casing. - Several existing tests run against this table, including the test for name-based resolution. - I confirmed that without this fix, the existing name-based resolution tests fail on the modified data files. - I locally ran test_scanners.py and test_nested_types.py on exhaustive with this fix. Change-Id: I87395f84ba29b4c3d8e41be1ea4e89e500b8a9f4 Reviewed-on: http://gerrit.cloudera.org:8080/5891 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Impala Public Jenkins
34 lines
1.8 KiB
JSON
34 lines
1.8 KiB
JSON
{"type": "record",
|
|
"namespace": "org.apache.impala",
|
|
"name": "ComplexTypesTbl",
|
|
/* Field names have mixed casing to test the case-insensitive matching of
|
|
fields in Parquet files. */
|
|
"fields": [
|
|
{"name": "id", "type": ["null", "long"]},
|
|
{"name": "int_array", "type": ["null", {"type": "array", "items": ["null", "int"]}]},
|
|
{"name": "int_array_Array", "type": ["null", {"type": "array", "items":
|
|
["null", {"type": "array", "items": ["null", "int"]}]}]},
|
|
{"name": "int_map", "type": ["null", {"type": "map", "values": ["null", "int"]}]},
|
|
{"name": "int_Map_Array", "type": ["null", {"type": "array", "items":
|
|
["null", {"type": "map", "values": ["null", "int"]}]}]},
|
|
{"name": "nested_struct", "type":
|
|
["null", {"type": "record", "name": "r1", "fields": [
|
|
{"name": "A", "type": ["null", "int"]},
|
|
{"name": "b", "type": ["null", {"type": "array", "items": ["null", "int"]}]},
|
|
{"name": "C", "type": ["null", {"type": "record", "name": "r2", "fields": [
|
|
{"name": "d", "type": ["null", {"type": "array", "items":
|
|
["null", {"type": "array", "items":
|
|
["null", {"type": "record", "name": "r3", "fields": [
|
|
{"name": "E", "type": ["null", "int"]},
|
|
{"name": "F", "type": ["null", "string"]}]}]}]}]}
|
|
]}]},
|
|
{"name": "g", "type": ["null", {"type": "map", "values":
|
|
["null", {"type": "record", "name": "r4", "fields": [
|
|
{"name": "H", "type":
|
|
["null", {"type": "record", "name": "r5", "fields": [
|
|
{"name": "i", "type": ["null", {"type": "array", "items":
|
|
["null", "double"]}]}]}]}]}]}]}
|
|
]}]}
|
|
]
|
|
}
|