mirror of
https://github.com/apache/impala.git
synced 2026-01-22 18:02:34 -05:00
Impala supports creating a table using the schema of a file. However, only Parquet is supported currently. This commit adds support for creating tables from ORC files The change relies on the ORC Java API with version 1.5 or greater, because of a bug in earlier versions. Therefore, ORC is listed as an external dependency, instead of relying on Hive's ORC version (from Hive3, Hive also lists it as a dependency). Also, the commit performs a little clean-up on the ParquetHelper class, renaming it to ParquetSchemaExtractor and removing outdated comments. To create a table from an ORC file, run: CREATE TABLE tablename LIKE ORC '/path/to/file' Tests: * Added analysis tests for primitive and complex types. * Added e2e tests for creating tables from ORC files. Change-Id: I77cd84cda2ed86516937a67eb320fd41e3f1cf2d Reviewed-on: http://gerrit.cloudera.org:8080/14811 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
90 lines
2.9 KiB
Plaintext
90 lines
2.9 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table $DATABASE.temp_decimal_table_orc like ORC
|
|
'$FILESYSTEM_PREFIX/test-warehouse/decimal_tiny_orc_def/000000_0'
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.temp_decimal_table_orc
|
|
---- RESULTS
|
|
'c1','decimal(10,4)','Inferred from ORC file.'
|
|
'c2','decimal(15,5)','Inferred from ORC file.'
|
|
'c3','decimal(1,1)','Inferred from ORC file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.temp_chars_table like ORC
|
|
'$FILESYSTEM_PREFIX/test-warehouse/chars_tiny_orc_def/000000_0'
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.temp_chars_table
|
|
---- RESULTS
|
|
'cs','char(5)','Inferred from ORC file.'
|
|
'cl','char(140)','Inferred from ORC file.'
|
|
'vc','varchar(32)','Inferred from ORC file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.like_zipcodes_file_orc like ORC
|
|
'$FILESYSTEM_PREFIX/test-warehouse/zipcode_incomes_orc_def/000000_0'
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.like_zipcodes_file_orc
|
|
---- RESULTS
|
|
'id','string','Inferred from ORC file.'
|
|
'zip','string','Inferred from ORC file.'
|
|
'description1','string','Inferred from ORC file.'
|
|
'description2','string','Inferred from ORC file.'
|
|
'income','int','Inferred from ORC file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table $DATABASE.like_alltypestiny_file_orc like ORC
|
|
'$FILESYSTEM_PREFIX/test-warehouse/alltypestiny_orc_def/year=2009/month=1/000000_0'
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe $DATABASE.like_alltypestiny_file_orc
|
|
---- RESULTS
|
|
'id','int','Inferred from ORC file.'
|
|
'bool_col','boolean','Inferred from ORC file.'
|
|
'tinyint_col','tinyint','Inferred from ORC file.'
|
|
'smallint_col','smallint','Inferred from ORC file.'
|
|
'int_col','int','Inferred from ORC file.'
|
|
'bigint_col','bigint','Inferred from ORC file.'
|
|
'float_col','float','Inferred from ORC file.'
|
|
'double_col','double','Inferred from ORC file.'
|
|
'date_string_col','string','Inferred from ORC file.'
|
|
'string_col','string','Inferred from ORC file.'
|
|
'timestamp_col','timestamp','Inferred from ORC file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table allcomplextypes_clone_orc like ORC
|
|
'$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/nullable.orc'
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe allcomplextypes_clone_orc
|
|
---- RESULTS
|
|
'id','bigint','Inferred from ORC file.'
|
|
'int_array','array<int>','Inferred from ORC file.'
|
|
'int_array_array','array<array<int>>','Inferred from ORC file.'
|
|
'int_map','map<string,int>','Inferred from ORC file.'
|
|
'int_map_array','array<map<string,int>>','Inferred from ORC file.'
|
|
'nested_struct','struct<\n a:int,\n b:array<int>,\n c:struct<\n d:array<array<struct<\n e:int,\n f:string\n >>>\n >,\n g:map<string,struct<\n h:struct<\n i:array<double>\n >\n >>\n>','Inferred from ORC file.'
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|