mirror of
https://github.com/apache/impala.git
synced 2026-01-26 21:02:23 -05:00
Full ACID row format looks like this:
{
"operation": 0,
"originalTransaction": 1,
"bucket": 536870912,
"rowId": 0,
"currentTransaction": 1,
"row": {"i": 1}
}
User columns are nested under "row". In the frontend we need to create
slot descriptors that correspond to the file schema. In the catalog we
could mimic the file schema but that would introduce several
complexities and corner cases in column resolution. Also in query
results the heading of the above user column would be "row.i". Star
expansion should also be modified, etc.
Because of that in the Catalog I create the exact opposite of the above
schema:
{
"row__id":
{
"operation": 0,
"originalTransaction": 1,
"bucket": 536870912,
"rowId": 0,
"currentTransaction": 1
}
"i": 1
}
This way very little modification is needed in the frontend. And the
hidden columns can be easily retrieved via 'SELECT row__id.*' when we
need those for debugging/testing.
We only need to change Path.getAbsolutePath() to return a schema path
that corresponds to the file schema. Also in the backend we need some
extra juggling in OrcSchemaResolver::ResolveColumn() to retrieve the
table schema path from the file schema path.
Testing:
I changed data loading to load ORC files in full ACID format by default.
With this change we should be able to scan full ACID tables that are
not minor-compacted, don't have deleted rows, and don't have original
files.
Newly added Tests:
* specific queries about hidden columns (full-acid-rowid.test)
* SHOW CREATE TABLE (show-create-table-full-acid.test)
* DESCRIBE [FORMATTED] TABLE (describe-path.test)
* INSERT should be forbidden (acid-negative.test)
* added tests for column masking (
ranger_column_masking_complex_types.test)
Change-Id: Ic2e2afec00c9a5cf87f1d61b5fe52b0085844bcb
Reviewed-on: http://gerrit.cloudera.org:8080/15395
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
190 lines
5.0 KiB
Plaintext
190 lines
5.0 KiB
Plaintext
====
|
|
---- QUERY
|
|
describe functional.alltypes
|
|
---- RESULTS
|
|
'id','int','Add a comment'
|
|
'bool_col','boolean',''
|
|
'tinyint_col','tinyint',''
|
|
'smallint_col','smallint',''
|
|
'int_col','int',''
|
|
'bigint_col','bigint',''
|
|
'float_col','float',''
|
|
'double_col','double',''
|
|
'date_string_col','string',''
|
|
'string_col','string',''
|
|
'timestamp_col','timestamp',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
# Test printing of complex types.
|
|
describe functional.allcomplextypes
|
|
---- RESULTS
|
|
'id','int',''
|
|
'int_array_col','array<int>',''
|
|
'array_array_col','array<array<int>>',''
|
|
'map_array_col','array<map<string,int>>',''
|
|
'struct_array_col','array<struct<\n f1:bigint,\n f2:string\n>>',''
|
|
'int_map_col','map<string,int>',''
|
|
'array_map_col','map<string,array<int>>',''
|
|
'map_map_col','map<string,map<string,int>>',''
|
|
'struct_map_col','map<string,struct<\n f1:bigint,\n f2:string\n>>',''
|
|
'int_struct_col','struct<\n f1:int,\n f2:int\n>',''
|
|
'complex_struct_col','struct<\n f1:int,\n f2:array<int>,\n f3:map<string,int>\n>',''
|
|
'nested_struct_col','struct<\n f1:int,\n f2:struct<\n f11:bigint,\n f12:struct<\n f21:bigint\n >\n >\n>',''
|
|
'complex_nested_struct_col','struct<\n f1:int,\n f2:array<struct<\n f11:bigint,\n f12:map<string,struct<\n f21:bigint\n >>\n >>\n>',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
describe functional_parquet.allcomplextypes.int_array_col
|
|
---- RESULTS
|
|
'item','int',''
|
|
'pos','bigint',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe functional_parquet.allcomplextypes.map_array_col.item
|
|
---- RESULTS
|
|
'key','string',''
|
|
'value','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe functional_parquet.allcomplextypes.complex_struct_col
|
|
---- RESULTS
|
|
'f1','int',''
|
|
'f2','array<int>',''
|
|
'f3','map<string,int>',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe functional_parquet.allcomplextypes.complex_struct_col.f2
|
|
---- RESULTS
|
|
'item','int',''
|
|
'pos','bigint',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe functional_parquet.allcomplextypes.nested_struct_col
|
|
---- RESULTS
|
|
'f1','int',''
|
|
'f2','struct<\n f11:bigint,\n f12:struct<\n f21:bigint\n >\n>',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Test describing structs within collections.
|
|
create table if not exists nested_structs (
|
|
map_array_struct_col map<string, array<struct<f1:int, f2:string>>>,
|
|
struct_array_struct_col
|
|
struct<f1:int, f2:array<struct<f11:bigint, f12:string>>>,
|
|
map_array_map_struct_col
|
|
map<string, array<map<string, struct<f1:string, f2:int>>>>)
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe nested_structs
|
|
---- RESULTS
|
|
'map_array_struct_col','map<string,array<struct<\n f1:int,\n f2:string\n>>>',''
|
|
'struct_array_struct_col','struct<\n f1:int,\n f2:array<struct<\n f11:bigint,\n f12:string\n >>\n>',''
|
|
'map_array_map_struct_col','map<string,array<map<string,struct<\n f1:string,\n f2:int\n>>>>',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
use functional
|
|
====
|
|
---- QUERY
|
|
# Check that paths relative to current database work ok.
|
|
describe alltypes
|
|
---- RESULTS
|
|
'id','int','Add a comment'
|
|
'bool_col','boolean',''
|
|
'tinyint_col','tinyint',''
|
|
'smallint_col','smallint',''
|
|
'int_col','int',''
|
|
'bigint_col','bigint',''
|
|
'float_col','float',''
|
|
'double_col','double',''
|
|
'date_string_col','string',''
|
|
'string_col','string',''
|
|
'timestamp_col','timestamp',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
use functional_parquet
|
|
====
|
|
---- QUERY
|
|
# Check that paths relative to current database work ok.
|
|
describe allcomplextypes.int_array_col
|
|
---- RESULTS
|
|
'item','int',''
|
|
'pos','bigint',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
3
|
|
---- QUERY
|
|
# describe full ACID partitioned table 'alltypes'.
|
|
describe functional_orc_def.alltypes
|
|
---- RESULTS
|
|
'id','int','Add a comment'
|
|
'bool_col','boolean',''
|
|
'tinyint_col','tinyint',''
|
|
'smallint_col','smallint',''
|
|
'int_col','int',''
|
|
'bigint_col','bigint',''
|
|
'float_col','float',''
|
|
'double_col','double',''
|
|
'date_string_col','string',''
|
|
'string_col','string',''
|
|
'timestamp_col','timestamp',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
3
|
|
---- QUERY
|
|
# describe synthetic transactional field 'row__id'.
|
|
describe functional_orc_def.alltypes.row__id
|
|
---- RESULTS
|
|
'operation','int',''
|
|
'originaltransaction','bigint',''
|
|
'bucket','int',''
|
|
'rowid','bigint',''
|
|
'currenttransaction','bigint',''
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
3
|
|
---- QUERY
|
|
# describe full ACID table with nested types.
|
|
describe functional_orc_def.complextypestbl
|
|
---- RESULTS
|
|
'id','bigint',''
|
|
'int_array','array<int>',''
|
|
'int_array_array','array<array<int>>',''
|
|
'int_map','map<string,int>',''
|
|
'int_map_array','array<map<string,int>>',''
|
|
'nested_struct','struct<\n a:int,\n b:array<int>,\n c:struct<\n d:array<array<struct<\n e:int,\n f:string\n >>>\n >,\n g:map<string,struct<\n h:struct<\n i:array<double>\n >\n >>\n>',''
|
|
---- TYPES
|
|
string, string, string
|
|
====
|