mirror of
https://github.com/apache/impala.git
synced 2026-01-08 21:03:01 -05:00
The Impala CatalogService manages the caching and dissemination of cluster-wide metadata. The CatalogService combines the metadata from the Hive Metastore, the NameNode, and potentially additional sources in the future. The CatalogService uses the StateStore to broadcast metadata updates across the cluster. The CatalogService also directly handles executing metadata updates request from impalad servers (DDL requests). It exposes a Thrift interface to allow impalads to directly connect execute their DDL operations. The CatalogService has two main components - a C++ server that implements StateStore integration, Thrift service implementiation, and exporting of the debug webpage/metrics. The other main component is the Java Catalog that manages caching and updating of of all the metadata. For each StateStore heartbeat, a delta of all metadata updates is broadcast to the rest of the cluster. Some Notes On the Changes --- * The metadata is all sent as thrift structs. To do this all catalog objects (Tables/Views, Databases, UDFs) have thrift struct to represent them. These are sent with each statestore delta update. * The existing Catalog class has been seperated into two seperate sub-classes. An ImpladCatalog and a CatalogServiceCatalog. See the comments on those classes for more details. What is working: * New CatalogService created * Working with statestore delta updates and latest UDF changes * DDL performed on Node 1 is now visible on all other nodes without a "refresh". * Each DDL operation against the Catalog Service will return the catalog version that contains the change. An impalad will wait for the statestore heartbeat that contains this version before returning from the DDL comment. * All table types (Hbase, Hdfs, Views) getting their metadata propagated properly * Block location information included in CS updates and used by Impalads * Column and table stats included in CS updates and used by Impalads * Query tests are all passing Still TODO: * Directly return catalog object metadata from DDL requests * Poll the Hive Metastore to detect new/dropped/modified tables * Reorganize the FE code for the Catalog Service. I don't think we want everything in the same JAR. Change-Id: I8c61296dac28fb98bcfdc17361f4f141d3977eda Reviewed-on: http://gerrit.ent.cloudera.com:8080/601 Reviewed-by: Lenni Kuff <lskuff@cloudera.com> Tested-by: Lenni Kuff <lskuff@cloudera.com>
253 lines
5.3 KiB
Plaintext
253 lines
5.3 KiB
Plaintext
====
|
|
---- QUERY
|
|
use default
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Make sure the database doesn't exist
|
|
show databases like 'ddl_test_db'
|
|
---- SETUP
|
|
RELOAD
|
|
---- RESULTS
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
create database ddl_test_db
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create a simple view without renaming the columns.
|
|
create view ddl_test_db.simple_view as
|
|
select * from functional.alltypes
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test that 'if not exists' swallows the error (view already exists)
|
|
create view if not exists ddl_test_db.simple_view as
|
|
select * from functional.alltypesagg
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create another simple view with 'if not exists' on a subset of
|
|
# alltypes' columns using custom column names and comments
|
|
create view if not exists
|
|
ddl_test_db.simple_view_sub (x, y comment 'hello', z) as
|
|
select int_col, string_col, timestamp_col from functional.alltypes
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create a view on a parquet table (Hive cannot create/read/write parquet)
|
|
create view ddl_test_db.parquet_view as
|
|
select * from functional_parquet.alltypes where id < 20
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create a complex view with predicates, joins, aggregates and order by
|
|
create view ddl_test_db.complex_view (abc comment 'agg', xyz comment 'gby') as
|
|
select count(a.bigint_col), b.string_col from
|
|
functional.alltypesagg a inner join functional.alltypestiny b
|
|
on a.id = b.id where a.bigint_col < 50
|
|
group by b.string_col having count(a.bigint_col) > 1
|
|
order by b.string_col limit 100
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create a view on a view
|
|
create view ddl_test_db.view_view (aaa, bbb) as
|
|
select * from ddl_test_db.complex_view
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test that the views are displayed by 'show tables'
|
|
show tables in ddl_test_db
|
|
---- RESULTS
|
|
'complex_view'
|
|
'parquet_view'
|
|
'simple_view'
|
|
'simple_view_sub'
|
|
'view_view'
|
|
====
|
|
---- QUERY
|
|
# Test that the views can be described
|
|
describe ddl_test_db.simple_view
|
|
---- RESULTS
|
|
'id','int',''
|
|
'bool_col','boolean',''
|
|
'tinyint_col','tinyint',''
|
|
'smallint_col','smallint',''
|
|
'int_col','int',''
|
|
'bigint_col','bigint',''
|
|
'float_col','float',''
|
|
'double_col','double',''
|
|
'date_string_col','string',''
|
|
'string_col','string',''
|
|
'timestamp_col','timestamp',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe ddl_test_db.simple_view_sub
|
|
---- RESULTS
|
|
'x','int',''
|
|
'y','string','hello'
|
|
'z','timestamp',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe ddl_test_db.complex_view
|
|
---- RESULTS
|
|
'abc','bigint','agg'
|
|
'xyz','string','gby'
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe ddl_test_db.parquet_view
|
|
---- RESULTS
|
|
'id','int',''
|
|
'bool_col','boolean',''
|
|
'tinyint_col','tinyint',''
|
|
'smallint_col','smallint',''
|
|
'int_col','int',''
|
|
'bigint_col','bigint',''
|
|
'float_col','float',''
|
|
'double_col','double',''
|
|
'date_string_col','string',''
|
|
'string_col','string',''
|
|
'timestamp_col','timestamp',''
|
|
'year','int',''
|
|
'month','int',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
describe ddl_test_db.view_view
|
|
---- RESULTS
|
|
'aaa','bigint',''
|
|
'bbb','string',''
|
|
---- TYPES
|
|
string,string,string
|
|
====
|
|
---- QUERY
|
|
# Test that the views can be queried.
|
|
select count(*) from ddl_test_db.simple_view
|
|
---- RESULTS
|
|
7300
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from ddl_test_db.simple_view_sub
|
|
---- RESULTS
|
|
7300
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from ddl_test_db.complex_view
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from ddl_test_db.parquet_view
|
|
---- RESULTS
|
|
20
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
select count(*) from ddl_test_db.view_view
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
bigint
|
|
====
|
|
---- QUERY
|
|
# Test dropping a view
|
|
drop view ddl_test_db.simple_view_sub
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test that the view is gone
|
|
show tables in ddl_test_db
|
|
---- RESULTS
|
|
'complex_view'
|
|
'parquet_view'
|
|
'simple_view'
|
|
'view_view'
|
|
====
|
|
---- QUERY
|
|
# Test 'if exists' for dropping a view (view does not exist)
|
|
drop view if exists ddl_test_db.bad_view
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test renaming a view
|
|
alter view ddl_test_db.view_view rename to ddl_test_db.view_on_view
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test renaming a parquet view
|
|
alter view ddl_test_db.parquet_view rename to ddl_test_db.new_parquet_view
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test that the view was renamed
|
|
show tables in ddl_test_db
|
|
---- RESULTS
|
|
'complex_view'
|
|
'new_parquet_view'
|
|
'simple_view'
|
|
'view_on_view'
|
|
====
|
|
---- QUERY
|
|
# Test altering a with a new definition
|
|
alter view ddl_test_db.new_parquet_view as
|
|
select bigint_col, string_col from functional_parquet.alltypesagg
|
|
where bigint_col is null limit 10
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test querying the altered view
|
|
select count(bigint_col), count(string_col) from ddl_test_db.new_parquet_view
|
|
---- RESULTS
|
|
0,10
|
|
---- TYPES
|
|
bigint,bigint
|
|
====
|
|
---- QUERY
|
|
# Create a view on a constant select and try to query it.
|
|
create view ddl_test_db.const_view
|
|
as select 1, 'a', 10.0
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select * from ddl_test_db.const_view
|
|
---- RESULTS
|
|
1,'a',10.0
|
|
---- TYPES
|
|
tinyint,string,float
|
|
====
|
|
---- QUERY
|
|
# Test that parentheses are preserved in view creation.
|
|
# If the parentheses were ignored the query would return a count > 0.
|
|
create view ddl_test_db.paren_view as
|
|
select count(*) from functional.alltypessmall
|
|
where true and (true or false) and false
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Test that parentheses are preserved in view creation.
|
|
select * from ddl_test_db.paren_view
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
bigint
|
|
====
|