mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
This patch uses the "external data source" mechanism in Impala to
implement data source for querying JDBC.
It has some limitations due to the restrictions of "external data
source":
- It is not distributed, e.g, fragment is unpartitioned. The queries
are executed on coordinator.
- Queries which read following data types from external JDBC tables
are not supported:
BINARY, CHAR, DATETIME, and COMPLEX.
- Only support binary predicates with operators =, !=, <=, >=,
<, > to be pushed to RDBMS.
- Following data types are not supported for predicates:
DECIMAL, TIMESTAMP, DATE, and BINARY.
- External tables with complex types of columns are not supported.
- Support is limited to the following databases:
MySQL, Postgres, Oracle, MSSQL, H2, DB2, and JETHRO_DATA.
- Catalog V2 is not supported (IMPALA-7131).
- DataSource objects are not persistent (IMPALA-12375).
Additional fixes are planned on top of this patch.
Source files under jdbc/conf, jdbc/dao and jdbc/exception are
replicated from Hive JDBC Storage Handler.
In order to query the RDBMS tables, the following steps should be
followed (note that existing data source table will be rebuilt):
1. Make sure the Impala cluster has been started.
2. Copy the jar files of JDBC drivers and the data source library into
HDFS.
${IMPALA_HOME}/testdata/bin/copy-ext-data-sources.sh
3. Create an `alltypes` table in the Postgres database.
${IMPALA_HOME}/testdata/bin/load-ext-data-sources.sh
4. Create data source tables (alltypes_jdbc_datasource and
alltypes_jdbc_datasource_2).
${IMPALA_HOME}/bin/impala-shell.sh -f\
${IMPALA_HOME}/testdata/bin/create-ext-data-source-table.sql
5. It's ready to run query to access data source tables created
in last step. Don't need to restart Impala cluster.
Testing:
- Added unit-test for Postgres and ran unit-test with JDBC driver
postgresql-42.5.1.jar.
- Ran manual unit-test for MySql with JDBC driver
mysql-connector-j-8.1.0.jar.
- Ran core tests successfully.
Change-Id: I8244e978c7717c6f1452f66f1630b6441392e7d2
Reviewed-on: http://gerrit.cloudera.org:8080/17842
Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com>
Reviewed-by: Kurt Deschler <kdeschle@cloudera.com>
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
99 lines
3.3 KiB
SQL
99 lines
3.3 KiB
SQL
--
|
|
-- Licensed to the Apache Software Foundation (ASF) under one
|
|
-- or more contributor license agreements. See the NOTICE file
|
|
-- distributed with this work for additional information
|
|
-- regarding copyright ownership. The ASF licenses this file
|
|
-- to you under the Apache License, Version 2.0 (the
|
|
-- "License"); you may not use this file except in compliance
|
|
-- with the License. You may obtain a copy of the License at
|
|
--
|
|
-- http://www.apache.org/licenses/LICENSE-2.0
|
|
--
|
|
-- Unless required by applicable law or agreed to in writing,
|
|
-- software distributed under the License is distributed on an
|
|
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
-- KIND, either express or implied. See the License for the
|
|
-- specific language governing permissions and limitations
|
|
-- under the License.
|
|
|
|
-- Create test data sources and tables
|
|
|
|
USE functional;
|
|
|
|
DROP DATA SOURCE IF EXISTS AllTypesDataSource;
|
|
CREATE DATA SOURCE AllTypesDataSource
|
|
LOCATION '/test-warehouse/data-sources/test-data-source.jar'
|
|
CLASS 'org.apache.impala.extdatasource.AllTypesDataSource'
|
|
API_VERSION 'V1';
|
|
|
|
DROP TABLE IF EXISTS alltypes_datasource;
|
|
CREATE TABLE alltypes_datasource (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col TINYINT,
|
|
smallint_col SMALLINT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
timestamp_col TIMESTAMP,
|
|
string_col STRING,
|
|
dec_col1 DECIMAL(9,0),
|
|
dec_col2 DECIMAL(10,0),
|
|
dec_col3 DECIMAL(20,10),
|
|
dec_col4 DECIMAL(38,37),
|
|
dec_col5 DECIMAL(10,5),
|
|
date_col DATE)
|
|
PRODUCED BY DATA SOURCE AllTypesDataSource("TestInitString");
|
|
|
|
DROP DATA SOURCE IF EXISTS JdbcDataSource;
|
|
CREATE DATA SOURCE JdbcDataSource
|
|
LOCATION '/test-warehouse/data-sources/jdbc-data-source.jar'
|
|
CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource'
|
|
API_VERSION 'V1';
|
|
|
|
DROP TABLE IF EXISTS alltypes_jdbc_datasource;
|
|
CREATE TABLE alltypes_jdbc_datasource (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col TINYINT,
|
|
smallint_col SMALLINT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP)
|
|
PRODUCED BY DATA SOURCE JdbcDataSource(
|
|
'{"database.type":"POSTGRES",
|
|
"jdbc.url":"jdbc:postgresql://localhost:5432/functional",
|
|
"jdbc.driver":"org.postgresql.Driver",
|
|
"driver.url":"/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
|
|
"dbcp.username":"hiveuser",
|
|
"dbcp.password":"password",
|
|
"table":"alltypes"}');
|
|
|
|
DROP TABLE IF EXISTS alltypes_jdbc_datasource_2;
|
|
CREATE TABLE alltypes_jdbc_datasource_2 (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col TINYINT,
|
|
smallint_col SMALLINT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP)
|
|
PRODUCED BY DATA SOURCE JdbcDataSource(
|
|
'{"database.type":"POSTGRES",
|
|
"jdbc.url":"jdbc:postgresql://localhost:5432/functional",
|
|
"jdbc.driver":"org.postgresql.Driver",
|
|
"driver.url":"hdfs://localhost:20500/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
|
|
"dbcp.username":"hiveuser",
|
|
"dbcp.password":"password",
|
|
"table":"AllTypesWithQuote",
|
|
"column.mapping":"id=id, bool_col=Bool_col, tinyint_col=Tinyint_col, smallint_col=Smallint_col, int_col=Int_col, bigint_col=Bigint_col, float_col=Float_col, double_col=Double_col, date_string_col=Date_string_col, string_col=String_col, timestamp=Timestamp"}');
|