IMPALA-12378: Auto Ship JDBC Data Source

This patch moves the source files of jdbc package to fe.
Data source location is optional. Data source could be created without
specifying HDFS location. Assume data source class is in the classpath
and instance of data source class could be created with current class
loader. Impala still try to load the jar file of the data source in
runtime if it's set in data source location.

Testing:
 - Passed core test
 - Passed dockerised-tests

Change-Id: I0daff8db6231f161ec27b45b51d78e21733d9b1f
Reviewed-on: http://gerrit.cloudera.org:8080/20971
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com>
This commit is contained in:
wzhou-code
2024-02-03 22:19:03 -08:00
committed by Wenzhe Zhou
parent 68e61c1aab
commit fc74ca672a
38 changed files with 79 additions and 58 deletions

View File

@@ -139,8 +139,10 @@ Status ExternalDataSourceExecutor::Init(const string& jar_path,
string local_jar_path;
// TODO(IMPALA-6727): pass the mtime from the coordinator. for now, skip the mtime
// check (-1).
RETURN_IF_ERROR(LibCache::instance()->GetLocalPath(
jar_path, LibCache::TYPE_JAR, -1, &handle, &local_jar_path));
if (!jar_path.empty()) {
RETURN_IF_ERROR(LibCache::instance()->GetLocalPath(
jar_path, LibCache::TYPE_JAR, -1, &handle, &local_jar_path));
}
JNIEnv* jni_env = JniUtil::GetJNIEnv();
// Add a scoped cleanup jni reference object. This cleans up local refs made below.

View File

@@ -290,6 +290,7 @@ export IMPALA_SLF4J_VERSION=2.0.3
export IMPALA_SPRINGFRAMEWORK_VERSION=5.3.27
export IMPALA_XMLSEC_VERSION=2.2.3
export IMPALA_OBS_VERSION=3.1.1-hw-42
export IMPALA_DBCP2_VERSION=2.9.0
# When Impala is building docker images on Redhat-based distributions,
# it is useful to be able to customize the base image. Some users will

View File

@@ -131,6 +131,7 @@ testdata/*.test
be/src/kudu/util/testdata/*.txt
be/src/testutil/*.pem
*.json
fe/src/main/java/org/apache/impala/extdatasource/jdbc/README.md
fe/src/test/resources/*.xml
fe/src/test/resources/adschema.ldif
fe/src/test/resources/adusers.ldif
@@ -138,7 +139,6 @@ fe/src/test/resources/hbase-jaas-client.conf.template
fe/src/test/resources/hbase-jaas-server.conf.template
fe/src/test/resources/users.ldif
java/.mvn/maven.config
java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/README.md
java/toolchains.xml.tmpl
testdata/AllTypesError/*.txt
testdata/AllTypesErrorNoNulls/*.txt

View File

@@ -344,6 +344,12 @@ under the License.
<version>${derby.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-dbcp2</artifactId>
<version>${commons-dbcp2.version}</version>
</dependency>
<dependency>
<groupId>com.google.errorprone</groupId>
<artifactId>error_prone_annotations</artifactId>

View File

@@ -1041,18 +1041,18 @@ public class MetastoreShim extends Hive3MetastoreShimBase {
* Convert DataConnector object to DataSource object.
*/
private static DataSource dataConnectorToDataSource(DataConnector connector) {
if (!connector.isSetName() || !connector.isSetType() || !connector.isSetUrl()
if (!connector.isSetName() || !connector.isSetType()
|| !connector.isSetDescription() || connector.getParametersSize() == 0
|| !connector.getType().equalsIgnoreCase(HMS_DATA_CONNECTOR_TYPE)) {
return null;
}
String name = connector.getName();
String location = connector.getUrl();
String location = connector.isSetUrl() ? connector.getUrl() : "";
String className =
connector.getParameters().get(HMS_DATA_CONNECTOR_PARAM_KEY_CLASS_NAME);
String apiVersion =
connector.getParameters().get(HMS_DATA_CONNECTOR_PARAM_KEY_API_VERSION);
if (!Strings.isNullOrEmpty(name) && !Strings.isNullOrEmpty(location) &&
if (!Strings.isNullOrEmpty(name) && location != null &&
!Strings.isNullOrEmpty(className) && !Strings.isNullOrEmpty(apiVersion)) {
return new DataSource(name, location, className, apiVersion);
}

View File

@@ -2326,11 +2326,11 @@ create_view_stmt ::=
create_data_src_stmt ::=
KW_CREATE KW_DATA source_ident:is_source_id
if_not_exists_val:if_not_exists ident_or_default:data_src_name
KW_LOCATION STRING_LITERAL:location
location_val:location
KW_CLASS STRING_LITERAL:class_name
KW_API_VERSION STRING_LITERAL:api_version
{:
RESULT = new CreateDataSrcStmt(data_src_name, new HdfsUri(location), class_name,
RESULT = new CreateDataSrcStmt(data_src_name, location, class_name,
api_version, if_not_exists);
:}
;

View File

@@ -44,7 +44,6 @@ public class CreateDataSrcStmt extends StatementBase {
Preconditions.checkNotNull(dataSrcName);
Preconditions.checkNotNull(className);
Preconditions.checkNotNull(apiVersionString);
Preconditions.checkNotNull(location);
dataSrcName_ = dataSrcName.toLowerCase();
location_ = location;
className_ = className;
@@ -68,7 +67,9 @@ public class CreateDataSrcStmt extends StatementBase {
"'. Valid API versions: " + Joiner.on(", ").join(ApiVersion.values()));
}
location_.analyze(analyzer, Privilege.ALL, FsAction.READ);
if (location_ != null) {
location_.analyze(analyzer, Privilege.ALL, FsAction.READ);
}
// TODO: Check class exists and implements API version
// TODO: authorization check
}
@@ -80,7 +81,7 @@ public class CreateDataSrcStmt extends StatementBase {
if (ifNotExists_) sb.append("IF NOT EXISTS ");
sb.append(dataSrcName_);
sb.append(" LOCATION '");
sb.append(location_.getLocation());
sb.append(location_ != null ? location_.getLocation() : "");
sb.append("' CLASS '");
sb.append(className_);
sb.append("' API_VERSION '");
@@ -91,7 +92,7 @@ public class CreateDataSrcStmt extends StatementBase {
public TCreateDataSourceParams toThrift() {
return new TCreateDataSourceParams(
new TDataSource(dataSrcName_, location_.toString(), className_,
apiVersion_.name())).setIf_not_exists(ifNotExists_);
new TDataSource(dataSrcName_, location_ != null ? location_.toString() : "",
className_, apiVersion_.name())).setIf_not_exists(ifNotExists_);
}
}

View File

@@ -62,14 +62,16 @@ public class CreateTableDataSrcStmt extends CreateTableStmt {
"not support the column type: " + col.getType());
}
}
// Add table properties from the DataSource catalog object now that we have access
// to the catalog. These are stored in the table metadata so that the table could
// be scanned without the DataSource catalog object.
String location = dataSource.getLocation();
getTblProperties().put(TBL_PROP_LOCATION, location);
getTblProperties().put(TBL_PROP_LOCATION, location != null ? location : "");
getTblProperties().put(TBL_PROP_CLASS, dataSource.getClassName());
getTblProperties().put(TBL_PROP_API_VER, dataSource.getApiVersion());
new HdfsUri(location).analyze(analyzer, Privilege.ALL, FsAction.READ);
// TODO: check class exists and implements API version
if (!Strings.isNullOrEmpty(location)) {
new HdfsUri(location).analyze(analyzer, Privilege.ALL, FsAction.READ);
}
}
}

View File

@@ -47,6 +47,7 @@ import org.apache.impala.extdatasource.v1.ExternalDataSource;
import org.apache.impala.thrift.TErrorCode;
import org.apache.impala.thrift.TStatus;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -107,15 +108,14 @@ public class ExternalDataSourceExecutor {
/**
* @param jarPath The local path to the jar containing the ExternalDataSource.
* It is null or empty if the jar file of data source is already in classpath.
* @param className The name of the class implementing the ExternalDataSource.
* @param apiVersionStr The API version the ExternalDataSource implements.
* Must be a valid value of {@link ApiVersion}.
* Must be a valid value of {@link ApiVersion}.
* @param initString The init string registered with this data source.
*/
public ExternalDataSourceExecutor(String jarPath, String className,
String apiVersionStr, String initString) throws ImpalaException {
Preconditions.checkNotNull(jarPath);
apiVersion_ = ApiVersion.valueOf(apiVersionStr);
if (apiVersion_ == null) {
throw new ImpalaRuntimeException("Invalid API version: " + apiVersionStr);
@@ -130,8 +130,8 @@ public class ExternalDataSourceExecutor {
dataSource_ = (ExternalDataSource) ctor.newInstance();
} catch (Exception ex) {
throw new ImpalaRuntimeException(String.format("Unable to load external data " +
"source library from path=%s className=%s apiVersion=%s", jarPath,
className, apiVersionStr), ex);
"source library from path=%s className=%s apiVersion=%s",
jarPath != null ? jarPath : "Impala classpath", className, apiVersionStr), ex);
}
}
@@ -141,6 +141,11 @@ public class ExternalDataSourceExecutor {
*/
private Class<?> getDataSourceClass() throws Exception {
Class<?> c = null;
if (Strings.isNullOrEmpty(jarPath_)) {
c = Class.forName(className_);
LOG.trace("Get instance of DataSourceClass in current ClassLoader");
return c;
}
// Cache map key needs to contain both the class name and init string in case
// the same class is used for multiple tables where some are cached and others
// are not.
@@ -248,8 +253,8 @@ public class ExternalDataSourceExecutor {
}
String errorMessage = String.format(
"Error in data source (path=%s, class=%s, version=%s) %s: %s",
jarPath_, className_, apiVersion_.name(), opName,
exceptionMessage);
jarPath_ != null ? jarPath_ : "Impala classpath", className_, apiVersion_.name(),
opName, exceptionMessage);
LOG.error(errorMessage, e); // Logs the stack
return new TStatus(TErrorCode.RUNTIME_ERROR, Lists.newArrayList(errorMessage));
}

View File

@@ -76,7 +76,7 @@ public class GenericJdbcDatabaseAccessor implements DatabaseAccessor {
protected DataSource dbcpDataSource = null;
// Cache datasource for sharing
public static Cache<String, DataSource> dataSourceCache = CacheBuilder
protected static final Cache<String, DataSource> dataSourceCache = CacheBuilder
.newBuilder()
.removalListener((RemovalListener<String, DataSource>) notification -> {
DataSource ds = notification.getValue();
@@ -164,9 +164,9 @@ public class GenericJdbcDatabaseAccessor implements DatabaseAccessor {
@Override
public void close(boolean cleanCache) {
dbcpDataSource = null;
if (cleanCache && dataSourceCache != null) {
if (cleanCache) {
Preconditions.checkNotNull(dataSourceCache);
dataSourceCache.invalidateAll();
dataSourceCache = null;
}
if (jdbcDriverLocalPath != null) {
// Delete the jar file of jdbc driver.
@@ -287,6 +287,7 @@ public class GenericJdbcDatabaseAccessor implements DatabaseAccessor {
String jdbcUrl = props.getProperty("url");
String username = props.getProperty("username", "-");
String cacheMapKey = String.format("%s.%s", jdbcUrl, username);
Preconditions.checkNotNull(dataSourceCache);
dbcpDataSource = dataSourceCache.get(cacheMapKey,
() -> {
LOG.info("Datasource for '{}' was not cached. "

View File

@@ -65,6 +65,7 @@ import org.apache.impala.thrift.TStatus;
import com.google.common.base.Joiner;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
@@ -169,16 +170,28 @@ public class DataSourceScanNode extends ScanNode {
}
}
String dsLocation = table_.getDataSource().getHdfs_location();
String localPath;
try {
localPath = FileSystemUtil.copyFileFromUriToLocal(dsLocation);
} catch (IOException e) {
throw new InternalException(String.format(
"Unable to fetch data source jar from location '%s'.", dsLocation));
}
String className = table_.getDataSource().getClass_name();
String apiVersion = table_.getDataSource().getApi_version();
String dsLocation = table_.getDataSource().getHdfs_location();
// Check if the data source jar file exists in the HDFS location. If yes, then copy
// the jar file to the local file system, and get the instance of the data source
// class in URLClassLoader which is generated from copied local jar file. Otherwise
// assume the jar file is in the classpath and try to get the instance of the data
// source class in current ClassLoader.
String localPath = null;
if (!Strings.isNullOrEmpty(dsLocation)) {
LOG.trace("Get the instance from URLClassLoader");
try {
localPath = FileSystemUtil.copyFileFromUriToLocal(dsLocation);
} catch (IOException e) {
throw new InternalException(String.format(
"Unable to fetch data source jar from location '%s'.", dsLocation));
}
Preconditions.checkNotNull(localPath);
} else {
LOG.trace("Get the instance of the data source class in current ClassLoader");
}
TPrepareResult prepareResult;
TStatus prepareStatus;
try {
@@ -196,9 +209,11 @@ public class DataSourceScanNode extends ScanNode {
"Error calling prepare() on data source %s",
DataSource.debugString(table_.getDataSource())), e);
} finally {
// Delete the jar file once its loaded
Path localJarPath = new Path("file://" + localPath);
FileSystemUtil.deleteIfExists(localJarPath);
if (!Strings.isNullOrEmpty(localPath)) {
// Delete the jar file once its loaded
Path localJarPath = new Path("file://" + localPath);
FileSystemUtil.deleteIfExists(localJarPath);
}
}
if (prepareStatus.getStatus_code() != TErrorCode.OK) {
throw new InternalException(String.format(

View File

@@ -2086,6 +2086,7 @@ public class AnalyzeDDLTest extends FrontendTestBase {
"CLASS 'com.bar.Foo' API_VERSION 'V1'");
AnalyzesOk("CREATE DATA SOURCE foo LOCATION 's3a://bucket/a/b/foo.jar' " +
"CLASS 'com.bar.Foo' API_VERSION 'V1'");
AnalyzesOk("CREATE DATA SOURCE foo CLASS 'com.bar.Foo' API_VERSION 'V1'");
AnalysisError("CREATE DATA SOURCE foo LOCATION 'blah://localhost:20500/foo.jar' " +
"CLASS 'com.bar.Foo' API_VERSION 'V1'",

View File

@@ -3115,6 +3115,7 @@ public class ParserTest extends FrontendTestBase {
"API_VERSION \"V1\"");
ParsesOk("CREATE DATA SOURCE foo LOCATION '/x/foo@hi_^!#.jar' CLASS 'com.bar.Foo' " +
"API_VERSION 'V1'");
ParsesOk("CREATE DATA SOURCE foo CLASS 'com.bar.Foo' API_VERSION 'V1'");
ParserError("CREATE DATA foo LOCATION '/foo.jar' CLASS 'com.bar.Foo' " +
"API_VERSION 'V1'");
@@ -3130,8 +3131,6 @@ public class ParserTest extends FrontendTestBase {
"API_VERSION V1");
ParserError("CREATE DATA SOURCE LOCATION '/x/foo.jar' CLASS 'com.bar.Foo' " +
"API_VERSION 'V1'");
ParserError("CREATE DATA SOURCE foo CLASS 'com.bar.Foo' " +
"API_VERSION 'V1'");
ParserError("CREATE DATA SOURCE foo LOCATION CLASS 'com.bar.Foo' " +
"API_VERSION 'V1'");
ParserError("CREATE DATA SOURCE foo LOCATION '/foo.jar' API_VERSION 'V1'");

View File

@@ -741,9 +741,8 @@ public class LdapHS2Test {
String dropDSQuery = "DROP DATA SOURCE IF EXISTS impala_jdbc_test_ds";
String createDSQuery = String.format("CREATE DATA SOURCE impala_jdbc_test_ds " +
"LOCATION '%s/test-warehouse/data-sources/jdbc-data-source.jar' " +
"CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource' " +
"API_VERSION 'V1'", fileSystemPrefix);
"API_VERSION 'V1'");
String dropTableQuery = "DROP TABLE IF EXISTS %s";
// Set JDBC authentication mechanisms as LDAP (3) with username/password as
// TEST_USER_1/TEST_PASSWORD_1.

View File

@@ -79,6 +79,7 @@ under the License.
<bouncy-castle.version>${env.IMPALA_BOUNCY_CASTLE_VERSION}</bouncy-castle.version>
<springframework.version>${env.IMPALA_SPRINGFRAMEWORK_VERSION}</springframework.version>
<json-smart.version>${env.IMPALA_JSON_SMART_VERSION}</json-smart.version>
<commons-dbcp2.version>${env.IMPALA_DBCP2_VERSION}</commons-dbcp2.version>
</properties>
<repositories>

View File

@@ -40,13 +40,6 @@ hadoop fs -put -f \
echo "Copied" ${EXT_DATA_SOURCE_SRC_PATH}/test/target/impala-data-source-test-*.jar \
"into HDFS" ${EXT_DATA_SOURCES_HDFS_PATH}
hadoop fs -put -f \
${EXT_DATA_SOURCE_SRC_PATH}/jdbc/target/impala-data-source-jdbc-*.jar \
${EXT_DATA_SOURCES_HDFS_PATH}/jdbc-data-source.jar
echo "Copied" ${EXT_DATA_SOURCE_SRC_PATH}/jdbc/target/impala-data-source-jdbc-*.jar \
"into HDFS" ${EXT_DATA_SOURCES_HDFS_PATH}
# Copy Postgres JDBC driver to HDFS
hadoop fs -put -f \
${IMPALA_HOME}/fe/target/dependency/postgresql-*.jar \

View File

@@ -3,7 +3,6 @@
# Create DataSource
DROP DATA SOURCE IF EXISTS TestJdbcDataSource;
CREATE DATA SOURCE TestJdbcDataSource
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-data-source.jar'
CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource'
API_VERSION 'V1';
---- RESULTS
@@ -15,7 +14,7 @@ SHOW DATA SOURCES LIKE 'testjdbcdatasource';
---- LABELS
NAME,LOCATION,CLASS NAME,API VERSION
---- RESULTS
'testjdbcdatasource',regex:'.*/test-warehouse/data-sources/jdbc-data-source.jar','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
'testjdbcdatasource','','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
---- TYPES
STRING,STRING,STRING,STRING
====

View File

@@ -3,7 +3,6 @@
# Create DataSource
DROP DATA SOURCE IF EXISTS TestJdbcDataSource;
CREATE DATA SOURCE TestJdbcDataSource
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-data-source.jar'
CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource'
API_VERSION 'V1';
---- RESULTS
@@ -15,7 +14,7 @@ SHOW DATA SOURCES LIKE 'testjdbcdatasource';
---- LABELS
NAME,LOCATION,CLASS NAME,API VERSION
---- RESULTS
'testjdbcdatasource',regex:'.*/test-warehouse/data-sources/jdbc-data-source.jar','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
'testjdbcdatasource','','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
---- TYPES
STRING,STRING,STRING,STRING
====

View File

@@ -3,7 +3,6 @@
# Create DataSource
DROP DATA SOURCE IF EXISTS TestJdbcDataSourceWithKeystore;
CREATE DATA SOURCE TestJdbcDataSourceWithKeystore
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-data-source.jar'
CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource'
API_VERSION 'V1';
---- RESULTS
@@ -15,7 +14,7 @@ SHOW DATA SOURCES LIKE 'testjdbcdatasourcewithkeystore';
---- LABELS
NAME,LOCATION,CLASS NAME,API VERSION
---- RESULTS
'testjdbcdatasourcewithkeystore',regex:'.*/test-warehouse/data-sources/jdbc-data-source.jar','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
'testjdbcdatasourcewithkeystore','','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
---- TYPES
STRING,STRING,STRING,STRING
====

View File

@@ -3,7 +3,6 @@
# Create DataSource
DROP DATA SOURCE IF EXISTS TestJdbcDataSource;
CREATE DATA SOURCE TestJdbcDataSource
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-data-source.jar'
CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource'
API_VERSION 'V1';
---- RESULTS
@@ -15,7 +14,7 @@ SHOW DATA SOURCES LIKE 'testjdbcdatasource';
---- LABELS
NAME,LOCATION,CLASS NAME,API VERSION
---- RESULTS
'testjdbcdatasource',regex:'.*/test-warehouse/data-sources/jdbc-data-source.jar','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
'testjdbcdatasource','','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
---- TYPES
STRING,STRING,STRING,STRING
====

View File

@@ -3,7 +3,6 @@
# Create DataSource
DROP DATA SOURCE IF EXISTS TestJdbcDataSource;
CREATE DATA SOURCE TestJdbcDataSource
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-data-source.jar'
CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource'
API_VERSION 'V1';
---- RESULTS
@@ -15,7 +14,7 @@ SHOW DATA SOURCES LIKE 'testjdbcdatasource';
---- LABELS
NAME,LOCATION,CLASS NAME,API VERSION
---- RESULTS
'testjdbcdatasource',regex:'.*/test-warehouse/data-sources/jdbc-data-source.jar','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
'testjdbcdatasource','','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
---- TYPES
STRING,STRING,STRING,STRING
====