mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-13586: Initial support for Iceberg REST Catalogs
This patch adds initial support for Iceberg REST Catalogs. This means now it's possible to run an Impala cluster without the Hive Metastore, and without the Impala CatalogD. Impala Coordinators can directly connect to an Iceberg REST server and fetch metadata for databases and tables from there. The support is read-only, i.e. DDL and DML statements are not supported yet. This was initially developed in the context of a company Hackathon program, i.e. it was a team effort that I squashed into a single commit and polished the code a bit. The Hackathon team members were: * Daniel Becker * Gabor Kaszab * Kurt Deschler * Peter Rozsa * Zoltan Borok-Nagy The Iceberg REST Catalog support can be configured via a Java properties file, the location of it can be specified via: --catalog_config_dir: Directory of configuration files Currently only one configuration file can be in the direcory as we only support a single Catalog at a time. The following properties are mandatory in the config file: * connector.name=iceberg * iceberg.catalog.type=rest * iceberg.rest-catalog.uri The first two properties can only be 'iceberg' and 'rest' for now, they are needed for extensibility in the future. Moreover, Impala Daemons need to specify the following flags to connect to an Iceberg REST Catalog: --use_local_catalog=true --catalogd_deployed=false Testing * e2e added to test basic functionlity with against a custom-built Iceberg REST server that delegates to HadoopCatalog under the hood * Further testing, e.g. Ranger tests are expected in subsequent commits TODO: * manual testing against Polaris / Lakekeeper, we could add automated tests in a later patch Change-Id: I1722b898b568d2f5689002f2b9bef59320cb088c Reviewed-on: http://gerrit.cloudera.org:8080/22353 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
99fc96adea
commit
bd3486c051
107
java/iceberg-rest-catalog-test/pom.xml
Normal file
107
java/iceberg-rest-catalog-test/pom.xml
Normal file
@@ -0,0 +1,107 @@
|
||||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<parent>
|
||||
<groupId>org.apache.impala</groupId>
|
||||
<artifactId>impala-parent</artifactId>
|
||||
<version>5.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>impala-iceberg-rest-catalog-test</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>Iceberg REST Catalog Test</name>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<exclusions>
|
||||
<!-- IMPALA-9468: Avoid pulling in netty for security reasons -->
|
||||
<exclusion>
|
||||
<groupId>io.netty</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-server</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-servlet</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs-client</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.iceberg</groupId>
|
||||
<artifactId>iceberg-api</artifactId>
|
||||
<version>${iceberg.version}</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.iceberg</groupId>
|
||||
<artifactId>iceberg-core</artifactId>
|
||||
<version>${iceberg.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.iceberg</groupId>
|
||||
<artifactId>iceberg-core</artifactId>
|
||||
<version>${iceberg.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
<configuration>
|
||||
<redirectTestOutputToFile>true</redirectTestOutputToFile>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
// We use the org.apache.iceberg.rest package because some classes
|
||||
// are package-private. This means this code is more likely to
|
||||
// break on Iceberg version updates. On the long-term we might
|
||||
// switch to an open-source Iceberg REST Catalog.
|
||||
package org.apache.iceberg.rest;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.iceberg.hadoop.HadoopCatalog;
|
||||
import org.apache.iceberg.catalog.Catalog;
|
||||
import org.apache.iceberg.rest.responses.ErrorResponse;
|
||||
import org.eclipse.jetty.server.Server;
|
||||
import org.eclipse.jetty.server.handler.gzip.GzipHandler;
|
||||
import org.eclipse.jetty.servlet.ServletContextHandler;
|
||||
import org.eclipse.jetty.servlet.ServletHolder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class IcebergRestCatalogTest {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(IcebergRestCatalogTest.class);
|
||||
private static final ObjectMapper MAPPER = RESTObjectMapper.mapper();
|
||||
|
||||
static final int REST_PORT = 9084;
|
||||
|
||||
private Server httpServer;
|
||||
|
||||
public IcebergRestCatalogTest() {}
|
||||
|
||||
private static String getWarehouseLocation() {
|
||||
String FILESYSTEM_PREFIX = System.getenv("FILESYSTEM_PREFIX");
|
||||
String HADOOP_CATALOG_LOCATION = "/test-warehouse/iceberg_test/hadoop_catalog";
|
||||
if (FILESYSTEM_PREFIX != null && !FILESYSTEM_PREFIX.isEmpty()) {
|
||||
return FILESYSTEM_PREFIX + HADOOP_CATALOG_LOCATION;
|
||||
}
|
||||
String DEFAULT_FS = System.getenv("DEFAULT_FS");
|
||||
return DEFAULT_FS + HADOOP_CATALOG_LOCATION;
|
||||
}
|
||||
|
||||
private Catalog initializeBackendCatalog() throws IOException {
|
||||
HdfsConfiguration conf = new HdfsConfiguration();
|
||||
return new HadoopCatalog(conf, getWarehouseLocation());
|
||||
}
|
||||
|
||||
public void start(boolean join) throws Exception {
|
||||
Catalog catalog = initializeBackendCatalog();
|
||||
RESTCatalogAdapter adapter = new RESTCatalogAdapter(catalog) {
|
||||
@Override
|
||||
public <T extends RESTResponse> T execute(
|
||||
RESTCatalogAdapter.HTTPMethod method,
|
||||
String path,
|
||||
Map<String, String> queryParams,
|
||||
Object body,
|
||||
Class<T> responseType,
|
||||
Map<String, String> headers,
|
||||
Consumer<ErrorResponse> errorHandler) {
|
||||
Object request = roundTripSerialize(body, "request");
|
||||
T response =
|
||||
super.execute(
|
||||
method, path, queryParams, request, responseType, headers, errorHandler);
|
||||
T responseAfterSerialization = roundTripSerialize(response, "response");
|
||||
return responseAfterSerialization;
|
||||
}
|
||||
};
|
||||
|
||||
RESTCatalogServlet servlet = new RESTCatalogServlet(adapter);
|
||||
ServletContextHandler context = new ServletContextHandler(
|
||||
ServletContextHandler.NO_SESSIONS);
|
||||
ServletHolder servletHolder = new ServletHolder(servlet);
|
||||
context.addServlet(servletHolder, "/*");
|
||||
context.insertHandler(new GzipHandler());
|
||||
|
||||
this.httpServer = new Server(REST_PORT);
|
||||
httpServer.setHandler(context);
|
||||
httpServer.start();
|
||||
|
||||
if (join) {
|
||||
httpServer.join();
|
||||
}
|
||||
}
|
||||
|
||||
public void stop() throws Exception {
|
||||
if (httpServer != null) {
|
||||
httpServer.stop();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new IcebergRestCatalogTest().start(true);
|
||||
}
|
||||
|
||||
public static <T> T roundTripSerialize(T payload, String description) {
|
||||
if (payload != null) {
|
||||
LOG.trace(payload.toString());
|
||||
try {
|
||||
if (payload instanceof RESTMessage) {
|
||||
return (T) MAPPER.readValue(
|
||||
MAPPER.writeValueAsString(payload), payload.getClass());
|
||||
} else {
|
||||
// use Map so that Jackson doesn't try to instantiate ImmutableMap
|
||||
// from payload.getClass()
|
||||
return (T) MAPPER.readValue(
|
||||
MAPPER.writeValueAsString(payload), Map.class);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.warn(e.toString());
|
||||
throw new RuntimeException(
|
||||
String.format("Failed to serialize and deserialize %s: %s",
|
||||
description, payload), e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -408,6 +408,7 @@ under the License.
|
||||
<modules>
|
||||
<module>datagenerator</module>
|
||||
<module>puffin-data-generator</module>
|
||||
<module>iceberg-rest-catalog-test</module>
|
||||
<module>executor-deps</module>
|
||||
<module>ext-data-source</module>
|
||||
<module>../fe</module>
|
||||
|
||||
Reference in New Issue
Block a user