1
0
mirror of synced 2025-12-19 18:14:56 -05:00

gradle,.github: add bulk cdk publish workflow (#43361)

This commit is contained in:
Marius Posta
2024-08-07 10:22:30 -07:00
committed by GitHub
parent 6a63ffa189
commit 359833011c
140 changed files with 13091 additions and 13 deletions

View File

@@ -39,6 +39,7 @@ jobs:
- '**/*.gradle'
- '**/*.kt'
- 'airbyte-cdk/java/**/*'
- 'airbyte-cdk/bulk/**/*'
run-check:
needs:

160
.github/workflows/publish-bulk-cdk.yml vendored Normal file
View File

@@ -0,0 +1,160 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
name: Publish Bulk CDK
on:
push:
branches:
- master
paths:
- "airbyte-cdk/bulk"
workflow_dispatch:
inputs:
repo:
description: "Repo to check out code from. Defaults to the main airbyte repo."
type: choice
required: true
default: airbytehq/airbyte
options:
- airbytehq/airbyte
build-number:
description: "Build Number"
required: false
type: number
concurrency:
group: publish-bulk-cdk
cancel-in-progress: false
env:
# Use the provided GITREF or default to the branch triggering the workflow.
GITREF: ${{ github.event.inputs.gitref || github.ref }}
S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
S3_BUILD_CACHE_SECRET_KEY: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
jobs:
publish-bulk-cdk:
name: Publish Bulk CDK
runs-on: connector-test-large
timeout-minutes: 30
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
ref: ${{ env.GITREF }}
- name: Setup Java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "21"
- name: Docker login
# Some tests use testcontainers which pull images from DockerHub.
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_PASSWORD }}
- name: Set Build Number [manual]
if: github.event.inputs.build-number
env:
BUILD_NUMBER: ${{ github.event.inputs.build-number }}
run: |
mkdir -p airbyte-cdk/bulk/build/generated
echo $BUILD_NUMBER > airbyte-cdk/bulk/build/generated/build.number
- name: Set Build Number [auto]
if: ${{ !github.event.inputs.build-number }}
uses: burrunan/gradle-cache-action@v1
env:
CI: true
with:
job-id: bulk-cdk-publish
concurrent: true
gradle-distribution-sha-256-sum-warning: false
arguments: --scan :airbyte-cdk:bulk:generateBuildNumberFile
- name: Read Build Number
run: echo "BUILD_NUMBER=$(cat airbyte-cdk/bulk/build/generated/build.number)" >>$GITHUB_ENV
- name: Build Bulk CDK
uses: burrunan/gradle-cache-action@v1
env:
CI: true
with:
job-id: bulk-cdk-publish
concurrent: true
gradle-distribution-sha-256-sum-warning: false
arguments: --scan :airbyte-cdk:bulk:bulkCdkBuild
- name: Publish Poms and Jars to CloudRepo
uses: burrunan/gradle-cache-action@v1
env:
CI: true
CLOUDREPO_USER: ${{ secrets.CLOUDREPO_USER }}
CLOUDREPO_PASSWORD: ${{ secrets.CLOUDREPO_PASSWORD }}
with:
job-id: bulk-cdk-publish
read-only: true
concurrent: true
execution-only-caches: true
gradle-distribution-sha-256-sum-warning: false
arguments: --scan :airbyte-cdk:bulk:bulkCdkPublish
- name: Post failure to Slack channel
if: ${{ env.DRY_RUN == 'false' && failure() }}
uses: slackapi/slack-github-action@v1.23.0
continue-on-error: true
with:
channel-id: C04J1M66D8B
payload: |
{
"text": "Error while publishing Bulk CDK!",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "Error while publishing Bulk CDK!"
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "See details on <https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}|GitHub>\n"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN_AIRBYTE_TEAM }}
- name: Post success to Slack channel
if: ${{ env.DRY_RUN == 'false' && !failure() }}
uses: slackapi/slack-github-action@v1.23.0
continue-on-error: true
with:
channel-id: C04J1M66D8B
payload: |
{
"text": "Bulk CDK version 0.${{ env.BUILD_NUMBER }} published successfully!",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "Bulk CDK version 0.${{ env.BUILD_NUMBER }} published successfully!"
}
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "See details on <https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}|GitHub>\n"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN_AIRBYTE_TEAM }}

View File

@@ -0,0 +1,14 @@
# Bulk CDK
The Bulk CDK is the "new java CDK" that's currently incubating.
It's written in Kotlin and consists of a _core_ and a bunch of _toolkits_:
- The _core_ consists of the Micronaut entry point and other objects which are expected in
connectors built using this CDK.
- The _toolkits_ consist of optional modules which contain objects which are common across
multiple (but by no means all) connectors.
While the CDK is incubating, its published version numbers are 0.X where X is monotonically
increasing based on the maximum version value found on the maven repository that the jars are
published to.
Jar publication happens via a github workflow triggered by pushes to the master branch, i.e. after
merging a pull request.

View File

@@ -0,0 +1,104 @@
final File buildNumberFile = file("${getLayout().buildDirectory.get()}/generated/build.number")
allprojects {
apply plugin: 'java-library'
apply plugin: 'maven-publish'
group 'io.airbyte.bulk-cdk'
// Disable spotbugs on test code, which gets annoying really quickly for @MicronautTest classes.
spotbugsTest {
enabled = false
}
dependencies {
implementation platform('org.jetbrains.kotlin:kotlin-bom:2.0.0')
implementation platform('org.jetbrains.kotlinx:kotlinx-coroutines-bom:1.8.1')
implementation platform('com.fasterxml.jackson:jackson-bom:2.16.1')
implementation platform('io.micronaut:micronaut-core-bom:4.3.13')
implementation platform('org.junit:junit-bom:5.10.2')
implementation platform('org.slf4j:slf4j-bom:2.0.13')
implementation platform('org.apache.logging.log4j:log4j-bom:2.21.1')
implementation platform('org.testcontainers:testcontainers-bom:1.19.8')
implementation 'org.jetbrains.kotlin:kotlin-stdlib'
implementation 'com.google.dagger:dagger-compiler:2.51.1'
ksp 'com.google.dagger:dagger-compiler:2.51.1'
annotationProcessor platform('io.micronaut:micronaut-core-bom:4.3.13')
annotationProcessor 'info.picocli:picocli-codegen:4.7.5'
annotationProcessor 'io.micronaut:micronaut-inject-kotlin'
ksp platform('io.micronaut:micronaut-core-bom:4.3.13')
ksp 'io.micronaut:micronaut-inject-kotlin'
kspTest platform('io.micronaut:micronaut-core-bom:4.3.13')
kspTest 'io.micronaut:micronaut-inject-kotlin'
}
if (buildNumberFile.exists()) {
version = "0.${buildNumberFile.text.trim()}"
publishing {
publications {
cdk(MavenPublication) {
from components.java
}
}
// This repository is only defined and used in the context of publishing artifacts
// It's different from the 'airbyte-public-jars' defined in settings.gradle
// only in its omission of the 'public' directory.
// Any artifacts publish here will be available in the 'airbyte-public-jars' repo.
repositories {
maven {
name 'airbyte-repo'
url 'https://airbyte.mycloudrepo.io/repositories/airbyte-public-jars/'
credentials {
username System.getenv('CLOUDREPO_USER')
password System.getenv('CLOUDREPO_PASSWORD')
}
}
}
}
}
}
if (buildNumberFile.exists()) {
tasks.register('bulkCdkBuild').configure {
dependsOn allprojects.collect {it.tasks.named('build')}
}
tasks.register('bulkCdkPublish').configure {
dependsOn allprojects.collect {it.tasks.named('publish')}
}
}
tasks.register('generateBuildNumberFile') {
description = 'Generates a build.number file in the build directory'
group = 'Custom'
outputs.file buildNumberFile
doLast {
var repoUrl = "https://airbyte.mycloudrepo.io/public/repositories/airbyte-public-jars"
var groupIdUrl = "${repoUrl}/io/airbyte/bulk-cdk"
var artifactUrl = "${groupIdUrl}/bulk"
var metadataXmlUrl = "${artifactUrl}/maven-metadata.xml"
var connection = metadataXmlUrl.toURL().openConnection() as HttpURLConnection
try {
connection.setRequestMethod("GET")
connection.setDoInput(true)
var responseCode = connection.getResponseCode()
if (responseCode != 200) {
throw new GradleException("Unexpected HTTP response code ${responseCode} from ${metadataXmlUrl} : expected 200.")
}
String responseContent = connection.inputStream.text
def xml = new XmlParser().parseText(responseContent)
String latestVersion = xml."versioning"."latest".text()
String buildNumber = latestVersion.replaceFirst('^0\\.', '')
Integer nextBuildNumber = 1 + buildNumber.toInteger()
buildNumberFile.parentFile.mkdirs()
buildNumberFile.text = "$nextBuildNumber"
logger.lifecycle("Wrote Bulk CDK build number ${nextBuildNumber} to ${buildNumberFile.path}.")
} finally {
connection.disconnect()
}
}
}

View File

@@ -0,0 +1,54 @@
dependencies {
api 'com.fasterxml.jackson.core:jackson-annotations'
api 'com.fasterxml.jackson.core:jackson-databind'
api 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310'
api 'com.kjetland:mbknor-jackson-jsonschema_2.13:1.0.39'
api('io.airbyte.airbyte-protocol:protocol-models:0.9.0') {
exclude group: 'com.google.guava', module: 'guava'
exclude group: 'com.google.api-client'
exclude group: 'org.apache.logging.log4j'
exclude group: 'javax.validation'
exclude group: 'org.apache.commons'
exclude group: 'commons-io'
}
api 'io.github.oshai:kotlin-logging-jvm:5.1.0'
api 'io.micronaut:micronaut-runtime'
api 'org.apache.sshd:sshd-mina:2.12.1'
api 'org.jetbrains.kotlinx:kotlinx-coroutines-core'
implementation 'com.datadoghq:dd-trace-api:1.28.0'
implementation 'com.datadoghq:dd-trace-ot:1.28.0'
implementation 'com.fasterxml.jackson.module:jackson-module-kotlin'
implementation 'com.fasterxml.jackson.module:jackson-module-afterburner'
implementation 'com.networknt:json-schema-validator:1.4.0'
implementation 'info.picocli:picocli:4.7.6'
implementation 'io.micronaut.picocli:micronaut-picocli:5.2.0'
implementation 'jakarta.validation:jakarta.validation-api:3.0.2'
implementation 'org.apache.commons:commons-lang3:3.14.0'
implementation 'org.apache.logging.log4j:log4j-api'
implementation 'org.apache.logging.log4j:log4j-core'
implementation 'org.apache.logging.log4j:log4j-slf4j-impl'
implementation 'org.apache.logging.log4j:log4j-slf4j2-impl'
implementation 'org.apache.logging.log4j:log4j-layout-template-json:2.17.2'
implementation 'org.bouncycastle:bcpkix-jdk18on:1.77'
implementation 'org.bouncycastle:bcprov-jdk18on:1.77'
implementation 'org.bouncycastle:bctls-jdk18on:1.77'
runtimeOnly 'com.google.guava:guava:33.2.0-jre'
runtimeOnly 'org.apache.commons:commons-compress:1.26.1'
testFixturesApi 'org.jetbrains.kotlin:kotlin-test-junit'
testFixturesApi 'org.jetbrains.kotlin:kotlin-reflect'
testFixturesApi 'org.junit.jupiter:junit-jupiter-api'
testFixturesApi 'org.junit.jupiter:junit-jupiter-params'
testFixturesApi 'org.junit.jupiter:junit-jupiter-engine'
testFixturesApi('org.testcontainers:testcontainers') {
exclude group: 'org.apache.commons', module: 'commons-compress'
}
testFixturesApi 'io.micronaut.test:micronaut-test-core:4.2.1'
testFixturesApi 'io.micronaut.test:micronaut-test-junit5:4.2.1'
testFixturesApi 'com.h2database:h2:2.2.224'
testFixturesApi 'io.github.deblockt:json-diff:1.0.1'
}

View File

@@ -0,0 +1,38 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
import io.airbyte.cdk.consumers.OutputConsumer
import io.airbyte.cdk.operation.Operation
import io.airbyte.cdk.util.ApmTraceUtils
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Value
import jakarta.inject.Inject
private val log = KotlinLogging.logger {}
/** [AirbyteConnectorRunner] tells Micronaut to use this [Runnable] as the entry point. */
@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI")
class AirbyteConnectorRunnable : Runnable {
@Value("\${airbyte.connector.metadata.docker-repository}") lateinit var connectorName: String
@Inject lateinit var operation: Operation
@Inject lateinit var outputConsumer: OutputConsumer
override fun run() {
log.info { "Executing ${operation::class} operation." }
try {
operation.execute()
} catch (e: Throwable) {
log.error(e) { "Failed ${operation::class} operation execution." }
ApmTraceUtils.addExceptionToTrace(e)
outputConsumer.acceptTraceOnConfigError(e)
throw e
} finally {
log.info { "Flushing output consumer prior to shutdown." }
outputConsumer.close()
log.info { "Completed integration: $connectorName." }
}
}
}

View File

@@ -0,0 +1,177 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk
import io.airbyte.cdk.command.ConnectorCommandLinePropertySource
import io.airbyte.cdk.command.MetadataYamlPropertySource
import io.micronaut.configuration.picocli.MicronautFactory
import io.micronaut.context.ApplicationContext
import io.micronaut.context.env.CommandLinePropertySource
import io.micronaut.context.env.Environment
import io.micronaut.core.cli.CommandLine as MicronautCommandLine
import java.nio.file.Path
import kotlin.system.exitProcess
import picocli.CommandLine
import picocli.CommandLine.Model.ArgGroupSpec
import picocli.CommandLine.Model.OptionSpec
import picocli.CommandLine.Model.UsageMessageSpec
/** Source connector entry point. */
class AirbyteSourceRunner(
args: Array<out String>,
) : AirbyteConnectorRunner("source", args) {
companion object {
@JvmStatic
fun run(vararg args: String) {
AirbyteSourceRunner(args).run<AirbyteConnectorRunnable>()
}
}
}
/** Destination connector entry point. */
class AirbyteDestinationRunner(
args: Array<out String>,
) : AirbyteConnectorRunner("destination", args) {
companion object {
@JvmStatic
fun run(vararg args: String) {
AirbyteDestinationRunner(args).run<AirbyteConnectorRunnable>()
}
}
}
/**
* Replacement for the Micronaut CLI application runner that configures the CLI components and adds
* the custom property source used to turn the arguments into configuration properties.
*/
sealed class AirbyteConnectorRunner(
val connectorType: String,
val args: Array<out String>,
) {
val envs: Array<String> = arrayOf(Environment.CLI, connectorType)
inline fun <reified R : Runnable> run() {
val picocliCommandLineFactory = PicocliCommandLineFactory(this)
val micronautCommandLine: MicronautCommandLine = MicronautCommandLine.parse(*args)
val airbytePropertySource =
ConnectorCommandLinePropertySource(
micronautCommandLine,
picocliCommandLineFactory.commands.options().map { it.longestName() },
)
val commandLinePropertySource = CommandLinePropertySource(micronautCommandLine)
val ctx: ApplicationContext =
ApplicationContext.builder(R::class.java, *envs)
.propertySources(
airbytePropertySource,
commandLinePropertySource,
MetadataYamlPropertySource(),
)
.start()
val isTest: Boolean = ctx.environment.activeNames.contains(Environment.TEST)
val picocliFactory: CommandLine.IFactory = MicronautFactory(ctx)
val picocliCommandLine: CommandLine =
picocliCommandLineFactory.build<AirbyteConnectorRunnable>(picocliFactory, isTest)
val exitCode: Int = picocliCommandLine.execute(*args)
if (!isTest) {
// Required by the platform, otherwise syncs may hang.
exitProcess(exitCode)
}
}
}
/** Encapsulates all picocli logic. Defines the grammar for the CLI. */
class PicocliCommandLineFactory(
val runner: AirbyteConnectorRunner,
) {
inline fun <reified R : Runnable> build(
factory: CommandLine.IFactory,
isTest: Boolean,
): CommandLine {
val commandSpec: CommandLine.Model.CommandSpec =
CommandLine.Model.CommandSpec.wrapWithoutInspection(R::class.java, factory)
.name("airbyte-${runner.connectorType}-connector")
.usageMessage(usageMessageSpec)
.mixinStandardHelpOptions(true)
.addArgGroup(commands)
.addOption(config)
.addOption(catalog)
.addOption(state)
if (isTest) {
commandSpec.addOption(output)
}
return CommandLine(commandSpec, factory)
}
val usageMessageSpec: UsageMessageSpec =
UsageMessageSpec()
.header(
"@|magenta ___ _ __ __ |@",
"@|magenta / | (_)____/ /_ __ __/ /____ |@",
"@|magenta / /| | / / ___/ __ \\/ / / / __/ _ |@",
"@|magenta / ___ |/ / / / /_/ / /_/ / /_/ __/ |@",
"@|magenta /_/ |_/_/_/ /_.___/\\__, /\\__/\\___/|@",
"@|magenta /____/ |@",
)
.description("Executes an Airbyte ${runner.connectorType} connector.")
fun command(
name: String,
description: String,
): OptionSpec = OptionSpec.builder("--$name").description(description).arity("0").build()
val spec: OptionSpec = command("spec", "outputs the json configuration specification")
val check: OptionSpec = command("check", "checks the config can be used to connect")
val discover: OptionSpec =
command("discover", "outputs a catalog describing the source's catalog")
val read: OptionSpec = command("read", "reads the source and outputs messages to STDOUT")
val write: OptionSpec = command("write", "writes messages from STDIN to the integration")
val commands: ArgGroupSpec =
ArgGroupSpec.builder()
.multiplicity("1")
.exclusive(true)
.addArg(spec)
.addArg(check)
.apply {
when (runner) {
is AirbyteSourceRunner -> addArg(discover).addArg(read)
is AirbyteDestinationRunner -> addArg(write)
}
}
.build()
fun fileOption(
name: String,
vararg description: String,
): OptionSpec =
OptionSpec.builder("--$name")
.description(*description)
.type(Path::class.java)
.arity("1")
.build()
val config: OptionSpec =
fileOption(
"config",
"path to the json configuration file",
"Required by the following commands: check, discover, read, write",
)
val catalog: OptionSpec =
fileOption(
"catalog",
"input path for the catalog",
"Required by the following commands: read, write",
)
val state: OptionSpec =
fileOption(
"state",
"path to the json-encoded state file",
"Required by the following commands: read",
)
val output: OptionSpec =
fileOption(
"output",
"path to the output file",
"When present, the connector writes to this file instead of stdout",
)
}

View File

@@ -0,0 +1,22 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.ssh.SshConnectionOptions
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
import java.time.Duration
/**
* Interface that defines a typed connector configuration.
*
* Prefer this or its implementations over the corresponding configuration POJOs; i.e.
* [ConfigurationJsonObjectBase] subclasses.
*/
interface Configuration {
val realHost: String
val realPort: Int
val sshTunnel: SshTunnelMethodConfiguration
val sshConnectionOptions: SshConnectionOptions
val maxConcurrency: Int
val resourceAcquisitionHeartbeat: Duration
}

View File

@@ -0,0 +1,58 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.util.Jsons
import io.micronaut.context.annotation.Value
import jakarta.inject.Singleton
import java.util.function.Supplier
/**
* Supplies a valid [T] configuration POJO instance, based on the `airbyte.connector.config`
* Micronaut property values:
* - either `airbyte.connector.config.json` if it is set (typically by the CLI)
* - or the other, nested `airbyte.connector.config.*` properties (typically in unit tests)
*
* One may wonder why we need to inject this [Supplier] instead of injecting the POJO directly. The
* reason is that injecting the POJO only works if the configuration values are set via the nested
* Micronaut properties (i.e. in unit tests). We could make direct injection work the same way as
* the [ConfiguredCatalogFactory] or the [InputStateFactory] (via a @Factory) but then we'd lose the
* ability to set values via the nested properties. This current design caters to both use cases.
* Furthermore, by deferring the parsing and validation of the configuration, we don't need to worry
* about exception handling edge cases when implementing the CHECK operation.
*
* The object is also validated against its [jsonSchema] JSON schema, derived from [javaClass].
*/
@Singleton
class ConfigurationJsonObjectSupplier<T : ConfigurationJsonObjectBase>(
private val micronautPropertiesFallback: T,
@Value("\${${CONNECTOR_CONFIG_PREFIX}.json}") private val jsonPropertyValue: String? = null,
) : Supplier<T> {
@Suppress("UNCHECKED_CAST")
val javaClass: Class<T> = micronautPropertiesFallback::class.java as Class<T>
val jsonSchema: JsonNode by lazy { ValidatedJsonUtils.generateAirbyteJsonSchema(javaClass) }
override fun get(): T {
val jsonMicronautFallback: String by lazy {
try {
Jsons.writeValueAsString(micronautPropertiesFallback)
} catch (e: Exception) {
throw ConfigErrorException("failed to serialize fallback instance for $javaClass")
}
}
val json: String = jsonPropertyValue ?: jsonMicronautFallback
return ValidatedJsonUtils.parseOne(javaClass, json)
}
}
/**
* Connector configuration POJO supertype.
*
* This dummy base class is required by Micronaut. Without it, thanks to Java's type erasure, it
* thinks that the [ConfigurationJsonObjectSupplier] requires a constructor argument of type [Any].
*
* Strictly speaking, its subclasses are not really POJOs anymore, but who cares.
*/
abstract class ConfigurationJsonObjectBase

View File

@@ -0,0 +1,50 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.util.ResourceUtils
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.micronaut.context.annotation.Factory
import io.micronaut.context.annotation.Requires
import io.micronaut.context.annotation.Value
import io.micronaut.context.env.Environment
import jakarta.inject.Singleton
/**
* Micronaut factory for the [ConfiguredAirbyteCatalog] singleton.
*
* The value may be defined via two Micronaut properties:
* - `airbyte.connector.catalog.json` for use by [ConnectorCommandLinePropertySource],
* - `airbyte.connector.catalog.resource` for use in unit tests.
*/
@Factory
class ConfiguredCatalogFactory {
@Singleton
@Requires(missingProperty = "${CONNECTOR_CATALOG_PREFIX}.resource")
fun make(
@Value("\${${CONNECTOR_CATALOG_PREFIX}.json}") json: String?,
): ConfiguredAirbyteCatalog =
ValidatedJsonUtils.parseOne(ConfiguredAirbyteCatalog::class.java, json ?: "{}").also {
for (configuredStream in it.streams) {
validateConfiguredStream(configuredStream)
}
}
private fun validateConfiguredStream(configuredStream: ConfiguredAirbyteStream) {
val stream: AirbyteStream = configuredStream.stream
if (stream.name == null) {
throw ConfigErrorException("Configured catalog is missing stream name.")
}
// TODO: add more validation?
}
@Singleton
@Requires(env = [Environment.TEST])
@Requires(notEnv = [Environment.CLI])
@Requires(property = "${CONNECTOR_CATALOG_PREFIX}.resource")
fun makeFromTestResource(
@Value("\${${CONNECTOR_CATALOG_PREFIX}.resource}") resource: String,
): ConfiguredAirbyteCatalog = make(ResourceUtils.readResource(resource))
}

View File

@@ -0,0 +1,61 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.operation.Operation
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.env.MapPropertySource
import io.micronaut.core.cli.CommandLine
import java.io.File
import java.nio.file.Path
private val log = KotlinLogging.logger {}
/**
* Custom Micronaut [MapPropertySource] that reads the command line arguments provided via the
* connector CLI and turns them into configuration properties. This allows the arguments to be
* injected into code that depends on them via Micronaut.
*/
class ConnectorCommandLinePropertySource(
commandLine: CommandLine,
allLongOptions: List<String>,
) : MapPropertySource("connector", resolveValues(commandLine, allLongOptions))
const val CONNECTOR_CONFIG_PREFIX: String = "airbyte.connector.config"
const val CONNECTOR_CATALOG_PREFIX: String = "airbyte.connector.catalog"
const val CONNECTOR_STATE_PREFIX: String = "airbyte.connector.state"
const val CONNECTOR_OUTPUT_FILE = "airbyte.connector.output.file"
private fun resolveValues(
commandLine: CommandLine,
allLongOptions: List<String>,
): Map<String, Any> {
val ops: List<String> =
allLongOptions.map { it.removePrefix("--") }.filter { commandLine.optionValue(it) != null }
if (ops.isEmpty()) {
throw IllegalArgumentException("Command line is missing an operation.")
}
if (ops.size > 1) {
throw IllegalArgumentException("Command line has multiple operations: $ops")
}
val values: MutableMap<String, Any> = mutableMapOf()
values[Operation.PROPERTY] = ops.first()
commandLine.optionValue("output")?.let { values[CONNECTOR_OUTPUT_FILE] = it }
for ((cliOptionKey, prefix) in
mapOf(
"config" to CONNECTOR_CONFIG_PREFIX,
"catalog" to CONNECTOR_CATALOG_PREFIX,
"state" to CONNECTOR_STATE_PREFIX,
)) {
val cliOptionValue = commandLine.optionValue(cliOptionKey) as String?
if (cliOptionValue.isNullOrBlank()) {
continue
}
val jsonFile: File = Path.of(cliOptionValue).toFile()
if (!jsonFile.exists()) {
log.warn { "File '$jsonFile' not found for '$cliOptionKey'." }
continue
}
values["$prefix.json"] = jsonFile.readText()
}
return values
}

View File

@@ -0,0 +1,24 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair
/** Union type of the state passed as input to a READ for a source connector. */
sealed interface InputState
data object EmptyInputState : InputState
data class GlobalInputState(
val global: OpaqueStateValue,
val globalStreams: Map<AirbyteStreamNameNamespacePair, OpaqueStateValue>,
/** Conceivably, some streams may undergo a full refresh alongside independently of the rest. */
val nonGlobalStreams: Map<AirbyteStreamNameNamespacePair, OpaqueStateValue>,
) : InputState
data class StreamInputState(
val streams: Map<AirbyteStreamNameNamespacePair, OpaqueStateValue>,
) : InputState
/** State values are opaque for the CDK, the schema is owned by the connector. */
typealias OpaqueStateValue = JsonNode

View File

@@ -0,0 +1,108 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.util.Jsons
import io.airbyte.cdk.util.ResourceUtils
import io.airbyte.protocol.models.v0.AirbyteGlobalState
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair
import io.airbyte.protocol.models.v0.AirbyteStreamState
import io.airbyte.protocol.models.v0.StreamDescriptor
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Factory
import io.micronaut.context.annotation.Requires
import io.micronaut.context.annotation.Value
import io.micronaut.context.env.Environment
import jakarta.inject.Singleton
/**
* Micronaut factory for the [InputState] singleton.
*
* The value may be defined via two Micronaut properties:
* - `airbyte.connector.state.json` for use by [ConnectorCommandLinePropertySource],
* - `airbyte.connector.state.resource` for use in unit tests.
*/
@Factory
class InputStateFactory {
private val log = KotlinLogging.logger {}
@Singleton
@Requires(missingProperty = "${CONNECTOR_STATE_PREFIX}.resource")
fun make(
@Value("\${${CONNECTOR_STATE_PREFIX}.json}") json: String?,
): InputState {
val list: List<AirbyteStateMessage> =
ValidatedJsonUtils.parseList(AirbyteStateMessage::class.java, json ?: "[]")
if (list.isEmpty()) {
return EmptyInputState
}
for (message in list) {
validateStateMessage(message)
}
val deduped: List<AirbyteStateMessage> =
list
.groupBy { msg: AirbyteStateMessage ->
if (msg.stream == null) {
msg.type.toString()
} else {
val desc: StreamDescriptor = msg.stream.streamDescriptor
AirbyteStreamNameNamespacePair(desc.name, desc.namespace).toString()
}
}
.mapNotNull { (groupKey, groupValues) ->
if (groupValues.size > 1) {
log.warn {
"Discarded duplicated ${groupValues.size - 1} state message(s) " +
"for '$groupKey'."
}
}
groupValues.last()
}
val nonGlobalStreams: Map<AirbyteStreamNameNamespacePair, OpaqueStateValue> =
streamStates(deduped.mapNotNull { it.stream })
val globalState: AirbyteGlobalState? =
deduped.find { it.type == AirbyteStateMessage.AirbyteStateType.GLOBAL }?.global
if (globalState == null) {
return StreamInputState(nonGlobalStreams)
}
val globalStateValue: OpaqueStateValue =
ValidatedJsonUtils.parseUnvalidated(
globalState.sharedState,
OpaqueStateValue::class.java,
)
val globalStreams: Map<AirbyteStreamNameNamespacePair, OpaqueStateValue> =
streamStates(globalState.streamStates)
return GlobalInputState(globalStateValue, globalStreams, nonGlobalStreams)
}
private fun streamStates(
streamStates: List<AirbyteStreamState>?,
): Map<AirbyteStreamNameNamespacePair, OpaqueStateValue> =
(streamStates ?: listOf()).associate { msg: AirbyteStreamState ->
val sd: StreamDescriptor = msg.streamDescriptor
val key = AirbyteStreamNameNamespacePair(sd.name, sd.namespace)
val jsonValue: JsonNode = msg.streamState ?: Jsons.objectNode()
key to ValidatedJsonUtils.parseUnvalidated(jsonValue, OpaqueStateValue::class.java)
}
private fun validateStateMessage(message: AirbyteStateMessage) {
when (message.type) {
AirbyteStateMessage.AirbyteStateType.GLOBAL,
AirbyteStateMessage.AirbyteStateType.STREAM, -> Unit
AirbyteStateMessage.AirbyteStateType.LEGACY ->
throw ConfigErrorException("Unsupported LEGACY state type in $message.")
null -> throw ConfigErrorException("State type not set in $message.")
}
// TODO: add more validation?
}
@Singleton
@Requires(env = [Environment.TEST])
@Requires(notEnv = [Environment.CLI])
@Requires(property = "${CONNECTOR_STATE_PREFIX}.resource")
fun makeFromTestResource(
@Value("\${${CONNECTOR_STATE_PREFIX}.resource}") resource: String,
): InputState = make(ResourceUtils.readResource(resource))
}

View File

@@ -0,0 +1,33 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import com.fasterxml.jackson.databind.PropertyNamingStrategies
import io.airbyte.cdk.util.ResourceUtils
import io.micronaut.context.env.MapPropertySource
import io.micronaut.context.env.yaml.YamlPropertySourceLoader
import java.net.URL
/** Loads the contents of the connector's metadata.yaml file as Micronaut properties. */
class MetadataYamlPropertySource : MapPropertySource(METADATA_YAML, loadFromResource()) {
companion object {
const val METADATA_YAML = "metadata.yaml"
const val PROPERTY_PREFIX = "airbyte.connector.metadata"
fun loadFromResource(): Map<String, Any?> {
val resourceURL: URL = ResourceUtils.getResource(METADATA_YAML)
val rawProperties: Map<String, Any?> =
YamlPropertySourceLoader()
.read(
METADATA_YAML,
resourceURL.openStream(),
)
return rawProperties.mapKeys { (key: String, _) ->
val stripped: String = key.removePrefix("data.")
val kebabCase: String =
PropertyNamingStrategies.KebabCaseStrategy.INSTANCE.translate(stripped)
"${PROPERTY_PREFIX}.$kebabCase"
}
}
}
}

View File

@@ -0,0 +1,166 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.ArrayNode
import com.fasterxml.jackson.databind.node.ObjectNode
import com.fasterxml.jackson.module.kotlin.contains
import com.kjetland.jackson.jsonSchema.JsonSchemaConfig
import com.kjetland.jackson.jsonSchema.JsonSchemaDraft
import com.kjetland.jackson.jsonSchema.JsonSchemaGenerator
import com.networknt.schema.JsonSchema
import com.networknt.schema.JsonSchemaFactory
import com.networknt.schema.SchemaValidatorsConfig
import com.networknt.schema.SpecVersion
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.util.Jsons
object ValidatedJsonUtils {
fun <T> parseOne(
klazz: Class<T>,
json: String,
): T {
val tree: JsonNode =
try {
Jsons.readTree(json)
} catch (e: Exception) {
throw ConfigErrorException("malformed json value while parsing for $klazz", e)
}
return parseList(klazz, tree).firstOrNull()
?: throw ConfigErrorException("missing json value while parsing for $klazz")
}
fun <T> parseList(
elementClass: Class<T>,
json: String?,
): List<T> {
val tree: JsonNode =
try {
Jsons.readTree(json ?: "[]")
} catch (e: Exception) {
throw ConfigErrorException(
"malformed json value while parsing for $elementClass",
e,
)
}
return parseList(elementClass, tree)
}
fun <T> parseList(
elementClass: Class<T>,
tree: JsonNode,
): List<T> {
val jsonList: List<JsonNode> = if (tree.isArray) tree.toList() else listOf(tree)
val schemaNode: JsonNode = generator.generateJsonSchema(elementClass)
val jsonSchema: JsonSchema = jsonSchemaFactory.getSchema(schemaNode, jsonSchemaConfig)
for (element in jsonList) {
val validationFailures = jsonSchema.validate(element)
if (validationFailures.isNotEmpty()) {
throw ConfigErrorException(
"$elementClass json schema violation: ${validationFailures.first()}",
)
}
}
return jsonList.map { parseUnvalidated(it, elementClass) }
}
fun <T> parseUnvalidated(
jsonNode: JsonNode,
klazz: Class<T>,
): T =
try {
Jsons.treeToValue(jsonNode, klazz)
} catch (e: Exception) {
throw ConfigErrorException("failed to map valid json to $klazz ", e)
}
val generatorConfig: JsonSchemaConfig =
JsonSchemaConfig.vanillaJsonSchemaDraft4()
.withJsonSchemaDraft(JsonSchemaDraft.DRAFT_07)
.withFailOnUnknownProperties(false)
private val generator = JsonSchemaGenerator(Jsons, generatorConfig)
val jsonSchemaConfig = SchemaValidatorsConfig()
val jsonSchemaFactory: JsonSchemaFactory =
JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7)
/**
* Generates a JSON schema suitable for use by the Airbyte Platform.
*
* This entails inlining any "$ref" fields and ensuring that an object with a "oneOf" field also
* contains `"type": "object"`.
*/
fun <T> generateAirbyteJsonSchema(klazz: Class<T>): JsonNode {
// Generate the real JSON schema for the class object.
val root: ObjectNode = generator.generateJsonSchema(klazz) as ObjectNode
// Now perform any post-processing required by Airbyte.
if (!root.contains("definitions")) {
// Nothing needs to be done where there are no "$ref" fields anywhere.
// This implies that there will be no "oneOf"s either.
return root
}
val definitions: ObjectNode = root["definitions"] as ObjectNode
fun walk(
node: JsonNode,
vararg visitedRefs: String,
) {
when (node) {
is ArrayNode ->
// Recurse over all array elements.
for (e in node.elements()) walk(e, *visitedRefs)
is ObjectNode -> {
if (node.contains("oneOf")) {
// Insert superfluous `"type": "object"` entry into object containing
// "oneOf".
// This doesn't change the schema but it makes the platform happy.
node.set<JsonNode>("type", Jsons.textNode("object"))
}
if (!node.contains("\$ref")) {
// Nothing to inline here, just recurse over all object field values.
for (pair in node.fields()) {
walk(pair.value, *visitedRefs)
}
return
}
// Inline the type referenced by the "$ref" field.
val ref: String = node["\$ref"].textValue().removePrefix("#/definitions/")
if (ref in visitedRefs) {
throw ConfigErrorException("circular \$ref '$ref' found in JSON schema")
}
val definition: ObjectNode =
definitions[ref] as? ObjectNode
?: throw ConfigErrorException(
"Undefined \$ref '$ref' found in JSON schema",
)
for (pair in definition.fields()) {
// Inline the definition in the current object.
// When a key is already present, keep the existing value.
if (!node.contains(pair.key)) {
node.set<JsonNode>(pair.key, pair.value)
}
}
node.remove("\$ref")
// Recurse over the object field values, including those which have just been
// inlined.
for (pair in node.fields()) {
walk(pair.value, ref, *visitedRefs)
}
}
else ->
// Nothing to do for non-array-non-object JSON nodes.
return
}
}
// Flatten the definitions first, to check for circular references.
walk(definitions)
// Remove the definitions, as they will be inlined.
root.remove("definitions")
// Inline the definitions.
walk(root)
// Return the transformed object.
return root
}
}

View File

@@ -0,0 +1,153 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.consumers
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteAnalyticsTraceMessage
import io.airbyte.protocol.models.v0.AirbyteCatalog
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus
import io.airbyte.protocol.models.v0.AirbyteErrorTraceMessage
import io.airbyte.protocol.models.v0.AirbyteEstimateTraceMessage
import io.airbyte.protocol.models.v0.AirbyteLogMessage
import io.airbyte.protocol.models.v0.AirbyteMessage
import io.airbyte.protocol.models.v0.AirbyteRecordMessage
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.AirbyteStreamStatusTraceMessage
import io.airbyte.protocol.models.v0.AirbyteTraceMessage
import io.airbyte.protocol.models.v0.ConnectorSpecification
import io.micronaut.context.annotation.DefaultImplementation
import io.micronaut.context.annotation.Secondary
import jakarta.inject.Singleton
import java.io.ByteArrayOutputStream
import java.time.Instant
import java.util.function.Consumer
import org.apache.commons.lang3.exception.ExceptionUtils
/** Emits the [AirbyteMessage] instances produced by the connector. */
@DefaultImplementation(StdoutOutputConsumer::class)
interface OutputConsumer : Consumer<AirbyteMessage>, AutoCloseable {
val emittedAt: Instant
fun accept(record: AirbyteRecordMessage) {
record.emittedAt = emittedAt.toEpochMilli()
accept(AirbyteMessage().withType(AirbyteMessage.Type.RECORD).withRecord(record))
}
fun accept(state: AirbyteStateMessage) {
accept(AirbyteMessage().withType(AirbyteMessage.Type.STATE).withState(state))
}
fun accept(log: AirbyteLogMessage) {
accept(AirbyteMessage().withType(AirbyteMessage.Type.LOG).withLog(log))
}
fun accept(spec: ConnectorSpecification) {
accept(AirbyteMessage().withType(AirbyteMessage.Type.SPEC).withSpec(spec))
}
fun accept(status: AirbyteConnectionStatus) {
accept(
AirbyteMessage()
.withType(AirbyteMessage.Type.CONNECTION_STATUS)
.withConnectionStatus(status),
)
}
fun accept(catalog: AirbyteCatalog) {
accept(AirbyteMessage().withType(AirbyteMessage.Type.CATALOG).withCatalog(catalog))
}
fun accept(trace: AirbyteTraceMessage) {
trace.emittedAt = emittedAt.toEpochMilli().toDouble()
accept(AirbyteMessage().withType(AirbyteMessage.Type.TRACE).withTrace(trace))
}
fun accept(error: AirbyteErrorTraceMessage) {
accept(AirbyteTraceMessage().withType(AirbyteTraceMessage.Type.ERROR).withError(error))
}
fun accept(estimate: AirbyteEstimateTraceMessage) {
accept(
AirbyteTraceMessage()
.withType(AirbyteTraceMessage.Type.ESTIMATE)
.withEstimate(estimate),
)
}
fun accept(streamStatus: AirbyteStreamStatusTraceMessage) {
accept(
AirbyteTraceMessage()
.withType(AirbyteTraceMessage.Type.STREAM_STATUS)
.withStreamStatus(streamStatus),
)
}
fun accept(analytics: AirbyteAnalyticsTraceMessage) {
accept(
AirbyteTraceMessage()
.withType(AirbyteTraceMessage.Type.ANALYTICS)
.withAnalytics(analytics),
)
}
fun acceptTraceOnConfigError(e: Throwable) {
val configErrorException: ConfigErrorException = ConfigErrorException.unwind(e) ?: return
accept(
AirbyteErrorTraceMessage()
.withFailureType(AirbyteErrorTraceMessage.FailureType.CONFIG_ERROR)
.withMessage(configErrorException.message)
.withInternalMessage(e.toString())
.withStackTrace(ExceptionUtils.getStackTrace(e)),
)
}
}
// Used for integration tests.
const val CONNECTOR_OUTPUT_FILE = "airbyte.connector.output.file"
/** Default implementation of [OutputConsumer]. */
@Singleton
@Secondary
private class StdoutOutputConsumer : OutputConsumer {
override val emittedAt: Instant = Instant.now()
private val buffer = ByteArrayOutputStream()
override fun accept(airbyteMessage: AirbyteMessage) {
// This method effectively println's its JSON-serialized argument.
// Using println is not particularly efficient, however.
// To improve performance, this method accumulates RECORD messages into a buffer
// before writing them to standard output in a batch.
// Other Airbyte message types are not buffered, instead they trigger an immediate flush.
// Such messages should not linger indefinitely in a buffer.
val isRecord: Boolean = airbyteMessage.type == AirbyteMessage.Type.RECORD
val json: ByteArray = Jsons.writeValueAsBytes(airbyteMessage)
synchronized(this) {
if (buffer.size() > 0) {
buffer.write('\n'.code)
}
buffer.writeBytes(json)
if (!isRecord || buffer.size() >= BUFFER_MAX_SIZE) {
withLockFlush()
}
}
}
override fun close() {
synchronized(this) {
// Flush any remaining buffer contents to stdout before closing.
withLockFlush()
}
}
private fun withLockFlush() {
if (buffer.size() > 0) {
println(buffer.toString(Charsets.UTF_8))
buffer.reset()
}
}
companion object {
const val BUFFER_MAX_SIZE = 1024 * 1024
}
}

View File

@@ -0,0 +1,50 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.data
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.JsonSchemaPrimitiveUtil
import io.airbyte.protocol.models.JsonSchemaType
/**
* Union type which models the Airbyte field type schema.
*
* This maps to the subset of [JsonSchemaType] which is used in practice. Its main reason for
* existing is to provide type-safety and convenient comparisons and string representations.
*/
sealed interface AirbyteType {
/** Unwraps the underlying Airbyte protocol type object. */
fun asJsonSchemaType(): JsonSchemaType
/** Convenience method to generate the JSON Schema object. */
fun asJsonSchema(): JsonNode = Jsons.valueToTree(asJsonSchemaType().jsonSchemaTypeMap)
}
data class ArrayAirbyteType(
val item: AirbyteType,
) : AirbyteType {
override fun asJsonSchemaType(): JsonSchemaType =
JsonSchemaType.builder(JsonSchemaPrimitiveUtil.JsonSchemaPrimitive.ARRAY)
.withItems(item.asJsonSchemaType())
.build()
}
enum class LeafAirbyteType(
private val jsonSchemaType: JsonSchemaType,
) : AirbyteType {
BOOLEAN(JsonSchemaType.BOOLEAN),
STRING(JsonSchemaType.STRING),
BINARY(JsonSchemaType.STRING_BASE_64),
DATE(JsonSchemaType.STRING_DATE),
TIME_WITH_TIMEZONE(JsonSchemaType.STRING_TIME_WITH_TIMEZONE),
TIME_WITHOUT_TIMEZONE(JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE),
TIMESTAMP_WITH_TIMEZONE(JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE),
TIMESTAMP_WITHOUT_TIMEZONE(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE),
INTEGER(JsonSchemaType.INTEGER),
NUMBER(JsonSchemaType.NUMBER),
NULL(JsonSchemaType.NULL),
JSONB(JsonSchemaType.JSONB),
;
override fun asJsonSchemaType(): JsonSchemaType = jsonSchemaType
}

View File

@@ -0,0 +1,353 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.data
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.util.Jsons
import java.math.BigDecimal
import java.net.URI
import java.net.URL
import java.nio.ByteBuffer
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.format.DateTimeFormatter
import java.time.format.DateTimeParseException
/** Encodes a field value of type [T] as a [JsonNode] for an Airbyte record or state message. */
fun interface JsonEncoder<T> {
fun encode(decoded: T): JsonNode
}
/**
* Decodes a field value of type [T] from a [JsonNode] in an Airbyte state message.
*
* Throws an [IllegalArgumentException] when the decoding fails. Implementations of [JsonDecoder]
* should be strict, failure is unexpected.
*/
fun interface JsonDecoder<T> {
fun decode(encoded: JsonNode): T
}
/** Combines a [JsonEncoder] and a [JsonDecoder]. */
interface JsonCodec<T> : JsonEncoder<T>, JsonDecoder<T>
data object BooleanCodec : JsonCodec<Boolean> {
override fun encode(decoded: Boolean): JsonNode = Jsons.booleanNode(decoded)
override fun decode(encoded: JsonNode): Boolean {
if (!encoded.isBoolean) {
throw IllegalArgumentException("invalid boolean value $encoded")
}
return encoded.booleanValue()
}
}
data object TextCodec : JsonCodec<String> {
override fun encode(decoded: String): JsonNode = Jsons.textNode(decoded)
override fun decode(encoded: JsonNode): String {
if (!encoded.isTextual) {
throw IllegalArgumentException("invalid textual value $encoded")
}
return encoded.textValue()
}
}
data object BinaryCodec : JsonCodec<ByteBuffer> {
override fun encode(decoded: ByteBuffer): JsonNode = Jsons.binaryNode(decoded)
override fun decode(encoded: JsonNode): ByteBuffer {
if (!encoded.isBinary) {
throw IllegalArgumentException("invalid binary value $encoded")
}
return ByteBuffer.wrap(encoded.binaryValue())
}
}
data object BigDecimalCodec : JsonCodec<BigDecimal> {
override fun encode(decoded: BigDecimal): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): BigDecimal {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
return encoded.decimalValue()
}
}
data object BigDecimalIntegerCodec : JsonCodec<BigDecimal> {
override fun encode(decoded: BigDecimal): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): BigDecimal {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
if (!encoded.canConvertToExactIntegral()) {
throw IllegalArgumentException("invalid integral value $encoded")
}
return encoded.decimalValue()
}
}
data object LongCodec : JsonCodec<Long> {
override fun encode(decoded: Long): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): Long {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
if (!encoded.canConvertToExactIntegral()) {
throw IllegalArgumentException("invalid integral value $encoded")
}
if (!encoded.canConvertToLong()) {
throw IllegalArgumentException("invalid 64-bit integer value $encoded")
}
return encoded.longValue()
}
}
data object IntCodec : JsonCodec<Int> {
override fun encode(decoded: Int): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): Int {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
if (!encoded.canConvertToExactIntegral()) {
throw IllegalArgumentException("invalid integral value $encoded")
}
if (!encoded.canConvertToInt()) {
throw IllegalArgumentException("invalid 32-bit integer value $encoded")
}
return encoded.intValue()
}
}
data object ShortCodec : JsonCodec<Short> {
override fun encode(decoded: Short): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): Short {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
if (!encoded.canConvertToExactIntegral()) {
throw IllegalArgumentException("invalid integral value $encoded")
}
val intValue: Int = encoded.intValue()
val shortValue: Short = encoded.shortValue()
if (!encoded.canConvertToInt() || shortValue.toInt() != intValue) {
throw IllegalArgumentException("invalid 16-bit integer value $encoded")
}
return shortValue
}
}
data object ByteCodec : JsonCodec<Byte> {
override fun encode(decoded: Byte): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): Byte {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
if (!encoded.canConvertToExactIntegral()) {
throw IllegalArgumentException("invalid integral value $encoded")
}
val intValue: Int = encoded.intValue()
val byteValue: Byte = intValue.toByte()
if (!encoded.canConvertToInt() || byteValue.toInt() != intValue) {
throw IllegalArgumentException("invalid 8-bit integer value $encoded")
}
return byteValue
}
}
data object DoubleCodec : JsonCodec<Double> {
override fun encode(decoded: Double): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): Double {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
val decoded: Double = encoded.doubleValue()
if (encode(decoded).decimalValue().compareTo(encoded.decimalValue()) != 0) {
throw IllegalArgumentException("invalid IEEE-754 64-bit floating point value $encoded")
}
return decoded
}
}
data object FloatCodec : JsonCodec<Float> {
override fun encode(decoded: Float): JsonNode = Jsons.numberNode(decoded)
override fun decode(encoded: JsonNode): Float {
if (!encoded.isNumber) {
throw IllegalArgumentException("invalid number value $encoded")
}
val decoded: Float = encoded.floatValue()
if (encode(decoded).doubleValue().compareTo(encoded.doubleValue()) != 0) {
throw IllegalArgumentException("invalid IEEE-754 32-bit floating point value $encoded")
}
return decoded
}
}
data object JsonBytesCodec : JsonCodec<ByteBuffer> {
override fun encode(decoded: ByteBuffer): JsonNode =
try {
Jsons.readTree(decoded.array())
} catch (_: Exception) {
Jsons.textNode(String(decoded.array()))
}
override fun decode(encoded: JsonNode): ByteBuffer {
if (!encoded.isObject && !encoded.isArray) {
throw IllegalArgumentException("invalid object or array value $encoded")
}
return ByteBuffer.wrap(Jsons.writeValueAsBytes(encoded))
}
}
data object JsonStringCodec : JsonCodec<String> {
override fun encode(decoded: String): JsonNode =
try {
Jsons.readTree(decoded)
} catch (_: Exception) {
Jsons.textNode(decoded)
}
override fun decode(encoded: JsonNode): String {
if (!encoded.isObject && !encoded.isArray) {
throw IllegalArgumentException("invalid object or array value $encoded")
}
return Jsons.writeValueAsString(encoded)
}
}
data object UrlCodec : JsonCodec<URL> {
override fun encode(decoded: URL): JsonNode = Jsons.textNode(decoded.toExternalForm())
override fun decode(encoded: JsonNode): URL {
val str: String = TextCodec.decode(encoded)
try {
return URI.create(str).toURL()
} catch (e: Exception) {
throw IllegalArgumentException("invalid URL value $str", e)
}
}
}
data object LocalDateCodec : JsonCodec<LocalDate> {
override fun encode(decoded: LocalDate): JsonNode = Jsons.textNode(decoded.format(formatter))
override fun decode(encoded: JsonNode): LocalDate {
val str: String = TextCodec.decode(encoded)
try {
return LocalDate.parse(str, formatter)
} catch (e: DateTimeParseException) {
throw IllegalArgumentException("invalid value $str for pattern '$PATTERN'", e)
}
}
const val PATTERN = "yyyy-MM-dd"
val formatter: DateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN)
}
data object LocalTimeCodec : JsonCodec<LocalTime> {
override fun encode(decoded: LocalTime): JsonNode = Jsons.textNode(decoded.format(formatter))
override fun decode(encoded: JsonNode): LocalTime {
val str: String = TextCodec.decode(encoded)
try {
return LocalTime.parse(str, formatter)
} catch (e: DateTimeParseException) {
throw IllegalArgumentException("invalid value $str for pattern '$PATTERN'", e)
}
}
const val PATTERN = "HH:mm:ss.SSSSSS"
val formatter: DateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN)
}
data object LocalDateTimeCodec : JsonCodec<LocalDateTime> {
override fun encode(decoded: LocalDateTime): JsonNode =
Jsons.textNode(decoded.format(formatter))
override fun decode(encoded: JsonNode): LocalDateTime {
val str: String = TextCodec.decode(encoded)
try {
return LocalDateTime.parse(str, formatter)
} catch (e: DateTimeParseException) {
throw IllegalArgumentException("invalid value $str for pattern '$PATTERN'", e)
}
}
const val PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
val formatter: DateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN)
}
data object OffsetTimeCodec : JsonCodec<OffsetTime> {
override fun encode(decoded: OffsetTime): JsonNode = Jsons.textNode(decoded.format(formatter))
override fun decode(encoded: JsonNode): OffsetTime {
val str: String = TextCodec.decode(encoded)
try {
return OffsetTime.parse(str, formatter)
} catch (e: DateTimeParseException) {
throw IllegalArgumentException("invalid value $str for pattern '$PATTERN'", e)
}
}
const val PATTERN = "HH:mm:ss.SSSSSSXXX"
val formatter: DateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN)
}
data object OffsetDateTimeCodec : JsonCodec<OffsetDateTime> {
override fun encode(decoded: OffsetDateTime): JsonNode =
Jsons.textNode(decoded.format(formatter))
override fun decode(encoded: JsonNode): OffsetDateTime {
val str: String = TextCodec.decode(encoded)
try {
return OffsetDateTime.parse(str, formatter)
} catch (e: DateTimeParseException) {
throw IllegalArgumentException("invalid value $str for pattern '$PATTERN'", e)
}
}
const val PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX"
val formatter: DateTimeFormatter = DateTimeFormatter.ofPattern(PATTERN)
}
data object NullCodec : JsonCodec<Any?> {
override fun encode(decoded: Any?): JsonNode = Jsons.nullNode()
override fun decode(encoded: JsonNode): Any? = null
}
data object AnyEncoder : JsonEncoder<Any> {
override fun encode(decoded: Any): JsonNode = Jsons.textNode(decoded.toString())
}
data class ArrayEncoder<T>(
val elementEncoder: JsonEncoder<T>,
) : JsonEncoder<List<T>> {
override fun encode(decoded: List<T>): JsonNode =
Jsons.arrayNode().apply {
for (e in decoded) {
add(elementEncoder.encode(e))
}
}
}
data class ArrayDecoder<T>(
val elementDecoder: JsonDecoder<T>,
) : JsonDecoder<List<T>> {
override fun decode(encoded: JsonNode): List<T> {
if (!encoded.isArray) {
throw IllegalArgumentException("invalid array value $encoded")
}
return encoded.elements().asSequence().map { elementDecoder.decode(it) }.toList()
}
}

View File

@@ -0,0 +1,21 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.exceptions
/**
* An exception that indicates that there is something wrong with the user's connector setup. This
* exception is caught and emits an AirbyteTraceMessage.
*/
class ConfigErrorException(
displayMessage: String,
exception: Throwable? = null,
) : RuntimeException(displayMessage, exception) {
companion object {
@JvmStatic
fun unwind(e: Throwable?): ConfigErrorException? =
when (e) {
null -> null
is ConfigErrorException -> e
else -> unwind(e.cause)
}
}
}

View File

@@ -0,0 +1,8 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.exceptions
/** Custom exception that represents a failure to execute an operation. */
class OperationExecutionException(
message: String? = null,
cause: Throwable? = null,
) : Exception(message, cause)

View File

@@ -0,0 +1,11 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.operation
/** Interface that defines a CLI operation. */
fun interface Operation {
fun execute()
companion object {
const val PROPERTY: String = "airbyte.connector.operation"
}
}

View File

@@ -0,0 +1,26 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.operation
import io.airbyte.cdk.command.ConfigurationJsonObjectSupplier
import io.airbyte.cdk.consumers.OutputConsumer
import io.airbyte.protocol.models.v0.ConnectorSpecification
import io.micronaut.context.annotation.Requires
import io.micronaut.context.annotation.Value
import jakarta.inject.Singleton
import java.net.URI
@Singleton
@Requires(property = Operation.PROPERTY, value = "spec")
class SpecOperation(
@Value("\${airbyte.connector.metadata.documentation-url}") val documentationUrl: String,
val configJsonObjectSupplier: ConfigurationJsonObjectSupplier<*>,
val outputConsumer: OutputConsumer,
) : Operation {
override fun execute() {
outputConsumer.accept(
ConnectorSpecification()
.withDocumentationUrl(URI.create(documentationUrl))
.withConnectionSpecification(configJsonObjectSupplier.jsonSchema),
)
}
}

View File

@@ -0,0 +1,31 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.ssh
import kotlin.time.Duration
import kotlin.time.Duration.Companion.ZERO
import kotlin.time.Duration.Companion.milliseconds
/** These can be passed in the connector configuration as additional parameters. */
data class SshConnectionOptions(
val sessionHeartbeatInterval: Duration,
val globalHeartbeatInterval: Duration,
val idleTimeout: Duration,
) {
companion object {
fun fromAdditionalProperties(map: Map<String, Any>) =
SshConnectionOptions(
when (val millis = map["session_heartbeat_interval"]) {
is Long -> millis.milliseconds
else -> 1_000.milliseconds
},
when (val millis = map["global_heartbeat_interval"]) {
is Long -> millis.milliseconds
else -> 2_000.milliseconds
},
when (val millis = map["idle_timeout"]) {
is Long -> millis.milliseconds
else -> ZERO
},
)
}
}

View File

@@ -0,0 +1,124 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.ssh
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.annotation.JsonPropertyDescription
import com.fasterxml.jackson.annotation.JsonSubTypes
import com.fasterxml.jackson.annotation.JsonTypeInfo
import com.fasterxml.jackson.annotation.JsonValue
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDefault
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import io.airbyte.cdk.command.CONNECTOR_CONFIG_PREFIX
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.micronaut.context.annotation.ConfigurationProperties
/** Union type for SSH tunnel method configuration in connector configurations. */
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "tunnel_method")
@JsonSubTypes(
JsonSubTypes.Type(value = SshNoTunnelMethod::class, name = "NO_TUNNEL"),
JsonSubTypes.Type(value = SshKeyAuthTunnelMethod::class, name = "SSH_KEY_AUTH"),
JsonSubTypes.Type(value = SshPasswordAuthTunnelMethod::class, name = "SSH_PASSWORD_AUTH"),
)
@JsonSchemaTitle("SSH Tunnel Method")
@JsonSchemaDescription(
"Whether to initiate an SSH tunnel before connecting to the database, " +
"and if so, which kind of authentication to use.",
)
sealed interface SshTunnelMethodConfiguration
@JsonSchemaTitle("No Tunnel")
@JsonSchemaDescription("No ssh tunnel needed to connect to database")
data object SshNoTunnelMethod : SshTunnelMethodConfiguration
@JsonSchemaTitle("SSH Key Authentication")
@JsonSchemaDescription("Connect through a jump server tunnel host using username and ssh key")
data class SshKeyAuthTunnelMethod(
@get:JsonProperty("tunnel_host", required = true)
@param:JsonProperty("tunnel_host", required = true)
@JsonSchemaTitle("SSH Tunnel Jump Server Host")
@JsonPropertyDescription("Hostname of the jump server host that allows inbound ssh tunnel.")
@JsonSchemaInject(json = """{"order":1}""")
val host: String,
@get:JsonProperty("tunnel_port", required = true)
@param:JsonProperty("tunnel_port", required = true)
@JsonSchemaTitle("SSH Connection Port")
@JsonPropertyDescription("Port on the proxy/jump server that accepts inbound ssh connections.")
@JsonSchemaInject(json = """{"order":2,"minimum": 0,"maximum": 65536}""")
@JsonSchemaDefault("22")
val port: Int,
@get:JsonProperty("tunnel_user", required = true)
@param:JsonProperty("tunnel_user", required = true)
@JsonSchemaTitle("SSH Login Username")
@JsonPropertyDescription("OS-level username for logging into the jump server host")
@JsonSchemaInject(json = """{"order":3}""")
val user: String,
@get:JsonProperty("ssh_key", required = true)
@param:JsonProperty("ssh_key", required = true)
@JsonSchemaTitle("SSH Private Key")
@JsonPropertyDescription(
"OS-level user account ssh key credentials in RSA PEM format " +
"( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )",
)
@JsonSchemaInject(json = """{"order":4,"multiline":true,"airbyte_secret": true}""")
val key: String,
) : SshTunnelMethodConfiguration
@JsonSchemaTitle("Password Authentication")
@JsonSchemaDescription(
"Connect through a jump server tunnel host using username and password authentication",
)
data class SshPasswordAuthTunnelMethod(
@get:JsonProperty("tunnel_host", required = true)
@param:JsonProperty("tunnel_host", required = true)
@JsonSchemaTitle("SSH Tunnel Jump Server Host")
@JsonPropertyDescription("Hostname of the jump server host that allows inbound ssh tunnel.")
@JsonSchemaInject(json = """{"order":1}""")
val host: String,
@get:JsonProperty("tunnel_port", required = true)
@param:JsonProperty("tunnel_port", required = true)
@JsonSchemaTitle("SSH Connection Port")
@JsonPropertyDescription("Port on the proxy/jump server that accepts inbound ssh connections.")
@JsonSchemaInject(json = """{"order":2,"minimum": 0,"maximum": 65536}""")
@JsonSchemaDefault("22")
val port: Int,
@get:JsonProperty("tunnel_user", required = true)
@param:JsonProperty("tunnel_user", required = true)
@JsonSchemaTitle("SSH Login Username")
@JsonPropertyDescription("OS-level username for logging into the jump server host")
@JsonSchemaInject(json = """{"order":3}""")
val user: String,
@get:JsonProperty("tunnel_user_password", required = true)
@param:JsonProperty("tunnel_user_password", required = true)
@JsonSchemaTitle("Password")
@JsonPropertyDescription("OS-level password for logging into the jump server host")
@JsonSchemaInject(json = """{"order":4,"airbyte_secret": true}""")
val password: String,
) : SshTunnelMethodConfiguration
@ConfigurationProperties("$CONNECTOR_CONFIG_PREFIX.tunnel_method")
class MicronautPropertiesFriendlySshTunnelMethodConfigurationJsonObject {
var tunnelMethod: String = "NO_TUNNEL"
var tunnelHost: String? = null
var tunnelPort: Int = 22
var tunnelUser: String? = null
var sshKey: String? = null
var tunnelUserPassword: String? = null
@JsonValue
fun asSshTunnelMethod(): SshTunnelMethodConfiguration =
when (tunnelMethod) {
"NO_TUNNEL" -> SshNoTunnelMethod
"SSH_KEY_AUTH" ->
SshKeyAuthTunnelMethod(tunnelHost!!, tunnelPort, tunnelUser!!, sshKey!!)
"SSH_PASSWORD_AUTH" ->
SshPasswordAuthTunnelMethod(
tunnelHost!!,
tunnelPort,
tunnelUser!!,
tunnelUserPassword!!,
)
else -> throw ConfigErrorException("invalid value $tunnelMethod")
}
}

View File

@@ -0,0 +1,144 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.ssh
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.github.oshai.kotlinlogging.KotlinLogging
import java.io.StringReader
import java.net.InetSocketAddress
import java.security.Security
import java.time.Duration
import kotlin.time.toJavaDuration
import org.apache.sshd.client.SshClient
import org.apache.sshd.client.future.ConnectFuture
import org.apache.sshd.client.keyverifier.AcceptAllServerKeyVerifier
import org.apache.sshd.client.session.ClientSession
import org.apache.sshd.common.SshException
import org.apache.sshd.common.session.SessionHeartbeatController
import org.apache.sshd.common.util.net.SshdSocketAddress
import org.apache.sshd.common.util.security.SecurityUtils
import org.apache.sshd.core.CoreModuleProperties
import org.apache.sshd.server.forward.AcceptAllForwardingFilter
import org.bouncycastle.jce.provider.BouncyCastleProvider
private val log = KotlinLogging.logger {}
/**
* Encapsulates a possible SSH tunnel.
*
* Create using [createTunnelSession].
*/
class TunnelSession
internal constructor(
val address: InetSocketAddress,
private val client: SshClient?,
private val clientSession: ClientSession?,
) : AutoCloseable {
override fun close() {
clientSession?.let {
log.info { "Closing SSH client session." }
it.close()
}
client?.let {
log.info { "Closing SSH client." }
it.stop()
}
}
}
/** Creates an open [TunnelSession]. */
fun createTunnelSession(
remote: SshdSocketAddress,
sshTunnel: SshTunnelMethodConfiguration,
connectionOptions: SshConnectionOptions,
): TunnelSession {
if (sshTunnel is SshNoTunnelMethod) {
return TunnelSession(remote.toInetSocketAddress(), null, null)
}
log.info { "Creating SSH client." }
val client: SshClient = createClient(connectionOptions)
try {
client.start()
// Create session.
log.info { "Creating SSH client session." }
val connectFuture: ConnectFuture =
when (sshTunnel) {
SshNoTunnelMethod -> TODO("unreachable code")
is SshKeyAuthTunnelMethod ->
client.connect(sshTunnel.user.trim(), sshTunnel.host.trim(), sshTunnel.port)
is SshPasswordAuthTunnelMethod ->
client.connect(sshTunnel.user.trim(), sshTunnel.host.trim(), sshTunnel.port)
}
val session: ClientSession = connectFuture.verify(tunnelSessionTimeout).session
when (sshTunnel) {
SshNoTunnelMethod -> Unit
is SshKeyAuthTunnelMethod -> {
val key: String = sshTunnel.key.replace("\\n", "\n")
val keyPair =
SecurityUtils.getKeyPairResourceParser()
.loadKeyPairs(null, null, null, StringReader(key))
?.firstOrNull()
?: throw ConfigErrorException(
"Unable to load private key pairs, " +
"verify key pairs are properly inputted",
)
session.addPublicKeyIdentity(keyPair)
}
is SshPasswordAuthTunnelMethod -> {
// remove default key provider looking at the machine's real ssh key
session.keyIdentityProvider = null
session.addPasswordIdentity(sshTunnel.password)
}
}
session.auth().verify(tunnelSessionTimeout)
log.info { "Established tunneling session to $remote." }
// Start port forwarding.
val localhost: String = SshdSocketAddress.LOCALHOST_ADDRESS.hostName
val address: SshdSocketAddress =
session.startLocalPortForwarding(
SshdSocketAddress(InetSocketAddress.createUnresolved(localhost, 0)),
remote,
)
log.info { "Port forwarding started on $address." }
return TunnelSession(address.toInetSocketAddress(), client, session)
} catch (e: SshException) {
if (
(e.message ?: "")
.lowercase()
.contains("failed to get operation result within specified timeout")
) {
throw ConfigErrorException(SSH_TIMEOUT_DISPLAY_MESSAGE, e)
} else {
throw RuntimeException(e)
}
}
}
const val SSH_TIMEOUT_DISPLAY_MESSAGE: String =
"Timed out while opening a SSH Tunnel. " +
"Please double check the given SSH configurations and try again."
private val tunnelSessionTimeout: Duration = Duration.ofMillis(15_000)
private fun createClient(connectionOptions: SshConnectionOptions): SshClient {
Security.addProvider(BouncyCastleProvider())
val client: SshClient = SshClient.setUpDefaultClient()
client.forwardingFilter = AcceptAllForwardingFilter.INSTANCE
client.serverKeyVerifier = AcceptAllServerKeyVerifier.INSTANCE
// Session level heartbeat using SSH_MSG_IGNORE every second.
client.setSessionHeartbeat(
SessionHeartbeatController.HeartbeatType.IGNORE,
connectionOptions.sessionHeartbeatInterval.toJavaDuration(),
)
// idle-timeout zero indicates NoTimeout.
CoreModuleProperties.IDLE_TIMEOUT[client] = connectionOptions.idleTimeout.toJavaDuration()
// Use tcp keep-alive mechanism.
CoreModuleProperties.SOCKET_KEEPALIVE[client] = true
// Additional delay used for ChannelOutputStream to wait for space in the remote socket
// send buffer.
CoreModuleProperties.WAIT_FOR_SPACE_TIMEOUT[client] = Duration.ofMinutes(2)
// Global keepalive message sent every 2 seconds.
// This precedes the session level heartbeat.
CoreModuleProperties.HEARTBEAT_INTERVAL[client] =
connectionOptions.globalHeartbeatInterval.toJavaDuration()
return client
}

View File

@@ -0,0 +1,136 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.util
import datadog.trace.api.DDTags
import datadog.trace.api.interceptor.MutableSpan
import io.opentracing.Span
import io.opentracing.log.Fields
import io.opentracing.tag.Tags
import io.opentracing.util.GlobalTracer
import java.io.PrintWriter
import java.io.StringWriter
import java.util.function.Consumer
/** Collection of utility methods to help with performance tracing. */
object ApmTraceUtils {
/** String format for the name of tags added to spans. */
const val TAG_FORMAT: String = "airbyte.%s.%s"
/** Standard prefix for tags added to spans. */
const val TAG_PREFIX: String = "metadata"
/**
* Adds all provided tags to the currently active span, if one exists, under the provided tag
* name namespace.
*
* @param tags A map of tags to be added to the currently active span.
* @param tagPrefix The prefix to be added to each custom tag name.
*/
@JvmOverloads
fun addTagsToTrace(
tags: Map<String?, Any>,
tagPrefix: String? = TAG_PREFIX,
) {
addTagsToTrace(GlobalTracer.get().activeSpan(), tags, tagPrefix)
}
/**
* Adds all the provided tags to the provided span, if one exists.
*
* @param span The [Span] that will be associated with the tags.
* @param tags A map of tags to be added to the currently active span.
* @param tagPrefix The prefix to be added to each custom tag name.
*/
fun addTagsToTrace(
span: Span?,
tags: Map<String?, Any>,
tagPrefix: String?,
) {
if (span != null) {
tags.entries.forEach(
Consumer { entry: Map.Entry<String?, Any> ->
span.setTag(formatTag(entry.key, tagPrefix), entry.value.toString())
},
)
}
}
/**
* Adds an exception to the currently active span, if one exists.
*
* @param t The [Throwable] to be added to the currently active span.
*/
@JvmStatic
fun addExceptionToTrace(t: Throwable?) {
addExceptionToTrace(GlobalTracer.get().activeSpan(), t)
}
/**
* Adds an exception to the provided span, if one exists.
*
* @param span The [Span] that will be associated with the exception.
* @param t The [Throwable] to be added to the provided span.
*/
fun addExceptionToTrace(
span: Span?,
t: Throwable?,
) {
if (span != null) {
span.setTag(Tags.ERROR, true)
span.log(java.util.Map.of(Fields.ERROR_OBJECT, t))
}
}
/**
* Adds all the provided tags to the root span.
*
* @param tags A map of tags to be added to the root span.
*/
fun addTagsToRootSpan(tags: Map<String?, Any>) {
val activeSpan = GlobalTracer.get().activeSpan()
if (activeSpan is MutableSpan) {
val localRootSpan = (activeSpan as MutableSpan).localRootSpan
tags.entries.forEach(
Consumer { entry: Map.Entry<String?, Any> ->
localRootSpan.setTag(formatTag(entry.key, TAG_PREFIX), entry.value.toString())
},
)
}
}
/**
* Adds an exception to the root span, if an active one exists.
*
* @param t The [Throwable] to be added to the provided span.
*/
fun recordErrorOnRootSpan(t: Throwable) {
val activeSpan = GlobalTracer.get().activeSpan()
if (activeSpan != null) {
activeSpan.setTag(Tags.ERROR, true)
activeSpan.log(java.util.Map.of(Fields.ERROR_OBJECT, t))
}
if (activeSpan is MutableSpan) {
val localRootSpan = (activeSpan as MutableSpan).localRootSpan
localRootSpan.setError(true)
localRootSpan.setTag(DDTags.ERROR_MSG, t.message)
localRootSpan.setTag(DDTags.ERROR_TYPE, t.javaClass.name)
val errorString = StringWriter()
t.printStackTrace(PrintWriter(errorString))
localRootSpan.setTag(DDTags.ERROR_STACK, errorString.toString())
}
}
/**
* Formats the tag key using [.TAG_FORMAT] provided by this utility with the provided tag
* prefix.
*
* @param tagKey The tag key to format.
* @param tagPrefix The prefix to be added to each custom tag name.
* @return The formatted tag key.
*/
@JvmOverloads
fun formatTag(
tagKey: String?,
tagPrefix: String? = TAG_PREFIX,
): String = String.format(TAG_FORMAT, tagPrefix, tagKey)
}

View File

@@ -0,0 +1,58 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.util
import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.core.JsonGenerator
import com.fasterxml.jackson.databind.DeserializationFeature
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.databind.node.ArrayNode
import com.fasterxml.jackson.databind.node.BinaryNode
import com.fasterxml.jackson.databind.node.BooleanNode
import com.fasterxml.jackson.databind.node.NumericNode
import com.fasterxml.jackson.databind.node.ObjectNode
import com.fasterxml.jackson.databind.node.TextNode
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
import com.fasterxml.jackson.module.afterburner.AfterburnerModule
import com.fasterxml.jackson.module.kotlin.registerKotlinModule
import java.math.BigDecimal
import java.math.BigInteger
import java.nio.ByteBuffer
object Jsons : ObjectMapper() {
init {
registerKotlinModule()
registerModule(JavaTimeModule())
registerModule(AfterburnerModule())
setSerializationInclusion(JsonInclude.Include.NON_NULL)
configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
configure(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, true)
}
fun objectNode(): ObjectNode = createObjectNode()
fun arrayNode(): ArrayNode = createArrayNode()
fun numberNode(n: Number): NumericNode =
when (n) {
is BigDecimal -> nodeFactory.numberNode(n) as NumericNode
is BigInteger -> nodeFactory.numberNode(n) as NumericNode
is Double -> nodeFactory.numberNode(n)
is Float -> nodeFactory.numberNode(n)
is Long -> nodeFactory.numberNode(n)
is Int -> nodeFactory.numberNode(n)
is Short -> nodeFactory.numberNode(n)
is Byte -> nodeFactory.numberNode(n)
else -> throw IllegalArgumentException("unsupported number class $${n::class} for $n")
}
fun textNode(str: CharSequence): TextNode = nodeFactory.textNode(str.toString())
fun binaryNode(array: ByteArray): BinaryNode = nodeFactory.binaryNode(array)
fun binaryNode(byteBuffer: ByteBuffer): BinaryNode {
val array = ByteArray(byteBuffer.remaining()).also { byteBuffer.asReadOnlyBuffer().get(it) }
return nodeFactory.binaryNode(array)
}
fun booleanNode(boolean: Boolean): BooleanNode = nodeFactory.booleanNode(boolean)
}

View File

@@ -0,0 +1,22 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.util
import java.net.URL
object ResourceUtils {
@JvmStatic
fun readResource(resourceName: String): String =
getResource(resourceName).openStream().reader().readText()
@JvmStatic
fun getResource(resourceName: String): URL {
val loader: ClassLoader =
listOfNotNull(
Thread.currentThread().contextClassLoader,
ResourceUtils::class.java.classLoader,
)
.firstOrNull()
?: throw RuntimeException("no ClassLoader found")
return loader.getResource(resourceName) ?: throw RuntimeException("resource not found")
}
}

View File

@@ -0,0 +1,21 @@
{
"type": "LOG",
"log": {
"level": {
"$resolver": "level",
"field": "name"
},
"message": {
"$resolver": "pattern",
"pattern": "%level %thread %C{1.}(%M):%L %m",
"stringified": true
},
"stack_trace": {
"$resolver": "exception",
"field": "stackTrace",
"stackTrace": {
"stringified": true
}
}
}
}

View File

@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Appenders>
<Console name="ConsoleJSONAppender" target="SYSTEM_OUT">
<!--#30781 - we set log4j's ConsoleAppender to use a buffer of 32KB. In order to ensure that the log write out is atomic-->
<!-- We cap both the log message and the printed stacktrace to 16KB bytes each. combined with other json character-->
<!-- this will ensure that we stay below 32768 characters-->
<JsonTemplateLayout eventTemplateUri="classpath:AirbyteLogMessageTemplate.json" maxStringLength="16000"/>
</Console>
</Appenders>
<Loggers>
<Root level="INFO" additivity="false">
<AppenderRef ref="ConsoleJSONAppender"/>
</Root>
</Loggers>
</Configuration>

View File

@@ -0,0 +1,75 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.util.Jsons
import io.airbyte.cdk.util.ResourceUtils
import io.airbyte.protocol.models.Field
import io.airbyte.protocol.models.JsonSchemaType
import io.airbyte.protocol.models.v0.CatalogHelpers
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.airbyte.protocol.models.v0.SyncMode
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(rebuildContext = true)
class ConfiguredAirbyteCatalogTest {
@Inject lateinit var actual: ConfiguredAirbyteCatalog
@Test
fun testEmpty() {
Assertions.assertEquals(ConfiguredAirbyteCatalog(), actual)
}
@Test
@Property(name = "airbyte.connector.catalog.resource", value = CATALOG_RESOURCE)
fun testInjectedCatalog() {
val expected =
ConfiguredAirbyteCatalog()
.withStreams(
listOf(
ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withCursorField(listOf("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(
CatalogHelpers.createAirbyteStream(
"bar",
"foo",
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING),
)
.withSupportedSyncModes(
listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL),
),
),
ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withCursorField(listOf("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(
CatalogHelpers.createAirbyteStream(
"baz",
"foo",
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING),
)
.withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH)),
),
),
)
Assertions.assertEquals(
Jsons.readTree(ResourceUtils.readResource(CATALOG_RESOURCE)),
Jsons.valueToTree(expected),
)
Assertions.assertEquals(expected, actual)
}
companion object {
const val CATALOG_RESOURCE = "command/catalog.json"
}
}

View File

@@ -0,0 +1,112 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(rebuildContext = true)
class InputStateTest {
@Inject lateinit var actual: InputState
@Test
fun testEmpty() {
Assertions.assertEquals(EmptyInputState, actual)
}
@Test
@Property(
name = "airbyte.connector.state.resource",
value = "command/vanilla-stream-states.json",
)
fun testVanillaStreamStates() {
val expected =
StreamInputState(
mapOf(
AirbyteStreamNameNamespacePair("bar", "foo") to
Jsons.readTree("{\"primary_key\":{\"k1\":10,\"k2\":20}}"),
AirbyteStreamNameNamespacePair("baz", "foo") to
Jsons.readTree("{\"cursors\":{\"c\":30}}"),
),
)
Assertions.assertEquals(
Jsons.writeValueAsString(expected),
Jsons.writeValueAsString(actual),
)
}
@Test
@Property(
name = "airbyte.connector.state.resource",
value = "command/vanilla-global-states.json",
)
fun testVanillaGlobalStates() {
val expected =
GlobalInputState(
global = Jsons.readTree("{\"cdc\":{}}"),
globalStreams =
mapOf(
AirbyteStreamNameNamespacePair("bar", "foo") to
Jsons.readTree("{\"primary_key\":{\"k1\":10,\"k2\":20}}"),
),
nonGlobalStreams = mapOf(),
)
Assertions.assertEquals(
Jsons.writeValueAsString(expected),
Jsons.writeValueAsString(actual),
)
}
@Test
@Property(
name = "airbyte.connector.state.resource",
value = "command/vanilla-mixed-states.json",
)
fun testVanillaMixedStates() {
val expected =
GlobalInputState(
global = Jsons.readTree("{\"cdc\":{}}"),
globalStreams =
mapOf(
AirbyteStreamNameNamespacePair("bar", "foo") to
Jsons.readTree("{\"primary_key\":{\"k1\":10,\"k2\":20}}"),
),
nonGlobalStreams =
mapOf(
AirbyteStreamNameNamespacePair("baz", "foo") to
Jsons.readTree("{\"primary_key\":{\"k\":1}}"),
),
)
Assertions.assertEquals(
Jsons.writeValueAsString(expected),
Jsons.writeValueAsString(actual),
)
}
@Test
@Property(name = "airbyte.connector.state.resource", value = "command/duplicate-states.json")
fun testDuplicates() {
val expected =
GlobalInputState(
global = Jsons.readTree("{\"cdc\":{}}"),
globalStreams =
mapOf(
AirbyteStreamNameNamespacePair("bar", "foo") to
Jsons.readTree("{\"primary_key\":{\"k1\":10,\"k2\":20}}"),
),
nonGlobalStreams =
mapOf(
AirbyteStreamNameNamespacePair("baz", "foo") to
Jsons.readTree("{\"primary_key\":{\"k\":10}}"),
),
)
Assertions.assertEquals(
Jsons.writeValueAsString(expected),
Jsons.writeValueAsString(actual),
)
}
}

View File

@@ -0,0 +1,352 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.data
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.util.Jsons
import java.math.BigDecimal
import java.net.URI
import java.nio.ByteBuffer
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import java.time.temporal.ChronoUnit
import java.util.UUID
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
class JsonCodecTest {
private fun <T> JsonCodec<T>.testValueRoundTrip(x: T) {
Assertions.assertEquals(x, decode(encode(x)))
}
private fun <T> JsonCodec<T>.testJsonRoundTrip(x: JsonNode) {
Assertions.assertEquals(x.toString(), encode(decode(x)).toString())
}
private fun <T> JsonCodec<T>.testBadEncoding(x: JsonNode) {
Assertions.assertThrows(IllegalArgumentException::class.java) { decode(x) }
}
@Test
fun testBoolean() {
BooleanCodec.run {
testValueRoundTrip(false)
testValueRoundTrip(true)
testJsonRoundTrip(Jsons.booleanNode(false))
testJsonRoundTrip(Jsons.booleanNode(true))
testBadEncoding(Jsons.textNode("true"))
testBadEncoding(Jsons.numberNode(1))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testText() {
TextCodec.run {
testValueRoundTrip("")
testValueRoundTrip("foo")
testJsonRoundTrip(Jsons.textNode(""))
testJsonRoundTrip(Jsons.textNode("foo"))
testBadEncoding(Jsons.binaryNode("foo".toByteArray()))
testBadEncoding(Jsons.numberNode(1))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testBinary() {
BinaryCodec.run {
testValueRoundTrip(ByteBuffer.wrap(ByteArray(0)))
testValueRoundTrip(ByteBuffer.wrap("foo".toByteArray()))
testJsonRoundTrip(Jsons.binaryNode("".toByteArray()))
testJsonRoundTrip(Jsons.binaryNode("foo".toByteArray()))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.numberNode(1))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testBigDecimal() {
BigDecimalCodec.run {
testValueRoundTrip(BigDecimal.ZERO)
testValueRoundTrip(BigDecimal.ONE)
testValueRoundTrip(BigDecimal(Long.MAX_VALUE).multiply(BigDecimal(1.1)))
testJsonRoundTrip(Jsons.numberNode(0))
testJsonRoundTrip(Jsons.numberNode(1))
testJsonRoundTrip(Jsons.numberNode(-123.456))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testBigDecimalInteger() {
BigDecimalIntegerCodec.run {
testValueRoundTrip(BigDecimal.ZERO)
testValueRoundTrip(BigDecimal.ONE)
testValueRoundTrip(BigDecimal(Long.MAX_VALUE).multiply(BigDecimal(2)))
testJsonRoundTrip(Jsons.numberNode(0))
testJsonRoundTrip(Jsons.numberNode(1))
testBadEncoding(Jsons.numberNode(BigDecimal(Long.MAX_VALUE).multiply(BigDecimal(1.1))))
testBadEncoding(Jsons.numberNode(123.456))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testLong() {
LongCodec.run {
testValueRoundTrip(0L)
testValueRoundTrip(1L)
testValueRoundTrip(Long.MAX_VALUE)
testJsonRoundTrip(Jsons.numberNode(0))
testJsonRoundTrip(Jsons.numberNode(1))
testBadEncoding(Jsons.numberNode(BigDecimal(Long.MAX_VALUE).multiply(BigDecimal(2))))
testBadEncoding(Jsons.numberNode(123.456))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testInt() {
IntCodec.run {
testValueRoundTrip(0)
testValueRoundTrip(1)
testJsonRoundTrip(Jsons.numberNode(0))
testJsonRoundTrip(Jsons.numberNode(1))
testBadEncoding(Jsons.numberNode(Long.MAX_VALUE))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testShort() {
ShortCodec.run {
testValueRoundTrip(0)
testValueRoundTrip(1)
testJsonRoundTrip(Jsons.numberNode(0))
testJsonRoundTrip(Jsons.numberNode(1))
testBadEncoding(Jsons.numberNode(Int.MAX_VALUE))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testByte() {
ByteCodec.run {
testValueRoundTrip(0)
testValueRoundTrip(1)
testJsonRoundTrip(Jsons.numberNode(0))
testJsonRoundTrip(Jsons.numberNode(1))
testBadEncoding(Jsons.numberNode(Short.MAX_VALUE))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testDouble() {
DoubleCodec.run {
testValueRoundTrip(0.0)
testValueRoundTrip(0.1)
testJsonRoundTrip(Jsons.numberNode(0.0))
testJsonRoundTrip(Jsons.numberNode(1.0))
testJsonRoundTrip(Jsons.numberNode(-123.456))
testJsonRoundTrip(Jsons.numberNode(0.000000000000000000000000000000000001))
testBadEncoding(Jsons.numberNode(BigDecimal(Long.MAX_VALUE / 3.0).pow(2)))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testFloat() {
FloatCodec.run {
testValueRoundTrip(0.0f)
testValueRoundTrip(0.1f)
testJsonRoundTrip(Jsons.numberNode(0.0f))
testJsonRoundTrip(Jsons.numberNode(1.0f))
testJsonRoundTrip(Jsons.numberNode(-123.456f))
testBadEncoding(Jsons.numberNode(0.000000000000000000000000000000000001))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testJsonBytes() {
JsonBytesCodec.run {
testValueRoundTrip(ByteBuffer.wrap("{}".toByteArray()))
testValueRoundTrip(ByteBuffer.wrap("[123]".toByteArray()))
testJsonRoundTrip(Jsons.objectNode())
testJsonRoundTrip(Jsons.arrayNode())
testBadEncoding(Jsons.textNode("{}"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testJsonString() {
JsonStringCodec.run {
testValueRoundTrip("{}")
testValueRoundTrip("[123]")
testJsonRoundTrip(Jsons.objectNode())
testJsonRoundTrip(Jsons.arrayNode())
testBadEncoding(Jsons.textNode("{}"))
testBadEncoding(Jsons.textNode("123"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testUrl() {
UrlCodec.run {
testValueRoundTrip(URI.create("http://localhost/").toURL())
testJsonRoundTrip(Jsons.textNode("http://localhost/"))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testLocalDate() {
LocalDateCodec.run {
testValueRoundTrip(LocalDate.now())
testJsonRoundTrip(Jsons.textNode("2024-03-01"))
testBadEncoding(Jsons.textNode("01-AUG-2024"))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testLocalTime() {
LocalTimeCodec.run {
testValueRoundTrip(LocalTime.now().truncatedTo(ChronoUnit.MICROS))
testJsonRoundTrip(Jsons.textNode("01:02:03.456789"))
testBadEncoding(Jsons.textNode("01:02:03.4"))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testLocalDateTime() {
LocalDateTimeCodec.run {
testValueRoundTrip(LocalDateTime.now().truncatedTo(ChronoUnit.MICROS))
testJsonRoundTrip(Jsons.textNode("2024-03-01T01:02:03.456789"))
testBadEncoding(Jsons.textNode("2024-03-01 01:02:03.4"))
testBadEncoding(Jsons.numberNode(LocalDateTime.now().toEpochSecond(ZoneOffset.UTC)))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testOffsetTime() {
OffsetTimeCodec.run {
testValueRoundTrip(OffsetTime.now().truncatedTo(ChronoUnit.MICROS))
testJsonRoundTrip(Jsons.textNode("01:02:03.456789-04:30"))
testBadEncoding(Jsons.textNode("01:02:03.456789"))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testOffsetDateTime() {
OffsetDateTimeCodec.run {
testValueRoundTrip(OffsetDateTime.now().truncatedTo(ChronoUnit.MICROS))
testJsonRoundTrip(Jsons.textNode("2024-03-01T01:02:03.456789-04:30"))
testBadEncoding(Jsons.textNode("2024-03-01T01:02:03.456789"))
testBadEncoding(Jsons.numberNode(OffsetDateTime.now().toEpochSecond()))
testBadEncoding(Jsons.textNode("foo"))
testBadEncoding(Jsons.nullNode())
}
}
@Test
fun testNull() {
NullCodec.run {
testValueRoundTrip(null)
testJsonRoundTrip(Jsons.nullNode())
Assertions.assertEquals(null, decode(encode("foo")))
}
}
@Test
fun testAnyEncoder() {
val uuid: UUID = UUID.randomUUID()
AnyEncoder.run {
Assertions.assertEquals(Jsons.textNode("foo"), encode("foo"))
Assertions.assertEquals(Jsons.textNode("123"), encode(BigDecimal(123)))
Assertions.assertEquals(Jsons.textNode(uuid.toString()), encode(uuid))
}
}
@Test
fun testArrayEncoder() {
ArrayEncoder(IntCodec).run {
Assertions.assertEquals(
Jsons.arrayNode().add(1).add(2).add(3),
encode(listOf(1, 2, 3)),
)
}
ArrayEncoder(ArrayEncoder(IntCodec)).run {
Assertions.assertEquals(
Jsons.arrayNode()
.add(Jsons.arrayNode().add(1).add(2))
.add(Jsons.arrayNode().add(3)),
encode(listOf(listOf(1, 2), listOf(3))),
)
}
}
@Test
fun testArrayDecoder() {
ArrayDecoder(IntCodec).run {
Assertions.assertEquals(
listOf(1, 2, 3),
decode(
Jsons.arrayNode().add(1).add(2).add(3),
),
)
}
ArrayDecoder(ArrayDecoder(IntCodec)).run {
Assertions.assertEquals(
listOf(listOf(1, 2), listOf(3)),
decode(
Jsons.arrayNode()
.add(Jsons.arrayNode().add(1).add(2))
.add(Jsons.arrayNode().add(3)),
),
)
Assertions.assertThrows(IllegalArgumentException::class.java) {
decode(Jsons.objectNode())
}
Assertions.assertThrows(IllegalArgumentException::class.java) {
decode(Jsons.textNode("[]"))
}
Assertions.assertThrows(IllegalArgumentException::class.java) {
decode(Jsons.nullNode())
}
}
}
}

View File

@@ -0,0 +1,25 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.jdbc
import java.sql.DriverManager
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
class H2TestFixtureTest {
val h2 = H2TestFixture()
@Test
fun testMem() {
h2.execute("CREATE SCHEMA hello")
val expected: List<List<Any?>> =
listOf(listOf("HELLO"), listOf("INFORMATION_SCHEMA"), listOf("PUBLIC"))
Assertions.assertEquals(expected, h2.query("SHOW SCHEMAS"))
}
@Test
fun testTcp() {
val actual: String =
DriverManager.getConnection(h2.jdbcUrl).use { it.metaData.databaseProductName }
Assertions.assertEquals("H2", actual)
}
}

View File

@@ -0,0 +1,52 @@
{
"streams": [
{
"stream": {
"name": "bar",
"json_schema": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"id": {
"type": "number"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": [],
"source_defined_primary_key": [],
"namespace": "foo"
},
"sync_mode": "incremental",
"cursor_field": ["id"],
"destination_sync_mode": "append",
"primary_key": []
},
{
"stream": {
"name": "baz",
"json_schema": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"id": {
"type": "number"
}
}
},
"supported_sync_modes": ["full_refresh"],
"default_cursor_field": [],
"source_defined_primary_key": [],
"namespace": "foo"
},
"sync_mode": "incremental",
"cursor_field": ["id"],
"destination_sync_mode": "append",
"primary_key": []
}
]
}

View File

@@ -0,0 +1,74 @@
[
{
"type": "GLOBAL",
"global": {
"shared_state": {
"cdc": {}
},
"stream_states": [
{
"stream_descriptor": {
"name": "bar",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k1": 1,
"k2": 2
}
}
}
]
}
},
{
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "baz",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k": 1
}
}
}
},
{
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "baz",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k": 10
}
}
}
},
{
"type": "GLOBAL",
"global": {
"shared_state": {
"cdc": {}
},
"stream_states": [
{
"stream_descriptor": {
"name": "bar",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k1": 10,
"k2": 20
}
}
}
]
}
}
]

View File

@@ -0,0 +1,24 @@
[
{
"type": "GLOBAL",
"global": {
"shared_state": {
"cdc": {}
},
"stream_states": [
{
"stream_descriptor": {
"name": "bar",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k1": 10,
"k2": 20
}
}
}
]
}
}
]

View File

@@ -0,0 +1,38 @@
[
{
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "baz",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k": 1
}
}
}
},
{
"type": "GLOBAL",
"global": {
"shared_state": {
"cdc": {}
},
"stream_states": [
{
"stream_descriptor": {
"name": "bar",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k1": 10,
"k2": 20
}
}
}
]
}
}
]

View File

@@ -0,0 +1,31 @@
[
{
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "bar",
"namespace": "foo"
},
"stream_state": {
"primary_key": {
"k1": 10,
"k2": 20
}
}
}
},
{
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "baz",
"namespace": "foo"
},
"stream_state": {
"cursors": {
"c": 30
}
}
}
}
]

View File

@@ -0,0 +1,31 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk
import io.micronaut.context.annotation.Factory
import io.micronaut.context.annotation.Requires
import io.micronaut.context.env.Environment
import jakarta.inject.Singleton
import java.time.Clock
import java.time.Duration
import java.time.Instant
import java.time.ZoneOffset
const val OFFSET_CLOCK = "offset-clock"
/** Injects more-or-less fake clocks for testing purposes. */
@Factory
@Requires(env = [Environment.TEST])
class TestClockFactory {
@Singleton
@Requires(notEnv = [OFFSET_CLOCK])
fun fixed(): Clock = Clock.fixed(fakeNow, ZoneOffset.UTC)
@Singleton
@Requires(env = [OFFSET_CLOCK])
fun offset(): Clock = Clock.offset(Clock.systemUTC(), Duration.between(fakeNow, Instant.now()))
companion object {
/** Some convenient timestamp with an easy-to-read ISO8601 representation. */
val fakeNow: Instant = Instant.ofEpochSecond(3133641600)
}
}

View File

@@ -0,0 +1,93 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.AirbyteConnectorRunnable
import io.airbyte.cdk.AirbyteConnectorRunner
import io.airbyte.cdk.AirbyteDestinationRunner
import io.airbyte.cdk.AirbyteSourceRunner
import io.airbyte.cdk.TestClockFactory
import io.airbyte.cdk.consumers.BufferingOutputConsumer
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteMessage
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import java.nio.file.Files
import java.nio.file.Path
import kotlin.io.path.deleteIfExists
data object CliRunner {
/**
* Runs a source connector with the given arguments and returns the results.
*
* This is useful for writing connector integration tests:
* - the [config], [catalog] and [state] get written to temporary files;
* - the file names get passed with the `--config`, `--catalog` and `--state` CLI arguments;
* - an extra temporary file is created to store the output;
* - that file name gets passed with the test-only `--output` CLI argument;
* - [AirbyteSourceRunner] takes the CLI arguments and runs them in a new Micronaut context;
* - after it's done, the output file contents are read and parsed into [AirbyteMessage]s.
* - those are stored in a [BufferingOutputConsumer] which is returned.
*/
fun runSource(
op: String,
config: ConfigurationJsonObjectBase? = null,
catalog: ConfiguredAirbyteCatalog? = null,
state: List<AirbyteStateMessage>? = null,
): BufferingOutputConsumer =
runConnector(op, config, catalog, state) { args: Array<String> ->
AirbyteSourceRunner(args)
}
/** Same as [runSource] but for destinations. */
fun runDestination(
op: String,
config: ConfigurationJsonObjectBase? = null,
catalog: ConfiguredAirbyteCatalog? = null,
state: List<AirbyteStateMessage>? = null,
): BufferingOutputConsumer =
runConnector(op, config, catalog, state) { args: Array<String> ->
AirbyteDestinationRunner(args)
}
private fun runConnector(
op: String,
config: ConfigurationJsonObjectBase?,
catalog: ConfiguredAirbyteCatalog?,
state: List<AirbyteStateMessage>?,
connectorRunnerConstructor: (Array<String>) -> AirbyteConnectorRunner,
): BufferingOutputConsumer {
val result = BufferingOutputConsumer(TestClockFactory().fixed())
val configFile: Path? = inputFile(config)
val catalogFile: Path? = inputFile(catalog)
val stateFile: Path? = inputFile(state)
val outputFile: Path = Files.createTempFile(null, null)
val args: List<String> =
listOfNotNull(
"--$op",
configFile?.let { "--config=$it" },
catalogFile?.let { "--catalog=$it" },
stateFile?.let { "--state=$it" },
"--output=$outputFile",
)
try {
connectorRunnerConstructor(args.toTypedArray()).run<AirbyteConnectorRunnable>()
Files.readAllLines(outputFile)
.filter { it.isNotBlank() }
.map { Jsons.readValue(it, AirbyteMessage::class.java) }
.forEach { result.accept(it) }
return result
} finally {
configFile?.deleteIfExists()
catalogFile?.deleteIfExists()
stateFile?.deleteIfExists()
outputFile.deleteIfExists()
}
}
private fun inputFile(contents: Any?): Path? =
contents?.let {
Files.createTempFile(null, null).also { file ->
Files.writeString(file, Jsons.writeValueAsString(contents))
}
}
}

View File

@@ -0,0 +1,73 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.consumers
import io.airbyte.protocol.models.v0.AirbyteCatalog
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus
import io.airbyte.protocol.models.v0.AirbyteLogMessage
import io.airbyte.protocol.models.v0.AirbyteMessage
import io.airbyte.protocol.models.v0.AirbyteRecordMessage
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.AirbyteTraceMessage
import io.airbyte.protocol.models.v0.ConnectorSpecification
import io.micronaut.context.annotation.Replaces
import io.micronaut.context.annotation.Requires
import io.micronaut.context.env.Environment
import jakarta.inject.Singleton
import java.time.Clock
import java.time.Instant
/** [OutputConsumer] implementation for unit tests. Collects everything into thread-safe buffers. */
@Singleton
@Requires(notEnv = [Environment.CLI])
@Requires(missingProperty = CONNECTOR_OUTPUT_FILE)
@Replaces(OutputConsumer::class)
class BufferingOutputConsumer(
clock: Clock,
) : OutputConsumer {
override val emittedAt: Instant = Instant.now(clock)
private val records = mutableListOf<AirbyteRecordMessage>()
private val states = mutableListOf<AirbyteStateMessage>()
private val logs = mutableListOf<AirbyteLogMessage>()
private val specs = mutableListOf<ConnectorSpecification>()
private val statuses = mutableListOf<AirbyteConnectionStatus>()
private val catalogs = mutableListOf<AirbyteCatalog>()
private val traces = mutableListOf<AirbyteTraceMessage>()
private val messages = mutableListOf<AirbyteMessage>()
override fun accept(m: AirbyteMessage) {
synchronized(this) {
messages.add(m)
when (m.type) {
AirbyteMessage.Type.RECORD -> records.add(m.record)
AirbyteMessage.Type.STATE -> states.add(m.state)
AirbyteMessage.Type.LOG -> logs.add(m.log)
AirbyteMessage.Type.SPEC -> specs.add(m.spec)
AirbyteMessage.Type.CONNECTION_STATUS -> statuses.add(m.connectionStatus)
AirbyteMessage.Type.CATALOG -> catalogs.add(m.catalog)
AirbyteMessage.Type.TRACE -> traces.add(m.trace)
else -> TODO("${m.type} not supported")
}
}
}
override fun close() {}
fun records(): List<AirbyteRecordMessage> =
synchronized(this) { listOf(*records.toTypedArray()) }
fun states(): List<AirbyteStateMessage> = synchronized(this) { listOf(*states.toTypedArray()) }
fun logs(): List<AirbyteLogMessage> = synchronized(this) { listOf(*logs.toTypedArray()) }
fun specs(): List<ConnectorSpecification> = synchronized(this) { listOf(*specs.toTypedArray()) }
fun statuses(): List<AirbyteConnectionStatus> =
synchronized(this) { listOf(*statuses.toTypedArray()) }
fun catalogs(): List<AirbyteCatalog> = synchronized(this) { listOf(*catalogs.toTypedArray()) }
fun traces(): List<AirbyteTraceMessage> = synchronized(this) { listOf(*traces.toTypedArray()) }
fun messages(): List<AirbyteMessage> = synchronized(this) { listOf(*messages.toTypedArray()) }
}

View File

@@ -0,0 +1,42 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.consumers
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteMessage
import io.micronaut.context.annotation.Replaces
import io.micronaut.context.annotation.Requires
import io.micronaut.context.annotation.Value
import io.micronaut.context.env.Environment
import jakarta.inject.Singleton
import java.io.FileOutputStream
import java.nio.file.Path
import java.time.Clock
import java.time.Instant
/**
* [OutputConsumer] implementation for CLI integration tests. Writes [AirbyteMessage]s to a file
* instead of stdout.
*/
@Singleton
@Requires(env = [Environment.TEST, Environment.CLI])
@Requires(property = CONNECTOR_OUTPUT_FILE)
@Replaces(OutputConsumer::class)
class FileOutputConsumer(
@Value("\${$CONNECTOR_OUTPUT_FILE}") filePath: Path,
clock: Clock,
) : OutputConsumer {
private val writer = FileOutputStream(filePath.toFile()).bufferedWriter()
override val emittedAt: Instant = Instant.now(clock)
override fun accept(msg: AirbyteMessage) {
synchronized(this) {
writer.appendLine(Jsons.writeValueAsString(msg))
writer.flush()
}
}
override fun close() {
synchronized(this) { writer.close() }
}
}

View File

@@ -0,0 +1,368 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.extensions
import java.lang.reflect.Constructor
import java.lang.reflect.InvocationHandler
import java.lang.reflect.Method
import java.lang.reflect.Modifier
import java.lang.reflect.Proxy
import java.time.Duration
import java.time.Instant
import java.time.format.DateTimeParseException
import java.util.Collections
import java.util.LinkedList
import java.util.Timer
import java.util.TimerTask
import java.util.concurrent.TimeUnit
import java.util.concurrent.TimeoutException
import java.util.regex.Pattern
import kotlin.collections.HashMap
import kotlin.concurrent.Volatile
import org.apache.commons.lang3.StringUtils
import org.apache.commons.lang3.exception.ExceptionUtils
import org.apache.commons.lang3.time.DurationFormatUtils
import org.junit.jupiter.api.Timeout
import org.junit.jupiter.api.extension.DynamicTestInvocationContext
import org.junit.jupiter.api.extension.ExtensionContext
import org.junit.jupiter.api.extension.InvocationInterceptor
import org.junit.jupiter.api.extension.ReflectiveInvocationContext
import org.slf4j.Logger
import org.slf4j.LoggerFactory
/**
* By default, junit only output logs to the console, and nothing makes it into log4j logs. This
* class fixes that by using the interceptor facility to print progress and timing information. This
* allows us to have junit loglines in our test logs. This is instanciated via
* [Java's
* ServiceLoader](https://docs.oracle.com/javase%2F9%2Fdocs%2Fapi%2F%2F/java/util/ServiceLoader.html)
* The declaration can be found in
* resources/META-INF/services/org.junit.jupiter.api.extension.Extension
*/
class LoggingInvocationInterceptor : InvocationInterceptor {
private class LoggingInvocationInterceptorHandler : InvocationHandler {
@Throws(Throwable::class)
override fun invoke(
proxy: Any,
method: Method,
args: Array<Any>,
): Any? {
val methodName = method.name
val invocationContextClass: Class<*> =
when (methodName) {
"interceptDynamicTest" -> DynamicTestInvocationContext::class.java
else -> ReflectiveInvocationContext::class.java
}
try {
LoggingInvocationInterceptor::class
.java
.getDeclaredMethod(
method.name,
InvocationInterceptor.Invocation::class.java,
invocationContextClass,
ExtensionContext::class.java,
)
} catch (_: NoSuchMethodException) {
LOGGER.error(
"Junit LoggingInvocationInterceptor executing unknown interception point {}",
method.name,
)
return method.invoke(proxy, *(args))
}
val invocation = args[0] as InvocationInterceptor.Invocation<*>?
val reflectiveInvocationContext = args[1] as? ReflectiveInvocationContext<*>
val extensionContext = args[2] as ExtensionContext?
val logLineSuffix: String
val methodMatcher = methodPattern.matcher(methodName)
if (methodName == "interceptDynamicTest") {
logLineSuffix = "execution of DynamicTest ${extensionContext!!.displayName}"
} else if (methodName == "interceptTestClassConstructor") {
logLineSuffix = "instance creation for ${reflectiveInvocationContext!!.targetClass}"
} else if (methodMatcher.matches()) {
val interceptedEvent = methodMatcher.group(1)
val methodRealClassName =
reflectiveInvocationContext!!.executable!!.declaringClass.simpleName
val reflectiveMethodName = reflectiveInvocationContext.executable!!.name
val targetClassName = reflectiveInvocationContext.targetClass.simpleName
val methodDisplayName =
if (targetClassName == methodRealClassName) {
reflectiveMethodName
} else {
"$reflectiveMethodName($methodRealClassName)"
}
logLineSuffix =
"execution of @$interceptedEvent method $targetClassName.$methodDisplayName"
TestContext.CURRENT_TEST_NAME.set("$targetClassName.$reflectiveMethodName")
} else {
logLineSuffix = "execution of unknown intercepted call $methodName"
}
val currentThread = Thread.currentThread()
val timeoutTask = TimeoutInteruptor(currentThread)
val start = Instant.now()
try {
val timeout = reflectiveInvocationContext?.let(::getTimeout)
if (timeout != null) {
LOGGER.info(
"Junit starting {} with timeout of {}",
logLineSuffix,
DurationFormatUtils.formatDurationWords(timeout.toMillis(), true, true),
)
Timer("TimeoutTimer-" + currentThread.name, true)
.schedule(timeoutTask, timeout.toMillis())
} else {
LOGGER.warn("Junit starting {} with no timeout", logLineSuffix)
}
val retVal = invocation!!.proceed()
val elapsedMs = Duration.between(start, Instant.now()).toMillis()
LOGGER.info(
"Junit completed {} in {}",
logLineSuffix,
DurationFormatUtils.formatDurationWords(elapsedMs, true, true),
)
return retVal
} catch (throwable: Throwable) {
timeoutTask.cancel()
val elapsedMs = Duration.between(start, Instant.now()).toMillis()
var t1: Throwable
if (timeoutTask.wasTriggered) {
val formattedDuration =
DurationFormatUtils.formatDurationWords(
elapsedMs,
true,
true,
)
val msg =
"Execution was cancelled after $formattedDuration. " +
"If a test should be given more time to complete, use the @Timeout " +
"annotation. If all the tests time out, override the " +
"'JunitMethodExecutionTimeout' system property."
t1 = TimeoutException(msg)
t1.initCause(throwable)
} else {
t1 = throwable
}
var belowCurrentCall = false
val stackToDisplay: MutableList<String?> = LinkedList()
for (stackString in ExceptionUtils.getStackFrames(throwable)) {
if (stackString!!.startsWith("\tat ")) {
if (
!belowCurrentCall &&
stackString.contains(
LoggingInvocationInterceptor::class.java.canonicalName,
)
) {
belowCurrentCall = true
}
} else {
belowCurrentCall = false
}
if (!belowCurrentCall) {
stackToDisplay.add(stackString)
}
}
val stackTrace = StringUtils.join(stackToDisplay, "\n ")
LOGGER.error(
"Junit exception throw during {} after {}:\n{}",
logLineSuffix,
DurationFormatUtils.formatDurationWords(elapsedMs, true, true),
stackTrace,
)
throw t1
} finally {
timeoutTask.cancel()
TestContext.CURRENT_TEST_NAME.set(null)
}
}
private class TimeoutInteruptor(
private val parentThread: Thread,
) : TimerTask() {
@Volatile var wasTriggered: Boolean = false
override fun run() {
LOGGER.info(
"interrupting running task on ${parentThread.name}. " +
"Current Stacktrace is ${parentThread.stackTrace.asList()}",
)
wasTriggered = true
parentThread.interrupt()
}
override fun cancel(): Boolean {
LOGGER.info("cancelling timer task on ${parentThread.name}")
return super.cancel()
}
}
companion object {
private val methodPattern: Pattern = Pattern.compile("intercept(.*)Method")
private val PATTERN: Pattern =
Pattern.compile(
"([1-9]\\d*) *((?:[nμm]?s)|m|h|d)?",
Pattern.CASE_INSENSITIVE or Pattern.UNICODE_CASE,
)
private val UNITS_BY_ABBREVIATION: MutableMap<String, TimeUnit>
init {
val unitsByAbbreviation: MutableMap<String, TimeUnit> = HashMap()
unitsByAbbreviation["ns"] = TimeUnit.NANOSECONDS
unitsByAbbreviation["μs"] = TimeUnit.MICROSECONDS
unitsByAbbreviation["ms"] = TimeUnit.MILLISECONDS
unitsByAbbreviation["s"] = TimeUnit.SECONDS
unitsByAbbreviation["m"] = TimeUnit.MINUTES
unitsByAbbreviation["h"] = TimeUnit.HOURS
unitsByAbbreviation["d"] = TimeUnit.DAYS
UNITS_BY_ABBREVIATION = Collections.unmodifiableMap(unitsByAbbreviation)
}
@Throws(DateTimeParseException::class)
fun parseDuration(text: String): Duration {
val matcher = PATTERN.matcher(text.trim { it <= ' ' })
if (matcher.matches()) {
val value = matcher.group(1).toLong()
val unitAbbreviation = matcher.group(2)
val unit =
if (unitAbbreviation == null) {
TimeUnit.SECONDS
} else {
UNITS_BY_ABBREVIATION.getValue(unitAbbreviation.lowercase())
}
return Duration.ofSeconds(unit.toSeconds(value))
}
throw DateTimeParseException(
"Timeout duration is not in the expected format (<number> [ns|μs|ms|s|m|h|d])",
text,
0,
)
}
private fun getTimeout(invocationContext: ReflectiveInvocationContext<*>): Duration {
var timeout: Duration? = null
var m = invocationContext.executable
if (m is Method) {
var timeoutAnnotation: Timeout? = m.getAnnotation(Timeout::class.java)
if (timeoutAnnotation == null) {
timeoutAnnotation =
invocationContext.targetClass.getAnnotation(Timeout::class.java)
}
if (timeoutAnnotation != null) {
timeout =
Duration.ofMillis(
timeoutAnnotation.unit.toMillis(timeoutAnnotation.value),
)
}
}
if (timeout == null) {
timeout =
parseDuration(
System.getProperty(JUNIT_METHOD_EXECUTION_TIMEOUT_PROPERTY_NAME),
)
}
return timeout
}
}
}
private val proxy: InvocationInterceptor? =
Proxy.newProxyInstance(
javaClass.classLoader,
arrayOf<Class<*>?>(InvocationInterceptor::class.java),
LoggingInvocationInterceptorHandler(),
) as InvocationInterceptor
@Throws(Throwable::class)
override fun interceptAfterAllMethod(
invocation: InvocationInterceptor.Invocation<Void?>?,
invocationContext: ReflectiveInvocationContext<Method?>?,
extensionContext: ExtensionContext?,
) {
proxy!!.interceptAfterAllMethod(invocation, invocationContext, extensionContext)
}
@Throws(Throwable::class)
override fun interceptAfterEachMethod(
invocation: InvocationInterceptor.Invocation<Void?>?,
invocationContext: ReflectiveInvocationContext<Method?>?,
extensionContext: ExtensionContext?,
) {
proxy!!.interceptAfterEachMethod(invocation, invocationContext, extensionContext)
}
@Throws(Throwable::class)
override fun interceptBeforeAllMethod(
invocation: InvocationInterceptor.Invocation<Void?>?,
invocationContext: ReflectiveInvocationContext<Method?>?,
extensionContext: ExtensionContext?,
) {
proxy!!.interceptBeforeAllMethod(invocation, invocationContext, extensionContext)
}
@Throws(Throwable::class)
override fun interceptBeforeEachMethod(
invocation: InvocationInterceptor.Invocation<Void?>?,
invocationContext: ReflectiveInvocationContext<Method?>?,
extensionContext: ExtensionContext?,
) {
proxy!!.interceptBeforeEachMethod(invocation, invocationContext, extensionContext)
}
@Throws(Throwable::class)
override fun interceptDynamicTest(
invocation: InvocationInterceptor.Invocation<Void?>?,
invocationContext: DynamicTestInvocationContext?,
extensionContext: ExtensionContext?,
) {
proxy!!.interceptDynamicTest(invocation, invocationContext, extensionContext)
}
@Throws(Throwable::class)
override fun interceptTestMethod(
invocation: InvocationInterceptor.Invocation<Void>,
invocationContext: ReflectiveInvocationContext<Method>,
extensionContext: ExtensionContext,
) {
if (!Modifier.isPublic(invocationContext.executable!!.modifiers)) {
LOGGER.warn(
"Junit method {}.{} is not declared as public",
invocationContext.executable!!.declaringClass.canonicalName,
invocationContext.executable!!.name,
)
}
proxy!!.interceptTestMethod(invocation, invocationContext, extensionContext)
}
@Throws(Throwable::class)
override fun interceptTestTemplateMethod(
invocation: InvocationInterceptor.Invocation<Void?>?,
invocationContext: ReflectiveInvocationContext<Method?>?,
extensionContext: ExtensionContext?,
) {
proxy!!.interceptTestTemplateMethod(invocation, invocationContext, extensionContext)
}
@Throws(Throwable::class)
override fun <T> interceptTestFactoryMethod(
invocation: InvocationInterceptor.Invocation<T?>?,
invocationContext: ReflectiveInvocationContext<Method?>?,
extensionContext: ExtensionContext?,
): T? = proxy!!.interceptTestFactoryMethod(invocation, invocationContext, extensionContext)
@Throws(Throwable::class)
override fun <T> interceptTestClassConstructor(
invocation: InvocationInterceptor.Invocation<T?>?,
invocationContext: ReflectiveInvocationContext<Constructor<T?>?>?,
extensionContext: ExtensionContext?,
): T? =
proxy!!.interceptTestClassConstructor(
invocation,
invocationContext,
extensionContext,
)
companion object {
private val LOGGER: Logger =
LoggerFactory.getLogger(LoggingInvocationInterceptor::class.java)
private val JUNIT_METHOD_EXECUTION_TIMEOUT_PROPERTY_NAME: String =
"JunitMethodExecutionTimeout"
}
}

View File

@@ -0,0 +1,6 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.extensions
object TestContext {
val CURRENT_TEST_NAME: ThreadLocal<String?> = ThreadLocal()
}

View File

@@ -0,0 +1,73 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.jdbc
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Value
import jakarta.inject.Singleton
import java.sql.Connection
import java.sql.DriverManager
import java.sql.ResultSet
import org.apache.commons.lang3.RandomStringUtils
import org.h2.tools.Server
private val log = KotlinLogging.logger {}
/** Wraps an H2 in-memory database and exposes a TCP server for it. */
@Singleton
class H2TestFixture(
@Value("\${h2.database.name}") database: String? = null,
) : AutoCloseable {
private val server: Server
private val internalConnection: Connection
val jdbcUrl: String
val port: Int
val database: String
init {
this.database = database ?: RandomStringUtils.randomAlphabetic(10).uppercase()
internalConnection = DriverManager.getConnection("jdbc:h2:mem:${this.database}")
server = Server.createTcpServer()
server.start()
jdbcUrl = "jdbc:h2:${server.url}/mem:${this.database}"
port = server.port
log.info { "H2 server ready to accept connections for $jdbcUrl" }
}
fun execute(
sqlFmt: String,
vararg args: Any?,
) {
internalConnection.createStatement().use {
it.execute(String.format(sqlFmt.replace('\n', ' '), *args))
}
}
fun query(
sqlFmt: String,
vararg args: Any?,
): List<List<Any?>> =
internalConnection.createStatement().use {
val result = mutableListOf<List<Any?>>()
it.executeQuery(String.format(sqlFmt.replace('\n', ' '), *args)).use { rs: ResultSet ->
val n: Int = rs.metaData.columnCount
while (rs.next()) {
val row = mutableListOf<Any?>()
for (i in 1..n) {
row.add(rs.getObject(i)?.takeUnless { rs.wasNull() })
}
result.add(row)
}
}
result
}
fun createConnection(): Connection = DriverManager.getConnection(jdbcUrl)
override fun close() {
log.info { "H2 server shutting down..." }
server.stop()
internalConnection.close()
log.info { "H2 server shut down." }
}
}

View File

@@ -0,0 +1,90 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.ssh
import io.airbyte.cdk.testcontainers.TestContainerFactory
import io.airbyte.cdk.testcontainers.innerAddress
import io.airbyte.cdk.testcontainers.outerAddress
import jakarta.inject.Singleton
import org.testcontainers.Testcontainers
import org.testcontainers.containers.GenericContainer
import org.testcontainers.containers.Network
import org.testcontainers.images.builder.ImageFromDockerfile
import org.testcontainers.utility.DockerImageName
/**
* Wraps a container which runs an SSH bastion, a.k.a. jump server. This is useful to test SSH
* tunneling features.
*/
@Singleton
@JvmInline
value class SshBastionContainer
private constructor(
val container: GenericContainer<*>,
) : AutoCloseable {
constructor(
network: Network? = null,
tunnelingToHostPort: Int? = null,
) : this(exclusive(network, tunnelingToHostPort))
val key: String
get() = container.execInContainer("cat", "var/bastion/id_rsa").stdout
val innerKeyAuthTunnelMethod: SshKeyAuthTunnelMethod
get() =
container.innerAddress().let {
SshKeyAuthTunnelMethod(it.hostName, it.port, SSH_USER, key)
}
val outerKeyAuthTunnelMethod: SshKeyAuthTunnelMethod
get() =
container.outerAddress().let {
SshKeyAuthTunnelMethod(it.hostName, it.port, SSH_USER, key)
}
val innerPasswordAuthTunnelMethod: SshPasswordAuthTunnelMethod
get() =
container.innerAddress().let {
SshPasswordAuthTunnelMethod(it.hostName, it.port, SSH_USER, SSH_PASSWORD)
}
val outerPasswordAuthTunnelMethod: SshPasswordAuthTunnelMethod
get() =
container.outerAddress().let {
SshPasswordAuthTunnelMethod(it.hostName, it.port, SSH_USER, SSH_PASSWORD)
}
override fun close() {
container.close()
}
companion object {
init {
TestContainerFactory.register("bastion-test") { _: DockerImageName ->
val image: ImageFromDockerfile =
ImageFromDockerfile("bastion-test")
.withFileFromClasspath("Dockerfile", "bastion/Dockerfile")
GenericContainer<_>(image).withExposedPorts(22)
}
}
fun exclusive(
network: Network?,
tunnelingToHostPort: Int?,
): GenericContainer<*> {
val imageName: DockerImageName = DockerImageName.parse("bastion-test")
if (tunnelingToHostPort != null) {
Testcontainers.exposeHostPorts(tunnelingToHostPort)
}
if (network == null) {
return TestContainerFactory.exclusive(imageName)
}
return TestContainerFactory.exclusive(
imageName,
TestContainerFactory.newModifier("withNetwork") { it.withNetwork(network) },
)
}
const val SSH_USER: String = "sshuser"
const val SSH_PASSWORD: String = "secret"
}
}

View File

@@ -0,0 +1,42 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.testcontainers
object LoggingHelper {
const val LOG_SOURCE_MDC_KEY: String = "log_source"
const val PREPARE_COLOR_CHAR: String = "\u001b[m"
const val RESET: String = "\u001B[0m"
fun applyColor(
color: Color,
msg: String,
): String = PREPARE_COLOR_CHAR + color.code + msg + PREPARE_COLOR_CHAR + RESET
fun logPrefixMdc(
logPrefix: String,
color: Color? = null,
): Pair<String, String> =
LOG_SOURCE_MDC_KEY to
when (color) {
null -> logPrefix
else -> applyColor(color, logPrefix)
}
enum class Color(
val code: String,
) {
BLACK("\u001b[30m"),
RED("\u001b[31m"),
GREEN("\u001b[32m"),
YELLOW("\u001b[33m"),
BLUE("\u001b[34m"),
MAGENTA("\u001b[35m"),
CYAN("\u001b[36m"),
WHITE("\u001b[37m"),
BLUE_BACKGROUND("\u001b[44m"), // source
YELLOW_BACKGROUND("\u001b[43m"), // destination
GREEN_BACKGROUND("\u001b[42m"), // normalization
CYAN_BACKGROUND("\u001b[46m"), // container runner
RED_BACKGROUND("\u001b[41m"), // testcontainers
PURPLE_BACKGROUND("\u001b[45m"), // dbt
}
}

View File

@@ -0,0 +1,153 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.testcontainers
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.ConcurrentMap
import java.util.concurrent.atomic.AtomicInteger
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import org.testcontainers.containers.GenericContainer
import org.testcontainers.containers.output.OutputFrame
import org.testcontainers.containers.output.Slf4jLogConsumer
import org.testcontainers.utility.DockerImageName
/** ContainerFactory provides us with suitably provisioned testcontainers. */
object TestContainerFactory {
private val logger: Logger = LoggerFactory.getLogger(TestContainerFactory::class.java)
private val specificFactories: ConcurrentMap<DockerImageName, SpecificTestContainerFactory<*>> =
ConcurrentHashMap()
private val sharedContainers: ConcurrentMap<ContainerKey, () -> Result<GenericContainer<*>>> =
ConcurrentHashMap()
private val counter = AtomicInteger()
class SpecificTestContainerFactory<C : GenericContainer<*>>(
val defaultImageName: DockerImageName,
val constructor: (DockerImageName) -> C,
) {
@Suppress("UNCHECKED_CAST")
fun createAndStart(
imageName: DockerImageName,
vararg modifiers: ContainerModifier<C>,
): C {
val modifierNames: String = modifiers.map { it.name }.joinToString(" ")
logger.info("Creating new container based on {} with {}.", imageName, modifierNames)
val container: GenericContainer<*> = constructor(imageName)
container.withLogConsumer(
object : Slf4jLogConsumer(logger) {
override fun accept(frame: OutputFrame) {
if (!frame.utf8StringWithoutLineEnding.isNullOrBlank()) {
super.accept(frame)
}
}
},
)
val id: Int = counter.incrementAndGet()
val logPrefix = "testcontainer #$id $imageName with $modifierNames"
LoggingHelper.logPrefixMdc(logPrefix, LoggingHelper.Color.RED_BACKGROUND)
for (modifier in modifiers) {
logger.info(
"Calling {} in {} on new container based on {}.",
modifier.name,
javaClass.name,
imageName,
)
modifier.modify(container as C)
}
container.start()
return container as C
}
}
interface ContainerModifier<C : GenericContainer<*>> {
val name: String
get() = toString()
fun modify(container: C)
}
private fun findFactory(dockerImageName: DockerImageName): SpecificTestContainerFactory<*> {
specificFactories.forEach { (_, factory: SpecificTestContainerFactory<*>) ->
if (
dockerImageName == factory.defaultImageName ||
dockerImageName.isCompatibleWith(factory.defaultImageName)
) {
return factory
}
}
throw NoSuchElementException("no factory registered for $dockerImageName")
}
private data class ContainerKey(
val factoryKey: DockerImageName,
val imageName: DockerImageName,
val modifierNames: List<String>,
)
/** Registers the constructor for testcontainers of type [C]. */
fun <C : GenericContainer<*>> register(
testContainerImageName: String,
constructor: (DockerImageName) -> C,
) {
register(DockerImageName.parse(testContainerImageName), constructor)
}
/** Registers the constructor for testcontainers of type [C]. */
fun <C : GenericContainer<*>> register(
testContainerImageName: DockerImageName,
constructor: (DockerImageName) -> C,
) {
val specificFactory = SpecificTestContainerFactory(testContainerImageName, constructor)
specificFactories[testContainerImageName] = specificFactory
}
fun <C : GenericContainer<*>> newModifier(
name: String,
fn: (C) -> Unit,
): ContainerModifier<C> =
object : ContainerModifier<C> {
override val name: String = name
override fun modify(container: C) {
fn(container)
}
override fun toString(): String = name
}
/** Returns an exclusive instance of the testcontainer. */
@Suppress("UNCHECKED_CAST")
fun <C : GenericContainer<*>> exclusive(
dockerImageName: DockerImageName,
vararg containerModifiers: ContainerModifier<C>,
): C {
val factory: SpecificTestContainerFactory<C> =
findFactory(dockerImageName) as SpecificTestContainerFactory<C>
return factory.createAndStart(dockerImageName, *containerModifiers)
}
/** Returns a shared instance of the testcontainer. */
@Suppress("UNCHECKED_CAST")
fun <C : GenericContainer<*>> shared(
dockerImageName: DockerImageName,
vararg containerModifiers: ContainerModifier<C>,
): C {
val factory: SpecificTestContainerFactory<C> =
findFactory(dockerImageName) as SpecificTestContainerFactory<C>
val containerKey =
ContainerKey(
factory.defaultImageName,
dockerImageName,
containerModifiers.map { it.name },
)
val newResult: Result<C> by lazy {
factory.runCatching { createAndStart(dockerImageName, *containerModifiers) }
}
// We deliberately avoid creating the container itself eagerly during the evaluation of the
// map value. Container creation can be exceedingly slow.
// Furthermore, we need to handle exceptions raised during container creation.
val supplier: () -> Result<C> =
sharedContainers.computeIfAbsent(containerKey) { { newResult } } as () -> Result<C>
// Instead, the container creation (if applicable) is deferred to here.
return supplier().getOrThrow()
}
}

View File

@@ -0,0 +1,24 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.testcontainers
import java.net.InetSocketAddress
import org.testcontainers.containers.Container
const val DOCKER_HOST_FROM_WITHIN_CONTAINER = "host.testcontainers.internal"
/**
* Returns the inner docker network address of a container. This can be used to reach a container
* from another container running on the same network.
*/
fun Container<*>.innerAddress(): InetSocketAddress =
InetSocketAddress.createUnresolved(
containerInfo.networkSettings.networks.entries.first().value.ipAddress!!,
exposedPorts.first(),
)
/**
* Returns the outer docker network address of a container. This can be used to reach a container
* from the host machine
*/
fun Container<*>.outerAddress(): InetSocketAddress =
InetSocketAddress.createUnresolved(host, firstMappedPort)

View File

@@ -0,0 +1 @@
io.airbyte.cdk.extensions.LoggingInvocationInterceptor

View File

@@ -0,0 +1,14 @@
FROM linuxkit/sshd:v1.0.0
# enable logging in as `sshuser` with password `secret`
RUN sed -i 's/#PermitRootLogin yes/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/PasswordAuthentication no/PasswordAuthentication yes/g' /etc/ssh/sshd_config
RUN adduser -D -g "sshuser" sshuser -G wheel
RUN echo -e "secret\nsecret" | passwd sshuser
# enable logging in as `sshuser` a key exposed in `/var/bastion/id_rsa`
RUN mkdir /var/bastion
RUN ssh-keygen -m PEM -t rsa -b 4096 -C "test-container-bastion" -P "" -f /var/bastion/id_rsa -q
RUN install -D /var/bastion/id_rsa.pub /home/sshuser/.ssh/authorized_keys
RUN chown -R sshuser:wheel /home/sshuser/.ssh
RUN chmod 600 /home/sshuser/.ssh/authorized_keys

View File

@@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="INFO">
<Properties>
<!-- Set the LOG_SCRUB_PATTERN env var to a regex pattern to scrub the log messages of secrets or any other unwanted content. -->
<Property name="jvm-log-pattern">%d{yyyy-MM-dd'T'HH:mm:ss,SSS}{GMT+0}`%t`%T`%highlight{%p}`%C{1.}(%M):%L - %replace{%m}{$${env:LOG_SCRUB_PATTERN:-\*\*\*\*\*}}{*****}%n</Property>
<!--Logs the timestamp and log_source/application name in the beginning of the line if it exists with a > separator, and then always the rest of the line.-->
<Property name="container-log-pattern">%d{yyyy-MM-dd'T'HH:mm:ss,SSS}{GMT+0}`%replace{%X{log_source}}{^ -}{} > %replace{%m}{$${env:LOG_SCRUB_PATTERN:-\*\*\*\*\*}}{*****}%n</Property>
<!-- Always log INFO by default. -->
<Property name="log-level">${sys:LOG_LEVEL:-${env:LOG_LEVEL:-INFO}}</Property>
<Property name="logDir">target/test-logs/${date:yyyy-MM-dd'T'HH:mm:ss}</Property>
</Properties>
<Appenders>
<Console name="JvmLogsStdOut" target="SYSTEM_OUT">
<PatternLayout pattern="${jvm-log-pattern}"/>
</Console>
<Console name="ContainerLogsStdOut" target="SYSTEM_OUT">
<PatternLayout pattern="${container-log-pattern}"/>
</Console>
<File name="JvmLogsFile" fileName="${logDir}/airbyte_jvm.log">
<PatternLayout disableAnsi="true" pattern="${jvm-log-pattern}"/>
</File>
<File name="ContainerLogFiles" fileName="${logDir}/airbyte_containers.log">
<PatternLayout disableAnsi="true" pattern="${container-log-pattern}"/>
</File>
<File name="UnifiedFile-JvmLogs" fileName="${logDir}/airbyte_unified.log">
<PatternLayout pattern="${jvm-log-pattern}"/>
</File>
<File name="UnifiedFile-ContainerLogs" fileName="${logDir}/airbyte_unified.log">
<PatternLayout pattern="${container-log-pattern}"/>
</File>
<ASync name="JvmLogs" includeLocation="true">
<AppenderRef ref="JvmLogsStdOut"/>
<AppenderRef ref="JvmLogsFile"/>
<AppenderRef ref="UnifiedFile-JvmLogs"/>
</ASync>
<ASync name="ContainerLogs">
<AppenderRef ref="ContainerLogsStdOut"/>
<AppenderRef ref="ContainerLogFiles"/>
<AppenderRef ref="UnifiedFile-ContainerLogs"/>
</ASync>
<Routing name="AllLogs">
<Routes pattern="$${ctx:simple}">
<Route key="true" ref="ContainerLogs">
</Route>
<Route ref="JvmLogs">
</Route>
</Routes>
</Routing>
</Appenders>
<Loggers>
<Root level="${log-level}">
<AppenderRef ref="AllLogs"/>
</Root>
</Loggers>
</Configuration>

View File

@@ -0,0 +1,9 @@
dependencies {
implementation project(':airbyte-cdk:bulk:core:bulk-cdk-core-base')
implementation 'org.apache.commons:commons-lang3:3.14.0'
implementation 'hu.webarticum:tree-printer:3.2.1'
testFixturesApi testFixtures(project(':airbyte-cdk:bulk:core:bulk-cdk-core-base'))
testImplementation project(':airbyte-cdk:bulk:toolkits:bulk-cdk-toolkit-extract-jdbc')
}

View File

@@ -0,0 +1,28 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.micronaut.context.annotation.Factory
import jakarta.inject.Singleton
import java.time.Duration
/** Subtype of [Configuration] for sources. */
interface SourceConfiguration : Configuration {
/** Does READ generate states of type GLOBAL? */
val global: Boolean
/** During the READ operation, how often a feed should checkpoint, ideally. */
val checkpointTargetInterval: Duration
/**
* Micronaut factory which glues [ConfigurationJsonObjectSupplier] and
* [SourceConfigurationFactory] together to produce a [SourceConfiguration] singleton.
*/
@Factory
private class MicronautFactory {
@Singleton
fun <I : ConfigurationJsonObjectBase> sourceConfig(
pojoSupplier: ConfigurationJsonObjectSupplier<I>,
factory: SourceConfigurationFactory<I, out SourceConfiguration>,
): SourceConfiguration = factory.make(pojoSupplier.get())
}
}

View File

@@ -0,0 +1,22 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.exceptions.ConfigErrorException
/**
* Each connector contains an implementation of this interface in a stateless class which maps the
* configuration JSON object to a typed [Configuration] implementation which is more directly useful
* to the rest of the connector.
*/
interface SourceConfigurationFactory<I : ConfigurationJsonObjectBase, O : SourceConfiguration> {
fun makeWithoutExceptionHandling(pojo: I): O
/** Wraps [makeWithoutExceptionHandling] exceptions in [ConfigErrorException]. */
fun make(pojo: I): O =
try {
makeWithoutExceptionHandling(pojo)
} catch (e: Exception) {
// Wrap NPEs (mostly) in ConfigErrorException.
throw ConfigErrorException("Failed to build ConnectorConfiguration.", e)
}
}

View File

@@ -0,0 +1,110 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.consumers
import io.airbyte.cdk.data.AirbyteType
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.DefaultImplementation
import jakarta.inject.Singleton
import java.util.function.Consumer
/**
* At the start of the READ operation, the connector configuration, the configured catalog and the
* input states are validated against each other. For instance, a catalog may have grown stale after
* a schema change in the source database. All validation failures are passed to this interface. The
* production implementation will log a message while the test implementation collects them in a
* buffer for later inspection.
*/
@DefaultImplementation(LoggingCatalogValidationFailureHandler::class)
interface CatalogValidationFailureHandler : Consumer<CatalogValidationFailure>
/** Union type for all validation failures. */
sealed interface CatalogValidationFailure {
val streamName: String
val streamNamespace: String?
}
data class StreamNotFound(
override val streamName: String,
override val streamNamespace: String?,
) : CatalogValidationFailure
data class MultipleStreamsFound(
override val streamName: String,
override val streamNamespace: String?,
) : CatalogValidationFailure
data class StreamHasNoFields(
override val streamName: String,
override val streamNamespace: String?,
) : CatalogValidationFailure
data class FieldNotFound(
override val streamName: String,
override val streamNamespace: String?,
val fieldName: String,
) : CatalogValidationFailure
data class FieldTypeMismatch(
override val streamName: String,
override val streamNamespace: String?,
val fieldName: String,
val expected: AirbyteType,
val actual: AirbyteType,
) : CatalogValidationFailure
data class InvalidPrimaryKey(
override val streamName: String,
override val streamNamespace: String?,
val primaryKey: List<String>,
) : CatalogValidationFailure
data class InvalidCursor(
override val streamName: String,
override val streamNamespace: String?,
val cursor: String,
) : CatalogValidationFailure
data class InvalidIncrementalSyncMode(
override val streamName: String,
override val streamNamespace: String?,
) : CatalogValidationFailure
data class ResetStream(
override val streamName: String,
override val streamNamespace: String?,
) : CatalogValidationFailure
private val log = KotlinLogging.logger {}
@Singleton
private class LoggingCatalogValidationFailureHandler : CatalogValidationFailureHandler {
override fun accept(f: CatalogValidationFailure) {
when (f) {
is FieldNotFound ->
log.warn { "In stream ${f.prettyName()}: field '${f.fieldName}' not found." }
is FieldTypeMismatch ->
log.warn {
"In stream ${f.prettyName()}: " +
"field '${f.fieldName}' is ${f.actual} but catalog expects ${f.expected}."
}
is StreamHasNoFields -> log.warn { "In stream ${f.prettyName()}: no data fields found" }
is InvalidCursor ->
log.warn { "In stream ${f.prettyName()}: invalid cursor '${f.cursor}'." }
is InvalidPrimaryKey ->
log.warn { "In stream ${f.prettyName()}: invalid primary key '${f.primaryKey}'." }
is InvalidIncrementalSyncMode ->
log.warn { "In stream ${f.prettyName()}: incremental sync not possible." }
is MultipleStreamsFound ->
log.warn { "Multiple matching streams found for ${f.prettyName()}." }
is ResetStream -> log.warn { "Resetting stream ${f.prettyName()}." }
is StreamNotFound -> log.warn { "No matching stream found for name ${f.prettyName()}." }
}
}
private fun CatalogValidationFailure.prettyName(): String =
if (streamNamespace == null) {
"'$streamName' in unspecified namespace"
} else {
"'$streamName' in namespace '$streamNamespace'"
}
}

View File

@@ -0,0 +1,116 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.operation
import io.airbyte.cdk.command.ConfigurationJsonObjectBase
import io.airbyte.cdk.command.ConfigurationJsonObjectSupplier
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.command.SourceConfigurationFactory
import io.airbyte.cdk.consumers.OutputConsumer
import io.airbyte.cdk.source.MetadataQuerier
import io.airbyte.cdk.util.ApmTraceUtils
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus
import io.airbyte.protocol.models.v0.AirbyteErrorTraceMessage
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Requires
import jakarta.inject.Singleton
import java.sql.SQLException
import org.apache.commons.lang3.exception.ExceptionUtils
private val log = KotlinLogging.logger {}
@Singleton
@Requires(property = Operation.PROPERTY, value = "check")
@Requires(env = ["source"])
class CheckOperation<T : ConfigurationJsonObjectBase>(
val configJsonObjectSupplier: ConfigurationJsonObjectSupplier<T>,
val configFactory: SourceConfigurationFactory<T, out SourceConfiguration>,
val metadataQuerierFactory: MetadataQuerier.Factory<SourceConfiguration>,
val outputConsumer: OutputConsumer,
) : Operation {
/** Wraps all checks in exception handling because CHECK must always exit cleanly. */
override fun execute() {
try {
log.info { "Parsing connector configuration JSON object." }
val pojo: T = configJsonObjectSupplier.get()
log.info { "Building internal connector configuration object." }
val config: SourceConfiguration = configFactory.make(pojo)
log.info { "Connecting for config check." }
metadataQuerierFactory.session(config).use { connectionCheck(it) }
} catch (e: SQLException) {
log.debug(e) { "SQLException while checking config." }
val message: String =
listOfNotNull(
e.sqlState?.let { "State code: $it" },
e.errorCode.takeIf { it != 0 }?.let { "Error code: $it" },
e.message?.let { "Message: $it" },
)
.joinToString(separator = "; ")
ApmTraceUtils.addExceptionToTrace(e)
outputConsumer.accept(
AirbyteErrorTraceMessage()
.withFailureType(AirbyteErrorTraceMessage.FailureType.CONFIG_ERROR)
.withMessage(message)
.withInternalMessage(e.toString())
.withStackTrace(ExceptionUtils.getStackTrace(e)),
)
outputConsumer.accept(
AirbyteConnectionStatus()
.withMessage(message)
.withStatus(AirbyteConnectionStatus.Status.FAILED),
)
log.info { "Config check failed." }
return
} catch (e: Exception) {
log.debug(e) { "Exception while checking config." }
ApmTraceUtils.addExceptionToTrace(e)
outputConsumer.acceptTraceOnConfigError(e)
outputConsumer.accept(
AirbyteConnectionStatus()
.withMessage(String.format(COMMON_EXCEPTION_MESSAGE_TEMPLATE, e.message))
.withStatus(AirbyteConnectionStatus.Status.FAILED),
)
log.info { "Config check failed." }
return
}
log.info { "Config check completed successfully." }
outputConsumer.accept(
AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED),
)
}
/**
* Checks the validity of the provided config:
* - by discovering the available tables,
* - by querying at least one table successfully.
*/
private fun connectionCheck(metadataQuerier: MetadataQuerier) {
log.info { "Querying all stream names and namespaces." }
var n = 0
val namespaces: List<String?> = listOf<String?>(null) + metadataQuerier.streamNamespaces()
for (namespace in namespaces) {
for (name in metadataQuerier.streamNames(namespace)) {
try {
metadataQuerier.fields(name, namespace)
} catch (e: Exception) {
log.info(e) {
"Query failed on stream '$name' in '${namespace ?: ""}': ${e.message}"
}
n++
continue
}
log.info { "Query successful on stream '$name' in '${namespace ?: ""}'." }
return
}
}
if (n == 0) {
throw RuntimeException("Discovered zero tables.")
} else {
throw RuntimeException("Unable to query any of the $n discovered table(s).")
}
}
companion object {
const val COMMON_EXCEPTION_MESSAGE_TEMPLATE: String =
"Could not connect with provided configuration. Error: %s"
}
}

View File

@@ -0,0 +1,87 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.operation
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.consumers.OutputConsumer
import io.airbyte.cdk.source.AirbyteStreamDecorator
import io.airbyte.cdk.source.Field
import io.airbyte.cdk.source.MetadataQuerier
import io.airbyte.protocol.models.Field as AirbyteField
import io.airbyte.protocol.models.v0.AirbyteCatalog
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.CatalogHelpers
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Requires
import jakarta.inject.Singleton
private val log = KotlinLogging.logger {}
@Singleton
@Requires(property = Operation.PROPERTY, value = "discover")
@Requires(env = ["source"])
class DiscoverOperation(
val config: SourceConfiguration,
val metadataQuerierFactory: MetadataQuerier.Factory<SourceConfiguration>,
val airbyteStreamDecorator: AirbyteStreamDecorator,
val outputConsumer: OutputConsumer,
) : Operation {
override fun execute() {
val airbyteStreams = mutableListOf<AirbyteStream>()
metadataQuerierFactory.session(config).use { metadataQuerier: MetadataQuerier ->
val namespaces: List<String?> =
listOf<String?>(null) + metadataQuerier.streamNamespaces()
for (namespace in namespaces) {
for (name in metadataQuerier.streamNames(namespace)) {
val fields: List<Field> = metadataQuerier.fields(name, namespace)
if (fields.isEmpty()) {
log.info {
"Ignoring stream '$name' in '${namespace ?: ""}' because no fields were discovered."
}
continue
}
val primaryKeys: List<List<String>> =
metadataQuerier.primaryKeys(name, namespace)
val discoveredStream = DiscoveredStream(name, namespace, fields, primaryKeys)
airbyteStreams.add(toAirbyteStream(discoveredStream))
}
}
}
outputConsumer.accept(AirbyteCatalog().withStreams(airbyteStreams))
}
fun toAirbyteStream(discoveredStream: DiscoveredStream): AirbyteStream {
val allColumnsByID: Map<String, Field> = discoveredStream.columns.associateBy { it.id }
val airbyteStream: AirbyteStream =
CatalogHelpers.createAirbyteStream(
discoveredStream.name,
discoveredStream.namespace,
discoveredStream.columns.map {
AirbyteField.of(it.id, it.type.airbyteType.asJsonSchemaType())
},
)
val pkColumnIDs: List<List<String>> =
discoveredStream.primaryKeyColumnIDs.filter { pk: List<String> ->
// Only keep PKs whose values can be round-tripped.
pk.all { airbyteStreamDecorator.isPossiblePrimaryKeyElement(allColumnsByID[it]!!) }
}
airbyteStream.withSourceDefinedPrimaryKey(pkColumnIDs)
if (config.global) {
// There is a global feed of incremental records, like CDC.
airbyteStreamDecorator.decorateGlobal(airbyteStream)
} else if (discoveredStream.columns.any { airbyteStreamDecorator.isPossibleCursor(it) }) {
// There is one field whose values can be round-tripped and aggregated by MAX.
airbyteStreamDecorator.decorateNonGlobal(airbyteStream)
} else {
// There is no such field.
airbyteStreamDecorator.decorateNonGlobalNoCursor(airbyteStream)
}
return airbyteStream
}
data class DiscoveredStream(
val name: String,
val namespace: String?,
val columns: List<Field>,
val primaryKeyColumnIDs: List<List<String>>,
)
}

View File

@@ -0,0 +1,104 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.operation
import hu.webarticum.treeprinter.SimpleTreeNode
import hu.webarticum.treeprinter.TreeNode
import hu.webarticum.treeprinter.printer.TreePrinter
import hu.webarticum.treeprinter.printer.listing.ListingTreePrinter
import io.airbyte.cdk.command.InputState
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.consumers.OutputConsumer
import io.airbyte.cdk.read.Feed
import io.airbyte.cdk.read.RootReader
import io.airbyte.cdk.read.StateManager
import io.airbyte.cdk.read.StateManagerFactory
import io.airbyte.cdk.source.PartitionsCreatorFactory
import io.airbyte.cdk.util.ThreadRenamingCoroutineName
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Requires
import jakarta.inject.Singleton
import kotlin.time.toKotlinDuration
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.delay
import kotlinx.coroutines.job
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
@Singleton
@Requires(property = Operation.PROPERTY, value = "read")
@Requires(env = ["source"])
class ReadOperation(
val config: SourceConfiguration,
val configuredCatalog: ConfiguredAirbyteCatalog,
val inputState: InputState,
val stateManagerFactory: StateManagerFactory,
val outputConsumer: OutputConsumer,
val partitionsCreatorFactory: PartitionsCreatorFactory,
) : Operation {
private val log = KotlinLogging.logger {}
override fun execute() {
val stateManager: StateManager =
stateManagerFactory.create(config, configuredCatalog, inputState)
val rootReader =
RootReader(
stateManager,
config.resourceAcquisitionHeartbeat,
config.checkpointTargetInterval,
outputConsumer,
partitionsCreatorFactory,
)
runBlocking(ThreadRenamingCoroutineName("read") + Dispatchers.Default) {
rootReader.read { feedJobs: Map<Feed, Job> ->
val rootJob = coroutineContext.job
launch(Job()) {
var previousJobTree = ""
while (feedJobs.values.any { it.isActive }) {
val currentJobTree: String = renderTree(rootJob)
if (currentJobTree != previousJobTree) {
log.info { "coroutine state:\n$currentJobTree" }
previousJobTree = currentJobTree
}
delay(config.resourceAcquisitionHeartbeat.toKotlinDuration())
}
}
}
}
}
companion object {
private val treePrinter: TreePrinter = ListingTreePrinter.builder().unicode().build()
private fun renderTree(feedsRootJob: Job): String {
val rootNode: TreeNode = recursiveBuildTree(feedsRootJob)
return treePrinter.stringify(rootNode)
}
private fun recursiveBuildTree(job: Job): TreeNode {
val name: String = label(job) ?: "???"
val node = SimpleTreeNode(name)
var children: List<Job> = job.children.toList()
// Collapse chains of jobs with identical names.
while (children.any { label(it) == name }) {
children =
children.flatMap {
if (label(it) == name) {
it.children.toList()
} else {
listOf(it)
}
}
}
for (child in children) {
node.addChild(recursiveBuildTree(child))
}
return node
}
private fun label(job: Job): String? =
(job as? CoroutineScope)?.coroutineContext?.get(ThreadRenamingCoroutineName)?.name
}
}

View File

@@ -0,0 +1,50 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import io.airbyte.cdk.source.Field
import io.airbyte.cdk.source.FieldOrMetaField
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair
import io.airbyte.protocol.models.v0.StreamDescriptor
import io.airbyte.protocol.models.v0.SyncMode
/**
* [Feed] identifies part of the data consumed during a READ operation.
*
* It's either one of the configured streams in the catalog, or some kind of global data feed
* comprising records for multiple streams, as is the case in database CDC.
*/
sealed interface Feed {
val label: String
}
/** Acts as a key for Airbyte STATE messages of type GLOBAL. */
data class Global(
val streams: List<Stream>,
) : Feed {
override val label: String
get() = "global"
}
/**
* Acts as a key for Airbyte STATE messages of type STREAM.
*
* Roughly equivalent to a [io.airbyte.protocol.models.v0.ConfiguredAirbyteStream].
*/
data class Stream(
val name: String,
val namespace: String?,
val fields: List<Field>,
val primaryKeyCandidates: List<List<Field>>,
val configuredSyncMode: SyncMode,
val configuredPrimaryKey: List<Field>?,
val configuredCursor: FieldOrMetaField?,
) : Feed {
val namePair: AirbyteStreamNameNamespacePair
get() = AirbyteStreamNameNamespacePair(name, namespace)
val streamDescriptor: StreamDescriptor
get() = StreamDescriptor().withName(name).withNamespace(namespace)
override val label: String
get() = namePair.toString()
}

View File

@@ -0,0 +1,306 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.source.PartitionReadCheckpoint
import io.airbyte.cdk.source.PartitionReader
import io.airbyte.cdk.source.PartitionsCreator
import io.airbyte.cdk.util.ThreadRenamingCoroutineName
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.AirbyteStreamStatusTraceMessage
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlin.coroutines.CoroutineContext
import kotlin.coroutines.coroutineContext
import kotlin.time.toKotlinDuration
import kotlinx.coroutines.CancellationException
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Deferred
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.TimeoutCancellationException
import kotlinx.coroutines.async
import kotlinx.coroutines.launch
import kotlinx.coroutines.selects.select
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.withContext
import kotlinx.coroutines.withTimeout
/**
* A [FeedReader] manages the publishing of RECORD, STATE and TRACE messages for a single [feed].
*/
class FeedReader(
val root: RootReader,
val feed: Feed,
) {
private val log = KotlinLogging.logger {}
/** Reads records from this [feed]. */
suspend fun read() {
var partitionsCreatorID = 1L
while (true) {
// Create PartitionReader instances.
val partitionReaders: List<PartitionReader> = createPartitions(partitionsCreatorID)
if (partitionReaders.isEmpty()) {
log.info {
"no more partitions to read for '${feed.label}' in round $partitionsCreatorID"
}
emitStreamStatus(AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE)
break
}
// Launch coroutines which read from each partition.
val scheduledPartitionReaders =
mutableMapOf<Long, Deferred<Result<PartitionReadCheckpoint>>>()
var partitionReaderID = 1L
var previousAcquirerJob: Job? = null
for (partitionReader in partitionReaders) {
val (acquirerJob: Job, readerJob: Deferred<Result<PartitionReadCheckpoint>>) =
asyncReadPartition(
partitionsCreatorID,
partitionReaderID,
partitionReader,
previousAcquirerJob,
)
previousAcquirerJob = acquirerJob
scheduledPartitionReaders[partitionReaderID++] = readerJob
}
// Wait for all PartitionReader coroutines to complete.
awaitAllPartitionReaders(scheduledPartitionReaders)
partitionsCreatorID++
}
}
private suspend fun createPartitions(partitionsCreatorID: Long): List<PartitionReader> {
val partitionsCreator: PartitionsCreator =
root.partitionsCreatorFactory.make(root.stateManager, feed)
withContext(ctx("round-$partitionsCreatorID-acquire-resources")) {
acquirePartitionsCreatorResources(partitionsCreatorID, partitionsCreator)
}
if (1L == partitionsCreatorID) {
emitStreamStatus(AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED)
}
return withContext(ctx("round-$partitionsCreatorID-create-partitions")) {
createPartitionsWithResources(partitionsCreatorID, partitionsCreator)
}
}
private suspend fun acquirePartitionsCreatorResources(
partitionsCreatorID: Long,
partitionsCreator: PartitionsCreator,
) {
while (true) {
val status: PartitionsCreator.TryAcquireResourcesStatus =
// Resource acquisition always executes serially.
root.resourceAcquisitionMutex.withLock { partitionsCreator.tryAcquireResources() }
if (status == PartitionsCreator.TryAcquireResourcesStatus.READY_TO_RUN) break
root.waitForResourceAvailability()
}
log.info {
"acquired resources to create partitions " +
"for '${feed.label}' in round $partitionsCreatorID"
}
}
private suspend fun createPartitionsWithResources(
partitionsCreatorID: Long,
partitionsCreator: PartitionsCreator,
): List<PartitionReader> {
log.info { "creating partitions for '${feed.label}' in round $partitionsCreatorID" }
return try {
partitionsCreator.run()
} finally {
log.info {
"releasing resources acquired to create partitions " +
"for '${feed.label}' in round $partitionsCreatorID"
}
partitionsCreator.releaseResources()
root.notifyResourceAvailability()
}
}
private suspend fun asyncReadPartition(
partitionsCreatorID: Long,
partitionReaderID: Long,
partitionReader: PartitionReader,
previousAcquirerJob: Job?,
): Pair<Job, Deferred<Result<PartitionReadCheckpoint>>> {
// Create resource acquisition job.
// Resource acquisition needs to be asynchronous because it may block for an indeterminate
// amount of time
// which would prevent PartitionReader instances which have already started and completed
// from either
// emitting a checkpoint or triggering cancellations.
val acquirerJobNameSuffix =
"round-$partitionsCreatorID-partition-$partitionReaderID-acquire-resources"
val acquirerJob: Job =
CoroutineScope(ctx(acquirerJobNameSuffix)).launch {
// Wait for the previous PartitionReader to acquire resources first ...
previousAcquirerJob?.join()
// ... before acquiring resources for this PartitionReader.
acquirePartitionReaderResources(
partitionsCreatorID,
partitionReaderID,
partitionReader,
)
}
// Create reader job which waits on the acquirer job.
val readerJobNameSuffix = "round-$partitionsCreatorID-partition-$partitionReaderID-read"
val readerJob: Deferred<Result<PartitionReadCheckpoint>> =
CoroutineScope(ctx(readerJobNameSuffix)).async {
// Poke-catch all exceptions, these will be handled further on.
runCatching {
// Acquire resources first.
acquirerJob.join()
// Read partition.
readPartitionWithResources(
partitionsCreatorID,
partitionReaderID,
partitionReader,
)
}
}
return acquirerJob to readerJob
}
private suspend fun acquirePartitionReaderResources(
partitionsCreatorID: Long,
partitionReaderID: Long,
partitionReader: PartitionReader,
) {
while (true) {
val status: PartitionReader.TryAcquireResourcesStatus =
// Resource acquisition always executes serially.
root.resourceAcquisitionMutex.withLock { partitionReader.tryAcquireResources() }
if (status == PartitionReader.TryAcquireResourcesStatus.READY_TO_RUN) break
root.waitForResourceAvailability()
}
log.info {
"acquired resources to read partition $partitionReaderID for '${feed.label}' in round $partitionsCreatorID"
}
}
private suspend fun readPartitionWithResources(
partitionsCreatorID: Long,
partitionReaderID: Long,
partitionReader: PartitionReader,
): PartitionReadCheckpoint {
log.info {
"reading partition $partitionReaderID " +
"for '${feed.label}' in round $partitionsCreatorID"
}
var checkpoint: PartitionReadCheckpoint
try {
withTimeout(root.timeout.toKotlinDuration()) { partitionReader.run() }
log.info {
"completed reading partition $partitionReaderID " +
"for '${feed.label}' in round $partitionsCreatorID"
}
checkpoint = partitionReader.checkpoint()
} catch (e: TimeoutCancellationException) {
log.info {
"timed out reading partition $partitionReaderID " +
"for '${feed.label}' in round $partitionsCreatorID"
}
checkpoint = partitionReader.checkpoint()
} finally {
log.info {
"releasing resources acquired to read partition $partitionReaderID " +
"for '${feed.label}' in round $partitionsCreatorID"
}
partitionReader.releaseResources()
root.notifyResourceAvailability()
}
log.info {
"read ${checkpoint.numRecords} record(s) from partition $partitionReaderID " +
"for '${feed.label}' in round $partitionsCreatorID"
}
return checkpoint
}
private suspend fun awaitAllPartitionReaders(
scheduled: Map<Long, Deferred<Result<PartitionReadCheckpoint>>>,
) {
fun label(partitionReaderID: Long): String =
"partition $partitionReaderID / ${scheduled.size} for '${feed.label}'"
// This map stores known results for all PartitionReader instances.
val results = mutableMapOf<Long, Result<PartitionReadCheckpoint>>()
// Although the PartitionReader instances run concurrently, the FeedReader mimics serial
// execution.
// This simplifies publishing checkpoints of forward progress.
// The following var tracks which PartitionReader is due next.
var pendingPartitionReaderID = 1L
// Loop until all PartitionReader instances have completed one way or another.
while (results.size < scheduled.size) {
// Wait for one of them which hasn't yet completed to complete.
val completedIDs: Set<Long> = results.keys.toSet()
val (completedPartitionReaderID: Long, result: Result<PartitionReadCheckpoint>) =
select {
for ((partitionReaderID, deferred) in scheduled) {
if (partitionReaderID !in completedIDs) {
deferred.onAwait { partitionReaderID to it }
}
}
}
// If the completed coroutine failed, cancel the coroutines for all PartitionReaders
// which are "later" as far as the mimicked order of execution is concerned.
// Everything they've done and are going to do is going to be wasted anyway
// so make them finish ASAP.
result.onFailure { exception: Throwable ->
log.warn(exception) { "exception thrown in ${label(completedPartitionReaderID)}" }
val message = "canceled due to failure of ${label(completedPartitionReaderID)}"
for ((partitionReaderID, deferred) in scheduled) {
if (partitionReaderID > completedPartitionReaderID) {
log.warn { "canceling ${label(partitionReaderID)}" }
val cancellationException = CancellationException(message, exception)
deferred.cancel(cancellationException)
// Don't select from this one in the next iteration.
// We don't want select to fail by throwing a CancellationException.
results[partitionReaderID] = Result.failure(cancellationException)
}
}
}
// Store the result and try to make forward progress in the mimicked serial execution.
results[completedPartitionReaderID] = result
try {
while (true) {
// Exit the loop if the pending result doesn't exist yet.
val pendingResult: Result<PartitionReadCheckpoint> =
results[pendingPartitionReaderID] ?: break
// Re-throw any exception that the PartitionReader may have thrown.
// Otherwise, update the StateManager with the forward progress.
log.info {
"processing result (success = ${pendingResult.isSuccess}) from reading " +
label(pendingPartitionReaderID)
}
val (opaqueStateValue: OpaqueStateValue, numRecords: Long) =
pendingResult.getOrThrow()
root.stateManager.scoped(feed).set(opaqueStateValue, numRecords)
log.info {
"updated state of '${feed.label}', moved it $numRecords record(s) forward"
}
// Move on to the next PartitionReader instance.
pendingPartitionReaderID++
}
} finally {
// Publish a checkpoint if applicable.
val stateMessages: List<AirbyteStateMessage> = root.stateManager.checkpoint()
if (stateMessages.isNotEmpty()) {
log.info { "checkpoint of ${stateMessages.size} state message(s)" }
stateMessages.forEach(root.outputConsumer::accept)
}
}
}
}
private suspend fun ctx(nameSuffix: String): CoroutineContext =
coroutineContext + ThreadRenamingCoroutineName("${feed.label}-$nameSuffix") + Dispatchers.IO
private fun emitStreamStatus(status: AirbyteStreamStatusTraceMessage.AirbyteStreamStatus) {
if (feed is Stream) {
root.outputConsumer.accept(
AirbyteStreamStatusTraceMessage()
.withStreamDescriptor(feed.streamDescriptor)
.withStatus(status),
)
}
}
}

View File

@@ -0,0 +1,130 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import io.airbyte.cdk.consumers.OutputConsumer
import io.airbyte.cdk.source.PartitionsCreatorFactory
import io.airbyte.cdk.util.ThreadRenamingCoroutineName
import io.github.oshai.kotlinlogging.KotlinLogging
import java.time.Duration
import java.util.concurrent.ConcurrentHashMap
import kotlin.coroutines.CoroutineContext
import kotlin.time.toKotlinDuration
import kotlinx.coroutines.CoroutineExceptionHandler
import kotlinx.coroutines.Job
import kotlinx.coroutines.cancel
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.collectLatest
import kotlinx.coroutines.flow.update
import kotlinx.coroutines.launch
import kotlinx.coroutines.supervisorScope
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.withTimeoutOrNull
/**
* [RootReader] is at the heart of the READ operation. A [RootReader] manages multiple [FeedReader]
* instances (one per [Feed]) and synchronizes them using coroutines.
*
* This object exists mainly to facilitate unit testing by keeping dependencies to a minimum.
*/
class RootReader(
val stateManager: StateManager,
val resourceAcquisitionHeartbeat: Duration,
val timeout: Duration,
val outputConsumer: OutputConsumer,
val partitionsCreatorFactory: PartitionsCreatorFactory,
) {
private val log = KotlinLogging.logger {}
/** [Mutex] ensuring that resource acquisition always happens serially. */
val resourceAcquisitionMutex: Mutex = Mutex()
private val resourceReleaseFlow: MutableStateFlow<Long> = MutableStateFlow(0L)
/** Notify a potential change in resource availability. */
fun notifyResourceAvailability() {
resourceReleaseFlow.update { it + 1 }
}
/** Wait until an availability notification arrives or a timeout is reached. */
suspend fun waitForResourceAvailability() {
withTimeoutOrNull(resourceAcquisitionHeartbeat.toKotlinDuration()) {
resourceReleaseFlow.collectLatest {}
}
}
/** Reads records from all [Feed]s. */
suspend fun read(listener: suspend (Map<Feed, Job>) -> Unit = {}) {
supervisorScope {
val feeds: List<Feed> = stateManager.feeds
val exceptions = ConcurrentHashMap<Feed, Throwable>()
// Launch one coroutine per feed.
val feedJobs: Map<Feed, Job> =
feeds.associateWith { feed: Feed ->
val coroutineName = ThreadRenamingCoroutineName(feed.label)
val handler = FeedExceptionHandler(feed, exceptions)
launch(coroutineName + handler) { FeedReader(this@RootReader, feed).read() }
}
// Call listener hook.
listener(feedJobs)
// Join on all stream feeds and collect caught exceptions.
val streamExceptions: Map<Stream, Throwable?> =
feeds.filterIsInstance<Stream>().associateWith {
feedJobs[it]?.join()
exceptions[it]
}
// Cancel any incomplete global feed job whose stream feed jobs have not all succeeded.
for ((global, globalJob) in feedJobs) {
if (global !is Global) continue
if (globalJob.isCompleted) continue
val globalStreamExceptions: List<Throwable> =
global.streams.mapNotNull { streamExceptions[it] }
if (globalStreamExceptions.isNotEmpty()) {
val cause: Throwable =
globalStreamExceptions.reduce { acc: Throwable, exception: Throwable ->
acc.addSuppressed(exception)
acc
}
globalJob.cancel("at least one stream did non complete", cause)
}
}
// Join on all global feeds and collect caught exceptions.
val globalExceptions: Map<Global, Throwable?> =
feeds.filterIsInstance<Global>().associateWith {
feedJobs[it]?.join()
exceptions[it]
}
// Reduce and throw any caught exceptions.
val caughtExceptions: List<Throwable> =
streamExceptions.values.mapNotNull { it } +
globalExceptions.values.mapNotNull { it }
if (caughtExceptions.isNotEmpty()) {
val cause: Throwable =
caughtExceptions.reduce { acc: Throwable, exception: Throwable ->
acc.addSuppressed(exception)
acc
}
throw cause
}
}
}
class FeedExceptionHandler(
val feed: Feed,
private val exceptions: ConcurrentHashMap<Feed, Throwable>,
) : CoroutineExceptionHandler {
private val log = KotlinLogging.logger {}
override val key: CoroutineContext.Key<CoroutineExceptionHandler>
get() = CoroutineExceptionHandler.Key
override fun handleException(
context: CoroutineContext,
exception: Throwable,
) {
log.warn(exception) { "canceled feed '${feed.label}' due to thrown exception" }
exceptions[feed] = exception
}
override fun toString(): String = "FeedExceptionHandler(${feed.label})"
}
}

View File

@@ -0,0 +1,195 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.protocol.models.v0.AirbyteGlobalState
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.AirbyteStateStats
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair
import io.airbyte.protocol.models.v0.AirbyteStreamState
/** A [StateQuerier] is like a read-only [StateManager]. */
interface StateQuerier {
/** [feeds] is all the [Feed]s in the configured catalog passed via the CLI. */
val feeds: List<Feed>
/** Returns the current state value for the given [feed]. */
fun current(feed: Feed): OpaqueStateValue?
}
/** Singleton object which tracks the state of an ongoing READ operation. */
class StateManager(
global: Global? = null,
initialGlobalState: OpaqueStateValue? = null,
initialStreamStates: Map<Stream, OpaqueStateValue?> = mapOf(),
) : StateQuerier {
private val global: GlobalStateManager?
private val nonGlobal: Map<AirbyteStreamNameNamespacePair, NonGlobalStreamStateManager>
init {
if (global == null) {
this.global = null
nonGlobal =
initialStreamStates
.mapValues { NonGlobalStreamStateManager(it.key, it.value) }
.mapKeys { it.key.namePair }
} else {
val globalStreams: Map<Stream, OpaqueStateValue?> =
global.streams.associateWith { initialStreamStates[it] }
this.global =
GlobalStateManager(
global = global,
initialGlobalState = initialGlobalState,
initialStreamStates = globalStreams,
)
nonGlobal =
initialStreamStates
.filterKeys { !globalStreams.containsKey(it) }
.mapValues { NonGlobalStreamStateManager(it.key, it.value) }
.mapKeys { it.key.namePair }
}
}
override val feeds: List<Feed> =
listOfNotNull(this.global?.feed) +
(this.global?.streamStateManagers?.values?.map { it.feed } ?: listOf()) +
nonGlobal.values.map { it.feed }
override fun current(feed: Feed): OpaqueStateValue? = scoped(feed).current()
/** Returns a [StateManagerScopedToFeed] instance scoped to this [feed]. */
fun scoped(feed: Feed): StateManagerScopedToFeed =
when (feed) {
is Global -> global ?: throw IllegalArgumentException("unknown global key")
is Stream -> global?.streamStateManagers?.get(feed.namePair)
?: nonGlobal[feed.namePair]
?: throw IllegalArgumentException("unknown stream key")
}
interface StateManagerScopedToFeed {
/**
* The [Feed] to which the [StateManager] is scoped in this instance of
* [StateManagerScopedToFeed].
*/
val feed: Feed
/** Returns the current state value in the [StateManager] for this [feed]. */
fun current(): OpaqueStateValue?
/** Updates the current state value in the [StateManager] for this [feed]. */
fun set(
state: OpaqueStateValue,
numRecords: Long,
)
}
/**
* Returns the Airbyte STATE messages which checkpoint the progress of the READ in the platform.
* Updates the internal state of the [StateManager] to ensure idempotency (no redundant messages
* are emitted).
*/
fun checkpoint(): List<AirbyteStateMessage> =
listOfNotNull(global?.checkpoint()) + nonGlobal.mapNotNull { it.value.checkpoint() }
private sealed class BaseStateManager<K : Feed>(
override val feed: K,
initialState: OpaqueStateValue?,
private val isCheckpointUnique: Boolean = true,
) : StateManagerScopedToFeed {
private var current: OpaqueStateValue? = initialState
private var pending: OpaqueStateValue? = initialState
private var pendingNumRecords: Long = 0L
override fun current(): OpaqueStateValue? = synchronized(this) { current }
override fun set(
state: OpaqueStateValue,
numRecords: Long,
) {
synchronized(this) {
pending = state
pendingNumRecords += numRecords
}
}
fun swap(): Pair<OpaqueStateValue?, Long>? {
synchronized(this) {
if (isCheckpointUnique && pendingNumRecords == 0L && pending == current) {
return null
}
val returnValue: Pair<OpaqueStateValue?, Long> = pending to pendingNumRecords
current = pending
pendingNumRecords = 0L
return returnValue
}
}
}
private class GlobalStateManager(
global: Global,
initialGlobalState: OpaqueStateValue?,
initialStreamStates: Map<Stream, OpaqueStateValue?>,
) : BaseStateManager<Global>(global, initialGlobalState) {
val streamStateManagers: Map<AirbyteStreamNameNamespacePair, GlobalStreamStateManager> =
initialStreamStates
.mapValues { GlobalStreamStateManager(it.key, it.value) }
.mapKeys { it.key.namePair }
fun checkpoint(): AirbyteStateMessage? {
var numSwapped = 0
var totalNumRecords: Long = 0L
var globalStateValue: OpaqueStateValue? = current()
val globalSwapped: Pair<OpaqueStateValue?, Long>? = swap()
if (globalSwapped != null) {
numSwapped++
globalStateValue = globalSwapped.first
totalNumRecords += globalSwapped.second
}
val streamStates = mutableListOf<AirbyteStreamState>()
for ((_, streamStateManager) in streamStateManagers) {
var streamStateValue: OpaqueStateValue? = streamStateManager.current()
val globalStreamSwapped: Pair<OpaqueStateValue?, Long>? = streamStateManager.swap()
if (globalStreamSwapped != null) {
numSwapped++
streamStateValue = globalStreamSwapped.first
totalNumRecords += globalStreamSwapped.second
}
streamStates.add(
AirbyteStreamState()
.withStreamDescriptor(streamStateManager.feed.streamDescriptor)
.withStreamState(streamStateValue),
)
}
val airbyteGlobalState =
AirbyteGlobalState()
.withSharedState(globalStateValue)
.withStreamStates(streamStates)
return AirbyteStateMessage()
.withType(AirbyteStateMessage.AirbyteStateType.GLOBAL)
.withGlobal(airbyteGlobalState)
.withSourceStats(AirbyteStateStats().withRecordCount(totalNumRecords.toDouble()))
}
}
private class GlobalStreamStateManager(
stream: Stream,
initialState: OpaqueStateValue?,
) : BaseStateManager<Stream>(stream, initialState, isCheckpointUnique = false)
private class NonGlobalStreamStateManager(
stream: Stream,
initialState: OpaqueStateValue?,
) : BaseStateManager<Stream>(stream, initialState) {
fun checkpoint(): AirbyteStateMessage? {
val (opaqueStateValue: OpaqueStateValue?, numRecords: Long) = swap() ?: return null
val airbyteStreamState =
AirbyteStreamState()
.withStreamDescriptor(feed.streamDescriptor)
.withStreamState(opaqueStateValue)
return AirbyteStateMessage()
.withType(AirbyteStateMessage.AirbyteStateType.STREAM)
.withStream(airbyteStreamState)
.withSourceStats(AirbyteStateStats().withRecordCount(numRecords.toDouble()))
}
}
}

View File

@@ -0,0 +1,243 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.command.EmptyInputState
import io.airbyte.cdk.command.GlobalInputState
import io.airbyte.cdk.command.InputState
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.command.StreamInputState
import io.airbyte.cdk.consumers.CatalogValidationFailureHandler
import io.airbyte.cdk.consumers.FieldNotFound
import io.airbyte.cdk.consumers.FieldTypeMismatch
import io.airbyte.cdk.consumers.InvalidIncrementalSyncMode
import io.airbyte.cdk.consumers.InvalidPrimaryKey
import io.airbyte.cdk.consumers.MultipleStreamsFound
import io.airbyte.cdk.consumers.StreamHasNoFields
import io.airbyte.cdk.consumers.StreamNotFound
import io.airbyte.cdk.data.AirbyteType
import io.airbyte.cdk.data.ArrayAirbyteType
import io.airbyte.cdk.data.LeafAirbyteType
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.source.CommonMetaField
import io.airbyte.cdk.source.Field
import io.airbyte.cdk.source.FieldOrMetaField
import io.airbyte.cdk.source.MetaField
import io.airbyte.cdk.source.MetadataQuerier
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.airbyte.protocol.models.v0.SyncMode
import jakarta.inject.Singleton
/**
* A factory for instantiating [StateManager] based on the inputs of a READ. These inputs are
* deliberately not injected here to make testing easier.
*/
@Singleton
class StateManagerFactory(
val metadataQuerierFactory: MetadataQuerier.Factory<SourceConfiguration>,
val handler: CatalogValidationFailureHandler,
) {
/** Generates a [StateManager] instance based on the provided inputs. */
fun create(
config: SourceConfiguration,
configuredCatalog: ConfiguredAirbyteCatalog,
inputState: InputState,
): StateManager {
val allStreams: List<Stream> =
metadataQuerierFactory.session(config).use { mq ->
configuredCatalog.streams.mapNotNull { toStream(mq, it) }
}
return if (config.global) {
when (inputState) {
is StreamInputState ->
throw ConfigErrorException("input state unexpectedly of type STREAM")
is GlobalInputState -> forGlobal(allStreams, inputState)
is EmptyInputState -> forGlobal(allStreams)
}
} else {
when (inputState) {
is GlobalInputState ->
throw ConfigErrorException("input state unexpectedly of type GLOBAL")
is StreamInputState -> forStream(allStreams, inputState)
is EmptyInputState -> forStream(allStreams)
}
}
}
private fun forGlobal(
streams: List<Stream>,
inputState: GlobalInputState? = null,
) =
StateManager(
global = Global(streams.filter { it.configuredSyncMode == SyncMode.INCREMENTAL }),
initialGlobalState = inputState?.global,
initialStreamStates =
streams.associateWith { stream: Stream ->
when (stream.configuredSyncMode) {
SyncMode.INCREMENTAL -> inputState?.globalStreams?.get(stream.namePair)
SyncMode.FULL_REFRESH -> inputState?.nonGlobalStreams?.get(stream.namePair)
}
},
)
private fun forStream(
streams: List<Stream>,
inputState: StreamInputState? = null,
) =
StateManager(
initialStreamStates =
streams.associateWith { stream: Stream ->
inputState?.streams?.get(stream.namePair)
},
)
private fun toStream(
metadataQuerier: MetadataQuerier,
configuredStream: ConfiguredAirbyteStream,
): Stream? {
val stream: AirbyteStream = configuredStream.stream
val jsonSchemaProperties: JsonNode = stream.jsonSchema["properties"]
val name: String = stream.name!!
val namespace: String? = stream.namespace
when (metadataQuerier.streamNames(namespace).filter { it == name }.size) {
0 -> {
handler.accept(StreamNotFound(name, namespace))
return null
}
1 -> Unit
else -> {
handler.accept(MultipleStreamsFound(name, namespace))
return null
}
}
val expectedSchema: Map<String, AirbyteType> =
jsonSchemaProperties.properties().associate { (id: String, schema: JsonNode) ->
id to airbyteTypeFromJsonSchema(schema)
}
val actualDataColumns: Map<String, Field> =
metadataQuerier.fields(name, namespace).associateBy { it.id }
fun dataColumnOrNull(id: String): Field? {
if (MetaField.isMetaFieldID(id)) {
// Ignore airbyte metadata columns.
// These aren't actually present in the table.
return null
}
val actualColumn: Field? = actualDataColumns[id]
if (actualColumn == null) {
handler.accept(FieldNotFound(name, namespace, id))
return null
}
val expectedAirbyteType: AirbyteType = expectedSchema[id] ?: return null
val actualAirbyteType: AirbyteType = actualColumn.type.airbyteType
if (expectedAirbyteType != actualAirbyteType) {
handler.accept(
FieldTypeMismatch(
name,
namespace,
id,
expectedAirbyteType,
actualAirbyteType,
),
)
return null
}
return actualColumn
}
val streamFields: List<Field> =
expectedSchema.keys.toList().filterNot(MetaField::isMetaFieldID).map {
dataColumnOrNull(it) ?: return@toStream null
}
if (streamFields.isEmpty()) {
handler.accept(StreamHasNoFields(name, namespace))
return null
}
fun pkOrNull(pkColumnIDs: List<String>): List<Field>? {
val pk: List<Field> = pkColumnIDs.mapNotNull(::dataColumnOrNull)
if (pk.isEmpty() || pk.size < pkColumnIDs.size) {
handler.accept(InvalidPrimaryKey(name, namespace, pkColumnIDs))
return null
}
return pk
}
fun cursorOrNull(cursorColumnID: String): FieldOrMetaField? {
if (cursorColumnID == CommonMetaField.CDC_LSN.id) {
return CommonMetaField.CDC_LSN
}
return dataColumnOrNull(cursorColumnID)
}
val primaryKeyCandidates: List<List<Field>> =
stream.sourceDefinedPrimaryKey.mapNotNull(::pkOrNull)
val configuredPrimaryKey: List<Field>? =
configuredStream.primaryKey?.asSequence()?.mapNotNull(::pkOrNull)?.firstOrNull()
val configuredCursor: FieldOrMetaField? =
configuredStream.cursorField?.asSequence()?.mapNotNull(::cursorOrNull)?.firstOrNull()
val configuredSyncMode: SyncMode =
when (configuredStream.syncMode) {
SyncMode.INCREMENTAL ->
if (configuredCursor == null) {
handler.accept(InvalidIncrementalSyncMode(name, namespace))
SyncMode.FULL_REFRESH
} else {
SyncMode.INCREMENTAL
}
else -> SyncMode.FULL_REFRESH
}
return Stream(
name,
namespace,
streamFields,
primaryKeyCandidates,
configuredSyncMode,
configuredPrimaryKey,
configuredCursor,
)
}
/**
* Recursively re-generates the original [AirbyteType] from a catalog stream field's JSON
* schema.
*/
private fun airbyteTypeFromJsonSchema(jsonSchema: JsonNode): AirbyteType {
fun value(key: String): String = jsonSchema[key]?.asText() ?: ""
return when (value("type")) {
"array" -> ArrayAirbyteType(airbyteTypeFromJsonSchema(jsonSchema["items"]))
"null" -> LeafAirbyteType.NULL
"boolean" -> LeafAirbyteType.BOOLEAN
"number" ->
when (value("airbyte_type")) {
"integer",
"big_integer", -> LeafAirbyteType.INTEGER
else -> LeafAirbyteType.NUMBER
}
"string" ->
when (value("format")) {
"date" -> LeafAirbyteType.DATE
"date-time" ->
if (value("airbyte_type") == "timestamp_with_timezone") {
LeafAirbyteType.TIMESTAMP_WITH_TIMEZONE
} else {
LeafAirbyteType.TIMESTAMP_WITHOUT_TIMEZONE
}
"time" ->
if (value("airbyte_type") == "time_with_timezone") {
LeafAirbyteType.TIME_WITH_TIMEZONE
} else {
LeafAirbyteType.TIME_WITHOUT_TIMEZONE
}
else ->
if (value("contentEncoding") == "base64") {
LeafAirbyteType.BINARY
} else {
LeafAirbyteType.STRING
}
}
else -> LeafAirbyteType.JSONB
}
}
}

View File

@@ -0,0 +1,52 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.source
import io.airbyte.protocol.models.v0.AirbyteStream
/**
* Stateless object for building an [AirbyteStream] during DISCOVER.
*
* [DefaultAirbyteStreamDecorator] is the sane default implementation, to be replaced with
* connector-specific implementations when required.
*/
interface AirbyteStreamDecorator {
/** Connector-specific [AirbyteStream] decoration logic for GLOBAL-state streams. */
fun decorateGlobal(airbyteStream: AirbyteStream)
/**
* Connector-specific [AirbyteStream] decoration logic for STREAM-state streams for which at
* least one discovered field can be used as a user-defined cursor in incremental syncs.
*/
fun decorateNonGlobal(airbyteStream: AirbyteStream)
/**
* Connector-specific [AirbyteStream] decoration logic for STREAM-state streams for which no
* discovered field can be used as a user-defined cursor in incremental syncs.
*/
fun decorateNonGlobalNoCursor(airbyteStream: AirbyteStream)
/**
* Can the field be used as part of a primary key?
*
* For this to be possible,
* 1. the field needs to be part of a key as defined by the source,
* 2. and its values must be deserializable from the checkpoint persisted in an Airbyte state
* message.
*
* This method does not determine (1), of course, because the source keys are defined in the
* source database itself and are retrieved via [MetadataQuerier.primaryKeys]. Instead, this
* method determines (2) based on the type information of the field, typically the [FieldType]
* objects. For instance if the [Field.type] does not map to a [LosslessFieldType] then the
* field can't reliably round-trip checkpoint values during a resumable initial sync.
*/
fun isPossiblePrimaryKeyElement(field: Field): Boolean
/**
* Can the field be used as a cursor in a cursor-based incremental sync?
*
* This predicate is like [isPossiblePrimaryKeyElement] but tighter: in addition to being able
* to round-trip the column values, we need to be able to query the max value from the source at
* the start of the sync.
*/
fun isPossibleCursor(field: Field): Boolean
}

View File

@@ -0,0 +1,84 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.source
import io.airbyte.cdk.data.AirbyteType
import io.airbyte.cdk.data.JsonDecoder
import io.airbyte.cdk.data.JsonEncoder
import io.airbyte.cdk.data.JsonStringCodec
import io.airbyte.cdk.data.LeafAirbyteType
import io.airbyte.cdk.data.OffsetDateTimeCodec
import java.time.OffsetDateTime
/** Internal equivalent of a [io.airbyte.protocol.models.Field]. */
sealed interface FieldOrMetaField {
val id: String
val type: FieldType
}
/**
* Root of our own type hierarchy for Airbyte record fields.
*
* Connectors may define their own concrete implementations.
*/
interface FieldType {
/** maps to [io.airbyte.protocol.models.Field.type] */
val airbyteType: AirbyteType
val jsonEncoder: JsonEncoder<*>
}
/**
* Subtype of [FieldType] for all [FieldType]s whose Airbyte record values can be turned back into
* their original source values. This allows these values to be persisted in an Airbyte state
* message.
*
* Connectors may define their own concrete implementations.
*/
interface LosslessFieldType : FieldType {
val jsonDecoder: JsonDecoder<*>
}
/**
* Internal equivalent of [io.airbyte.protocol.models.Field] for values which come from the source
* itself, instead of being generated by the connector during its operation.
*/
data class Field(
override val id: String,
override val type: FieldType,
) : FieldOrMetaField
/**
* Internal equivalent of [io.airbyte.protocol.models.Field] for values which are generated by the
* connector itself during its operation, instead of coming from the source.
*/
interface MetaField : FieldOrMetaField {
companion object {
const val META_PREFIX = "_ab_"
fun isMetaFieldID(id: String): Boolean = id.startsWith("_ab_")
}
}
/** Convenience enum listing the [MetaField]s which are generated by all connectors. */
enum class CommonMetaField(
override val type: FieldType,
) : MetaField {
CDC_LSN(CdcStringMetaFieldType),
CDC_UPDATED_AT(CdcOffsetDateTimeMetaFieldType),
CDC_DELETED_AT(CdcOffsetDateTimeMetaFieldType),
;
override val id: String
get() = MetaField.META_PREFIX + name.lowercase()
}
data object CdcStringMetaFieldType : LosslessFieldType {
override val airbyteType: AirbyteType = LeafAirbyteType.STRING
override val jsonEncoder: JsonEncoder<String> = JsonStringCodec
override val jsonDecoder: JsonDecoder<String> = JsonStringCodec
}
data object CdcOffsetDateTimeMetaFieldType : LosslessFieldType {
override val airbyteType: AirbyteType = LeafAirbyteType.TIMESTAMP_WITH_TIMEZONE
override val jsonEncoder: JsonEncoder<OffsetDateTime> = OffsetDateTimeCodec
override val jsonDecoder: JsonDecoder<OffsetDateTime> = OffsetDateTimeCodec
}

View File

@@ -0,0 +1,33 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.source
import io.airbyte.cdk.command.SourceConfiguration
/** A very thin abstraction around JDBC metadata queries. */
interface MetadataQuerier : AutoCloseable {
/**
* Queries the information_schema for all table names in the schemas specified by the connector
* configuration.
*/
fun streamNamespaces(): List<String>
fun streamNames(streamNamespace: String?): List<String>
/** Executes a SELECT * on the table, discards the results, and extracts all column metadata. */
fun fields(
streamName: String,
streamNamespace: String?,
): List<Field>
/** Queries the information_schema for all primary keys for the given table. */
fun primaryKeys(
streamName: String,
streamNamespace: String?,
): List<List<String>>
/** Factory for [MetadataQuerier] instances. */
fun interface Factory<T : SourceConfiguration> {
/** An implementation might open a connection to build a [MetadataQuerier] instance. */
fun session(config: T): MetadataQuerier
}
}

View File

@@ -0,0 +1,158 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.source
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.read.Feed
import io.airbyte.cdk.read.StateQuerier
import io.airbyte.cdk.source.PartitionsCreator.TryAcquireResourcesStatus
/**
* [PartitionsCreatorFactory] must be implemented by each source connector and serves as the
* entrypoint to how READ operations are executed for that connector, via the [PartitionsCreator]
* and [PartitionReader] instances which are ultimately created by it.
*/
fun interface PartitionsCreatorFactory {
/**
* Returns a [PartitionsCreator] which will cause the READ to advance for this particular [feed]
* when possible. A [StateQuerier] is provided to obtain the current [OpaqueStateValue] for this
* [feed] but may also be used to peek at the state of other [Feed]s. This may be useful for
* synchronizing the READ for this [feed] by waiting for other [Feed]s to reach a desired state
* before proceeding; the waiting may be triggered by [PartitionsCreator.tryAcquireResources] or
* [PartitionReader.tryAcquireResources].
*/
fun make(
stateQuerier: StateQuerier,
feed: Feed,
): PartitionsCreator
}
/**
* A [PartitionsCreator] breaks down a [Feed] (a stream, or some global data feed) into zero, one or
* more partitions. Each partition is defined and read by a [PartitionReader] instance. These
* execute concurrently, but are joined serially because the state checkpoints need to appear in
* sequence.
*
* - zero partitions means that there is no more records to be read for this [Feed];
* - one partition effectively means that the records will be read serially;
* - many partitions therefore involves some concurrency within the [Feed].
*/
interface PartitionsCreator {
/**
* Called before [run] to try to acquire all the resources required for its execution. These may
* be, but are not limited to:
* - disk space,
* - heap space,
* - JDBC connections from a connection pool,
* - API tokens,
* - etc.
*
* The CDK is not aware of resources; resource management is the responsibility of the connector
* implementation. Implementations of this method should not block the thread; in fact they
* should never be slow because the invocation of [tryAcquireResources] is guarded by a lock to
* ensure serial execution.
*
* This [tryAcquireResources] method may also be used to coordinate work. For example, the
* connector may require the global [Feed] to wait until all stream [Feed]s are done.
*
* This method gets called multiple times.
*/
fun tryAcquireResources(): TryAcquireResourcesStatus
enum class TryAcquireResourcesStatus {
READY_TO_RUN,
RETRY_LATER,
}
/**
* Creates [PartitionReader] instances.
*
* This method gets called at most once.
*/
suspend fun run(): List<PartitionReader>
/**
* Called after [run] to release any resources acquired by [tryAcquireResources].
*
* This method gets called exactly once after a successful call to [tryAcquireResources].
*/
fun releaseResources()
}
data object CreateNoPartitions : PartitionsCreator {
override fun tryAcquireResources() = TryAcquireResourcesStatus.READY_TO_RUN
override suspend fun run(): List<PartitionReader> = listOf()
override fun releaseResources() {}
}
/**
* A [PartitionReader], when executed via [run], emits records within the corresponding _partition_,
* and completes by returning the value of the state checkpoint.
*
* A _partition_ is a chunk of consecutive records within a [Feed], which is either a stream or some
* global data feed.
*/
interface PartitionReader {
/**
* Called before [run] to try to acquire all the resources required for its execution. These may
* be, but are not limited to:
* - disk space,
* - heap space,
* - JDBC connections from a connection pool,
* - API tokens,
* - etc.
*
* The CDK is not aware of resources; resource management is the responsibility of the connector
* implementation. Implementations of this method should not block the thread; in fact they
* should never be slow because the invocation of [tryAcquireResources] is guarded by a lock to
* ensure serial execution.
*
* This [tryAcquireResources] method may also be used to coordinate work. For example, the
* connector may require the global [Feed] to wait until all stream [Feed]s are done.
*
* This method gets called multiple times.
*/
fun tryAcquireResources(): TryAcquireResourcesStatus
enum class TryAcquireResourcesStatus {
READY_TO_RUN,
RETRY_LATER,
// XXX: there's room here for some kind of CANCEL value which cancels all pending
// PartitionReaders.
}
/**
* Reads the corresponding partition.
*
* This method gets called at most once.
*
* This method is suspendable and may be cancelled due to a timeout. Implementations must be
* careful to always make at least some forward progress (according to the value returned by
* [checkpoint]) regardless of timeouts.
*/
suspend fun run()
/**
* Returns the forward progress made by the execution of [run].
*
* The [checkpoint] method gets called exactly once after the call to [run] either completes
* successfully or times out, and not necessarily in the same thread as [run]. The [checkpoint]
* method does not get called if [run] is otherwise interrupted.
*/
fun checkpoint(): PartitionReadCheckpoint
/**
* Called after [run] and [checkpoint] to release any resources acquired by
* [tryAcquireResources].
*
* This method gets called exactly once after a successful call to [tryAcquireResources], but
* not necessarily in the same thread as [tryAcquireResources], [run] or [checkpoint].
*/
fun releaseResources()
}
data class PartitionReadCheckpoint(
val opaqueStateValue: OpaqueStateValue,
val numRecords: Long,
)

View File

@@ -0,0 +1,30 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.util
import kotlin.coroutines.CoroutineContext
import kotlinx.coroutines.ThreadContextElement
/** Alternative to [kotlinx.coroutines.CoroutineName] that works in prod. */
data class ThreadRenamingCoroutineName(
val name: String,
) : ThreadContextElement<String> {
companion object Key : CoroutineContext.Key<ThreadRenamingCoroutineName>
override val key: CoroutineContext.Key<ThreadRenamingCoroutineName>
get() = Key
override fun updateThreadContext(context: CoroutineContext): String {
val previousName: String = Thread.currentThread().name
Thread.currentThread().name = "$previousName#$name"
return previousName
}
override fun restoreThreadContext(
context: CoroutineContext,
oldState: String,
) {
Thread.currentThread().name = oldState
}
override fun toString(): String = "CoroutineName($name)"
}

View File

@@ -0,0 +1,83 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.command
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.ssh.SshNoTunnelMethod
import io.airbyte.cdk.ssh.SshPasswordAuthTunnelMethod
import io.airbyte.cdk.test.source.FakeSourceConfigurationJsonObject
import io.airbyte.cdk.util.Jsons
import io.airbyte.cdk.util.ResourceUtils
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(rebuildContext = true)
class ConfigurationJsonObjectSupplierTest {
@Inject
lateinit var supplier: ConfigurationJsonObjectSupplier<FakeSourceConfigurationJsonObject>
@Test
fun testSchema() {
Assertions.assertEquals(FakeSourceConfigurationJsonObject::class.java, supplier.javaClass)
val expected: String = ResourceUtils.readResource("command/expected-schema.json")
Assertions.assertEquals(Jsons.readTree(expected), supplier.jsonSchema)
}
@Test
@Property(name = "airbyte.connector.config.host", value = "hello")
@Property(name = "airbyte.connector.config.database", value = "testdb")
fun testPropertyInjection() {
val pojo: FakeSourceConfigurationJsonObject = supplier.get()
Assertions.assertEquals("hello", pojo.host)
Assertions.assertEquals("testdb", pojo.database)
Assertions.assertEquals(SshNoTunnelMethod, pojo.getTunnelMethodValue())
}
@Test
fun testSchemaViolation() {
Assertions.assertThrows(ConfigErrorException::class.java, supplier::get)
}
@Test
@Property(
name = "airbyte.connector.config.json",
value = """{"host":"hello","port":123,"database":"testdb"}""",
)
fun testGoodJson() {
val pojo: FakeSourceConfigurationJsonObject = supplier.get()
Assertions.assertEquals("hello", pojo.host)
Assertions.assertEquals(123, pojo.port)
Assertions.assertEquals("testdb", pojo.database)
Assertions.assertEquals(SshNoTunnelMethod, pojo.getTunnelMethodValue())
}
@Test
@Property(name = "airbyte.connector.config.json", value = """{"foo""")
fun testMalformedJson() {
Assertions.assertThrows(ConfigErrorException::class.java, supplier::get)
}
@Test
@Property(name = "airbyte.connector.config.host", value = "hello")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(
name = "airbyte.connector.config.tunnel_method.tunnel_method",
value = "SSH_PASSWORD_AUTH",
)
@Property(name = "airbyte.connector.config.tunnel_method.tunnel_host", value = "localhost")
@Property(name = "airbyte.connector.config.tunnel_method.tunnel_port", value = "22")
@Property(name = "airbyte.connector.config.tunnel_method.tunnel_user", value = "sshuser")
@Property(
name = "airbyte.connector.config.tunnel_method.tunnel_user_password",
value = "secret",
)
fun testPropertySubTypeInjection() {
val pojo: FakeSourceConfigurationJsonObject = supplier.get()
Assertions.assertEquals("hello", pojo.host)
Assertions.assertEquals("testdb", pojo.database)
val expected = SshPasswordAuthTunnelMethod("localhost", 22, "sshuser", "secret")
Assertions.assertEquals(expected, pojo.getTunnelMethodValue())
}
}

View File

@@ -0,0 +1,375 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.jdbc
import java.math.BigDecimal
import java.nio.ByteBuffer
import java.sql.Connection
import java.sql.Date
import java.sql.JDBCType
import java.sql.PreparedStatement
import java.sql.ResultSet
import java.sql.Statement
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
class JdbcAccessorTest {
val h2 = H2TestFixture()
val columns =
mapOf(
"col_boolean" to "BOOLEAN",
"col_number" to "DECFLOAT",
"col_binary" to "VARBINARY",
"col_clob" to "CLOB",
"col_date" to "DATE",
"col_time" to "TIME",
"col_time_tz" to "TIME(6) WITH TIME ZONE",
"col_timestamp" to "TIMESTAMP",
"col_timestamp_tz" to "TIMESTAMP(6) WITH TIME ZONE",
"col_array" to "INTEGER ARRAY[10]",
)
init {
h2.execute(
columns
.map { "${it.key} ${it.value}" }
.joinToString(", ", "CREATE TABLE datatypes (", ")"),
)
}
@BeforeEach
fun resetH2() {
h2.execute("TRUNCATE TABLE datatypes")
h2.execute(
"""
INSERT INTO datatypes VALUES (
TRUE,
123,
x'6D6E',
'abcdef',
'2024-03-01',
'01:02:03',
'01:02:03.456-04',
'2024-03-01 01:02:03',
'2024-03-01 01:02:03.456-04',
ARRAY[1,2,3]
)""",
)
}
lateinit var columnName: String
private fun <T> JdbcGetter<T>.select(): T? =
h2.createConnection().use { conn: Connection ->
conn.createStatement().use { stmt: Statement ->
stmt.executeQuery("SELECT * FROM datatypes").use { rs: ResultSet ->
Assertions.assertTrue(rs.next())
val colIdx: Int = columns.keys.toList().indexOf(columnName) + 1
get(rs, colIdx)
}
}
}
private fun <T> JdbcSetter<T>.update(value: T) {
val sql = "UPDATE datatypes SET $columnName = ?"
h2.createConnection().use { conn: Connection ->
conn.prepareStatement(sql).use { stmt: PreparedStatement ->
set(stmt, 1, value)
stmt.execute()
}
}
}
private fun updateToNull() {
h2.createConnection().use { conn: Connection ->
conn.createStatement().use { stmt: Statement ->
stmt.execute("UPDATE datatypes SET $columnName = NULL")
}
}
}
@Test
fun testBooleanAccessor() {
columnName = "col_boolean"
BooleanAccessor.run {
Assertions.assertEquals(true, select())
update(false)
Assertions.assertEquals(false, select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testByteAccessor() {
columnName = "col_number"
ByteAccessor.run {
Assertions.assertEquals(123, select())
update(52)
Assertions.assertEquals(52, select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testShortAccessor() {
columnName = "col_number"
ShortAccessor.run {
Assertions.assertEquals(123, select())
update(1234)
Assertions.assertEquals(1234, select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testIntAccessor() {
columnName = "col_number"
IntAccessor.run {
Assertions.assertEquals(123, select())
update(123456)
Assertions.assertEquals(123456, select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testLongAccessor() {
columnName = "col_number"
LongAccessor.run {
Assertions.assertEquals(123L, select())
update(1234567890123456L)
Assertions.assertEquals(1234567890123456L, select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testFloatAccessor() {
columnName = "col_number"
FloatAccessor.run {
Assertions.assertEquals(123f, select())
update(123.456f)
Assertions.assertEquals(123.456f, select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testDoubleAccessor() {
columnName = "col_number"
DoubleAccessor.run {
Assertions.assertEquals(123.0, select())
update(2.5)
Assertions.assertEquals(2.5, select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testBigDecimalAccessor() {
columnName = "col_number"
BigDecimalAccessor.run {
Assertions.assertEquals(0, BigDecimal("123").compareTo(select()))
update(BigDecimal("0.0000000001"))
Assertions.assertEquals(0, BigDecimal("0.0000000001").compareTo(select()))
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testBytesAccessor() {
columnName = "col_binary"
BytesAccessor.run {
Assertions.assertEquals("mn", select()?.let { String(it.array()) })
update(ByteBuffer.wrap("ab".toByteArray()))
Assertions.assertEquals("ab", select()?.let { String(it.array()) })
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testBinaryStreamAccessor() {
columnName = "col_binary"
BinaryStreamAccessor.run {
Assertions.assertEquals("mn", select()?.let { String(it.array()) })
update(ByteBuffer.wrap("ab".toByteArray()))
Assertions.assertEquals("ab", select()?.let { String(it.array()) })
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testClobAccessor() {
columnName = "col_clob"
ClobAccessor.run {
Assertions.assertEquals("abcdef", select())
update("ABCDEF")
Assertions.assertEquals("ABCDEF", select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testNClobAccessor() {
columnName = "col_clob"
NClobAccessor.run {
Assertions.assertEquals("abcdef", select())
update("ABCDEF")
Assertions.assertEquals("ABCDEF", select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testCharacterStreamAccessor() {
columnName = "col_clob"
CharacterStreamAccessor.run {
Assertions.assertEquals("abcdef", select())
update("ABCDEF")
Assertions.assertEquals("ABCDEF", select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testNCharacterStreamAccessor() {
columnName = "col_clob"
NCharacterStreamAccessor.run {
Assertions.assertEquals("abcdef", select())
update("ABCDEF")
Assertions.assertEquals("ABCDEF", select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testStringAccessor() {
columnName = "col_clob"
StringAccessor.run {
Assertions.assertEquals("abcdef", select())
update("ABCDEF")
Assertions.assertEquals("ABCDEF", select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testNStringAccessor() {
columnName = "col_clob"
NStringAccessor.run {
Assertions.assertEquals("abcdef", select())
update("ABCDEF")
Assertions.assertEquals("ABCDEF", select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testDateAccessor() {
columnName = "col_date"
DateAccessor.run {
Assertions.assertEquals(LocalDate.of(2024, 3, 1), select())
update(LocalDate.of(1999, 11, 12))
Assertions.assertEquals(LocalDate.of(1999, 11, 12), select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testTimeAccessor() {
columnName = "col_time"
TimeAccessor.run {
Assertions.assertEquals(LocalTime.of(1, 2, 3), select())
update(LocalTime.of(11, 12, 13))
Assertions.assertEquals(LocalTime.of(11, 12, 13), select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testTimestampAccessor() {
columnName = "col_timestamp"
TimestampAccessor.run {
Assertions.assertEquals(LocalDateTime.of(2024, 3, 1, 1, 2, 3), select())
update(LocalDateTime.of(1999, 11, 12, 11, 12, 13))
Assertions.assertEquals(LocalDateTime.of(1999, 11, 12, 11, 12, 13), select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testObjectGetterAnySetter() {
columnName = "col_time_tz"
ObjectGetter(OffsetTime::class.java).run {
Assertions.assertEquals(
OffsetTime.of(1, 2, 3, 456000000, ZoneOffset.ofHours(-4)),
select(),
)
}
AnySetter(JDBCType.TIME_WITH_TIMEZONE.vendorTypeNumber).run {
update(OffsetTime.of(11, 12, 13, 456000000, ZoneOffset.ofHours(3)))
}
AnyAccessor.run {
Assertions.assertEquals(
OffsetTime.of(11, 12, 13, 456000000, ZoneOffset.ofHours(3)),
select(),
)
}
columnName = "col_timestamp_tz"
ObjectGetter(OffsetDateTime::class.java).run {
Assertions.assertEquals(
OffsetDateTime.of(2024, 3, 1, 1, 2, 3, 456000000, ZoneOffset.ofHours(-4)),
select(),
)
}
}
@Test
@Suppress("DEPRECATION")
fun testAnyAccessor() {
columnName = "col_date"
AnyAccessor.run {
Assertions.assertEquals(Date(124, 2, 1), select())
update(Date(99, 10, 12))
Assertions.assertEquals(Date(99, 10, 12), select())
updateToNull()
Assertions.assertEquals(null, select())
}
}
@Test
fun testArrayGetterAndSetter() {
columnName = "col_array"
ArrayGetter(IntAccessor).run { Assertions.assertEquals(listOf(1, 2, 3), select()) }
ArraySetter("INTEGER").run { update(listOf(4, 5)) }
ArrayGetter(IntAccessor).run { Assertions.assertEquals(listOf(4, 5), select()) }
updateToNull()
ArrayGetter(IntAccessor).run { Assertions.assertEquals(null, select()) }
}
}

View File

@@ -0,0 +1,57 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.jdbc
import io.airbyte.cdk.ssh.SshBastionContainer
import io.airbyte.cdk.test.source.FakeSourceConfigurationFactory
import io.airbyte.cdk.test.source.FakeSourceConfigurationJsonObject
import io.airbyte.cdk.testcontainers.DOCKER_HOST_FROM_WITHIN_CONTAINER
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
import org.testcontainers.Testcontainers
class JdbcConnectionFactoryTest {
val h2 = H2TestFixture()
init {
Testcontainers.exposeHostPorts(h2.port)
}
val sshBastion = SshBastionContainer(tunnelingToHostPort = h2.port)
@Test
fun testVanilla() {
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
}
val factory = JdbcConnectionFactory(FakeSourceConfigurationFactory().make(configPojo))
Assertions.assertEquals("H2", factory.get().metaData.databaseProductName)
}
@Test
fun testSshKeyAuth() {
val configPojo =
FakeSourceConfigurationJsonObject().apply {
host = DOCKER_HOST_FROM_WITHIN_CONTAINER // required only because of container
port = h2.port
database = h2.database
setTunnelMethodValue(sshBastion.outerKeyAuthTunnelMethod)
}
val factory = JdbcConnectionFactory(FakeSourceConfigurationFactory().make(configPojo))
Assertions.assertEquals("H2", factory.get().metaData.databaseProductName)
}
@Test
fun testSshPasswordAuth() {
val configPojo =
FakeSourceConfigurationJsonObject().apply {
host = DOCKER_HOST_FROM_WITHIN_CONTAINER // required only because of container
port = h2.port
database = h2.database
setTunnelMethodValue(sshBastion.outerPasswordAuthTunnelMethod)
}
val factory = JdbcConnectionFactory(FakeSourceConfigurationFactory().make(configPojo))
Assertions.assertEquals("H2", factory.get().metaData.databaseProductName)
}
}

View File

@@ -0,0 +1,78 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.jdbc
import io.airbyte.cdk.source.MetadataQuerier
import io.airbyte.cdk.test.source.FakeSourceConfiguration
import io.airbyte.cdk.test.source.FakeSourceConfigurationFactory
import io.airbyte.cdk.test.source.FakeSourceConfigurationJsonObject
import io.airbyte.cdk.test.source.FakeSourceOperations
import java.sql.JDBCType
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
class JdbcMetadataQuerierTest {
val h2 = H2TestFixture()
init {
h2.execute("CREATE TABLE kv (k INT PRIMARY KEY, v VARCHAR(60))")
}
val factory = JdbcMetadataQuerier.Factory(FakeSourceOperations(), FakeSourceOperations())
@Test
fun test() {
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
}
val config: FakeSourceConfiguration = FakeSourceConfigurationFactory().make(configPojo)
factory.session(config).use { mdq: MetadataQuerier ->
Assertions.assertEquals(listOf("PUBLIC"), mdq.streamNamespaces())
Assertions.assertEquals(listOf("KV"), mdq.streamNames("PUBLIC"))
val expectedColumnMetadata: List<JdbcMetadataQuerier.ColumnMetadata> =
listOf(
JdbcMetadataQuerier.ColumnMetadata(
name = "_ROWID_",
label = "_ROWID_",
type =
SystemType(
typeName = "BIGINT",
typeCode = JDBCType.BIGINT.vendorTypeNumber,
precision = 64,
scale = 0,
),
nullable = false,
),
JdbcMetadataQuerier.ColumnMetadata(
name = "K",
label = "K",
type =
SystemType(
typeName = "INTEGER",
typeCode = JDBCType.INTEGER.vendorTypeNumber,
precision = 32,
scale = 0,
),
nullable = false,
),
JdbcMetadataQuerier.ColumnMetadata(
name = "V",
label = "V",
type =
SystemType(
typeName = "CHARACTER VARYING",
typeCode = JDBCType.VARCHAR.vendorTypeNumber,
precision = 60,
scale = 0,
),
nullable = true,
),
)
val tableName = (mdq as JdbcMetadataQuerier).findTableName("KV", "PUBLIC")
Assertions.assertNotNull(tableName)
Assertions.assertEquals(expectedColumnMetadata, mdq.columnMetadata(tableName!!))
Assertions.assertEquals(listOf(listOf("K")), mdq.primaryKeys("KV", "PUBLIC"))
}
}
}

View File

@@ -0,0 +1,90 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.jdbc
import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.cdk.source.Field
import io.airbyte.cdk.source.select.SelectQuerier
import io.airbyte.cdk.source.select.SelectQuery
import io.airbyte.cdk.test.source.FakeSourceConfiguration
import io.airbyte.cdk.test.source.FakeSourceConfigurationFactory
import io.airbyte.cdk.test.source.FakeSourceConfigurationJsonObject
import io.airbyte.cdk.util.Jsons
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
class JdbcSelectQuerierTest {
val h2 = H2TestFixture()
init {
h2.execute(
"""CREATE TABLE kv (
|k INT PRIMARY KEY,
|v VARCHAR(60))
|
"""
.trimMargin()
.replace('\n', ' '),
)
h2.execute("INSERT INTO kv (k, v) VALUES (1, 'foo'), (2, 'bar'), (3, NULL);")
}
val columns: List<Field> = listOf(Field("k", IntFieldType), Field("v", StringFieldType))
@Test
fun testVanilla() {
runTest(
SelectQuery("SELECT k, v FROM kv", columns, listOf()),
"""{"k":1, "v":"foo"}""",
"""{"k":2, "v":"bar"}""",
"""{"k":3, "v":null}""",
)
}
@Test
fun testBindings() {
runTest(
SelectQuery(
"SELECT k, v FROM kv WHERE k < ?",
columns,
listOf(SelectQuery.Binding(Jsons.numberNode(2), IntFieldType)),
),
"""{"k":1, "v":"foo"}""",
)
runTest(
SelectQuery(
"SELECT k, v FROM kv WHERE k > ? AND k < ?",
columns,
listOf(
SelectQuery.Binding(Jsons.numberNode(1), IntFieldType),
SelectQuery.Binding(Jsons.numberNode(3), IntFieldType),
),
),
"""{"k":2, "v":"bar"}""",
)
}
@Test
fun testProjection() {
runTest(
SelectQuery("SELECT v FROM kv", columns.drop(1), listOf()),
"""{"v":"foo"}""",
"""{"v":"bar"}""",
"""{"v":null}""",
)
}
private fun runTest(
q: SelectQuery,
vararg expected: String,
) {
val configPojo: FakeSourceConfigurationJsonObject =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
}
val config: FakeSourceConfiguration = FakeSourceConfigurationFactory().make(configPojo)
val querier: SelectQuerier = JdbcSelectQuerier(JdbcConnectionFactory(config))
val actual: List<ObjectNode> = querier.executeQuery(q).use { it.asSequence().toList() }
Assertions.assertIterableEquals(expected.toList().map(Jsons::readTree), actual)
}
}

View File

@@ -0,0 +1,587 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.ArrayNode
import io.airbyte.cdk.TestClockFactory
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.consumers.BufferingOutputConsumer
import io.airbyte.cdk.source.PartitionReadCheckpoint
import io.airbyte.cdk.source.PartitionReader
import io.airbyte.cdk.source.PartitionsCreator
import io.airbyte.cdk.source.PartitionsCreatorFactory
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteMessage
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.AirbyteStreamStatusTraceMessage
import io.airbyte.protocol.models.v0.AirbyteTraceMessage
import io.airbyte.protocol.models.v0.SyncMode
import io.github.oshai.kotlinlogging.KotlinLogging
import java.lang.RuntimeException
import java.time.Duration
import kotlin.random.Random
import kotlin.time.toKotlinDuration
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.delay
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.sync.Semaphore
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.DynamicNode
import org.junit.jupiter.api.DynamicTest
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestFactory
import org.junit.jupiter.api.Timeout
const val TEST_TIMEOUT_SECONDS = 10L
@Timeout(TEST_TIMEOUT_SECONDS)
class RootReaderIntegrationTest {
val testCases: List<TestCase> =
listOf(
TestCase(
"simple-1",
Create(Read),
Create(),
),
TestCase(
"simple-3",
Create(Read, Read, Read),
Create(),
),
TestCase(
"simple-3-2",
Create(Read, Read, Read),
Create(Read, Read),
Create(),
),
TestCase(
"backoff-1",
CreatorBackOff(Create(Read)),
CreatorBackOff(CreatorBackOff(Create())),
),
TestCase(
"backoff-2",
Create(ReaderBackOff(ReaderBackOff(Read)), ReaderBackOff(Read)),
Create(),
),
TestCase(
"failure-1-c",
Create(Read),
CreatorFailure,
),
TestCase(
"failure-1-1r",
Create(Read),
Create(ReaderFailure),
),
TestCase(
"failure-2-2r",
Create(Read, Read),
Create(ReaderFailure, Read),
),
TestCase(
"failure-2-3r",
Create(Read, Read),
Create(Read, ReaderFailure, Read),
),
)
/** Simulates a READ operation for each test case, which corresponds to a one-stream catalog. */
@TestFactory
fun testConcurrent(): Iterable<DynamicNode> =
testCases.map { DynamicTest.dynamicTest(it.name, it::run) }
/** Similar to [testConcurrent] but with a resource which forces serial execution. */
@TestFactory
fun testSerial(): Iterable<DynamicNode> =
testCases.map { it.copy(resource = 1) }.map { DynamicTest.dynamicTest(it.name, it::run) }
/**
* Similar to [testConcurrent] but with a resource which forces execution on at most 2 threads.
*/
@TestFactory
fun testConstrained(): Iterable<DynamicNode> =
testCases
.map { it.copy(resource = CONSTRAINED) }
.map { DynamicTest.dynamicTest(it.name, it::run) }
/**
* Simulates a READ operation with a catalog with all the streams in [testCases]. Some of these
* fail, so this test checks that a failure in one stream propagates to the others properly.
*/
@Test
fun testAllStreamsNonGlobal() {
val stateManager =
StateManager(initialStreamStates = testCases.associate { it.stream to null })
val testOutputConsumer = BufferingOutputConsumer(TestClockFactory().fixed())
val rootReader =
RootReader(
stateManager,
slowHeartbeat,
excessiveTimeout,
testOutputConsumer,
TestPartitionsCreatorFactory(Semaphore(CONSTRAINED), *testCases.toTypedArray()),
)
Assertions.assertThrows(RuntimeException::class.java) {
runBlocking(Dispatchers.Default) { rootReader.read() }
}
val log = KotlinLogging.logger {}
for (msg in testOutputConsumer.messages()) {
log.info { Jsons.writeValueAsString(msg) }
}
for (testCase in testCases) {
log.info { "checking stream feed for ${testCase.name}" }
val traceMessages: List<AirbyteTraceMessage> =
testOutputConsumer.traces().filter {
it.streamStatus.streamDescriptor.name == testCase.name
}
testCase.verifyTraces(traceMessages)
val stateMessages: List<AirbyteStateMessage> =
testOutputConsumer.states().filter {
it.stream.streamDescriptor.name == testCase.name
}
testCase.verifyStates(stateMessages)
}
}
/**
* Similar to [testAllStreamsNonGlobal] but with a global feed. This test checks that the global
* feed gets cancelled when one of its stream feeds fails. Otherwise, the test times out.
*/
@Test
fun testAllStreamsGlobal() {
val stateManager =
StateManager(
global = Global(testCases.map { it.stream }),
initialGlobalState = null,
initialStreamStates = testCases.associate { it.stream to null },
)
val testOutputConsumer = BufferingOutputConsumer(TestClockFactory().fixed())
val rootReader =
RootReader(
stateManager,
slowHeartbeat,
excessiveTimeout,
testOutputConsumer,
TestPartitionsCreatorFactory(Semaphore(CONSTRAINED), *testCases.toTypedArray()),
)
Assertions.assertThrows(RuntimeException::class.java) {
runBlocking(Dispatchers.Default) { rootReader.read() }
}
val log = KotlinLogging.logger {}
for (msg in testOutputConsumer.messages()) {
log.info { Jsons.writeValueAsString(msg) }
}
for (testCase in testCases) {
log.info { "checking stream feed for ${testCase.name}" }
val traceMessages: List<AirbyteTraceMessage> =
testOutputConsumer.traces().filter {
it.streamStatus.streamDescriptor.name == testCase.name
}
testCase.verifyTraces(traceMessages)
val streamStateMessages: List<AirbyteStateMessage> =
testOutputConsumer.states().filter {
it.stream?.streamDescriptor?.name == testCase.name
}
Assertions.assertTrue(streamStateMessages.isEmpty())
}
log.info { "checking global feed" }
val globalStateMessages: List<AirbyteStateMessage> =
testOutputConsumer.states().filter {
it.type == AirbyteStateMessage.AirbyteStateType.GLOBAL
}
Assertions.assertFalse(globalStateMessages.isEmpty())
}
companion object {
const val CONSTRAINED = 2
}
}
/** Each [TestCase] encodes a scenario for how a READ operation might proceed for a [Stream]. */
data class TestCase(
val name: String,
val creatorCases: List<CreatorCase>,
val resource: Int = 100_000, // some arbitrary large value by default
) {
constructor(
name: String,
vararg creatorCases: CreatorCase,
) : this(name, creatorCases.toList())
val stream: Stream =
Stream(
name = name,
namespace = "test",
fields = listOf(),
primaryKeyCandidates = listOf(),
configuredSyncMode = SyncMode.FULL_REFRESH,
configuredPrimaryKey = null,
configuredCursor = null,
)
fun run() {
val testOutputConsumer = BufferingOutputConsumer(TestClockFactory().fixed())
val rootReader =
RootReader(
StateManager(initialStreamStates = mapOf(stream to null)),
slowHeartbeat,
excessiveTimeout,
testOutputConsumer,
TestPartitionsCreatorFactory(Semaphore(resource), this),
)
try {
runBlocking(Dispatchers.Default) { rootReader.read() }
log.info { "read completed for $name" }
Assertions.assertTrue(isSuccessful, name)
} catch (e: Exception) {
Assertions.assertFalse(isSuccessful, name)
log.info(e) { "read failed for $name" }
}
for (msg in testOutputConsumer.messages()) {
log.info { Jsons.writeValueAsString(msg) }
}
verify(testOutputConsumer)
}
private val log = KotlinLogging.logger {}
fun verify(output: BufferingOutputConsumer) {
var inTracePrefix = true
var inTraceSuffix = false
for (msg in output.messages()) {
val json: String by lazy { Jsons.writeValueAsString(msg) }
when (msg.type) {
AirbyteMessage.Type.TRACE ->
if (!inTracePrefix && !inTraceSuffix) inTraceSuffix = true
AirbyteMessage.Type.STATE -> {
if (inTracePrefix) inTracePrefix = false
Assertions.assertFalse(
inTraceSuffix,
"unexpected STATE message $json in case $name",
)
}
else ->
Assertions.fail(
"Unexpected Airbyte message type ${msg.type} in $json in case $name",
)
}
}
verifyTraces(output.traces())
verifyStates(output.states())
}
fun verifyTraces(traceMessages: List<AirbyteTraceMessage>) {
var hasStarted = false
var hasCompleted = false
for (trace in traceMessages) {
when (trace.type) {
AirbyteTraceMessage.Type.STREAM_STATUS -> {
Assertions.assertEquals(name, trace.streamStatus?.streamDescriptor?.name)
when (trace.streamStatus.status) {
AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED -> {
hasStarted = true
Assertions.assertFalse(hasCompleted)
}
AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE -> {
hasCompleted = true
Assertions.assertTrue(hasStarted)
}
else ->
Assertions.fail(
"unexpected TRACE message status ${trace.streamStatus.status} " +
"in case $name",
)
}
}
else ->
Assertions.fail(
"unexpected TRACE message type ${trace.type} in case $name",
)
}
}
Assertions.assertTrue(hasStarted)
Assertions.assertEquals(isSuccessful, hasCompleted)
}
fun verifyStates(stateMessages: List<AirbyteStateMessage>) {
val actualStates = mutableListOf<MutableSet<JsonNode>>()
var previousPartitionsCreatorID: Long = -1L
for (stateMessage in stateMessages) {
Assertions.assertEquals(name, stateMessage.stream?.streamDescriptor?.name)
Assertions.assertEquals(true, stateMessage.stream?.streamState?.isArray, name)
val stateValue: ArrayNode = stateMessage.stream.streamState as ArrayNode
val currentID: Long = stateValue.get(0)?.asLong()!!
if (currentID == previousPartitionsCreatorID) {
actualStates.last().add(stateValue)
} else {
actualStates.add(mutableSetOf(stateValue))
previousPartitionsCreatorID = currentID
}
}
log.info { "actual states for $name: $actualStates" }
// Compare expected and actual states.
// Actual states are sensitive to timing within a partition creation round.
// This means that a direct comparison is not possible.
val maxID: Long = expectedStates.size.coerceAtLeast(actualStates.size).toLong()
for (partitionsCreatorID in 1L..maxID) {
val expected: Set<JsonNode>? = expectedStates.getOrNull(partitionsCreatorID.toInt() - 1)
val actual: Set<JsonNode>? = actualStates.getOrNull(partitionsCreatorID.toInt() - 1)
if (expected == null) {
Assertions.assertNull(
actual,
"expected nothing in round $partitionsCreatorID, got $actual",
)
break
}
Assertions.assertNotNull(
actual,
"expected $expected in round $partitionsCreatorID, got nothing",
)
for (actualState in actual!!) {
Assertions.assertTrue(
actualState in expected,
"$actualState should be in $expected",
)
}
}
}
/** [isSuccessful] represents whether the READ operation is expected to be successful. */
val isSuccessful: Boolean = creatorCases.all { it.isSuccessful() }
private fun CreatorCase.isSuccessful(): Boolean =
when (this) {
is CreatorBackOff -> next.isSuccessful()
CreatorFailure -> false
is Create -> readerCases.all { it.isSuccessful() }
}
private fun ReaderCase.isSuccessful(): Boolean =
when (this) {
is ReaderBackOff -> next.isSuccessful()
ReaderFailure -> false
Read -> true
}
/**
* [expectedStates] represent the expected state values in the messages. Sets are formed by the
* state values emitted by concurrent [PartitionReader] instances.
*/
val expectedStates: List<Set<JsonNode>> =
mutableListOf<Set<JsonNode>>().apply {
creatorCases.forEachIndexed { creatorIndex, creatorCase ->
val set = creatorCase.states(1L + creatorIndex)
val trimmed = set.takeWhile { it.isArray }.toSet()
if (trimmed.isEmpty()) return@apply
add(trimmed)
if (trimmed.size < set.size) return@apply
}
}
private fun CreatorCase.states(creatorID: Long): Set<JsonNode> =
when (this) {
is CreatorBackOff -> next.states(creatorID)
CreatorFailure -> setOf()
is Create ->
readerCases
.mapIndexed { idx, readerCase -> readerCase.state(creatorID, 1L + idx) }
.toSet()
}
private fun ReaderCase.state(
creatorID: Long,
readerID: Long,
): JsonNode =
when (this) {
is ReaderBackOff -> next.state(creatorID, readerID)
ReaderFailure -> Jsons.nullNode()
Read -> Jsons.arrayNode().add(creatorID).add(readerID)
}
}
/** A [CreatorCase] specifies how the [TestPartitionsCreator] should behave. */
sealed interface CreatorCase
data class CreatorBackOff(
val next: CreatorCase,
) : CreatorCase
data object CreatorFailure : CreatorCase
data class Create(
val readerCases: List<ReaderCase>,
) : CreatorCase {
constructor(vararg readerCases: ReaderCase) : this(readerCases.toList())
}
/** A [ReaderCase] specifies how the [TestPartitionReader] should behave. */
sealed interface ReaderCase
data class ReaderBackOff(
val next: ReaderCase,
) : ReaderCase
data object ReaderFailure : ReaderCase
data object Read : ReaderCase
class TestPartitionsCreator(
private val creatorID: Long,
private var case: CreatorCase,
private val resource: Semaphore,
) : PartitionsCreator {
private val log = KotlinLogging.logger {}
override fun tryAcquireResources(): PartitionsCreator.TryAcquireResourcesStatus =
when (val case = case) {
is CreatorBackOff -> {
log.info { "failed resource acquisition due to deliberate backoff" }
this.case = case.next
PartitionsCreator.TryAcquireResourcesStatus.RETRY_LATER
}
else -> {
if (resource.tryAcquire()) {
log.info { "resource acquisition successful" }
PartitionsCreator.TryAcquireResourcesStatus.READY_TO_RUN
} else {
log.info { "failed resource acquisition due to resource starvation" }
PartitionsCreator.TryAcquireResourcesStatus.RETRY_LATER
}
}
}
override fun releaseResources() {
resource.release()
}
override suspend fun run(): List<PartitionReader> {
while (true) {
when (val case = case) {
is CreatorBackOff -> TODO("unreachable code")
CreatorFailure -> {
log.info { "deliberately failing the partitioning" }
throw RuntimeException("boom")
}
is Create -> {
val partitionReaders: List<PartitionReader> =
case.readerCases.mapIndexed { idx: Int, readerCase: ReaderCase ->
TestPartitionReader(creatorID, 1L + idx, readerCase, resource)
}
log.info { "successfully created ${partitionReaders.size} partition(s)" }
return partitionReaders
}
}
}
}
}
class TestPartitionReader(
private val creatorID: Long,
private val readerID: Long,
private var case: ReaderCase,
private val resource: Semaphore,
) : PartitionReader {
private val log = KotlinLogging.logger {}
override fun tryAcquireResources(): PartitionReader.TryAcquireResourcesStatus =
when (val case = case) {
is ReaderBackOff -> {
log.info { "failed resource acquisition due to deliberate backoff" }
this.case = case.next
PartitionReader.TryAcquireResourcesStatus.RETRY_LATER
}
else -> {
if (resource.tryAcquire()) {
log.info { "resource acquisition successful" }
PartitionReader.TryAcquireResourcesStatus.READY_TO_RUN
} else {
log.info { "failed resource acquisition due to resource starvation" }
PartitionReader.TryAcquireResourcesStatus.RETRY_LATER
}
}
}
override fun releaseResources() {
resource.release()
}
override suspend fun run() {
when (case) {
is ReaderBackOff -> TODO("unreachable code")
is ReaderFailure -> {
log.info { "deliberately failing the read" }
throw RuntimeException("boom")
}
is Read -> {
delay(readDelay().toKotlinDuration())
log.info { "partition read successful" }
}
}
}
override fun checkpoint(): PartitionReadCheckpoint =
PartitionReadCheckpoint(
opaqueStateValue = Jsons.arrayNode().add(creatorID).add(readerID),
numRecords = 0L,
)
}
class TestPartitionsCreatorFactory(
val resource: Semaphore,
vararg val testCases: TestCase,
) : PartitionsCreatorFactory {
private val log = KotlinLogging.logger {}
override fun make(
stateQuerier: StateQuerier,
feed: Feed,
): PartitionsCreator {
if (feed is Global) {
// For a global feed, return a bogus PartitionsCreator which backs off forever.
// This tests that the corresponding coroutine gets canceled properly.
return object : PartitionsCreator {
override fun tryAcquireResources(): PartitionsCreator.TryAcquireResourcesStatus {
log.info { "failed to acquire resources for global feed, as always" }
return PartitionsCreator.TryAcquireResourcesStatus.RETRY_LATER
}
override suspend fun run(): List<PartitionReader> {
TODO("unreachable code")
}
override fun releaseResources() {
TODO("unreachable code")
}
}
}
// For a stream feed, pick the CreatorCase in the corresponding TestCase
// which is the successor of the one whose corresponding state is in the StateQuerier.
val testCase: TestCase = testCases.find { it.name == (feed as Stream).name }!!
val checkpointedPartitionCreatorID: Long =
when (val opaqueStateValue: OpaqueStateValue? = stateQuerier.current(feed)) {
null -> 0L
is ArrayNode -> opaqueStateValue.get(0).asLong()
else -> TODO("unreachable code")
}
val nextCreatorCaseIndex: Int =
checkpointedPartitionCreatorID.toInt() // the ID is off by 1 so this works
if (nextCreatorCaseIndex >= testCase.creatorCases.size) TODO("unreachable code")
return TestPartitionsCreator(
1L + checkpointedPartitionCreatorID,
testCase.creatorCases[nextCreatorCaseIndex],
resource,
)
}
}
/** Tests should succeed and not timeout. */
val excessiveTimeout: Duration = Duration.ofSeconds(TEST_TIMEOUT_SECONDS * 2)
/** The heartbeat duration is set to allow enough room to back off many times. */
val slowHeartbeat: Duration = Duration.ofSeconds(TEST_TIMEOUT_SECONDS).dividedBy(100L)
fun readDelay(): Duration =
Duration.ofSeconds(TEST_TIMEOUT_SECONDS)
.dividedBy(1000L)
.multipliedBy(Random.Default.nextLong(1L, 10L))

View File

@@ -0,0 +1,228 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.command.InputState
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.consumers.BufferingCatalogValidationFailureHandler
import io.airbyte.cdk.consumers.CatalogValidationFailure
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.SyncMode
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(rebuildContext = true)
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "airbyte.connector.config.cursor.cursor_method", value = "cdc")
@Property(name = "metadata.resource", value = "read/metadata.json")
class StateManagerGlobalStatesTest {
@Inject lateinit var config: SourceConfiguration
@Inject lateinit var configuredCatalog: ConfiguredAirbyteCatalog
@Inject lateinit var inputState: InputState
@Inject lateinit var stateManagerFactory: StateManagerFactory
@Inject lateinit var handler: BufferingCatalogValidationFailureHandler
val stateManager: StateManager by lazy {
stateManagerFactory.create(config, configuredCatalog, inputState)
}
@Test
@Property(name = "airbyte.connector.catalog.resource", value = "read/cdc-catalog.json")
@Property(
name = "airbyte.connector.state.json",
value =
"""
{"type": "GLOBAL", "global": {
"shared_state": { "cdc": {} },
"stream_states": [{
"stream_descriptor": { "name": "BAR", "namespace": "FOO" },
"stream_state": { "primary_key": {} }
}]}}""",
)
fun testStreamInStateButNotInCatalog() {
prelude()
Assertions.assertEquals(listOf<CatalogValidationFailure>(), handler.get())
}
@Test
@Property(name = "airbyte.connector.catalog.resource", value = "read/cdc-catalog.json")
@Property(name = "airbyte.connector.state.json", value = "[]")
fun testColdStart() {
val streams: Streams = prelude()
// test current state
Assertions.assertNull(stateManager.scoped(streams.global).current())
Assertions.assertNull(stateManager.scoped(streams.kv).current())
Assertions.assertNull(stateManager.scoped(streams.events).current())
Assertions.assertEquals(listOf<CatalogValidationFailure>(), handler.get())
// update state manager with fake work results
stateManager.scoped(streams.global).set(Jsons.readTree("{\"cdc\":\"starting\"}"), 0L)
stateManager.scoped(streams.kv).set(Jsons.readTree("{\"initial_sync\":\"ongoing\"}"), 123L)
stateManager
.scoped(streams.events)
.set(Jsons.readTree("{\"full_refresh\":\"ongoing\"}"), 456L)
// test checkpoint messages
val checkpoint: List<AirbyteStateMessage> = stateManager.checkpoint()
Assertions.assertEquals(
listOf(
"""{
|"type":"GLOBAL",
|"global":{"shared_state":{"cdc":"starting"},
|"stream_states":[
|{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"},
|"stream_state":{"initial_sync":"ongoing"}}
|]},
|"sourceStats":{"recordCount":123.0}
|}
""".trimMargin(),
"""{
|"type":"STREAM",
|"stream":{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"},
|"stream_state":{"full_refresh":"ongoing"}},
|"sourceStats":{"recordCount":456.0}
|}
""".trimMargin(),
)
.map { Jsons.readTree(it) },
checkpoint.map { Jsons.valueToTree<JsonNode>(it) },
)
}
@Test
@Property(name = "airbyte.connector.catalog.resource", value = "read/cdc-catalog.json")
@Property(
name = "airbyte.connector.state.json",
value =
"""
{"type": "GLOBAL", "global": {
"shared_state": { "cdc": "starting" },
"stream_states": [{
"stream_descriptor": { "name": "KV", "namespace": "PUBLIC" },
"stream_state": { "initial_sync": "ongoing" }
}]}}""",
)
fun testInitialSyncWarmStart() {
val streams: Streams = prelude()
// test current state
Assertions.assertEquals(
Jsons.readTree("{ \"cdc\": \"starting\" }"),
stateManager.scoped(streams.global).current(),
)
Assertions.assertEquals(
Jsons.readTree("{ \"initial_sync\": \"ongoing\" }"),
stateManager.scoped(streams.kv).current(),
)
Assertions.assertNull(stateManager.scoped(streams.events).current())
Assertions.assertEquals(listOf<CatalogValidationFailure>(), handler.get())
// update state manager with fake work results
stateManager
.scoped(streams.kv)
.set(Jsons.readTree("{\"initial_sync\":\"completed\"}"), 789L)
// test checkpoint messages
val checkpoint: List<AirbyteStateMessage> = stateManager.checkpoint()
Assertions.assertEquals(
listOf(
"""{
|"type":"GLOBAL",
|"global":{"shared_state":{"cdc":"starting"},
|"stream_states":[
|{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"},
|"stream_state":{"initial_sync":"completed"}}
|]},"sourceStats":{"recordCount":789.0}
|}
""".trimMargin(),
)
.map { Jsons.readTree(it) },
checkpoint.map { Jsons.valueToTree<JsonNode>(it) },
)
}
@Test
@Property(name = "airbyte.connector.catalog.resource", value = "read/cdc-catalog.json")
@Property(
name = "airbyte.connector.state.json",
value =
"""
{"type": "GLOBAL", "global": {
"shared_state": { "cdc": "starting" },
"stream_states": [{
"stream_descriptor": { "name": "KV", "namespace": "PUBLIC" },
"stream_state": { "initial_sync": "completed" }
}]}}""",
)
fun testIncrementalWarmStart() {
val streams: Streams = prelude()
// test current state
Assertions.assertEquals(
Jsons.readTree("{ \"cdc\": \"starting\" }"),
stateManager.scoped(streams.global).current(),
)
Assertions.assertEquals(
Jsons.readTree("{ \"initial_sync\": \"completed\" }"),
stateManager.scoped(streams.kv).current(),
)
Assertions.assertNull(stateManager.scoped(streams.events).current())
Assertions.assertEquals(listOf<CatalogValidationFailure>(), handler.get())
// update state manager with fake work results
stateManager.scoped(streams.global).set(Jsons.readTree("{\"cdc\":\"ongoing\"}"), 741L)
// test checkpoint messages
val checkpoint: List<AirbyteStateMessage> = stateManager.checkpoint()
Assertions.assertEquals(
listOf(
"""{
|"type":"GLOBAL",
|"global":{"shared_state":{"cdc":"ongoing"},
|"stream_states":[
|{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"},
|"stream_state":{"initial_sync":"completed"}}
|]},
|"sourceStats":{"recordCount":741.0}
|}
""".trimMargin(),
)
.map { Jsons.readTree(it) },
checkpoint.map { Jsons.valueToTree<JsonNode>(it) },
)
}
private fun prelude(): Streams {
val globals: List<Global> = stateManager.feeds.mapNotNull { it as? Global }
Assertions.assertEquals(1, globals.size)
val global: Global = globals.first()
val streams: List<Stream> = stateManager.feeds.mapNotNull { it as? Stream }
Assertions.assertEquals(2, streams.size)
Assertions.assertEquals(1, global.streams.size)
val kv: Stream = global.streams.first()
Assertions.assertEquals("KV", kv.name)
Assertions.assertEquals(listOf("V", "K"), kv.fields.map { it.id })
Assertions.assertEquals(
listOf(listOf("K")),
kv.primaryKeyCandidates.map { col -> col.map { it.id } },
)
Assertions.assertEquals(SyncMode.INCREMENTAL, kv.configuredSyncMode)
val events: Stream = streams.filter { it.namePair != kv.namePair }.first()
Assertions.assertEquals("EVENTS", events.name)
Assertions.assertEquals(listOf("MSG", "ID", "TS"), events.fields.map { it.id })
Assertions.assertEquals(
listOf(listOf("ID")),
events.primaryKeyCandidates.map { col -> col.map { it.id } },
)
Assertions.assertEquals(SyncMode.FULL_REFRESH, events.configuredSyncMode)
return Streams(global, kv, events)
}
data class Streams(
val global: Global,
val kv: Stream,
val events: Stream,
)
}

View File

@@ -0,0 +1,228 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.command.InputState
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.consumers.BufferingCatalogValidationFailureHandler
import io.airbyte.cdk.consumers.CatalogValidationFailure
import io.airbyte.cdk.consumers.StreamHasNoFields
import io.airbyte.cdk.consumers.StreamNotFound
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteStateMessage
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.SyncMode
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(rebuildContext = true)
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "airbyte.connector.config.cursor.cursor_method", value = "user_defined")
@Property(name = "metadata.resource", value = "read/metadata.json")
class StateManagerStreamStatesTest {
@Inject lateinit var config: SourceConfiguration
@Inject lateinit var configuredCatalog: ConfiguredAirbyteCatalog
@Inject lateinit var inputState: InputState
@Inject lateinit var stateManagerFactory: StateManagerFactory
@Inject lateinit var handler: BufferingCatalogValidationFailureHandler
val stateManager: StateManager by lazy {
stateManagerFactory.create(config, configuredCatalog, inputState)
}
@Test
@Property(
name = "airbyte.connector.catalog.json",
value =
"""
{"streams": [{
"stream": {
"name": "BLAH",
"json_schema": { "type": "object","properties": {} },
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
"sync_mode": "full_refresh",
"destination_sync_mode": "overwrite"
}]}""",
)
@Property(name = "airbyte.connector.state.json", value = "[]")
fun testBadStreamName() {
// test current state
Assertions.assertEquals(listOf<Feed>(), stateManager.feeds)
Assertions.assertEquals(listOf(StreamNotFound("BLAH", "PUBLIC")), handler.get())
}
@Test
@Property(
name = "airbyte.connector.catalog.json",
value =
"""
{"streams": [{
"stream": {
"name": "EVENTS",
"json_schema": { "type": "object","properties": {} },
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
"sync_mode": "full_refresh",
"destination_sync_mode": "overwrite"
}]}""",
)
@Property(name = "airbyte.connector.state.json", value = "[]")
fun testBadSchema() {
// test current state
Assertions.assertEquals(listOf<Feed>(), stateManager.feeds)
Assertions.assertEquals(listOf(StreamHasNoFields("EVENTS", "PUBLIC")), handler.get())
}
@Test
@Property(
name = "airbyte.connector.catalog.json",
value =
"""
{"streams": [{
"stream": $STREAM,
"sync_mode": "full_refresh",
"primary_key": [["ID"]],
"destination_sync_mode": "overwrite"
}]}""",
)
@Property(name = "airbyte.connector.state.json", value = "[]")
fun testFullRefreshColdStart() {
// test current state
val stream: Stream = prelude(SyncMode.FULL_REFRESH, listOf("ID"))
Assertions.assertNull(stateManager.scoped(stream).current())
Assertions.assertEquals(listOf<CatalogValidationFailure>(), handler.get())
// update state manager with fake work result
stateManager
.scoped(stream)
.set(Jsons.readTree("{\"cursor_incremental\":\"initial_sync_ongoing\"}"), 123L)
// test checkpoint messages
val checkpoint: List<AirbyteStateMessage> = stateManager.checkpoint()
Assertions.assertEquals(
listOf(
"""{
|"type":"STREAM",
|"stream":{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"},
|"stream_state":{"cursor_incremental":"initial_sync_ongoing"}},
|"sourceStats":{"recordCount":123.0}
|}
""".trimMargin(),
)
.map { Jsons.readTree(it) },
checkpoint.map { Jsons.valueToTree<JsonNode>(it) },
)
}
@Test
@Property(
name = "airbyte.connector.catalog.json",
value =
"""
{"streams": [{
"stream": $STREAM,
"sync_mode": "full_refresh",
"primary_key": [["ID"]],
"destination_sync_mode": "overwrite"
}]}""",
)
@Property(
name = "airbyte.connector.state.json",
value =
"""
[{"type": "STREAM", "stream": {
"stream_descriptor": { "name": "EVENTS", "namespace": "PUBLIC" },
"stream_state": { "cursor_incremental": "initial_sync_ongoing" }
}}]""",
)
fun testFullRefreshWarmStart() {
// test current state
val stream: Stream = prelude(SyncMode.FULL_REFRESH, listOf("ID"))
Assertions.assertEquals(
Jsons.readTree("{\"cursor_incremental\": \"initial_sync_ongoing\"}"),
stateManager.scoped(stream).current(),
)
Assertions.assertEquals(listOf<CatalogValidationFailure>(), handler.get())
// update state manager with fake work result
stateManager
.scoped(stream)
.set(Jsons.readTree("{\"cursor_incremental\":\"cursor_checkpoint\"}"), 456)
// test checkpoint messages
val checkpoint: List<AirbyteStateMessage> = stateManager.checkpoint()
Assertions.assertEquals(
listOf(
"""{
|"type":"STREAM",
|"stream":{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"},
|"stream_state":{"cursor_incremental":"cursor_checkpoint"}},
|"sourceStats":{"recordCount":456.0}
|}
""".trimMargin(),
)
.map { Jsons.readTree(it) },
checkpoint.map { Jsons.valueToTree<JsonNode>(it) },
)
}
private fun prelude(
expectedSyncMode: SyncMode,
expectedPrimaryKey: List<String>? = null,
expectedCursor: String? = null,
): Stream {
Assertions.assertEquals(1, stateManager.feeds.size)
Assertions.assertEquals(1, stateManager.feeds.mapNotNull { it as? Stream }.size)
val eventsStream: Stream = stateManager.feeds.mapNotNull { it as? Stream }.first()
Assertions.assertEquals("EVENTS", eventsStream.name)
Assertions.assertEquals(listOf("MSG", "ID", "TS"), eventsStream.fields.map { it.id })
Assertions.assertEquals(
listOf(listOf("ID")),
eventsStream.primaryKeyCandidates.map { col -> col.map { it.id } },
)
Assertions.assertEquals(expectedSyncMode, eventsStream.configuredSyncMode)
Assertions.assertEquals(
expectedPrimaryKey,
eventsStream.configuredPrimaryKey?.map { it.id },
)
Assertions.assertEquals(expectedCursor, eventsStream.configuredCursor?.id)
return eventsStream
}
companion object {
const val STREAM =
"""
{
"name": "EVENTS",
"json_schema": {
"type": "object",
"properties": {
"MSG": {
"type": "string"
},
"ID": {
"type": "string"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": ["ID", "TS"],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
}"""
}
}

View File

@@ -0,0 +1,193 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.read.streams
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.TestClockFactory
import io.airbyte.cdk.consumers.BufferingCatalogValidationFailureHandler
import io.airbyte.cdk.consumers.BufferingOutputConsumer
import io.airbyte.cdk.jdbc.H2TestFixture
import io.airbyte.cdk.jdbc.IntFieldType
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
import io.airbyte.cdk.jdbc.JdbcSelectQuerier
import io.airbyte.cdk.jdbc.StringFieldType
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.read.stream.MemoryFetchSizeEstimator
import io.airbyte.cdk.read.stream.MemoryFetchSizeEstimator.Companion.DEFAULT_FETCH_SIZE
import io.airbyte.cdk.read.stream.Sample
import io.airbyte.cdk.read.stream.Sample.Kind
import io.airbyte.cdk.read.stream.StreamPartitionReader
import io.airbyte.cdk.read.stream.StreamPartitionsCreator
import io.airbyte.cdk.read.stream.StreamPartitionsCreatorUtils
import io.airbyte.cdk.read.stream.StreamReadContextManager
import io.airbyte.cdk.source.Field
import io.airbyte.cdk.source.select.From
import io.airbyte.cdk.source.select.OrderBy
import io.airbyte.cdk.source.select.SelectColumns
import io.airbyte.cdk.source.select.SelectQuerySpec
import io.airbyte.cdk.test.source.FakeSourceConfiguration
import io.airbyte.cdk.test.source.FakeSourceConfigurationFactory
import io.airbyte.cdk.test.source.FakeSourceConfigurationJsonObject
import io.airbyte.cdk.test.source.FakeSourceOperations
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.SyncMode
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
class StreamPartitionsCreatorUtilsTest {
val h2 = H2TestFixture()
init {
h2.execute(
"""CREATE TABLE kv (
|k INT PRIMARY KEY,
|v VARCHAR(60))
|
"""
.trimMargin()
.replace('\n', ' '),
)
h2.execute(
"INSERT INTO kv (k, v) " +
"VALUES (1, 'foo'), (2, 'bar'), (3, NULL), (4, 'baz'), (5, 'quux');",
)
}
val k = Field("k", IntFieldType)
val v = Field("v", StringFieldType)
val stream =
Stream(
name = "kv",
namespace = "public",
fields = listOf(k, v),
primaryKeyCandidates = listOf(listOf(k)),
configuredSyncMode = SyncMode.FULL_REFRESH,
configuredPrimaryKey = listOf(k),
configuredCursor = null,
)
val querySpec =
SelectQuerySpec(
SelectColumns(listOf(k)),
From("kv", "public"),
orderBy = OrderBy(listOf(k)),
)
val testParameters =
StreamPartitionsCreator.Parameters(
preferParallelized = true,
tableSampleSize = 2,
throughputBytesPerSecond = 10L,
)
@Test
fun testCollectSample() {
val utils: StreamPartitionsCreatorUtils = createUtils(testParameters)
val sample = utils.collectSample(querySpec) {}
Assertions.assertEquals(Kind.SMALL, sample.kind)
}
@Test
fun testCollectTinySample() {
val utils: StreamPartitionsCreatorUtils =
createUtils(testParameters.copy(tableSampleSize = 100))
val sample = utils.collectSample(querySpec) {}
Assertions.assertEquals(Kind.TINY, sample.kind)
}
@Test
fun testCollectEmptySample() {
h2.execute("TRUNCATE TABLE kv")
val utils: StreamPartitionsCreatorUtils = createUtils(testParameters)
val sample = utils.collectSample(querySpec) {}
Assertions.assertEquals(Kind.EMPTY, sample.kind)
}
@Test
fun testCollectSampleInLargeTable() {
h2.execute("INSERT INTO kv(k, v) SELECT X, NULL FROM SYSTEM_RANGE(6, 100000)")
val utils: StreamPartitionsCreatorUtils =
createUtils(testParameters.copy(tableSampleSize = 100))
val sample = utils.collectSample(querySpec) {}
Assertions.assertEquals(Kind.SMALL, sample.kind)
}
@Test
fun testMemoryFetchSizeEstimator() {
Assertions.assertEquals(
14000,
MemoryFetchSizeEstimator(700_000, 1).apply(Sample(listOf(10, 20, 30), Kind.SMALL, 0L)),
)
Assertions.assertEquals(
7000,
MemoryFetchSizeEstimator(700_000, 2).apply(Sample(listOf(10, 20, 30), Kind.SMALL, 0L)),
)
Assertions.assertEquals(
DEFAULT_FETCH_SIZE,
MemoryFetchSizeEstimator(700_000, 2).apply(Sample(listOf(), Kind.MEDIUM, 0L)),
)
}
@Test
fun testCursorUpperBound() {
val utils: StreamPartitionsCreatorUtils = createUtils(testParameters)
utils.computeCursorUpperBound(k)
Assertions.assertEquals(
"5",
utils.ctx.transientCursorUpperBoundState.get()?.toString(),
)
}
@Test
fun testSplitPrimaryKey() {
val utils: StreamPartitionsCreatorUtils = createUtils(testParameters)
val input =
StreamPartitionReader.SnapshotInput(
primaryKey = listOf(k),
primaryKeyLowerBound = null,
primaryKeyUpperBound = null,
)
val splits: List<Pair<List<JsonNode>?, List<JsonNode>?>> =
utils.split(input, input.primaryKeyLowerBound, input.primaryKeyUpperBound)
val actual: String = splits.joinToString { (l, r) -> "]${l?.first()}, ${r?.first()}]" }
Assertions.assertEquals("]null, 1], ]1, 2], ]2, null]", actual)
}
@Test
fun testSplitCursor() {
val utils: StreamPartitionsCreatorUtils = createUtils(testParameters)
val input =
StreamPartitionReader.CursorIncrementalInput(
cursor = k,
cursorLowerBound = Jsons.numberNode(1),
cursorUpperBound = Jsons.numberNode(4),
)
val splits: List<Pair<List<JsonNode>?, List<JsonNode>?>> =
utils.split(input, listOf(input.cursorLowerBound), listOf(input.cursorUpperBound))
val actual: String = splits.joinToString { (l, r) -> "]${l?.first()}, ${r?.first()}]" }
Assertions.assertEquals("]1, 2], ]2, 4]", actual)
}
private fun createUtils(
params: StreamPartitionsCreator.Parameters,
): StreamPartitionsCreatorUtils {
val configPojo: FakeSourceConfigurationJsonObject =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
timeout = "PT1S"
}
val config: FakeSourceConfiguration = FakeSourceConfigurationFactory().make(configPojo)
val ctxManager =
StreamReadContextManager(
config,
BufferingCatalogValidationFailureHandler(),
FakeSourceOperations(),
JdbcSelectQuerier(JdbcConnectionFactory(config)),
BufferingOutputConsumer(TestClockFactory().fixed()),
)
val ctx = ctxManager[stream]
ctx.resetStream()
return StreamPartitionsCreatorUtils(ctx, params)
}
}

View File

@@ -0,0 +1,11 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.AirbyteSourceRunner
/** A fake source database connector, vaguely compatible with the H2 database. */
class FakeSource {
fun main(args: Array<String>) {
AirbyteSourceRunner.run(*args)
}
}

View File

@@ -0,0 +1,81 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.consumers.BufferingOutputConsumer
import io.airbyte.cdk.operation.CheckOperation
import io.airbyte.cdk.operation.Operation
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(environments = ["source"], rebuildContext = true)
@Property(name = Operation.PROPERTY, value = "check")
class FakeSourceCheckTest {
@Inject lateinit var checkOperation: CheckOperation<FakeSourceConfigurationJsonObject>
@Inject lateinit var outputConsumer: BufferingOutputConsumer
@Test
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.port", value = "-1")
@Property(name = "airbyte.connector.config.database", value = "testdb")
fun testConfigBadPort() {
assertFailed(" must have a minimum value of 0".toRegex())
}
@Test
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "metadata.resource", value = "test/source/metadata-valid.json")
fun testSuccess() {
assertSucceeded()
}
@Test
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "metadata.resource", value = "test/source/metadata-empty.json")
fun testBadSchema() {
assertFailed("Discovered zero tables".toRegex())
}
@Test
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "metadata.resource", value = "test/source/metadata-column-query-fails.json")
fun testBadTables() {
assertFailed("Unable to query any of the [0-9]+ discovered table".toRegex())
}
fun assertSucceeded() {
assert(null)
}
fun assertFailed(regex: Regex) {
assert(regex)
}
fun assert(failureRegex: Regex?) {
checkOperation.execute()
val statuses: List<AirbyteConnectionStatus> = outputConsumer.statuses()
Assertions.assertEquals(1, statuses.size, statuses.toString())
val actual: AirbyteConnectionStatus = statuses.first()
if (failureRegex == null) {
Assertions.assertEquals(
AirbyteConnectionStatus.Status.SUCCEEDED,
actual.status,
actual.toString(),
)
} else {
Assertions.assertEquals(
AirbyteConnectionStatus.Status.FAILED,
actual.status,
actual.toString(),
)
Assertions.assertTrue(failureRegex.containsMatchIn(actual.message), actual.message)
}
}
}

View File

@@ -0,0 +1,61 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.command.JdbcSourceConfiguration
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.command.SourceConfigurationFactory
import io.airbyte.cdk.ssh.SshConnectionOptions
import io.airbyte.cdk.ssh.SshNoTunnelMethod
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
import io.micronaut.context.annotation.Requires
import io.micronaut.context.annotation.Secondary
import io.micronaut.context.env.Environment
import jakarta.inject.Singleton
import java.time.Duration
/** [SourceConfiguration] implementation for [FakeSource]. */
data class FakeSourceConfiguration(
override val realHost: String,
override val realPort: Int,
override val sshTunnel: SshTunnelMethodConfiguration,
override val sshConnectionOptions: SshConnectionOptions,
override val jdbcUrlFmt: String,
override val schemas: Set<String>,
val cursor: CursorConfiguration,
val resumablePreferred: Boolean,
override val maxConcurrency: Int,
override val checkpointTargetInterval: Duration,
) : JdbcSourceConfiguration {
override val global: Boolean = cursor is CdcCursor
override val jdbcProperties: Map<String, String> = mapOf()
override val resourceAcquisitionHeartbeat: Duration
get() = Duration.ofMillis(10)
}
/** [SourceConfigurationFactory] implementation for [FakeSource]. */
@Singleton
@Requires(env = [Environment.TEST])
@Secondary
class FakeSourceConfigurationFactory :
SourceConfigurationFactory<FakeSourceConfigurationJsonObject, FakeSourceConfiguration> {
override fun makeWithoutExceptionHandling(
pojo: FakeSourceConfigurationJsonObject,
): FakeSourceConfiguration {
val sshConnectionOptions: SshConnectionOptions =
SshConnectionOptions.fromAdditionalProperties(pojo.getAdditionalProperties())
return FakeSourceConfiguration(
realHost = pojo.host,
realPort = pojo.port,
sshTunnel = pojo.getTunnelMethodValue() ?: SshNoTunnelMethod,
sshConnectionOptions = sshConnectionOptions,
jdbcUrlFmt = "jdbc:h2:tcp://%s:%d/mem:${pojo.database}",
schemas = pojo.schemas?.takeUnless { it.isEmpty() }?.toSet() ?: setOf("PUBLIC"),
cursor = pojo.getCursorConfigurationValue() ?: UserDefinedCursor,
resumablePreferred = pojo.resumablePreferred != false,
maxConcurrency = 1,
checkpointTargetInterval = Duration.parse(pojo.timeout).takeIf { it.isPositive }
?: Duration.ofDays(100L),
)
}
}

View File

@@ -0,0 +1,160 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import com.fasterxml.jackson.annotation.JsonAnyGetter
import com.fasterxml.jackson.annotation.JsonAnySetter
import com.fasterxml.jackson.annotation.JsonGetter
import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.annotation.JsonPropertyDescription
import com.fasterxml.jackson.annotation.JsonPropertyOrder
import com.fasterxml.jackson.annotation.JsonSetter
import com.fasterxml.jackson.annotation.JsonSubTypes
import com.fasterxml.jackson.annotation.JsonTypeInfo
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaArrayWithUniqueItems
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDefault
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import io.airbyte.cdk.command.CONNECTOR_CONFIG_PREFIX
import io.airbyte.cdk.command.ConfigurationJsonObjectBase
import io.airbyte.cdk.exceptions.ConfigErrorException
import io.airbyte.cdk.ssh.MicronautPropertiesFriendlySshTunnelMethodConfigurationJsonObject
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
import io.micronaut.context.annotation.ConfigurationBuilder
import io.micronaut.context.annotation.ConfigurationProperties
import io.micronaut.context.annotation.Secondary
import jakarta.inject.Singleton
/** [ConfigurationJsonObjectBase] implementation for [FakeSource]. */
@JsonSchemaTitle("Test Source Spec")
@JsonPropertyOrder(
value =
[
"host",
"port",
"database",
"schemas",
"tunnel_method",
"cursor",
],
)
@Singleton
@Secondary
@ConfigurationProperties(CONNECTOR_CONFIG_PREFIX)
class FakeSourceConfigurationJsonObject : ConfigurationJsonObjectBase() {
@JsonProperty("host")
@JsonSchemaTitle("Host")
@JsonSchemaInject(json = """{"order":1}""")
@JsonSchemaDefault("localhost")
@JsonPropertyDescription("Hostname of the database.")
var host: String = "localhost"
@JsonProperty("port")
@JsonSchemaTitle("Port")
@JsonSchemaInject(json = """{"order":2,"minimum": 0,"maximum": 65536}""")
@JsonSchemaDefault("9092")
@JsonPropertyDescription("Port of the database.")
var port: Int = 9092
@JsonProperty("database")
@JsonSchemaTitle("Database")
@JsonPropertyDescription("Name of the database.")
@JsonSchemaInject(json = """{"order":3}""")
lateinit var database: String
@JsonProperty("schemas")
@JsonSchemaTitle("Schemas")
@JsonSchemaArrayWithUniqueItems("schemas")
@JsonPropertyDescription("The list of schemas to sync from. Defaults to PUBLIC.")
@JsonSchemaInject(json = """{"order":4,"minItems":1,"uniqueItems":true}""")
var schemas: List<String>? = null
@JsonIgnore
@ConfigurationBuilder(configurationPrefix = "tunnel_method")
val tunnelMethod = MicronautPropertiesFriendlySshTunnelMethodConfigurationJsonObject()
@JsonIgnore var tunnelMethodJson: SshTunnelMethodConfiguration? = null
@JsonSetter("tunnel_method")
fun setTunnelMethodValue(value: SshTunnelMethodConfiguration?) {
tunnelMethodJson = value
}
@JsonGetter("tunnel_method")
@JsonSchemaTitle("SSH Tunnel Method")
@JsonPropertyDescription(
"Whether to initiate an SSH tunnel before connecting to the database," +
" and if so, which kind of authentication to use.",
)
@JsonSchemaInject(json = """{"order":5}""")
fun getTunnelMethodValue(): SshTunnelMethodConfiguration? =
tunnelMethodJson ?: tunnelMethod.asSshTunnelMethod()
@JsonIgnore
@ConfigurationBuilder(configurationPrefix = "cursor")
val cursor = MicronautPropertiesFriendlyCursorConfiguration()
@JsonIgnore var cursorJson: CursorConfiguration? = null
@JsonSetter("cursor")
fun setCursorMethodValue(value: CursorConfiguration?) {
cursorJson = value
}
@JsonGetter("cursor")
@JsonSchemaTitle("Update Method")
@JsonPropertyDescription("Configures how data is extracted from the database.")
@JsonSchemaInject(json = """{"order":6,"display_type":"radio"}""")
fun getCursorConfigurationValue(): CursorConfiguration? =
cursorJson ?: cursor.asCursorConfiguration()
@JsonProperty("resumable_preferred")
@JsonSchemaDefault("true")
@JsonSchemaInject(json = """{"order":7,"display_type":"check"}""")
var resumablePreferred: Boolean? = true
@JsonProperty("timeout")
@JsonSchemaDefault("PT0S")
@JsonSchemaInject(json = """{"order":8}""")
var timeout: String? = "PT0S"
@JsonIgnore var additionalPropertiesMap = mutableMapOf<String, Any>()
@JsonAnyGetter fun getAdditionalProperties(): Map<String, Any> = additionalPropertiesMap
@JsonAnySetter
fun setAdditionalProperty(
name: String,
value: Any,
) {
additionalPropertiesMap[name] = value
}
}
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "cursor_method")
@JsonSubTypes(
JsonSubTypes.Type(value = UserDefinedCursor::class, name = "user_defined"),
JsonSubTypes.Type(value = CdcCursor::class, name = "cdc"),
)
@JsonSchemaTitle("Update Method")
@JsonSchemaDescription("Configures how data is extracted from the database.")
sealed interface CursorConfiguration
@JsonSchemaTitle("Scan Changes with User Defined Cursor")
data object UserDefinedCursor : CursorConfiguration
@JsonSchemaTitle("Read Changes using Change Data Capture (CDC)")
data object CdcCursor : CursorConfiguration
@ConfigurationProperties("$CONNECTOR_CONFIG_PREFIX.cursor")
class MicronautPropertiesFriendlyCursorConfiguration {
var cursorMethod: String = "user_defined"
fun asCursorConfiguration(): CursorConfiguration =
when (cursorMethod) {
"user_defined" -> UserDefinedCursor
"cdc" -> CdcCursor
else -> throw ConfigErrorException("invalid value $cursorMethod")
}
}

View File

@@ -0,0 +1,51 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.ssh.SshConnectionOptions
import io.airbyte.cdk.ssh.SshPasswordAuthTunnelMethod
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import kotlin.time.Duration
import kotlin.time.Duration.Companion.milliseconds
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(rebuildContext = true)
class FakeSourceConfigurationTest {
@Inject lateinit var actual: SourceConfiguration
@Test
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "airbyte.connector.config.schemas", value = "PUBLIC,TESTSCHEMA")
@Property(
name = "airbyte.connector.config.tunnel_method.tunnel_method",
value = "SSH_PASSWORD_AUTH",
)
@Property(name = "airbyte.connector.config.tunnel_method.tunnel_host", value = "localhost")
@Property(name = "airbyte.connector.config.tunnel_method.tunnel_port", value = "22")
@Property(name = "airbyte.connector.config.tunnel_method.tunnel_user", value = "sshuser")
@Property(
name = "airbyte.connector.config.tunnel_method.tunnel_user_password",
value = "secret",
)
fun testVanilla() {
val expected =
FakeSourceConfiguration(
realHost = "localhost",
realPort = 9092,
sshTunnel = SshPasswordAuthTunnelMethod("localhost", 22, "sshuser", "secret"),
sshConnectionOptions =
SshConnectionOptions(1_000.milliseconds, 2_000.milliseconds, Duration.ZERO),
jdbcUrlFmt = "jdbc:h2:tcp://%s:%d/mem:testdb",
schemas = setOf("PUBLIC", "TESTSCHEMA"),
cursor = UserDefinedCursor,
resumablePreferred = true,
maxConcurrency = 1,
checkpointTargetInterval = java.time.Duration.ofDays(100L),
)
Assertions.assertEquals(expected, actual)
}
}

View File

@@ -0,0 +1,110 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.consumers.BufferingOutputConsumer
import io.airbyte.cdk.operation.DiscoverOperation
import io.airbyte.cdk.operation.Operation
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteCatalog
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.SyncMode
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(environments = ["source"], rebuildContext = true)
@Property(name = Operation.PROPERTY, value = "discover")
class FakeSourceDiscoverTest {
@Inject lateinit var discoverOperation: DiscoverOperation
@Inject lateinit var outputConsumer: BufferingOutputConsumer
@Test
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "airbyte.connector.config.cursor", value = "user_defined")
@Property(name = "metadata.resource", value = "test/source/metadata-valid.json")
fun testCursorBasedIncremental() {
val events =
AirbyteStream()
.withName("EVENTS")
.withNamespace("PUBLIC")
.withJsonSchema(Jsons.readTree(EVENTS_SCHEMA))
.withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(listOf(listOf("ID")))
val kv =
AirbyteStream()
.withName("KV")
.withNamespace("PUBLIC")
.withJsonSchema(Jsons.readTree(KV_SCHEMA))
.withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(listOf(listOf("K")))
val expected = AirbyteCatalog().withStreams(listOf(events, kv))
discoverOperation.execute()
Assertions.assertEquals(listOf(expected), outputConsumer.catalogs())
}
@Test
@Property(name = "airbyte.connector.config.host", value = "localhost")
@Property(name = "airbyte.connector.config.database", value = "testdb")
@Property(name = "airbyte.connector.config.cursor", value = "cdc")
@Property(name = "metadata.resource", value = "test/source/metadata-valid.json")
fun testCdcIncremental() {
val events =
AirbyteStream()
.withName("EVENTS")
.withNamespace("PUBLIC")
.withJsonSchema(Jsons.readTree(EVENTS_SCHEMA))
.withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(listOf(listOf("ID")))
val kv =
AirbyteStream()
.withName("KV")
.withNamespace("PUBLIC")
.withJsonSchema(Jsons.readTree(KV_SCHEMA))
.withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(listOf(listOf("K")))
val expected = AirbyteCatalog().withStreams(listOf(events, kv))
discoverOperation.execute()
Assertions.assertEquals(listOf(expected), outputConsumer.catalogs())
}
companion object {
const val EVENTS_SCHEMA =
"""
{
"type": "object",
"properties": {
"MSG": {
"type": "string"
},
"ID": {
"type": "string"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
}
}
}
"""
const val KV_SCHEMA =
"""
{
"type": "object",
"properties": {
"V": {
"type": "string"
},
"K": {
"type": "number",
"airbyte_type": "integer"
}
}
}
"""
}
}

View File

@@ -0,0 +1,169 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.command.SyncsTestFixture
import io.airbyte.cdk.jdbc.H2TestFixture
import io.airbyte.cdk.ssh.SshBastionContainer
import io.airbyte.cdk.testcontainers.DOCKER_HOST_FROM_WITHIN_CONTAINER
import java.sql.Connection
import java.sql.Statement
import org.junit.jupiter.api.Test
import org.testcontainers.Testcontainers
class FakeSourceIntegrationTest {
@Test
fun testSpec() {
SyncsTestFixture.testSpec("test/source/expected-spec.json")
}
@Test
fun testCheckFailBadConfig() {
SyncsTestFixture.testCheck(
FakeSourceConfigurationJsonObject().apply {
port = -1
database = ""
},
"Could not connect with provided configuration",
)
}
@Test
fun testCheckFailNoDatabase() {
H2TestFixture().use { h2: H2TestFixture ->
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database + "_garbage"
}
SyncsTestFixture.testCheck(configPojo, "Error code: 90149")
}
}
@Test
fun testCheckFailNoTables() {
H2TestFixture().use { h2: H2TestFixture ->
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
}
SyncsTestFixture.testCheck(configPojo, "Discovered zero tables")
}
}
@Test
fun testCheckSuccess() {
H2TestFixture().use { h2: H2TestFixture ->
h2.createConnection().use(::prelude)
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
}
SyncsTestFixture.testCheck(configPojo)
}
}
@Test
fun testCheckSshTunnel() {
H2TestFixture().use { h2: H2TestFixture ->
h2.createConnection().use(::prelude)
Testcontainers.exposeHostPorts(h2.port)
SshBastionContainer(tunnelingToHostPort = h2.port).use { ssh: SshBastionContainer ->
val configPojo =
FakeSourceConfigurationJsonObject().apply {
host =
DOCKER_HOST_FROM_WITHIN_CONTAINER // required only because of container
port = h2.port
database = h2.database
}
configPojo.setTunnelMethodValue(ssh.outerKeyAuthTunnelMethod)
SyncsTestFixture.testCheck(configPojo)
configPojo.setTunnelMethodValue(ssh.outerPasswordAuthTunnelMethod)
SyncsTestFixture.testCheck(configPojo)
}
}
}
@Test
fun testDiscover() {
H2TestFixture().use { h2: H2TestFixture ->
h2.createConnection().use(::prelude)
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
}
SyncsTestFixture.testDiscover(configPojo, "test/source/expected-cursor-catalog.json")
}
}
@Test
fun testReadGlobal() {
H2TestFixture().use { h2: H2TestFixture ->
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
setCursorMethodValue(CdcCursor)
resumablePreferred = false
}
SyncsTestFixture.testSyncs(
configPojo,
h2::createConnection,
::prelude,
"test/source/expected-cdc-catalog.json",
"test/source/cdc-catalog.json",
SyncsTestFixture.AfterRead.Companion.fromExpectedMessages(
"test/source/expected-messages-global-cold-start.json",
),
)
}
}
@Test
fun testReadStreams() {
H2TestFixture().use { h2: H2TestFixture ->
val configPojo =
FakeSourceConfigurationJsonObject().apply {
port = h2.port
database = h2.database
resumablePreferred = true
}
SyncsTestFixture.testSyncs(
configPojo,
h2::createConnection,
::prelude,
"test/source/expected-cursor-catalog.json",
"test/source/cursor-catalog.json",
SyncsTestFixture.AfterRead.Companion.fromExpectedMessages(
"test/source/expected-messages-stream-cold-start.json",
),
SyncsTestFixture.AfterRead.Companion.fromExpectedMessages(
"test/source/expected-messages-stream-warm-start.json",
),
)
}
}
companion object {
@JvmStatic
fun prelude(connection: Connection) {
for (sql in listOf(CREATE_KV, INSERT_KV, CREATE_EVENTS, INSERT_EVENTS)) {
connection.createStatement().use { stmt: Statement -> stmt.execute(sql) }
}
}
const val CREATE_KV = "CREATE TABLE kv (k INT PRIMARY KEY, v VARCHAR(60));"
const val INSERT_KV = "INSERT INTO kv (k, v) VALUES (1, 'foo'), (2, 'bar'), (3, NULL);"
const val CREATE_EVENTS =
"CREATE TABLE events (" +
"id UUID PRIMARY KEY," +
"ts TIMESTAMP WITH TIME ZONE NOT NULL," +
"msg VARCHAR(60));"
const val INSERT_EVENTS =
"INSERT INTO events (id, ts, msg) VALUES " +
"('cc449902-30da-5ea8-c4d3-02732e5bfce9', '2024-04-29T00:00:00-04:00', 'bar')," +
"('dd55aa13-41eb-6fb4-d5e4-13843f6c0dfa', '2024-04-30T00:00:00-04:00', NULL);"
}
}

View File

@@ -0,0 +1,185 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.jdbc.ArrayFieldType
import io.airbyte.cdk.jdbc.BigDecimalFieldType
import io.airbyte.cdk.jdbc.BigIntegerFieldType
import io.airbyte.cdk.jdbc.BinaryStreamFieldType
import io.airbyte.cdk.jdbc.BooleanFieldType
import io.airbyte.cdk.jdbc.ByteFieldType
import io.airbyte.cdk.jdbc.BytesFieldType
import io.airbyte.cdk.jdbc.ClobFieldType
import io.airbyte.cdk.jdbc.DoubleFieldType
import io.airbyte.cdk.jdbc.FloatFieldType
import io.airbyte.cdk.jdbc.IntFieldType
import io.airbyte.cdk.jdbc.JdbcMetadataQuerier
import io.airbyte.cdk.jdbc.LocalDateFieldType
import io.airbyte.cdk.jdbc.LocalDateTimeFieldType
import io.airbyte.cdk.jdbc.LocalTimeFieldType
import io.airbyte.cdk.jdbc.LosslessJdbcFieldType
import io.airbyte.cdk.jdbc.NClobFieldType
import io.airbyte.cdk.jdbc.NStringFieldType
import io.airbyte.cdk.jdbc.NullFieldType
import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType
import io.airbyte.cdk.jdbc.OffsetTimeFieldType
import io.airbyte.cdk.jdbc.PokemonFieldType
import io.airbyte.cdk.jdbc.ShortFieldType
import io.airbyte.cdk.jdbc.StringFieldType
import io.airbyte.cdk.jdbc.UrlFieldType
import io.airbyte.cdk.jdbc.XmlFieldType
import io.airbyte.cdk.source.FieldType
import io.airbyte.cdk.source.select.And
import io.airbyte.cdk.source.select.Equal
import io.airbyte.cdk.source.select.From
import io.airbyte.cdk.source.select.FromNode
import io.airbyte.cdk.source.select.FromSample
import io.airbyte.cdk.source.select.Greater
import io.airbyte.cdk.source.select.Lesser
import io.airbyte.cdk.source.select.LesserOrEqual
import io.airbyte.cdk.source.select.Limit
import io.airbyte.cdk.source.select.LimitNode
import io.airbyte.cdk.source.select.NoFrom
import io.airbyte.cdk.source.select.NoLimit
import io.airbyte.cdk.source.select.NoOrderBy
import io.airbyte.cdk.source.select.NoWhere
import io.airbyte.cdk.source.select.Or
import io.airbyte.cdk.source.select.OrderBy
import io.airbyte.cdk.source.select.OrderByNode
import io.airbyte.cdk.source.select.SelectColumnMaxValue
import io.airbyte.cdk.source.select.SelectColumns
import io.airbyte.cdk.source.select.SelectNode
import io.airbyte.cdk.source.select.SelectQuery
import io.airbyte.cdk.source.select.SelectQueryGenerator
import io.airbyte.cdk.source.select.SelectQuerySpec
import io.airbyte.cdk.source.select.Where
import io.airbyte.cdk.source.select.WhereClauseLeafNode
import io.airbyte.cdk.source.select.WhereClauseNode
import io.airbyte.cdk.source.select.WhereNode
import io.micronaut.context.annotation.Requires
import io.micronaut.context.annotation.Secondary
import io.micronaut.context.env.Environment
import jakarta.inject.Singleton
import java.sql.JDBCType
/** Stateless connector-specific logic for [FakeSource]. */
@Singleton
@Requires(env = [Environment.TEST])
@Secondary
class FakeSourceOperations : JdbcMetadataQuerier.FieldTypeMapper, SelectQueryGenerator {
override fun toFieldType(c: JdbcMetadataQuerier.ColumnMetadata): FieldType =
when (c.type.jdbcType) {
JDBCType.BIT,
JDBCType.BOOLEAN, -> BooleanFieldType
JDBCType.TINYINT -> ByteFieldType
JDBCType.SMALLINT -> ShortFieldType
JDBCType.INTEGER -> IntFieldType
JDBCType.BIGINT -> BigIntegerFieldType
JDBCType.FLOAT -> FloatFieldType
JDBCType.DOUBLE -> DoubleFieldType
JDBCType.REAL,
JDBCType.NUMERIC,
JDBCType.DECIMAL, -> BigDecimalFieldType
JDBCType.CHAR,
JDBCType.VARCHAR,
JDBCType.LONGVARCHAR, -> StringFieldType
JDBCType.NCHAR,
JDBCType.NVARCHAR,
JDBCType.LONGNVARCHAR, -> NStringFieldType
JDBCType.DATE -> LocalDateFieldType
JDBCType.TIME -> LocalTimeFieldType
JDBCType.TIMESTAMP -> LocalDateTimeFieldType
JDBCType.TIME_WITH_TIMEZONE -> OffsetTimeFieldType
JDBCType.TIMESTAMP_WITH_TIMEZONE -> OffsetDateTimeFieldType
JDBCType.BLOB -> BinaryStreamFieldType
JDBCType.BINARY,
JDBCType.VARBINARY,
JDBCType.LONGVARBINARY, -> BytesFieldType
JDBCType.CLOB -> ClobFieldType
JDBCType.NCLOB -> NClobFieldType
JDBCType.DATALINK -> UrlFieldType
JDBCType.SQLXML -> XmlFieldType
JDBCType.ARRAY -> ArrayFieldType(StringFieldType)
JDBCType.NULL -> NullFieldType
JDBCType.OTHER,
JDBCType.JAVA_OBJECT,
JDBCType.DISTINCT,
JDBCType.STRUCT,
JDBCType.REF,
JDBCType.ROWID,
JDBCType.REF_CURSOR,
null, -> PokemonFieldType
}
override fun generate(ast: SelectQuerySpec): SelectQuery =
SelectQuery(ast.sql(), ast.select.columns, ast.bindings())
fun SelectQuerySpec.sql(): String {
val components: List<String> =
listOf(select.sql(), from.sql(), where.sql(), orderBy.sql(), limit.sql())
return components.filter { it.isNotBlank() }.joinToString(" ")
}
fun SelectNode.sql(): String =
when (this) {
is SelectColumns -> "SELECT " + columns.map { it.id }.joinToString(", ")
is SelectColumnMaxValue -> "SELECT MAX(${column.id})"
}
fun FromNode.sql(): String =
when (this) {
NoFrom -> ""
is From -> if (namespace == null) "FROM $name" else "FROM $namespace.$name"
is FromSample -> {
val innerFrom: String = From(name, namespace).sql()
val innerWhere = "WHERE MOD(ROWNUM(), $sampleRateInv) = 0 "
val innerLimit = "LIMIT $sampleSize"
"FROM (SELECT * $innerFrom $innerWhere $innerLimit)"
}
}
fun WhereNode.sql(): String =
when (this) {
NoWhere -> ""
is Where -> "WHERE ${clause.sql()}"
}
fun WhereClauseNode.sql(): String =
when (this) {
is And -> conj.map { it.sql() }.joinToString(") AND (", "(", ")")
is Or -> disj.map { it.sql() }.joinToString(") OR (", "(", ")")
is Equal -> "${column.id} = ?"
is Greater -> "${column.id} > ?"
is LesserOrEqual -> "${column.id} <= ?"
is Lesser -> "${column.id} < ?"
}
fun OrderByNode.sql(): String =
when (this) {
NoOrderBy -> ""
is OrderBy -> "ORDER BY " + columns.map { it.id }.joinToString(", ")
}
fun LimitNode.sql(): String =
when (this) {
NoLimit -> ""
is Limit -> "LIMIT $n"
}
fun SelectQuerySpec.bindings(): List<SelectQuery.Binding> = where.bindings()
fun WhereNode.bindings(): List<SelectQuery.Binding> =
when (this) {
is NoWhere -> listOf()
is Where -> clause.bindings()
}
fun WhereClauseNode.bindings(): List<SelectQuery.Binding> =
when (this) {
is And -> conj.flatMap { it.bindings() }
is Or -> disj.flatMap { it.bindings() }
is WhereClauseLeafNode -> {
val type = column.type as LosslessJdbcFieldType<*, *>
listOf(SelectQuery.Binding(bindingValue, type))
}
}
}

View File

@@ -0,0 +1,41 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.read.Feed
import io.airbyte.cdk.read.Global
import io.airbyte.cdk.read.StateQuerier
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.read.stream.StreamPartitionReader
import io.airbyte.cdk.read.stream.StreamPartitionsCreator
import io.airbyte.cdk.read.stream.StreamReadContext
import io.airbyte.cdk.read.stream.StreamReadContextManager
import io.airbyte.cdk.read.stream.streamPartitionsCreatorInput
import io.airbyte.cdk.source.CreateNoPartitions
import io.airbyte.cdk.source.PartitionsCreator
import io.airbyte.cdk.source.PartitionsCreatorFactory
import jakarta.inject.Singleton
@Singleton
class FakeSourcePartitionsCreatorFactory(
val streamReadContextManager: StreamReadContextManager,
) : PartitionsCreatorFactory {
override fun make(
stateQuerier: StateQuerier,
feed: Feed,
): PartitionsCreator {
val opaqueStateValue: OpaqueStateValue? = stateQuerier.current(feed)
return when (feed) {
is Global -> CreateNoPartitions
is Stream -> {
val ctx: StreamReadContext = streamReadContextManager[feed]
StreamPartitionsCreator(
ctx,
opaqueStateValue.streamPartitionsCreatorInput(ctx),
StreamPartitionsCreator.Parameters(preferParallelized = false),
StreamPartitionReader.Parameters(preferResumable = false),
)
}
}
}
}

View File

@@ -0,0 +1,46 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.cdk.test.source
import com.deblock.jsondiff.DiffGenerator
import com.deblock.jsondiff.diff.JsonDiff
import com.deblock.jsondiff.matcher.CompositeJsonMatcher
import com.deblock.jsondiff.matcher.JsonMatcher
import com.deblock.jsondiff.matcher.LenientJsonObjectPartialMatcher
import com.deblock.jsondiff.matcher.StrictJsonArrayPartialMatcher
import com.deblock.jsondiff.matcher.StrictPrimitivePartialMatcher
import com.deblock.jsondiff.viewer.OnlyErrorDiffViewer
import io.airbyte.cdk.consumers.BufferingOutputConsumer
import io.airbyte.cdk.operation.Operation
import io.airbyte.cdk.operation.SpecOperation
import io.airbyte.cdk.util.Jsons
import io.airbyte.cdk.util.ResourceUtils
import io.micronaut.context.annotation.Property
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(environments = ["source"], rebuildContext = true)
@Property(name = Operation.PROPERTY, value = "spec")
@Property(name = "airbyte.connector.metadata.documentation-url", value = "https://docs.airbyte.com")
class FakeSourceSpecTest {
@Inject lateinit var specOperation: SpecOperation
@Inject lateinit var outputConsumer: BufferingOutputConsumer
@Test
fun testSpec() {
val expected: String = ResourceUtils.readResource("test/source/expected-spec.json")
specOperation.execute()
val actual: String = Jsons.writeValueAsString(outputConsumer.specs().last())
val jsonMatcher: JsonMatcher =
CompositeJsonMatcher(
StrictJsonArrayPartialMatcher(),
LenientJsonObjectPartialMatcher(),
StrictPrimitivePartialMatcher(),
)
val diff: JsonDiff = DiffGenerator.diff(expected, actual, jsonMatcher)
Assertions.assertEquals("", OnlyErrorDiffViewer.from(diff).toString())
}
}

View File

@@ -0,0 +1,209 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Test Source Spec",
"type": "object",
"additionalProperties": true,
"properties": {
"host": {
"type": "string",
"default": "localhost",
"description": "Hostname of the database.",
"title": "Host",
"order": 1
},
"port": {
"type": "integer",
"default": 9092,
"description": "Port of the database.",
"title": "Port",
"order": 2,
"minimum": 0,
"maximum": 65536
},
"database": {
"type": "string",
"description": "Name of the database.",
"title": "Database",
"order": 3
},
"schemas": {
"type": "array",
"items": {
"type": "string"
},
"description": "The list of schemas to sync from. Defaults to PUBLIC.",
"title": "Schemas",
"order": 4,
"minItems": 1,
"uniqueItems": true
},
"tunnel_method": {
"oneOf": [
{
"title": "No Tunnel",
"type": "object",
"additionalProperties": true,
"description": "No ssh tunnel needed to connect to database",
"properties": {
"tunnel_method": {
"type": "string",
"enum": ["NO_TUNNEL"],
"default": "NO_TUNNEL"
}
},
"required": ["tunnel_method"]
},
{
"title": "SSH Key Authentication",
"type": "object",
"additionalProperties": true,
"description": "Connect through a jump server tunnel host using username and ssh key",
"properties": {
"tunnel_method": {
"type": "string",
"enum": ["SSH_KEY_AUTH"],
"default": "SSH_KEY_AUTH"
},
"tunnel_host": {
"type": "string",
"description": "Hostname of the jump server host that allows inbound ssh tunnel.",
"title": "SSH Tunnel Jump Server Host",
"order": 1
},
"tunnel_port": {
"type": "integer",
"default": 22,
"description": "Port on the proxy/jump server that accepts inbound ssh connections.",
"title": "SSH Connection Port",
"order": 2,
"minimum": 0,
"maximum": 65536
},
"tunnel_user": {
"type": "string",
"description": "OS-level username for logging into the jump server host",
"title": "SSH Login Username",
"order": 3
},
"ssh_key": {
"type": "string",
"description": "OS-level user account ssh key credentials in RSA PEM format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )",
"title": "SSH Private Key",
"order": 4,
"multiline": true,
"airbyte_secret": true
}
},
"required": [
"tunnel_method",
"tunnel_host",
"tunnel_port",
"tunnel_user",
"ssh_key"
]
},
{
"title": "Password Authentication",
"type": "object",
"additionalProperties": true,
"description": "Connect through a jump server tunnel host using username and password authentication",
"properties": {
"tunnel_method": {
"type": "string",
"enum": ["SSH_PASSWORD_AUTH"],
"default": "SSH_PASSWORD_AUTH"
},
"tunnel_host": {
"type": "string",
"description": "Hostname of the jump server host that allows inbound ssh tunnel.",
"title": "SSH Tunnel Jump Server Host",
"order": 1
},
"tunnel_port": {
"type": "integer",
"default": 22,
"description": "Port on the proxy/jump server that accepts inbound ssh connections.",
"title": "SSH Connection Port",
"order": 2,
"minimum": 0,
"maximum": 65536
},
"tunnel_user": {
"type": "string",
"description": "OS-level username for logging into the jump server host",
"title": "SSH Login Username",
"order": 3
},
"tunnel_user_password": {
"type": "string",
"description": "OS-level password for logging into the jump server host",
"title": "Password",
"order": 4,
"airbyte_secret": true
}
},
"required": [
"tunnel_method",
"tunnel_host",
"tunnel_port",
"tunnel_user",
"tunnel_user_password"
]
}
],
"description": "Whether to initiate an SSH tunnel before connecting to the database, and if so, which kind of authentication to use.",
"title": "SSH Tunnel Method",
"order": 5,
"type": "object"
},
"cursor": {
"oneOf": [
{
"title": "Scan Changes with User Defined Cursor",
"type": "object",
"additionalProperties": true,
"description": "Configures how data is extracted from the database.",
"properties": {
"cursor_method": {
"type": "string",
"enum": ["user_defined"],
"default": "user_defined"
}
},
"required": ["cursor_method"]
},
{
"title": "Read Changes using Change Data Capture (CDC)",
"type": "object",
"additionalProperties": true,
"description": "Configures how data is extracted from the database.",
"properties": {
"cursor_method": {
"type": "string",
"enum": ["cdc"],
"default": "cdc"
}
},
"required": ["cursor_method"]
}
],
"description": "Configures how data is extracted from the database.",
"title": "Update Method",
"order": 6,
"display_type": "radio",
"type": "object"
},
"resumable_preferred": {
"type": "boolean",
"default": true,
"order": 7,
"display_type": "check"
},
"timeout": {
"type": "string",
"default": "PT0S",
"order": 8
}
},
"required": ["host", "port", "database"]
}

View File

@@ -0,0 +1,4 @@
---
data:
dockerRepository: "airbyte/fake-source"
documentationUrl: "https://docs.airbyte.com"

View File

@@ -0,0 +1,58 @@
{
"streams": [
{
"stream": {
"name": "EVENTS",
"json_schema": {
"type": "object",
"properties": {
"MSG": {
"type": "string"
},
"ID": {
"type": "string"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": ["ID", "TS"],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
"sync_mode": "full_refresh",
"cursor_field": [],
"destination_sync_mode": "overwrite",
"primary_key": [["ID"]]
},
{
"stream": {
"name": "KV",
"json_schema": {
"type": "object",
"properties": {
"V": {
"type": "string"
},
"K": {
"type": "number",
"airbyte_type": "integer"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": ["K"],
"source_defined_primary_key": [["K"]],
"namespace": "PUBLIC"
},
"sync_mode": "incremental",
"cursor_field": ["_ab_cdc_lsn"],
"destination_sync_mode": "overwrite",
"primary_key": [["K"]]
}
]
}

View File

@@ -0,0 +1,58 @@
{
"streams": [
{
"stream": {
"name": "EVENTS",
"json_schema": {
"type": "object",
"properties": {
"MSG": {
"type": "string"
},
"ID": {
"type": "string"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": [],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
"sync_mode": "incremental",
"cursor_field": ["TS"],
"destination_sync_mode": "overwrite",
"primary_key": [["ID"]]
},
{
"stream": {
"name": "KV",
"json_schema": {
"type": "object",
"properties": {
"V": {
"type": "string"
},
"K": {
"type": "number",
"airbyte_type": "integer"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": [],
"source_defined_primary_key": [["K"]],
"namespace": "PUBLIC"
},
"sync_mode": "full_refresh",
"cursor_field": [],
"destination_sync_mode": "overwrite",
"primary_key": []
}
]
}

View File

@@ -0,0 +1,25 @@
[
{
"name": "EVENTS",
"namespace": "PUBLIC",
"metadata": {
"columns": {
"ID": "io.airbyte.cdk.jdbc.StringFieldType",
"TS": "io.airbyte.cdk.jdbc.OffsetDateTimeFieldType",
"MSG": "io.airbyte.cdk.jdbc.StringFieldType"
},
"primaryKeys": [["ID"]]
}
},
{
"name": "KV",
"namespace": "PUBLIC",
"metadata": {
"columns": {
"K": "io.airbyte.cdk.jdbc.IntFieldType",
"V": "io.airbyte.cdk.jdbc.StringFieldType"
},
"primaryKeys": [["K"]]
}
}
]

View File

@@ -0,0 +1,59 @@
{
"streams": [
{
"stream": {
"name": "EVENTS",
"json_schema": {
"type": "object",
"properties": {
"ID": {
"type": "string",
"contentEncoding": "base64"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
},
"MSG": {
"type": "string"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": ["ID", "TS"],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
"sync_mode": "full_refresh",
"cursor_field": [],
"destination_sync_mode": "overwrite",
"primary_key": [["ID"]]
},
{
"stream": {
"name": "KV",
"json_schema": {
"type": "object",
"properties": {
"K": {
"type": "number",
"airbyte_type": "integer"
},
"V": {
"type": "string"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": ["K"],
"source_defined_primary_key": [["K"]],
"namespace": "PUBLIC"
},
"sync_mode": "incremental",
"cursor_field": ["_ab_cdc_lsn"],
"destination_sync_mode": "overwrite",
"primary_key": [["K"]]
}
]
}

View File

@@ -0,0 +1,59 @@
{
"streams": [
{
"stream": {
"name": "EVENTS",
"json_schema": {
"type": "object",
"properties": {
"ID": {
"type": "string",
"contentEncoding": "base64"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
},
"MSG": {
"type": "string"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": [],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
"sync_mode": "incremental",
"cursor_field": ["TS"],
"destination_sync_mode": "overwrite",
"primary_key": [["ID"]]
},
{
"stream": {
"name": "KV",
"json_schema": {
"type": "object",
"properties": {
"K": {
"type": "number",
"airbyte_type": "integer"
},
"V": {
"type": "string"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": [],
"source_defined_primary_key": [["K"]],
"namespace": "PUBLIC"
},
"sync_mode": "full_refresh",
"cursor_field": [],
"destination_sync_mode": "overwrite",
"primary_key": []
}
]
}

View File

@@ -0,0 +1,83 @@
{
"streams": [
{
"name": "EVENTS",
"json_schema": {
"type": "object",
"properties": {
"MSG": {
"type": "string"
},
"_ROWID_": {
"type": "number",
"airbyte_type": "integer"
},
"ID": {
"type": "string",
"contentEncoding": "base64"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
},
"_ab_cdc_lsn": {
"type": "string"
},
"_ab_cdc_updated_at": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
},
"_ab_cdc_deleted_at": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_cursor": true,
"default_cursor_field": ["_ab_cdc_lsn"],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
{
"name": "KV",
"json_schema": {
"type": "object",
"properties": {
"_ROWID_": {
"type": "number",
"airbyte_type": "integer"
},
"V": {
"type": "string"
},
"K": {
"type": "number",
"airbyte_type": "integer"
},
"_ab_cdc_lsn": {
"type": "string"
},
"_ab_cdc_updated_at": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
},
"_ab_cdc_deleted_at": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_cursor": true,
"default_cursor_field": ["_ab_cdc_lsn"],
"source_defined_primary_key": [["K"]],
"namespace": "PUBLIC"
}
]
}

View File

@@ -0,0 +1,55 @@
{
"streams": [
{
"name": "EVENTS",
"json_schema": {
"type": "object",
"properties": {
"MSG": {
"type": "string"
},
"_ROWID_": {
"type": "number",
"airbyte_type": "integer"
},
"ID": {
"type": "string",
"contentEncoding": "base64"
},
"TS": {
"type": "string",
"format": "date-time",
"airbyte_type": "timestamp_with_timezone"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": [],
"source_defined_primary_key": [["ID"]],
"namespace": "PUBLIC"
},
{
"name": "KV",
"json_schema": {
"type": "object",
"properties": {
"_ROWID_": {
"type": "number",
"airbyte_type": "integer"
},
"V": {
"type": "string"
},
"K": {
"type": "number",
"airbyte_type": "integer"
}
}
},
"supported_sync_modes": ["full_refresh", "incremental"],
"default_cursor_field": [],
"source_defined_primary_key": [["K"]],
"namespace": "PUBLIC"
}
]
}

View File

@@ -0,0 +1,185 @@
[
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "KV",
"namespace": "PUBLIC"
},
"status": "STARTED"
}
}
},
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "EVENTS",
"namespace": "PUBLIC"
},
"status": "STARTED"
}
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "KV",
"data": {
"K": 1,
"V": "foo"
},
"emitted_at": 3133641600000
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "KV",
"data": {
"K": 2,
"V": "bar"
},
"emitted_at": 3133641600000
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "KV",
"data": {
"K": 3,
"V": null
},
"emitted_at": 3133641600000
}
},
{
"type": "STATE",
"state": {
"type": "GLOBAL",
"global": {
"stream_states": [
{
"stream_descriptor": {
"name": "KV",
"namespace": "PUBLIC"
},
"stream_state": {
"primary_key": {},
"cursors": {}
}
}
]
},
"sourceStats": {
"recordCount": 3.0
}
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "EVENTS",
"data": {
"ID": "zESZAjDaXqjE0wJzLlv86Q==",
"TS": "2024-04-29T00:00:00.000000-04:00",
"MSG": "bar"
},
"emitted_at": 3133641600000
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "EVENTS",
"data": {
"ID": "3VWqE0Hrb7TV5BOEP2wN+g==",
"TS": "2024-04-30T00:00:00.000000-04:00",
"MSG": null
},
"emitted_at": 3133641600000
}
},
{
"type": "STATE",
"state": {
"type": "GLOBAL",
"global": {
"stream_states": [
{
"stream_descriptor": {
"name": "KV",
"namespace": "PUBLIC"
},
"stream_state": {
"primary_key": {},
"cursors": {}
}
}
]
},
"sourceStats": {
"recordCount": 0.0
}
}
},
{
"type": "STATE",
"state": {
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "EVENTS",
"namespace": "PUBLIC"
},
"stream_state": {
"primary_key": {},
"cursors": {}
}
},
"sourceStats": {
"recordCount": 2.0
}
}
},
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "KV",
"namespace": "PUBLIC"
},
"status": "COMPLETE"
}
}
},
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "EVENTS",
"namespace": "PUBLIC"
},
"status": "COMPLETE"
}
}
}
]

View File

@@ -0,0 +1,160 @@
[
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "EVENTS",
"namespace": "PUBLIC"
},
"status": "STARTED"
}
}
},
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "KV",
"namespace": "PUBLIC"
},
"status": "STARTED"
}
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "EVENTS",
"data": {
"ID": "zESZAjDaXqjE0wJzLlv86Q==",
"TS": "2024-04-29T00:00:00.000000-04:00",
"MSG": "bar"
},
"emitted_at": 3133641600000
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "EVENTS",
"data": {
"ID": "3VWqE0Hrb7TV5BOEP2wN+g==",
"TS": "2024-04-30T00:00:00.000000-04:00",
"MSG": null
},
"emitted_at": 3133641600000
}
},
{
"type": "STATE",
"state": {
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "EVENTS",
"namespace": "PUBLIC"
},
"stream_state": {
"primary_key": {},
"cursors": {
"TS": "2024-04-30T00:00:00.000000-04:00"
}
}
},
"sourceStats": {
"recordCount": 2.0
}
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "KV",
"data": {
"K": 1,
"V": "foo"
},
"emitted_at": 3133641600000
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "KV",
"data": {
"K": 2,
"V": "bar"
},
"emitted_at": 3133641600000
}
},
{
"type": "RECORD",
"record": {
"namespace": "PUBLIC",
"stream": "KV",
"data": {
"K": 3,
"V": null
},
"emitted_at": 3133641600000
}
},
{
"type": "STATE",
"state": {
"type": "STREAM",
"stream": {
"stream_descriptor": {
"name": "KV",
"namespace": "PUBLIC"
},
"stream_state": {
"primary_key": {},
"cursors": {}
}
},
"sourceStats": {
"recordCount": 3.0
}
}
},
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "EVENTS",
"namespace": "PUBLIC"
},
"status": "COMPLETE"
}
}
},
{
"type": "TRACE",
"trace": {
"type": "STREAM_STATUS",
"emitted_at": 3.1336416e12,
"stream_status": {
"stream_descriptor": {
"name": "KV",
"namespace": "PUBLIC"
},
"status": "COMPLETE"
}
}
}
]

Some files were not shown because too many files have changed in this diff Show More