1
0
mirror of synced 2025-12-25 02:09:19 -05:00

Destination Redshift: pull in cdk update for refreshes bugfix (#42506)

This commit is contained in:
Edward Gao
2024-08-20 08:12:03 -07:00
committed by GitHub
parent b20365a390
commit 6b902f5091
12 changed files with 80 additions and 20 deletions

View File

@@ -4,7 +4,7 @@ plugins {
}
airbyteJavaConnector {
cdkVersionRequired = '0.41.4'
cdkVersionRequired = '0.44.14'
features = ['db-destinations', 's3-destinations', 'typing-deduping']
useLocalCdk = false
}

View File

@@ -5,7 +5,7 @@ data:
connectorSubtype: database
connectorType: destination
definitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc
dockerImageTag: 3.4.1
dockerImageTag: 3.4.2
dockerRepository: airbyte/destination-redshift
documentationUrl: https://docs.airbyte.com/integrations/destinations/redshift
githubIssueLabel: destination-redshift

View File

@@ -51,6 +51,7 @@ import io.airbyte.integrations.base.destination.operation.DefaultFlush
import io.airbyte.integrations.base.destination.operation.DefaultSyncOperation
import io.airbyte.integrations.base.destination.typing_deduping.CatalogParser
import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialStatus
import io.airbyte.integrations.base.destination.typing_deduping.ImportType
import io.airbyte.integrations.base.destination.typing_deduping.InitialRawTableStatus
import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog
import io.airbyte.integrations.base.destination.typing_deduping.Sql
@@ -72,7 +73,6 @@ import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta
import io.airbyte.protocol.models.v0.AirbyteStreamStatusTraceMessage.AirbyteStreamStatus
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.ConnectorSpecification
import io.airbyte.protocol.models.v0.DestinationSyncMode
import java.sql.SQLException
import java.time.Duration
import java.util.Objects
@@ -145,7 +145,7 @@ class RedshiftDestination : BaseConnector(), Destination {
val streamConfig =
StreamConfig(
id = streamId,
destinationSyncMode = DestinationSyncMode.APPEND,
postImportAction = ImportType.APPEND,
primaryKey = listOf(),
cursor = Optional.empty(),
columns = linkedMapOf(),
@@ -199,6 +199,8 @@ class RedshiftDestination : BaseConnector(), Destination {
isAirbyteMetaPresentInRaw = true,
isGenerationIdPresent = true,
),
finalTableGenerationId = 1,
finalTempTableGenerationId = 1,
),
FileUploadFormat.CSV,
destinationColumns,

View File

@@ -100,7 +100,7 @@ class RedshiftStagingStorageOperation(
return null
}
return generation.first()[JavaBaseConstants.COLUMN_NAME_AB_GENERATION_ID].asLong()
return generation.first()[JavaBaseConstants.COLUMN_NAME_AB_GENERATION_ID]?.asLong() ?: 0
}
override fun writeToStage(
@@ -114,7 +114,12 @@ class RedshiftStagingStorageOperation(
"Uploading records to for ${streamId.rawNamespace}.${streamId.rawName} to path $objectPath"
}
val filename =
s3StorageOperations.uploadRecordsToBucket(data, streamId.rawNamespace, objectPath)
s3StorageOperations.uploadRecordsToBucket(
data,
streamId.rawNamespace,
objectPath,
streamConfig.generationId
)
log.info {
"Starting copy to target table from stage: ${streamId.rawName}$suffix in destination from stage: $objectPath/$filename."

View File

@@ -23,7 +23,7 @@ import org.jooq.SQLDialect
private val log = KotlinLogging.logger {}
class RedshiftDestinationHandler(
databaseName: String?,
databaseName: String,
jdbcDatabase: JdbcDatabase,
rawNamespace: String
) :
@@ -31,7 +31,8 @@ class RedshiftDestinationHandler(
databaseName,
jdbcDatabase,
rawNamespace,
SQLDialect.DEFAULT
SQLDialect.DEFAULT,
generationHandler = RedshiftGenerationHandler(databaseName)
) {
override fun createNamespaces(schemas: Set<String>) {
// SHOW SCHEMAS will fail with a "schema ... does not exist" error

View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.redshift.typing_deduping
import io.airbyte.cdk.db.jdbc.JdbcDatabase
import io.airbyte.cdk.integrations.base.JavaBaseConstants
import io.airbyte.cdk.integrations.destination.jdbc.JdbcGenerationHandler
class RedshiftGenerationHandler(private val databaseName: String) : JdbcGenerationHandler {
override fun getGenerationIdInTable(
database: JdbcDatabase,
namespace: String,
name: String
): Long? {
val tableExistsWithGenerationId =
database.executeMetadataQuery {
// Find a column named _airbyte_generation_id
// in the relevant table.
val resultSet =
it.getColumns(
databaseName,
namespace,
name,
JavaBaseConstants.COLUMN_NAME_AB_GENERATION_ID
)
// Check if there were any such columns.
resultSet.next()
}
// The table doesn't exist, or exists but doesn't have generation id
if (!tableExistsWithGenerationId) {
return null
}
// The table exists and has generation ID. Query it.
val queryResult =
database.queryJsons(
"""
SELECT ${JavaBaseConstants.COLUMN_NAME_AB_GENERATION_ID}
FROM "$namespace"."$name"
LIMIT 1
""".trimIndent()
)
return queryResult
.firstOrNull()
?.get(JavaBaseConstants.COLUMN_NAME_AB_GENERATION_ID)
?.asLong()
?: 0
}
}

View File

@@ -13,10 +13,10 @@ import io.airbyte.commons.json.Jsons.emptyObject
import io.airbyte.commons.json.Jsons.jsonNode
import io.airbyte.commons.json.Jsons.serialize
import io.airbyte.integrations.base.destination.typing_deduping.ColumnId
import io.airbyte.integrations.base.destination.typing_deduping.ImportType
import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.airbyte.protocol.models.v0.StreamDescriptor
import io.github.oshai.kotlinlogging.KotlinLogging
import java.nio.charset.StandardCharsets
@@ -77,7 +77,7 @@ class RedshiftSuperLimitationTransformer(
// convert List<ColumnId> to Set<ColumnId> for faster lookup
val primaryKeys =
streamConfig.primaryKey.stream().map(ColumnId::originalName).collect(Collectors.toSet())
val syncMode = streamConfig.destinationSyncMode
val syncMode = streamConfig.postImportAction
val transformationInfo =
transformNodes(jsonNode, DEFAULT_PREDICATE_VARCHAR_GREATER_THAN_64K)
val originalBytes = transformationInfo.originalBytes
@@ -97,7 +97,7 @@ class RedshiftSuperLimitationTransformer(
transformedBytes
)
val minimalNode = constructMinimalJsonWithPks(jsonNode, primaryKeys, cursorField)
if (minimalNode.isEmpty && syncMode == DestinationSyncMode.APPEND_DEDUP) {
if (minimalNode.isEmpty && syncMode == ImportType.DEDUPE) {
// Fail the sync if PKs are missing in DEDUPE, no point sending an empty record to
// destination.
throw RuntimeException(

View File

@@ -18,6 +18,7 @@ import io.airbyte.commons.exceptions.ConfigErrorException
import io.airbyte.commons.json.Jsons
import io.airbyte.commons.string.Strings
import io.airbyte.integrations.base.destination.operation.AbstractStreamOperation.Companion.TMP_TABLE_SUFFIX
import io.airbyte.integrations.base.destination.typing_deduping.ImportType
import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig
import io.airbyte.integrations.base.destination.typing_deduping.StreamId
import io.airbyte.integrations.destination.redshift.operation.RedshiftStagingStorageOperation
@@ -26,7 +27,6 @@ import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftSqlG
import io.airbyte.integrations.destination.redshift.util.RedshiftUtil
import io.airbyte.protocol.models.v0.AirbyteMessage.Type
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta
import io.airbyte.protocol.models.v0.DestinationSyncMode
import java.nio.file.Files
import java.nio.file.Path
import java.util.Optional
@@ -54,7 +54,7 @@ class RedshiftS3StagingStorageOperationTest {
private val streamConfig =
StreamConfig(
streamId,
DestinationSyncMode.APPEND,
ImportType.APPEND,
emptyList(),
Optional.empty(),
LinkedHashMap(),

View File

@@ -7,4 +7,4 @@
// And append the records from the second sync
{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}, "_airbyte_generation_id": 43, "_airbyte_meta": {"sync_id": 42, "changes": []}}
{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}, "_airbyte_generation_id": 43, "_airbyte_meta": {"sync_id": 42, "changes": []}}
{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z", "_airbyte_generation_id": 43, "_airbyte_meta": {"sync_id": 42, "changes": []}}}
{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}, "_airbyte_generation_id": 43, "_airbyte_meta": {"sync_id": 42, "changes": []}}

View File

@@ -8,11 +8,11 @@ import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolT
import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType
import io.airbyte.integrations.base.destination.typing_deduping.Array
import io.airbyte.integrations.base.destination.typing_deduping.ColumnId
import io.airbyte.integrations.base.destination.typing_deduping.ImportType
import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig
import io.airbyte.integrations.base.destination.typing_deduping.StreamId
import io.airbyte.integrations.base.destination.typing_deduping.Struct
import io.airbyte.integrations.destination.redshift.RedshiftSQLNameTransformer
import io.airbyte.protocol.models.v0.DestinationSyncMode
import java.io.IOException
import java.time.Instant
import java.util.Arrays
@@ -73,7 +73,7 @@ class RedshiftSqlGeneratorTest {
incrementalDedupStream =
StreamConfig(
streamId!!,
DestinationSyncMode.APPEND_DEDUP,
ImportType.DEDUPE,
primaryKey,
Optional.of(cursor),
columns,
@@ -84,7 +84,7 @@ class RedshiftSqlGeneratorTest {
incrementalAppendStream =
StreamConfig(
streamId!!,
DestinationSyncMode.APPEND,
ImportType.APPEND,
primaryKey,
Optional.of(cursor),
columns,
@@ -164,7 +164,7 @@ class RedshiftSqlGeneratorTest {
redshiftSqlGenerator.updateTable(
StreamConfig(
streamId!!,
DestinationSyncMode.APPEND_DEDUP,
ImportType.DEDUPE,
primaryKey,
Optional.of(cursor),
columns,

View File

@@ -11,13 +11,13 @@ import io.airbyte.commons.resources.MoreResources.readResource
import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType
import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType
import io.airbyte.integrations.base.destination.typing_deduping.ColumnId
import io.airbyte.integrations.base.destination.typing_deduping.ImportType
import io.airbyte.integrations.base.destination.typing_deduping.ParsedCatalog
import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig
import io.airbyte.integrations.base.destination.typing_deduping.StreamId
import io.airbyte.integrations.destination.redshift.RedshiftSQLNameTransformer
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.airbyte.protocol.models.v0.StreamDescriptor
import java.io.IOException
import java.nio.charset.StandardCharsets
@@ -53,7 +53,7 @@ class RedshiftSuperLimitationTransformerTest {
val streamConfig =
StreamConfig(
streamId,
DestinationSyncMode.APPEND_DEDUP,
ImportType.DEDUPE,
primaryKey,
Optional.empty(),
columns,

View File

@@ -222,6 +222,7 @@ Each stream will be output into its own raw table in Redshift. Each table will c
| Version | Date | Pull Request | Subject |
| :------ | :--------- | :--------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| 3.4.2 | 2024-08-15 | [42506](https://github.com/airbytehq/airbyte/pull/42506) | Fix bug in refreshes logic (already mitigated in platform, just fixing protocol compliance) |
| 3.4.1 | 2024-08-13 | [xxx](https://github.com/airbytehq/airbyte/pull/xxx) | Simplify Redshift Options |
| 3.4.0 | 2024-07-23 | [42445](https://github.com/airbytehq/airbyte/pull/42445) | Respect the `drop cascade` option on raw tables |
| 3.3.1 | 2024-07-15 | [41968](https://github.com/airbytehq/airbyte/pull/41968) | Don't hang forever on empty stream list; shorten error message on INCOMPLETE stream status |