1
0
mirror of synced 2025-12-25 02:09:19 -05:00

Destination snowflake: Add config option to enable time travel (#35754)

This commit is contained in:
Edward Gao
2024-04-10 12:54:02 -07:00
committed by GitHub
parent 1815e38dd8
commit 4fc7f1a82d
11 changed files with 55 additions and 12 deletions

View File

@@ -5,7 +5,7 @@ data:
connectorSubtype: database
connectorType: destination
definitionId: 424892c4-daac-4491-b35d-c6688ba547ba
dockerImageTag: 3.6.6
dockerImageTag: 3.7.0
dockerRepository: airbyte/destination-snowflake
documentationUrl: https://docs.airbyte.com/integrations/destinations/snowflake
githubIssueLabel: destination-snowflake

View File

@@ -160,7 +160,10 @@ public class SnowflakeInternalStagingDestination extends AbstractJdbcDestination
}
}
final SnowflakeSqlGenerator sqlGenerator = new SnowflakeSqlGenerator();
final int retentionPeriodDays = SnowflakeSqlOperations.getRetentionPeriodDays(
config.get(SnowflakeSqlOperations.RETENTION_PERIOD_DAYS_CONFIG_KEY));
final SnowflakeSqlGenerator sqlGenerator = new SnowflakeSqlGenerator(retentionPeriodDays);
final ParsedCatalog parsedCatalog;
final TyperDeduper typerDeduper;
final JdbcDatabase database = getDatabase(getDataSource(config));

View File

@@ -6,6 +6,7 @@ package io.airbyte.integrations.destination.snowflake;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.integrations.base.DestinationConfig;
import io.airbyte.cdk.integrations.base.JavaBaseConstants;
import io.airbyte.cdk.integrations.destination.async.partial_messages.PartialAirbyteMessage;
import io.airbyte.cdk.integrations.destination.jdbc.JdbcSqlOperations;
@@ -21,7 +22,9 @@ import net.snowflake.client.jdbc.SnowflakeSQLException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class SnowflakeSqlOperations extends JdbcSqlOperations implements SqlOperations {
public class SnowflakeSqlOperations extends JdbcSqlOperations implements SqlOperations {
public static final String RETENTION_PERIOD_DAYS_CONFIG_KEY = "retention_period_days";
private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeSqlOperations.class);
private static final int MAX_FILES_IN_LOADING_QUERY_LIMIT = 1000;
@@ -46,6 +49,7 @@ class SnowflakeSqlOperations extends JdbcSqlOperations implements SqlOperations
@Override
public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) {
int retentionPeriodDays = getRetentionPeriodDaysFromConfigSingleton();
return String.format(
"""
CREATE TABLE IF NOT EXISTS "%s"."%s" (
@@ -53,13 +57,34 @@ class SnowflakeSqlOperations extends JdbcSqlOperations implements SqlOperations
"%s" TIMESTAMP WITH TIME ZONE DEFAULT current_timestamp(),
"%s" TIMESTAMP WITH TIME ZONE DEFAULT NULL,
"%s" VARIANT
) data_retention_time_in_days = 0;""",
) data_retention_time_in_days = %d;""",
schemaName,
tableName,
JavaBaseConstants.COLUMN_NAME_AB_RAW_ID,
JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT,
JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT,
JavaBaseConstants.COLUMN_NAME_DATA);
JavaBaseConstants.COLUMN_NAME_DATA,
retentionPeriodDays);
}
/**
* Sort of hacky. The problem is that SnowflakeSqlOperations is constructed in the
* SnowflakeDestination constructor, but we don't have the JsonNode config until we try to call
* check/getSerializedConsumer on the SnowflakeDestination. So we can't actually inject the config
* normally. Instead, we just use the singleton object. :(
*/
private static int getRetentionPeriodDaysFromConfigSingleton() {
return getRetentionPeriodDays(DestinationConfig.getInstance().getNodeValue(RETENTION_PERIOD_DAYS_CONFIG_KEY));
}
public static int getRetentionPeriodDays(final JsonNode node) {
int retentionPeriodDays;
if (node == null || node.isNull()) {
retentionPeriodDays = 1;
} else {
retentionPeriodDays = node.asInt();
}
return retentionPeriodDays;
}
@Override

View File

@@ -49,6 +49,12 @@ public class SnowflakeSqlGenerator implements SqlGenerator {
"LOCALTIME",
"LOCALTIMESTAMP");
private final int retentionPeriodDays;
public SnowflakeSqlGenerator(int retentionPeriodDays) {
this.retentionPeriodDays = retentionPeriodDays;
}
@Override
public StreamId buildStreamId(final String namespace, final String name, final String rawNamespaceOverride) {
return new StreamId(
@@ -119,14 +125,15 @@ public class SnowflakeSqlGenerator implements SqlGenerator {
return Sql.of(new StringSubstitutor(Map.of(
"final_table_id", stream.id().finalTableId(QUOTE, suffix.toUpperCase()),
"force_create_table", forceCreateTable,
"column_declarations", columnDeclarations)).replace(
"column_declarations", columnDeclarations,
"retention_period_days", retentionPeriodDays)).replace(
"""
CREATE ${force_create_table} TABLE ${final_table_id} (
"_AIRBYTE_RAW_ID" TEXT NOT NULL,
"_AIRBYTE_EXTRACTED_AT" TIMESTAMP_TZ NOT NULL,
"_AIRBYTE_META" VARIANT NOT NULL
${column_declarations}
);
) data_retention_time_in_days = ${retention_period_days};
"""));
}

View File

@@ -181,6 +181,13 @@
"description": "When enabled your data will load into your final tables incrementally while your data is still being synced. When Disabled (the default), your data loads into your final tables once at the end of a sync. Note that this option only applies if you elect to create Final tables",
"title": "Enable Loading Data Incrementally to Final Tables",
"order": 12
},
"retention_period_days": {
"type": "integer",
"default": 1,
"description": "The number of days of Snowflake Time Travel to enable on the tables. See <a href=\"https://docs.snowflake.com/en/user-guide/data-time-travel#data-retention-period\">Snowflake's documentation</a> for more information. Setting a nonzero value will incur increased storage costs in your Snowflake instance.",
"title": "Data Retention Period (days)",
"order": 13
}
}
},

View File

@@ -122,7 +122,7 @@ public class SnowflakeInsertDestinationAcceptanceTest extends DestinationAccepta
final String namespace,
final JsonNode streamSchema)
throws Exception {
final StreamId streamId = new SnowflakeSqlGenerator().buildStreamId(namespace, streamName, JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE);
final StreamId streamId = new SnowflakeSqlGenerator(0).buildStreamId(namespace, streamName, JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE);
return retrieveRecordsFromTable(streamId.rawName(), streamId.rawNamespace())
.stream()
.map(r -> r.get(JavaBaseConstants.COLUMN_NAME_DATA))

View File

@@ -111,7 +111,7 @@ public abstract class AbstractSnowflakeTypingDedupingTest extends BaseTypingDedu
@Override
protected SqlGenerator getSqlGenerator() {
return new SnowflakeSqlGenerator();
return new SnowflakeSqlGenerator(0);
}
@Override

View File

@@ -69,7 +69,7 @@ public class SnowflakeSqlGeneratorIntegrationTest extends BaseSqlGeneratorIntegr
@Override
protected SnowflakeSqlGenerator getSqlGenerator() {
return new SnowflakeSqlGenerator();
return new SnowflakeSqlGenerator(0);
}
@Override

View File

@@ -44,7 +44,7 @@ class SnowflakeSqlOperationsTest {
"%s" TIMESTAMP WITH TIME ZONE DEFAULT current_timestamp(),
"%s" TIMESTAMP WITH TIME ZONE DEFAULT NULL,
"%s" VARIANT
) data_retention_time_in_days = 0;""",
) data_retention_time_in_days = 1;""",
SCHEMA_NAME,
TABLE_NAME,
JavaBaseConstants.COLUMN_NAME_AB_RAW_ID,

View File

@@ -24,7 +24,7 @@ import org.junit.jupiter.api.Test;
public class SnowflakeSqlGeneratorTest {
private final SnowflakeSqlGenerator generator = new SnowflakeSqlGenerator();
private final SnowflakeSqlGenerator generator = new SnowflakeSqlGenerator(0);
@Test
void columnNameSpecialCharacterHandling() {

View File

@@ -276,6 +276,7 @@ desired namespace.
| Version | Date | Pull Request | Subject |
|:----------------|:-----------|:-------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 3.7.0 | 2024-04-08 | [\#35754](https://github.com/airbytehq/airbyte/pull/35754) | Allow configuring `data_retention_time_in_days`; apply to both raw and final tables. *Note*: Existing tables will not be affected; you must manually alter them.|
| 3.6.6 | 2024-03-26 | [\#36466](https://github.com/airbytehq/airbyte/pull/36466) | Correctly hhandle instances with `QUOTED_IDENTIFIERS_IGNORE_CASE` enabled globally |
| 3.6.5 | 2024-03-25 | [\#36461](https://github.com/airbytehq/airbyte/pull/36461) | Internal code change (use published CDK artifact instead of source dependency) |
| 3.6.4 | 2024-03-25 | [\#36396](https://github.com/airbytehq/airbyte/pull/36396) | Handle instances with `QUOTED_IDENTIFIERS_IGNORE_CASE` enabled globally |