1
0
mirror of synced 2026-01-07 00:05:48 -05:00

Destination Bigquery 1s1t: handle cursor change (#28721)

* handle new cursor column

* sync2 is actualy weird, apparently

* logistics

* Automated Commit - Format and Process Resources Changes

---------

Co-authored-by: edgao <edgao@users.noreply.github.com>
This commit is contained in:
Edward Gao
2023-07-26 17:36:15 -07:00
committed by GitHub
parent d9f2444c9e
commit 9f6963ccfc
11 changed files with 90 additions and 6 deletions

View File

@@ -71,7 +71,8 @@ public abstract class BaseTypingDedupingTest {
private static final RecordDiffer DIFFER = new RecordDiffer(
Pair.of("id1", AirbyteProtocolType.INTEGER),
Pair.of("id2", AirbyteProtocolType.INTEGER),
Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE));
Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE),
Pair.of("old_cursor", AirbyteProtocolType.INTEGER));
private String randomSuffix;
private JsonNode config;
@@ -488,6 +489,55 @@ public abstract class BaseTypingDedupingTest {
// evolutions)
}
/**
* Change the cursor column in the second sync to a column that doesn't exist in the first sync.
* Verify that we overwrite everything correctly.
* <p>
* This essentially verifies that the destination connector correctly recognizes NULL cursors as
* older than non-NULL cursors.
*/
@Test
public void incrementalDedupChangeCursor() throws Exception {
JsonNode mangledSchema = SCHEMA.deepCopy();
((ObjectNode) mangledSchema.get("properties")).remove("updated_at");
((ObjectNode) mangledSchema.get("properties")).set(
"old_cursor",
Jsons.deserialize(
"""
{"type": "integer"}
"""));
ConfiguredAirbyteStream configuredStream = new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withCursorField(List.of("old_cursor"))
.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP)
.withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
.withStream(new AirbyteStream()
.withNamespace(streamNamespace)
.withName(streamName)
.withJsonSchema(mangledSchema));
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStream));
// First sync
final List<AirbyteMessage> messages1 = readMessages("sync1_cursorchange_messages.jsonl");
runSync(catalog, messages1);
final List<JsonNode> expectedRawRecords1 = readRecords("sync1_cursorchange_expectedrecords_dedup_raw.jsonl");
final List<JsonNode> expectedFinalRecords1 = readRecords("sync1_cursorchange_expectedrecords_dedup_final.jsonl");
verifySyncResult(expectedRawRecords1, expectedFinalRecords1);
// Second sync
final List<AirbyteMessage> messages2 = readMessages("sync2_messages.jsonl");
configuredStream.getStream().setJsonSchema(SCHEMA);
configuredStream.setCursorField(List.of("updated_at"));
runSync(catalog, messages2);
final List<JsonNode> expectedRawRecords2 = readRecords("sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl");
final List<JsonNode> expectedFinalRecords2 = readRecords("sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl");
verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
}
@Test
@Disabled("Not yet implemented")
public void testSyncWithLargeRecordBatch() throws Exception {

View File

@@ -0,0 +1,3 @@
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "old_cursor": 1, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie"}

View File

@@ -0,0 +1,3 @@
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}

View File

@@ -0,0 +1,4 @@
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}}}
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}}
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}}
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}}

View File

@@ -0,0 +1,3 @@
{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}
// Charlie wasn't reemitted with updated_at, so it still has a null cursor
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "name": "Charlie"}

View File

@@ -0,0 +1,4 @@
{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}}
{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}}
// Charlie wasn't reemitted in sync2. This record still has an old_cursor value.
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}