✨Destination Bigquery 1s1t: handle cursor change (#28721)
* handle new cursor column * sync2 is actualy weird, apparently * logistics * Automated Commit - Format and Process Resources Changes --------- Co-authored-by: edgao <edgao@users.noreply.github.com>
This commit is contained in:
@@ -71,7 +71,8 @@ public abstract class BaseTypingDedupingTest {
|
||||
private static final RecordDiffer DIFFER = new RecordDiffer(
|
||||
Pair.of("id1", AirbyteProtocolType.INTEGER),
|
||||
Pair.of("id2", AirbyteProtocolType.INTEGER),
|
||||
Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE));
|
||||
Pair.of("updated_at", AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE),
|
||||
Pair.of("old_cursor", AirbyteProtocolType.INTEGER));
|
||||
|
||||
private String randomSuffix;
|
||||
private JsonNode config;
|
||||
@@ -488,6 +489,55 @@ public abstract class BaseTypingDedupingTest {
|
||||
// evolutions)
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the cursor column in the second sync to a column that doesn't exist in the first sync.
|
||||
* Verify that we overwrite everything correctly.
|
||||
* <p>
|
||||
* This essentially verifies that the destination connector correctly recognizes NULL cursors as
|
||||
* older than non-NULL cursors.
|
||||
*/
|
||||
@Test
|
||||
public void incrementalDedupChangeCursor() throws Exception {
|
||||
JsonNode mangledSchema = SCHEMA.deepCopy();
|
||||
((ObjectNode) mangledSchema.get("properties")).remove("updated_at");
|
||||
((ObjectNode) mangledSchema.get("properties")).set(
|
||||
"old_cursor",
|
||||
Jsons.deserialize(
|
||||
"""
|
||||
{"type": "integer"}
|
||||
"""));
|
||||
ConfiguredAirbyteStream configuredStream = new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withCursorField(List.of("old_cursor"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND_DEDUP)
|
||||
.withPrimaryKey(List.of(List.of("id1"), List.of("id2")))
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace(streamNamespace)
|
||||
.withName(streamName)
|
||||
.withJsonSchema(mangledSchema));
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStream));
|
||||
|
||||
// First sync
|
||||
final List<AirbyteMessage> messages1 = readMessages("sync1_cursorchange_messages.jsonl");
|
||||
|
||||
runSync(catalog, messages1);
|
||||
|
||||
final List<JsonNode> expectedRawRecords1 = readRecords("sync1_cursorchange_expectedrecords_dedup_raw.jsonl");
|
||||
final List<JsonNode> expectedFinalRecords1 = readRecords("sync1_cursorchange_expectedrecords_dedup_final.jsonl");
|
||||
verifySyncResult(expectedRawRecords1, expectedFinalRecords1);
|
||||
|
||||
// Second sync
|
||||
final List<AirbyteMessage> messages2 = readMessages("sync2_messages.jsonl");
|
||||
configuredStream.getStream().setJsonSchema(SCHEMA);
|
||||
configuredStream.setCursorField(List.of("updated_at"));
|
||||
|
||||
runSync(catalog, messages2);
|
||||
|
||||
final List<JsonNode> expectedRawRecords2 = readRecords("sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl");
|
||||
final List<JsonNode> expectedFinalRecords2 = readRecords("sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl");
|
||||
verifySyncResult(expectedRawRecords2, expectedFinalRecords2);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled("Not yet implemented")
|
||||
public void testSyncWithLargeRecordBatch() throws Exception {
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "old_cursor": 1, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie"}
|
||||
@@ -0,0 +1,3 @@
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}
|
||||
@@ -0,0 +1,4 @@
|
||||
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}}}
|
||||
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}}}
|
||||
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}}}
|
||||
{"type": "RECORD", "record": {"emitted_at": 1000, "data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}}
|
||||
@@ -0,0 +1,3 @@
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}
|
||||
// Charlie wasn't reemitted with updated_at, so it still has a null cursor
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "name": "Charlie"}
|
||||
@@ -0,0 +1,4 @@
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}}
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:02Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01:00:00:00Z"}}
|
||||
// Charlie wasn't reemitted in sync2. This record still has an old_cursor value.
|
||||
{"_airbyte_extracted_at": "1970-01-01T00:00:01Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}}
|
||||
Reference in New Issue
Block a user