1
0
mirror of synced 2025-12-23 21:03:15 -05:00

Migrate source mssql from old CDK to new CDK (#63731)

This commit is contained in:
Wenqi Hu
2025-10-28 23:56:33 -07:00
committed by GitHub
parent 008dead99a
commit a9afe9546b
86 changed files with 7041 additions and 10605 deletions

View File

@@ -1,33 +0,0 @@
# MsSQL (SQL Server) Source
## Performance Test
To run performance tests in commandline:
```shell
./gradlew :airbyte-integrations:connectors:source-mssql:performanceTest [--cpulimit=cpulimit/<limit>] [--memorylimit=memorylimit/<limit>]
```
In pull request:
```shell
/test-performance connector=connectors/source-mssql [--cpulimit=cpulimit/<limit>] [--memorylimit=memorylimit/<limit>]
```
- `cpulimit`: Limit the number of CPUs. The minimum is `2`. E.g. `--cpulimit=cpulimit/2`.
- `memorylimit`: Limit the size of the memory. Must include the unit at the end (e.g. `MB`, `GB`). The minimum size is `6MB`. E.g. `--memorylimit=memorylimit/4GB`.
- When none of the CPU or memory limit is provided, the performance tests will run without memory or CPU limitations. The available resource will be bound that those specified in `ResourceRequirements.java`.
### Use MsSQL script to populate the benchmark database
In order to create a database with a certain number of tables, and a certain number of records in each of them,
you need to follow a few simple steps.
1. Create a new database.
2. Follow the TODOs in [create_mssql_benchmarks.sql](src/test-performance/sql/create_mssql_benchmarks.sql) to change the number of tables, and the number of records of different sizes.
3. Execute the script with your changes for the new database. You can run the script with the MySQL client:
```bash
cd airbyte-integrations/connectors/source-mssql
sqlcmd -S Serverinstance -E -i src/test-performance/sql/create_mssql_benchmarks.sql
```
4. After the script finishes its work, you will receive the number of tables specified in the script, with names starting with **test_0** and ending with **test\_(the number of tables minus 1)**.

View File

@@ -1,9 +0,0 @@
# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference)
# for more information about how to configure these tests
connector_image: airbyte/source-mssql:dev
tests:
spec:
- spec_path: "src/test-integration/resources/expected_spec.json"
config_path: "src/test-integration/resources/dummy_config.json"
backward_compatibility_tests_config:
disable_for_version: "0.4.25"

View File

@@ -1,43 +1,32 @@
plugins {
id 'airbyte-java-connector'
id 'airbyte-bulk-connector'
id "io.airbyte.gradle.docker"
id 'airbyte-connector-docker-convention'
}
airbyteJavaConnector {
cdkVersionRequired = '0.48.18'
features = ['db-sources']
useLocalCdk = false
}
java {
// TODO: rewrite code to avoid javac warnings in the first place
compileJava {
options.compilerArgs += "-Xlint:-try,-rawtypes"
}
compileTestFixturesJava {
options.compilerArgs += "-Xlint:-this-escape"
}
}
application {
mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource'
applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0']
mainClass = 'io.airbyte.integrations.source.mssql.MsSqlServerSource'
}
airbyteBulkConnector {
core = 'extract'
toolkits = ['extract-jdbc', 'extract-cdc']
}
dependencies {
implementation 'com.microsoft.sqlserver:mssql-jdbc:12.10.0.jre11'
implementation 'io.debezium:debezium-embedded:3.0.7.Final'
implementation 'io.debezium:debezium-connector-sqlserver:3.0.7.Final'
implementation 'com.microsoft.sqlserver:mssql-jdbc:12.10.1.jre11'
implementation 'com.azure:azure-identity:1.15.3'
implementation 'io.debezium:debezium-embedded:3.3.0.Final'
implementation 'io.debezium:debezium-connector-sqlserver:3.3.0.Final'
implementation 'org.codehaus.plexus:plexus-utils:3.4.2'
testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0'
api 'org.apache.commons:commons-lang3:3.18.0'
implementation 'org.apache.commons:commons-lang3:3.18.0'
testImplementation 'org.awaitility:awaitility:4.2.0'
testImplementation 'org.hamcrest:hamcrest-all:1.3'
testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0'
testImplementation 'org.testcontainers:mssqlserver:1.19.0'
}
compileKotlin {
testImplementation 'com.zaxxer:HikariCP:5.1.0'
testImplementation("io.mockk:mockk:1.12.0")
api 'com.google.guava:guava:33.4.0-jre'
}

View File

@@ -1,2 +1,3 @@
testExecutionConcurrency=-1
JunitMethodExecutionTimeout=5 m
testExecutionConcurrency=1
JunitMethodExecutionTimeout=5m
cdkVersion=0.1.58

View File

@@ -1,17 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import pytest
pytest_plugins = ("connector_acceptance_test.plugin",)
@pytest.fixture(scope="session", autouse=True)
def connector_setup():
"""This fixture is a placeholder for external resources that acceptance test might require."""
# TODO: setup test dependencies if needed. otherwise remove the TODO comments
yield
# TODO: clean up test dependencies

View File

@@ -1,228 +0,0 @@
CREATE
DATABASE MSSQL_BASIC;
USE MSSQL_BASIC;
CREATE
TABLE
dbo.TEST_DATASET(
id INTEGER PRIMARY KEY,
test_column_1 BIGINT,
test_column_10 FLOAT,
test_column_11 REAL,
test_column_12 DATE,
test_column_13 smalldatetime,
test_column_14 datetime,
test_column_15 datetime2,
test_column_16 TIME,
test_column_18 CHAR,
test_column_2 INT,
test_column_20 text,
test_column_21 nchar,
test_column_22 nvarchar(MAX),
test_column_23 ntext,
test_column_25 VARBINARY(3),
test_column_3 SMALLINT,
test_column_4 tinyint,
test_column_6 DECIMAL(
5,
2
),
test_column_7 NUMERIC
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
1,
- 9223372036854775808,
'123',
'123',
'0001-01-01',
'1900-01-01',
'1753-01-01',
'0001-01-01',
'13:00:01',
'a',
- 2147483648,
'a',
'a',
'a',
'a',
CAST(
'ABC' AS VARBINARY
),
- 32768,
0,
999.33,
'99999'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
2,
9223372036854775807,
'1234567890.1234567',
'1234567890.1234567',
'9999-12-31',
'2079-06-06',
'9999-12-31',
'9999-12-31',
'13:00:04Z',
'*',
2147483647,
'abc',
'*',
'abc',
'abc',
CAST(
'ABC' AS VARBINARY
),
32767,
255,
999.33,
'99999'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
3,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'*',
2147483647,
'Some test text 123$%^&*()_',
N'ї',
N'Миші йдуть на південь, не питай чому;',
N'Миші йдуть на південь, не питай чому;',
CAST(
'ABC' AS VARBINARY
),
32767,
255,
999.33,
'99999'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
4,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'*',
2147483647,
'',
N'ї',
N'櫻花分店',
N'櫻花分店',
CAST(
'ABC' AS VARBINARY
),
32767,
255,
999.33,
'99999'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
5,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'*',
2147483647,
'',
N'ї',
'',
'',
CAST(
'ABC' AS VARBINARY
),
32767,
255,
999.33,
'99999'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
6,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'*',
2147483647,
'',
N'ї',
N'\xF0\x9F\x9A\x80',
N'\xF0\x9F\x9A\x80',
CAST(
'ABC' AS VARBINARY
),
32767,
255,
999.33,
'99999'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
7,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'*',
2147483647,
'',
N'ї',
N'\xF0\x9F\x9A\x80',
N'\xF0\x9F\x9A\x80',
CAST(
'ABC' AS VARBINARY
),
32767,
255,
999.33,
'99999'
);

View File

@@ -1,320 +0,0 @@
CREATE
DATABASE MSSQL_FULL;
USE MSSQL_FULL;
CREATE
TABLE
dbo.TEST_DATASET(
id INTEGER PRIMARY KEY,
test_column_1 BIGINT,
test_column_10 FLOAT,
test_column_11 REAL,
test_column_12 DATE,
test_column_13 smalldatetime,
test_column_14 datetime,
test_column_15 datetime2,
test_column_16 TIME,
test_column_17 datetimeoffset,
test_column_18 CHAR,
test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8,
test_column_2 INT,
test_column_20 text,
test_column_21 nchar,
test_column_22 nvarchar(MAX),
test_column_23 ntext,
test_column_24 BINARY,
test_column_25 VARBINARY(3),
test_column_26 geometry,
test_column_27 uniqueidentifier,
test_column_28 xml,
test_column_29 geography,
test_column_3 SMALLINT,
test_column_30 hierarchyid,
test_column_31 sql_variant,
test_column_4 tinyint,
test_column_5 bit,
test_column_6 DECIMAL(
5,
2
),
test_column_7 NUMERIC,
test_column_8 money,
test_column_9 smallmoney
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
1,
- 9223372036854775808,
'123',
'123',
'0001-01-01',
'1900-01-01',
'1753-01-01',
'0001-01-01',
NULL,
'0001-01-10 00:00:00 +01:00',
'a',
'a',
NULL,
'a',
'a',
'a',
'a',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'<user><user_id>1</user_id></user>',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
NULL,
'/1/1/',
'a',
NULL,
NULL,
999.33,
'99999',
NULL,
NULL
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
2,
9223372036854775807,
'1234567890.1234567',
'1234567890.1234567',
'9999-12-31',
'2079-06-06',
'9999-12-31',
'9999-12-31',
'13:00:01',
'9999-01-10 00:00:00 +01:00',
'*',
'abc',
- 2147483648,
'abc',
'*',
'abc',
'abc',
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
- 32768,
NULL,
'abc',
0,
0,
NULL,
NULL,
'9990000.3647',
'-214748.3648'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
3,
0,
NULL,
NULL,
'1999-01-08',
NULL,
'9999-12-31T13:00:04Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04Z',
NULL,
NULL,
N'Миші йдуть на південь, не питай чому;',
2147483647,
'Some test text 123$%^&*()_',
N'ї',
N'Миші йдуть на південь, не питай чому;',
N'Миші йдуть на південь, не питай чому;',
NULL,
NULL,
NULL,
NULL,
'',
NULL,
32767,
NULL,
N'Миші йдуть на південь, не питай чому;',
255,
1,
NULL,
NULL,
NULL,
214748.3647
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
4,
NULL,
NULL,
NULL,
NULL,
NULL,
'9999-12-31T13:00:04.123Z',
NULL,
'13:00:04.123456Z',
NULL,
NULL,
N'櫻花分店',
NULL,
'',
NULL,
N'櫻花分店',
N'櫻花分店',
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
N'櫻花分店',
NULL,
'true',
NULL,
NULL,
NULL,
NULL
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
5,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
'',
NULL,
NULL,
NULL,
'',
'',
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
'',
NULL,
'false',
NULL,
NULL,
NULL,
NULL
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
6,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
7,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
N'\xF0\x9F\x9A\x80',
NULL,
NULL,
NULL,
N'\xF0\x9F\x9A\x80',
N'\xF0\x9F\x9A\x80',
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
N'\xF0\x9F\x9A\x80',
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
);

View File

@@ -1,380 +0,0 @@
CREATE
DATABASE MSSQL_FULL_NN;
USE MSSQL_FULL_NN;
CREATE
TABLE
dbo.TEST_DATASET(
id INTEGER PRIMARY KEY,
test_column_1 BIGINT,
test_column_10 FLOAT,
test_column_11 REAL,
test_column_12 DATE,
test_column_13 smalldatetime,
test_column_14 datetime,
test_column_15 datetime2,
test_column_16 TIME,
test_column_17 datetimeoffset,
test_column_18 CHAR,
test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8,
test_column_2 INT,
test_column_20 text,
test_column_21 nchar,
test_column_22 nvarchar(MAX),
test_column_23 ntext,
test_column_24 BINARY,
test_column_25 VARBINARY(3),
test_column_26 geometry,
test_column_27 uniqueidentifier,
test_column_28 xml,
test_column_29 geography,
test_column_3 SMALLINT,
test_column_30 hierarchyid,
test_column_31 sql_variant,
test_column_4 tinyint,
test_column_5 bit,
test_column_6 DECIMAL(
5,
2
),
test_column_7 NUMERIC,
test_column_8 money,
test_column_9 smallmoney
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
1,
- 9223372036854775808,
'123',
'123',
'0001-01-01',
'1900-01-01',
'1753-01-01',
'0001-01-01',
'13:00:01',
'0001-01-10 00:00:00 +01:00',
'a',
'a',
- 2147483648,
'a',
'a',
'a',
'a',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'<user><user_id>1</user_id></user>',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
- 32768,
'/1/1/',
'a',
0,
0,
999.33,
'99999',
'9990000.3647',
'-214748.3648'
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
2,
9223372036854775807,
'1234567890.1234567',
'1234567890.1234567',
'9999-12-31',
'2079-06-06',
'9999-12-31',
'9999-12-31',
'13:00:04Z',
'9999-01-10 00:00:00 +01:00',
'*',
'abc',
2147483647,
'abc',
'*',
'abc',
'abc',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
32767,
'/1/1/',
'abc',
255,
1,
999.33,
'99999',
'9990000.3647',
214748.3647
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
3,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'9999-01-10 00:00:00 +01:00',
'*',
N'Миші йдуть на південь, не питай чому;',
2147483647,
'Some test text 123$%^&*()_',
N'ї',
N'Миші йдуть на південь, не питай чому;',
N'Миші йдуть на південь, не питай чому;',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
32767,
'/1/1/',
N'Миші йдуть на південь, не питай чому;',
255,
'true',
999.33,
'99999',
'9990000.3647',
214748.3647
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
4,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'9999-01-10 00:00:00 +01:00',
'*',
N'櫻花分店',
2147483647,
'',
N'ї',
N'櫻花分店',
N'櫻花分店',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
32767,
'/1/1/',
N'櫻花分店',
255,
'false',
999.33,
'99999',
'9990000.3647',
214748.3647
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
5,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'9999-01-10 00:00:00 +01:00',
'*',
'',
2147483647,
'',
N'ї',
'',
'',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
32767,
'/1/1/',
'',
255,
'false',
999.33,
'99999',
'9990000.3647',
214748.3647
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
6,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'9999-01-10 00:00:00 +01:00',
'*',
N'\xF0\x9F\x9A\x80',
2147483647,
'',
N'ї',
N'\xF0\x9F\x9A\x80',
N'\xF0\x9F\x9A\x80',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
32767,
'/1/1/',
N'\xF0\x9F\x9A\x80',
255,
'false',
999.33,
'99999',
'9990000.3647',
214748.3647
);
INSERT
INTO
dbo.TEST_DATASET
VALUES(
7,
0,
'1234567890.1234567',
'1234567890.1234567',
'1999-01-08',
'2079-06-06',
'9999-12-31T13:00:04.123Z',
'9999-12-31T13:00:04.123456Z',
'13:00:04.123456Z',
'9999-01-10 00:00:00 +01:00',
'*',
N'\xF0\x9F\x9A\x80',
2147483647,
'',
N'ї',
N'\xF0\x9F\x9A\x80',
N'\xF0\x9F\x9A\x80',
CAST(
'A' AS BINARY(1)
),
CAST(
'ABC' AS VARBINARY
),
geometry::STGeomFromText(
'LINESTRING (100 100, 20 180, 180 180)',
0
),
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
'',
geography::STGeomFromText(
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
4326
),
32767,
'/1/1/',
N'\xF0\x9F\x9A\x80',
255,
'false',
999.33,
'99999',
'9990000.3647',
214748.3647
);

View File

@@ -9,24 +9,33 @@ data:
connectorSubtype: database
connectorType: source
definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1
dockerImageTag: 4.2.6
dockerImageTag: 4.3.0-rc.1
dockerRepository: airbyte/source-mssql
documentationUrl: https://docs.airbyte.com/integrations/sources/mssql
githubIssueLabel: source-mssql
icon: mssql.svg
icon: icon.svg
license: ELv2
maxSecondsBetweenMessages: 7200
name: Microsoft SQL Server (MSSQL)
connectorBuildOptions:
baseImage: docker.io/airbyte/java-connector-base:2.0.1@sha256:ec89bd1a89e825514dd2fc8730ba299a3ae1544580a078df0e35c5202c2085b3
connectorIPCOptions:
dataChannel:
version: "0.0.2"
supportedSerialization: ["JSONL", "PROTOBUF"]
supportedTransport: ["SOCKET", "STDIO"]
registryOverrides:
cloud:
enabled: true
oss:
enabled: true
releaseStage: generally_available
releaseStage: alpha
supportLevel: certified
tags:
- language:java
releases:
rolloutConfiguration:
enableProgressiveRollout: true
breakingChanges:
4.0.0:
message: "We have overhauled our MSSQL source connector and it is now supported by the Airbyte team! To benefit from new features, including terabyte-sized table support, reliability improvements, expanded datetime data types, and various bug fixes, please opt in to the 4.0.0 version."
@@ -37,32 +46,8 @@ data:
2.0.0:
message: "Add default cursor for cdc"
upgradeDeadline: "2023-08-23"
connectorBuildOptions:
baseImage: docker.io/airbyte/java-connector-base:2.0.1@sha256:ec89bd1a89e825514dd2fc8730ba299a3ae1544580a078df0e35c5202c2085b3
connectorTestSuitesOptions:
- suite: unitTests
- suite: integrationTests
testSecrets:
- name: SECRET_SOURCE-MSSQL__CREDS
fileName: config.json
secretStore:
type: GSM
alias: airbyte-connector-testing-secret-store
- name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS
fileName: performance-config.json
secretStore:
type: GSM
alias: airbyte-connector-testing-secret-store
- suite: acceptanceTests
testSecrets:
- name: SECRET_SOURCE-MSSQL__CREDS
fileName: config.json
secretStore:
type: GSM
alias: airbyte-connector-testing-secret-store
- name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS
fileName: performance-config.json
secretStore:
type: GSM
alias: airbyte-connector-testing-secret-store
metadataSpecVersion: "1.0"

View File

@@ -1,15 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
// Constants defined in
// airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json.
public class MsSqlSpecConstants {
public static final String INVALID_CDC_CURSOR_POSITION_PROPERTY = "invalid_cdc_cursor_position_behavior";
public static final String FAIL_SYNC_OPTION = "Fail sync";
public static final String RESYNC_DATA_OPTION = "Re-sync data";
}

View File

@@ -1,75 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT;
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT;
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR;
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO;
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.cdk.integrations.debezium.CdcMetadataInjector;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
import java.time.Instant;
import java.util.concurrent.atomic.AtomicLong;
public class MssqlCdcConnectorMetadataInjector implements CdcMetadataInjector<MssqlDebeziumStateAttributes> {
private final long emittedAtConverted;
// This now makes this class stateful. Please make sure to use the same instance within a sync
private final AtomicLong recordCounter = new AtomicLong(1);
private static final long ONE_HUNDRED_MILLION = 100_000_000;
private static MssqlCdcConnectorMetadataInjector mssqlCdcConnectorMetadataInjector;
private MssqlCdcConnectorMetadataInjector(final Instant emittedAt) {
this.emittedAtConverted = emittedAt.getEpochSecond() * ONE_HUNDRED_MILLION;
}
public static MssqlCdcConnectorMetadataInjector getInstance(final Instant emittedAt) {
if (mssqlCdcConnectorMetadataInjector == null) {
mssqlCdcConnectorMetadataInjector = new MssqlCdcConnectorMetadataInjector(emittedAt);
}
return mssqlCdcConnectorMetadataInjector;
}
@Override
public void addMetaData(final ObjectNode event, final JsonNode source) {
final String commitLsn = source.get("commit_lsn").asText();
final String eventSerialNo = source.get("event_serial_no").asText();
event.put(CDC_LSN, commitLsn);
event.put(CDC_EVENT_SERIAL_NO, eventSerialNo);
event.put(CDC_DEFAULT_CURSOR, getCdcDefaultCursor());
}
@Override
public void addMetaDataToRowsFetchedOutsideDebezium(final ObjectNode record,
final String transactionTimestamp,
final MssqlDebeziumStateAttributes debeziumStateAttributes) {
record.put(CDC_UPDATED_AT, transactionTimestamp);
record.put(CDC_EVENT_SERIAL_NO, "1");
record.put(CDC_LSN, debeziumStateAttributes.lsn().toString());
record.put(CDC_DELETED_AT, (String) null);
record.put(CDC_DEFAULT_CURSOR, getCdcDefaultCursor());
}
@Override
public String namespace(final JsonNode source) {
return source.get("schema").asText();
}
@Override
public String name(JsonNode source) {
return source.get("table").asText();
}
private Long getCdcDefaultCursor() {
return this.emittedAtConverted + this.recordCounter.getAndIncrement();
}
}

View File

@@ -1,230 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.SyncMode;
import java.time.Duration;
import java.util.Properties;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.codehaus.plexus.util.StringUtils;
public class MssqlCdcHelper {
// legacy replication method config before version 0.4.0
// it is an enum with possible values: STANDARD and CDC
public static final String LEGACY_REPLICATION_FIELD = "replication_method";
// new replication method config since version 0.4.0
// it is an oneOf object
public static final String REPLICATION_FIELD = "replication";
public static final String REPLICATION_TYPE_FIELD = "replication_type";
public static final String METHOD_FIELD = "method";
private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10L);
// Test execution latency is lower when heartbeats are more frequent.
private static final Duration HEARTBEAT_INTERVAL_IN_TESTS = Duration.ofSeconds(1L);
private static final Duration POLL_INTERVAL = Duration.ofSeconds(5L);
// The poll.interval.ms must be lower than the heartbeat.interval.ms
private static final Duration POLL_INTERVAL_IN_TESTS = Duration.ofMillis(500L);
public enum ReplicationMethod {
STANDARD,
CDC
}
@VisibleForTesting
static boolean isCdc(final JsonNode config) {
if (config != null) {
// new replication method config since version 0.4.0
if (config.hasNonNull(LEGACY_REPLICATION_FIELD) && config.get(LEGACY_REPLICATION_FIELD).isObject()) {
final JsonNode replicationConfig = config.get(LEGACY_REPLICATION_FIELD);
return ReplicationMethod.valueOf(replicationConfig.get(METHOD_FIELD).asText()) == ReplicationMethod.CDC;
}
// legacy replication method config before version 0.4.0
if (config.hasNonNull(LEGACY_REPLICATION_FIELD) && config.get(LEGACY_REPLICATION_FIELD).isTextual()) {
return ReplicationMethod.valueOf(config.get(LEGACY_REPLICATION_FIELD).asText()) == ReplicationMethod.CDC;
}
if (config.hasNonNull(REPLICATION_FIELD)) {
final JsonNode replicationConfig = config.get(REPLICATION_FIELD);
return ReplicationMethod.valueOf(replicationConfig.get(REPLICATION_TYPE_FIELD).asText()) == ReplicationMethod.CDC;
}
}
return false;
}
public static Properties getDebeziumProperties(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final boolean isSnapshot) {
final JsonNode config = database.getSourceConfig();
final JsonNode dbConfig = database.getDatabaseConfig();
final Properties props = new Properties();
props.setProperty("connector.class", "io.debezium.connector.sqlserver.SqlServerConnector");
// https://debezium.io/documentation/reference/2.2/connectors/sqlserver.html#sqlserver-property-include-schema-changes
props.setProperty("include.schema.changes", "false");
// https://debezium.io/documentation/reference/2.2/connectors/sqlserver.html#sqlserver-property-provide-transaction-metadata
props.setProperty("provide.transaction.metadata", "false");
props.setProperty("converters", "mssql_converter");
props.setProperty("mssql_converter.type", MssqlDebeziumConverter.class.getName());
// If new stream(s) are added after a previously successful sync,
// the snapshot.mode needs to be initial_only since we don't want to continue streaming changes
// https://debezium.io/documentation/reference/stable/connectors/sqlserver.html#sqlserver-property-snapshot-mode
if (isSnapshot) {
props.setProperty("snapshot.mode", "initial_only");
} else {
// If not in snapshot mode, initial will make sure that a snapshot is taken if the transaction log
// is rotated out. This will also end up read streaming changes from the transaction_log.
props.setProperty("snapshot.mode", "when_needed");
}
props.setProperty("snapshot.isolation.mode", "read_committed");
props.setProperty("schema.include.list", getSchema(catalog));
props.setProperty("table.include.list", getTableIncludeList(catalog));
props.setProperty("database.names", config.get(JdbcUtils.DATABASE_KEY).asText());
final String msgKeyColumns = getMessageKeyColumnValue(catalog);
System.out.println("msgKeyColumns: " + msgKeyColumns);
if (isCdc(config) && !msgKeyColumns.isEmpty()) {
// If the replication method is CDC, we need to set the message key columns
props.setProperty("message.key.columns", msgKeyColumns);
}
final Duration heartbeatInterval =
(database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean())
? HEARTBEAT_INTERVAL_IN_TESTS
: HEARTBEAT_INTERVAL;
props.setProperty("heartbeat.interval.ms", Long.toString(heartbeatInterval.toMillis()));
// Set poll.interval.ms to 5s. This parameter will determine how long Debezium will wait before
// querying for new data. It must be lower than heartbeat.interval.ms
final Duration pollInterval =
(database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean())
? POLL_INTERVAL_IN_TESTS
: POLL_INTERVAL;
props.setProperty("poll.interval.ms", Long.toString(pollInterval.toMillis()));
if (config.has("ssl_method")) {
final JsonNode sslConfig = config.get("ssl_method");
final String sslMethod = sslConfig.get("ssl_method").asText();
if ("unencrypted".equals(sslMethod)) {
props.setProperty("database.encrypt", "false");
props.setProperty("driver.trustServerCertificate", "true");
} else if ("encrypted_trust_server_certificate".equals(sslMethod)) {
props.setProperty("driver.encrypt", "true");
props.setProperty("driver.trustServerCertificate", "true");
} else if ("encrypted_verify_certificate".equals(sslMethod)) {
props.setProperty("driver.encrypt", "true");
props.setProperty("driver.trustServerCertificate", "false");
if (dbConfig.has("trustStore") && !dbConfig.get("trustStore").asText().isEmpty()) {
props.setProperty("database.trustStore", dbConfig.get("trustStore").asText());
}
if (dbConfig.has("trustStorePassword") && !dbConfig.get("trustStorePassword").asText().isEmpty()) {
props.setProperty("database.trustStorePassword", dbConfig.get("trustStorePassword").asText());
}
if (dbConfig.has("hostNameInCertificate") && !dbConfig.get("hostNameInCertificate").asText().isEmpty()) {
props.setProperty("database.hostNameInCertificate", dbConfig.get("hostNameInCertificate").asText());
}
}
} else {
props.setProperty("driver.trustServerCertificate", "true");
}
return props;
}
private static String getSchema(final ConfiguredAirbyteCatalog catalog) {
return catalog.getStreams().stream()
.filter(s -> s.getSyncMode() == SyncMode.INCREMENTAL)
.map(ConfiguredAirbyteStream::getStream)
.map(AirbyteStream::getNamespace)
// debezium needs commas escaped to split properly
.map(x -> StringUtils.escape(x, new char[] {','}, "\\,"))
.collect(Collectors.joining(","));
}
/**
* Returns a comma-separated list of fully-qualified table identifiers (schema.table) for Debezium's
* table.include.list property. This ensures only explicitly selected tables are captured by CDC,
* not all CDC-enabled tables in the schema.
*
* @param catalog the configured airbyte catalog
* @return a comma-separated list of schema.table identifiers with proper escaping for Debezium
*/
@VisibleForTesting
static String getTableIncludeList(final ConfiguredAirbyteCatalog catalog) {
return catalog.getStreams().stream()
.filter(s -> s.getSyncMode() == SyncMode.INCREMENTAL)
.map(ConfiguredAirbyteStream::getStream)
.map(stream -> {
final String schema = stream.getNamespace();
final String table = stream.getName();
final String fullTableId = schema + "." + table;
// Use Pattern.quote to escape special regex characters, then escape commas for Debezium
return StringUtils.escape(Pattern.quote(fullTableId), new char[] {','}, "\\,");
})
.collect(Collectors.joining(","));
}
/**
* Escapes the following special characters in the input string: comma (,), period (.), semicolon
* (;), and colon (:). Each special character is prefixed with a backslash.
*
* @param input the string to escape
* @return the escaped string
*/
private static String escapeSpecialChars(String input) {
if (input == null) {
return null;
}
StringBuilder sb = new StringBuilder();
for (char c : input.toCharArray()) {
if (c == ',' || c == '.' || c == ';' || c == ':') {
sb.append('\\');
}
sb.append(c);
}
return sb.toString();
}
/**
* Returns a string representation of the message key columns for the streams in the catalog. The
* format is "schema1.table1:keyCol1,keyCol2;schema2.table2:keyCol1,keyCol2". This is used to set
* the message key columns in the debezium properties. The method filters the streams to only
* include those with incremental sync mode and user-defined primary keys.
*
* @param catalog the configured airbyte catalog
* @return a string representation of the message key columns
*/
private static String getMessageKeyColumnValue(final ConfiguredAirbyteCatalog catalog) {
return catalog.getStreams().stream()
.filter(s -> s.getSyncMode() == SyncMode.INCREMENTAL)
.filter(s -> !s.getPrimaryKey().isEmpty())
.map(s -> {
final String tableId = escapeSpecialChars(s.getStream().getNamespace()) + "." + escapeSpecialChars(s.getStream().getName());
final String keyCols = s.getPrimaryKey().get(0).stream()
.map(col -> escapeSpecialChars(col))
.collect(Collectors.joining(","));
return tableId + ":" + keyCols;
})
.collect(Collectors.joining(";"));
}
}

View File

@@ -1,41 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.integrations.debezium.CdcSavedInfoFetcher;
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory;
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
import java.util.Optional;
public class MssqlCdcSavedInfoFetcher implements CdcSavedInfoFetcher {
private final JsonNode savedOffset;
private final JsonNode savedSchemaHistory;
private final boolean isSavedSchemaHistoryCompressed;
public MssqlCdcSavedInfoFetcher(final CdcState savedState) {
final boolean savedStatePresent = savedState != null && savedState.getState() != null;
this.savedOffset = savedStatePresent ? savedState.getState().get(MSSQL_CDC_OFFSET) : null;
this.savedSchemaHistory = savedStatePresent ? savedState.getState().get(MSSQL_DB_HISTORY) : null;
this.isSavedSchemaHistoryCompressed =
savedStatePresent && savedState.getState().has(IS_COMPRESSED) && savedState.getState().get(IS_COMPRESSED).asBoolean();
}
@Override
public JsonNode getSavedOffset() {
return savedOffset;
}
@Override
public SchemaHistory<Optional<JsonNode>> getSavedSchemaHistory() {
return new SchemaHistory<>(Optional.ofNullable(savedSchemaHistory), isSavedSchemaHistoryCompressed);
}
}

View File

@@ -1,76 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.integrations.debezium.CdcStateHandler;
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory;
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
import io.airbyte.commons.json.Jsons;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlCdcStateHandler implements CdcStateHandler {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCdcStateHandler.class);
private final StateManager stateManager;
public MssqlCdcStateHandler(final StateManager stateManager) {
this.stateManager = stateManager;
}
@Override
public boolean isCdcCheckpointEnabled() {
return true;
}
@Override
public AirbyteMessage saveState(final Map<String, String> offset, final SchemaHistory<String> dbHistory) {
final Map<String, Object> state = new HashMap<>();
state.put(MSSQL_CDC_OFFSET, offset);
state.put(MSSQL_DB_HISTORY, dbHistory.getSchema());
state.put(IS_COMPRESSED, dbHistory.isCompressed());
final JsonNode asJson = Jsons.jsonNode(state);
LOGGER.info("debezium state offset: {}", Jsons.jsonNode(offset));
final CdcState cdcState = new CdcState().withState(asJson);
stateManager.getCdcStateManager().setCdcState(cdcState);
/*
* Namespace pair is ignored by global state manager, but is needed for satisfy the API contract.
* Therefore, provide an empty optional.
*/
final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty());
return new AirbyteMessage().withType(Type.STATE).withState(stateMessage);
}
@Override
public AirbyteMessage saveStateAfterCompletionOfSnapshotOfNewStreams() {
LOGGER.info("Snapshot of new tables is complete, saving state");
/*
* Namespace pair is ignored by global state manager, but is needed for satisfy the API contract.
* Therefore, provide an empty optional.
*/
final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty());
return new AirbyteMessage().withType(Type.STATE).withState(stateMessage);
}
@Override
public boolean compressSchemaHistoryForState() {
return true;
}
}

View File

@@ -1,144 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Preconditions;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.debezium.CdcTargetPosition;
import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata;
import io.airbyte.cdk.integrations.debezium.internals.SnapshotMetadata;
import io.airbyte.commons.json.Jsons;
import io.debezium.connector.sqlserver.Lsn;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlCdcTargetPosition implements CdcTargetPosition<Lsn> {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCdcTargetPosition.class);
public final Lsn targetLsn;
public MssqlCdcTargetPosition(final Lsn targetLsn) {
this.targetLsn = targetLsn;
}
@Override
public boolean reachedTargetPosition(final ChangeEventWithMetadata changeEventWithMetadata) {
if (changeEventWithMetadata.isSnapshotEvent()) {
return false;
} else if (SnapshotMetadata.LAST == changeEventWithMetadata.getSnapshotMetadata()) {
LOGGER.info("Signalling close because Snapshot is complete");
return true;
} else {
final Lsn recordLsn = extractLsn(changeEventWithMetadata.getEventValueAsJson());
final boolean isEventLSNAfter = targetLsn.compareTo(recordLsn) <= 0;
if (isEventLSNAfter) {
LOGGER.info("Signalling close because record's LSN : " + recordLsn + " is after target LSN : " + targetLsn);
}
return isEventLSNAfter;
}
}
@Override
public Lsn extractPositionFromHeartbeatOffset(final Map<String, ?> sourceOffset) {
final Object commitLsnValue = sourceOffset.get("commit_lsn");
return (commitLsnValue == null) ? Lsn.NULL : Lsn.valueOf(commitLsnValue.toString());
}
private Lsn extractLsn(final JsonNode valueAsJson) {
return Optional.ofNullable(valueAsJson.get("source"))
.flatMap(source -> Optional.ofNullable(source.get("commit_lsn").asText()))
.map(Lsn::valueOf)
.orElseThrow(() -> new IllegalStateException("Could not find LSN"));
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final MssqlCdcTargetPosition that = (MssqlCdcTargetPosition) o;
return targetLsn.equals(that.targetLsn);
}
@Override
public int hashCode() {
return targetLsn.hashCode();
}
public static MssqlCdcTargetPosition getTargetPosition(final JdbcDatabase database, final String dbName) {
try {
// We might have to wait a bit before querying the max_lsn to give the CDC capture job
// a chance to catch up. This is important in tests, where reads might occur in quick succession
// which might leave the CT tables (which Debezium consumes) in a stale state.
final JsonNode sourceConfig = database.getSourceConfig();
final String maxLsnQuery = """
USE [%s];
SELECT sys.fn_cdc_get_max_lsn() AS max_lsn;
""".formatted(dbName);
// Query the high-water mark.
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(
connection -> connection.createStatement().executeQuery(maxLsnQuery),
JdbcUtils.getDefaultSourceOperations()::rowToJson);
Preconditions.checkState(jsonNodes.size() == 1);
final Lsn maxLsn;
if (jsonNodes.get(0).get("max_lsn") != null) {
maxLsn = Lsn.valueOf(jsonNodes.get(0).get("max_lsn").binaryValue());
} else {
maxLsn = Lsn.NULL;
}
LOGGER.info("identified target lsn: " + maxLsn);
return new MssqlCdcTargetPosition(maxLsn);
} catch (final SQLException | IOException e) {
throw new RuntimeException(e);
}
}
@Override
public boolean isHeartbeatSupported() {
return true;
}
@Override
public boolean reachedTargetPosition(Lsn positionFromHeartbeat) {
return positionFromHeartbeat.compareTo(targetLsn) >= 0;
}
@Override
public boolean isEventAheadOffset(Map<String, String> offset, ChangeEventWithMetadata event) {
if (offset == null || offset.size() != 1) {
return false;
}
final Lsn eventLsn = extractLsn(event.getEventValueAsJson());
final Lsn offsetLsn = offsetToLsn(offset);
return eventLsn.compareTo(offsetLsn) > 0;
}
@Override
public boolean isSameOffset(Map<String, String> offsetA, Map<String, String> offsetB) {
if ((offsetA == null || offsetA.size() != 1) || (offsetB == null || offsetB.size() != 1)) {
return false;
}
return offsetToLsn(offsetA).equals(offsetToLsn(offsetB));
}
private Lsn offsetToLsn(Map<String, String> offset) {
final JsonNode offsetJson = Jsons.deserialize((String) offset.values().toArray()[0]);
final JsonNode commitLsnJson = offsetJson.get("commit_lsn");
return (commitLsnJson == null || commitLsnJson.isNull()) ? Lsn.NULL : Lsn.valueOf(commitLsnJson.asText());
}
}

View File

@@ -1,215 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.microsoft.sqlserver.jdbc.Geography;
import com.microsoft.sqlserver.jdbc.Geometry;
import com.microsoft.sqlserver.jdbc.SQLServerException;
import io.airbyte.cdk.db.DataTypeUtils;
import io.airbyte.cdk.db.jdbc.DateTimeConverter;
import io.airbyte.cdk.integrations.debezium.internals.DebeziumConverterUtils;
import io.debezium.spi.converter.CustomConverter;
import io.debezium.spi.converter.RelationalColumn;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.time.*;
import java.time.format.DateTimeFormatter;
import java.util.*;
import microsoft.sql.DateTimeOffset;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlDebeziumConverter implements CustomConverter<SchemaBuilder, RelationalColumn> {
private final Logger LOGGER = LoggerFactory.getLogger(MssqlDebeziumConverter.class);
private final Set<String> BINARY = Set.of("VARBINARY", "BINARY");
private final Set<String> DATETIME_TYPES = Set.of("DATETIME", "DATETIME2", "SMALLDATETIME");
private final String DATE = "DATE";
private static final String DATETIMEOFFSET = "DATETIMEOFFSET";
private static final String TIME_TYPE = "TIME";
private static final String SMALLMONEY_TYPE = "SMALLMONEY";
private static final String GEOMETRY = "GEOMETRY";
private static final String GEOGRAPHY = "GEOGRAPHY";
private static final String DATETIME_FORMAT_MICROSECONDS = "yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]";
@Override
public void configure(final Properties props) {}
@Override
public void converterFor(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
if (DATE.equalsIgnoreCase(field.typeName())) {
registerDate(field, registration);
} else if (DATETIME_TYPES.contains(field.typeName().toUpperCase())) {
registerDatetime(field, registration);
} else if (SMALLMONEY_TYPE.equalsIgnoreCase(field.typeName())) {
registerMoney(field, registration);
} else if (BINARY.contains(field.typeName().toUpperCase())) {
registerBinary(field, registration);
} else if (GEOMETRY.equalsIgnoreCase(field.typeName())) {
registerGeometry(field, registration);
} else if (GEOGRAPHY.equalsIgnoreCase(field.typeName())) {
registerGeography(field, registration);
} else if (TIME_TYPE.equalsIgnoreCase(field.typeName())) {
registerTime(field, registration);
} else if (DATETIMEOFFSET.equalsIgnoreCase(field.typeName())) {
registerDateTimeOffSet(field, registration);
}
}
private void registerGeometry(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (input instanceof byte[]) {
try {
return Geometry.deserialize((byte[]) input).toString();
} catch (final SQLServerException e) {
LOGGER.error(e.getMessage());
}
}
LOGGER.warn("Uncovered Geometry class type '{}'. Use default converter",
input.getClass().getName());
return input.toString();
});
}
private void registerGeography(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (input instanceof byte[]) {
try {
return Geography.deserialize((byte[]) input).toString();
} catch (final SQLServerException e) {
LOGGER.error(e.getMessage());
}
}
LOGGER.warn("Uncovered Geography class type '{}'. Use default converter",
input.getClass().getName());
return input.toString();
});
}
private void registerDate(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (field.typeName().equalsIgnoreCase("DATE")) {
return DateTimeConverter.convertToDate(input);
}
return DateTimeConverter.convertToTimestamp(input);
});
}
private void registerDateTimeOffSet(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (input instanceof DateTimeOffset) {
var offsetDateTime = ((DateTimeOffset) input).getOffsetDateTime();
return offsetDateTime.format(DataTypeUtils.TIMESTAMPTZ_FORMATTER);
}
LOGGER.warn("Uncovered DateTimeOffSet class type '{}'. Use default converter",
input.getClass().getName());
return input.toString();
});
}
private void registerDatetime(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(),
input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (input instanceof final Timestamp d) {
final LocalDateTime localDateTime = d.toLocalDateTime();
return localDateTime.format(DateTimeFormatter.ofPattern(DATETIME_FORMAT_MICROSECONDS));
}
if (input instanceof final Long d) {
// During schema history creation datetime input arrives in the form of epoch nanosecond
// This is needed for example for a column defined as:
// [TransactionDate] DATETIME2 (7) DEFAULT ('2024-01-01T00:00:00.0000000') NOT NULL
final Instant instant = Instant.ofEpochMilli(d / 1000 / 1000);
final LocalDateTime localDateTime = LocalDateTime.ofInstant(instant, ZoneId.of("UTC"));
return localDateTime.format(DateTimeFormatter.ofPattern(DATETIME_FORMAT_MICROSECONDS));
}
return input.toString();
});
}
private void registerTime(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (input instanceof Timestamp) {
return DataTypeUtils.toISOTimeString(((Timestamp) input).toLocalDateTime());
}
LOGGER.warn("Uncovered time class type '{}'. Use default converter",
input.getClass().getName());
return input.toString();
});
}
private void registerMoney(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.float64(), input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (input instanceof BigDecimal) {
return ((BigDecimal) input).doubleValue();
}
LOGGER.warn("Uncovered money class type '{}'. Use default converter",
input.getClass().getName());
return input.toString();
});
}
private void registerBinary(final RelationalColumn field,
final ConverterRegistration<SchemaBuilder> registration) {
registration.register(SchemaBuilder.string(), input -> {
if (Objects.isNull(input)) {
return DebeziumConverterUtils.convertDefaultValue(field);
}
if (input instanceof byte[]) {
return Base64.getEncoder().encodeToString((byte[]) input);
}
LOGGER.warn("Uncovered binary class type '{}'. Use default converter",
input.getClass().getName());
return input.toString();
});
}
}

View File

@@ -1,304 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifierList;
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting;
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getIdentifierWithQuoting;
import static io.airbyte.integrations.source.mssql.MssqlSource.HIERARCHYID;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
import io.airbyte.commons.json.Jsons;
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import java.math.BigDecimal;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility class to define constants related to querying mssql
*/
public class MssqlQueryUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlQueryUtils.class);
private static final String MAX_OC_VALUE_QUERY =
"""
SELECT MAX(%s) as %s FROM %s;
""";
public record TableSizeInfo(Long tableSize, Long avgRowLength) {}
private static final String MAX_CURSOR_VALUE_QUERY =
"""
SELECT TOP 1 %s, COUNT(*) AS %s FROM %s WHERE %s = (SELECT MAX(%s) FROM %s) GROUP BY %s;
""";
public static final String INDEX_QUERY = "EXEC sp_helpindex N'%s'";
public record Index(
@JsonProperty("index_name") String name,
@JsonProperty("index_description") String description,
@JsonProperty("index_keys") String keys) {}
public static final String TABLE_ESTIMATE_QUERY =
"""
EXEC sp_spaceused N'"%s"."%s"'
""";
public static final String MAX_OC_COL = "max_oc";
public static final String DATA_SIZE_HUMAN_READABLE = "data";
public static final String NUM_ROWS = "rows";
public static void getIndexInfoForStreams(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final String quoteString) {
for (final ConfiguredAirbyteStream stream : catalog.getStreams()) {
final String streamName = stream.getStream().getName();
final String schemaName = stream.getStream().getNamespace();
final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, streamName, quoteString);
LOGGER.info("Discovering indexes for table {}", fullTableName);
try {
final String query = INDEX_QUERY.formatted(fullTableName);
LOGGER.debug("Index lookup query: {}", query);
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(query).executeQuery(),
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
if (jsonNodes != null) {
jsonNodes.stream().map(node -> Jsons.convertValue(node, Index.class))
.forEach(i -> LOGGER.info("Index {}", i));
}
} catch (final Exception ex) {
LOGGER.info("Failed to get index for {}", fullTableName);
}
}
}
public static String getMaxOcValueForStream(final JdbcDatabase database,
final ConfiguredAirbyteStream stream,
final String ocFieldName,
final String quoteString) {
final String name = stream.getStream().getName();
final String namespace = stream.getStream().getNamespace();
final String fullTableName =
getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString);
final String maxOcQuery = String.format(MAX_OC_VALUE_QUERY,
getIdentifierWithQuoting(ocFieldName, quoteString),
MAX_OC_COL,
fullTableName);
LOGGER.info("Querying for max oc value: {}", maxOcQuery);
try {
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(maxOcQuery).executeQuery(),
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
Preconditions.checkState(jsonNodes.size() == 1);
if (jsonNodes.get(0).get(MAX_OC_COL) == null) {
LOGGER.info("Max PK is null for table {} - this could indicate an empty table", fullTableName);
return null;
}
return jsonNodes.get(0).get(MAX_OC_COL).asText();
} catch (final SQLException e) {
throw new RuntimeException(e);
}
}
private static long toBytes(final String filesize) {
long returnValue = -1;
final Pattern patt = Pattern.compile("([\\d.]+)[\s+]*([GMK]B)", Pattern.CASE_INSENSITIVE);
final Matcher matcher = patt.matcher(filesize);
Map<String, Integer> powerMap = new HashMap<String, Integer>();
powerMap.put("GB", 3);
powerMap.put("MB", 2);
powerMap.put("KB", 1);
if (matcher.find()) {
String number = matcher.group(1).trim();
int pow = powerMap.get(matcher.group(2).toUpperCase());
BigDecimal bytes = new BigDecimal(number);
bytes = bytes.multiply(BigDecimal.valueOf(1024).pow(pow));
returnValue = bytes.longValue();
}
return returnValue;
}
public static Map<AirbyteStreamNameNamespacePair, TableSizeInfo> getTableSizeInfoForStreams(final JdbcDatabase database,
final List<ConfiguredAirbyteStream> streams,
final String quoteString) {
final Map<AirbyteStreamNameNamespacePair, TableSizeInfo> tableSizeInfoMap = new HashMap<>();
streams.forEach(stream -> {
try {
final String name = stream.getStream().getName();
final String namespace = stream.getStream().getNamespace();
final String fullTableName =
getFullyQualifiedTableNameWithQuoting(name, namespace, quoteString);
final List<JsonNode> tableEstimateResult = getTableEstimate(database, namespace, name);
if (tableEstimateResult != null
&& tableEstimateResult.size() == 1
&& tableEstimateResult.get(0).get(DATA_SIZE_HUMAN_READABLE) != null
&& tableEstimateResult.get(0).get(NUM_ROWS) != null) {
final long tableEstimateBytes = toBytes(tableEstimateResult.get(0).get(DATA_SIZE_HUMAN_READABLE).asText());
final long numRows = tableEstimateResult.get(0).get(NUM_ROWS).asLong();
final long avgTableRowSizeBytes = numRows > 0 ? tableEstimateBytes / numRows : 0;
LOGGER.info("Stream {} size estimate is {}, average row size estimate is {}", fullTableName, tableEstimateBytes, avgTableRowSizeBytes);
final TableSizeInfo tableSizeInfo = new TableSizeInfo(tableEstimateBytes, avgTableRowSizeBytes);
final AirbyteStreamNameNamespacePair namespacePair =
new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
tableSizeInfoMap.put(namespacePair, tableSizeInfo);
}
} catch (final Exception e) {
LOGGER.warn("Error occurred while attempting to estimate sync size", e);
}
});
return tableSizeInfoMap;
}
/**
* Iterates through each stream and find the max cursor value and the record count which has that
* value based on each cursor field provided by the customer per stream This information is saved in
* a Hashmap with the mapping being the AirbyteStreamNameNamespacepair -> CursorBasedStatus
*
* @param database the source db
* @param streams streams to be synced
* @param stateManager stream stateManager
* @return Map of streams to statuses
*/
public static Map<io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair, CursorBasedStatus> getCursorBasedSyncStatusForStreams(final JdbcDatabase database,
final List<ConfiguredAirbyteStream> streams,
final StateManager stateManager,
final String quoteString) {
final Map<io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair, CursorBasedStatus> cursorBasedStatusMap = new HashMap<>();
streams.forEach(stream -> {
final String name = stream.getStream().getName();
final String namespace = stream.getStream().getNamespace();
final String fullTableName =
getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString);
final Optional<CursorInfo> cursorInfoOptional =
stateManager.getCursorInfo(new AirbyteStreamNameNamespacePair(name, namespace));
if (cursorInfoOptional.isEmpty()) {
throw new RuntimeException(String.format("Stream %s was not provided with an appropriate cursor", stream.getStream().getName()));
}
final CursorBasedStatus cursorBasedStatus = new CursorBasedStatus();
final Optional<String> maybeCursorField = Optional.ofNullable(cursorInfoOptional.get().getCursorField());
maybeCursorField.ifPresent(cursorField -> {
LOGGER.info("Cursor {}. Querying max cursor value for {}.{}", cursorField, namespace, name);
final String quotedCursorField = getIdentifierWithQuoting(cursorField, quoteString);
final String counterField = cursorField + "_count";
final String quotedCounterField = getIdentifierWithQuoting(counterField, quoteString);
final String cursorBasedSyncStatusQuery = String.format(MAX_CURSOR_VALUE_QUERY,
quotedCursorField,
quotedCounterField,
fullTableName,
quotedCursorField,
quotedCursorField,
fullTableName,
quotedCursorField);
final List<JsonNode> jsonNodes;
try {
jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(cursorBasedSyncStatusQuery).executeQuery(),
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
} catch (SQLException e) {
throw new RuntimeException("Failed to read max cursor value from %s.%s".formatted(namespace, name), e);
}
cursorBasedStatus.setCursorField(ImmutableList.of(cursorField));
if (!jsonNodes.isEmpty()) {
final JsonNode result = jsonNodes.get(0);
LOGGER.info("Max cursor value for {}.{} is {}", namespace, fullTableName, result);
cursorBasedStatus.setCursor(result.get(cursorField).asText());
cursorBasedStatus.setCursorRecordCount(result.get(counterField).asLong());
}
cursorBasedStatus.setStateType(StateType.CURSOR_BASED);
cursorBasedStatus.setVersion(2L);
cursorBasedStatus.setStreamName(name);
cursorBasedStatus.setStreamNamespace(namespace);
cursorBasedStatusMap.put(new AirbyteStreamNameNamespacePair(name, namespace), cursorBasedStatus);
});
});
return cursorBasedStatusMap;
}
private static List<JsonNode> getTableEstimate(final JdbcDatabase database, final String namespace, final String name)
throws SQLException {
// Construct the table estimate query.
final String tableEstimateQuery =
String.format(TABLE_ESTIMATE_QUERY, namespace, name);
LOGGER.info("Querying for table estimate size: {}", tableEstimateQuery);
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.createStatement().executeQuery(tableEstimateQuery),
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
Preconditions.checkState(jsonNodes.size() == 1);
LOGGER.debug("Estimate: {}", jsonNodes);
return jsonNodes;
}
public static String prettyPrintConfiguredAirbyteStreamList(final List<ConfiguredAirbyteStream> streamList) {
return streamList.stream().map(s -> "%s.%s".formatted(s.getStream().getNamespace(), s.getStream().getName())).collect(Collectors.joining(", "));
}
/**
* There is no support for hierarchyid even in the native SQL Server JDBC driver. Its value can be
* converted to a nvarchar(4000) data type by calling the ToString() method. So we make a separate
* query to get Table's MetaData, check is there any hierarchyid columns, and wrap required fields
* with the ToString() function in the final Select query. Reference:
* https://docs.microsoft.com/en-us/sql/t-sql/data-types/hierarchyid-data-type-method-reference?view=sql-server-ver15#data-type-conversion
* Note: This is where the main logic for the same method in MssqlSource. Extracted logic in order
* to be used in MssqlInitialLoadRecordIterator
*
* @return the list with Column names updated to handle functions (if nay) properly
*/
public static String getWrappedColumnNames(
final JdbcDatabase database,
final String quoteString,
final List<String> columnNames,
final String schemaName,
final String tableName) {
final List<String> hierarchyIdColumns = new ArrayList<>();
try {
final String identifierQuoteString = database.getMetaData().getIdentifierQuoteString();
final SQLServerResultSetMetaData sqlServerResultSetMetaData = (SQLServerResultSetMetaData) database
.queryMetadata(String
.format("SELECT TOP 1 %s FROM %s", // only first row is enough to get field's type
enquoteIdentifierList(columnNames, quoteString),
getFullyQualifiedTableNameWithQuoting(schemaName, tableName, quoteString)));
// metadata will be null if table doesn't contain records
if (sqlServerResultSetMetaData != null) {
for (int i = 1; i <= sqlServerResultSetMetaData.getColumnCount(); i++) {
if (HIERARCHYID.equals(sqlServerResultSetMetaData.getColumnTypeName(i))) {
hierarchyIdColumns.add(sqlServerResultSetMetaData.getColumnName(i));
}
}
}
// iterate through names and replace Hierarchyid field for query is with toString() function
// Eventually would get columns like this: testColumn.toString as "testColumn"
// toString function in SQL server is the only way to get human-readable value, but not mssql
// specific HEX value
return String.join(", ", columnNames.stream()
.map(
el -> hierarchyIdColumns.contains(el) ? String.format("%s.ToString() as %s%s%s", el, identifierQuoteString, el, identifierQuoteString)
: getIdentifierWithQuoting(el, quoteString))
.toList());
} catch (final SQLException e) {
LOGGER.error("Failed to fetch metadata to prepare a proper request.", e);
throw new RuntimeException(e);
}
}
}

View File

@@ -1,719 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.db.DataTypeUtils.TIMESTAMPTZ_FORMATTER;
import static io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler.isAnyStreamIncrementalSyncMode;
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT;
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT;
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.*;
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbReadUtil.identifyStreamsForCursorBased;
import static io.airbyte.integrations.source.mssql.MssqlCdcHelper.*;
import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getCursorBasedSyncStatusForStreams;
import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getTableSizeInfoForStreams;
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.*;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
import static java.util.stream.Collectors.toList;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import io.airbyte.cdk.db.factory.DatabaseDriver;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig;
import io.airbyte.cdk.db.util.SSLCertificateUtils;
import io.airbyte.cdk.integrations.base.IntegrationRunner;
import io.airbyte.cdk.integrations.base.Source;
import io.airbyte.cdk.integrations.base.adaptive.AdaptiveSourceRunner;
import io.airbyte.cdk.integrations.base.ssh.SshWrappedSource;
import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource;
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadHandler;
import io.airbyte.cdk.integrations.source.relationaldb.TableInfo;
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
import io.airbyte.cdk.integrations.source.relationaldb.state.NonResumableStateMessageProducer;
import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateMessageProducer;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateGeneratorUtils;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManagerFactory;
import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator;
import io.airbyte.commons.exceptions.ConfigErrorException;
import io.airbyte.commons.features.EnvVariableFeatureFlags;
import io.airbyte.commons.features.FeatureFlags;
import io.airbyte.commons.functional.CheckedConsumer;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.stream.AirbyteStreamStatusHolder;
import io.airbyte.commons.util.AutoCloseableIterator;
import io.airbyte.commons.util.AutoCloseableIterators;
import io.airbyte.integrations.source.mssql.cursor_based.MssqlCursorBasedStateManager;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStreamStateManager;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.CursorBasedStreams;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams;
import io.airbyte.protocol.models.CommonField;
import io.airbyte.protocol.models.v0.*;
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
import java.io.IOException;
import java.net.URI;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.sql.*;
import java.time.*;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.RandomStringUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlSource extends AbstractJdbcSource<JDBCType> implements Source {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSource.class);
public static final String DESCRIBE_TABLE_QUERY =
"""
sp_columns "%s"
""";
public static final String NULL_CURSOR_VALUE_WITH_SCHEMA_QUERY =
"""
SELECT CASE WHEN (SELECT TOP 1 1 FROM "%s"."%s" WHERE "%s" IS NULL)=1 then 1 else 0 end as %s
""";
public static final String DRIVER_CLASS = DatabaseDriver.MSSQLSERVER.getDriverClassName();
public static final String MSSQL_CDC_OFFSET = "mssql_cdc_offset";
public static final String MSSQL_DB_HISTORY = "mssql_db_history";
public static final String IS_COMPRESSED = "is_compressed";
public static final String CDC_LSN = "_ab_cdc_lsn";
public static final String CDC_EVENT_SERIAL_NO = "_ab_cdc_event_serial_no";
public static final String HIERARCHYID = "hierarchyid";
private static final int INTERMEDIATE_STATE_EMISSION_FREQUENCY = 10_000;
public static final String CDC_DEFAULT_CURSOR = "_ab_cdc_cursor";
public static final String TUNNEL_METHOD = "tunnel_method";
public static final String NO_TUNNEL = "NO_TUNNEL";
public static final String SSL_METHOD = "ssl_method";
public static final String SSL_METHOD_UNENCRYPTED = "unencrypted";
private MssqlInitialLoadStateManager initialLoadStateManager = null;
public static final String JDBC_DELIMITER = ";";
private List<String> schemas;
private int stateEmissionFrequency;
private final FeatureFlags featureFlags;
public static final String REPLICATION_INCREMENTAL_EXCLUDE_TODAYS = "exclude_todays_data";
public static Source sshWrappedSource(final MssqlSource source) {
return new SshWrappedSource(source, JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY);
}
public MssqlSource() {
this(new EnvVariableFeatureFlags());
}
public MssqlSource(final FeatureFlags featureFlags) {
super(DRIVER_CLASS, AdaptiveStreamingQueryConfig::new, new MssqlSourceOperations());
this.featureFlags = featureFlags;
this.stateEmissionFrequency = INTERMEDIATE_STATE_EMISSION_FREQUENCY;
}
@Override
public FeatureFlags getFeatureFlags() {
return featureFlags;
}
@Override
protected AirbyteStateType getSupportedStateType(final JsonNode config) {
return MssqlCdcHelper.isCdc(config) ? AirbyteStateType.GLOBAL : AirbyteStateType.STREAM;
}
@Override
public AirbyteConnectionStatus check(final JsonNode config) throws Exception {
// #15808 Disallow connecting to db with disable, prefer or allow SSL mode when connecting directly
// and not over SSH tunnel
if (cloudDeploymentMode()) {
if (config.has(TUNNEL_METHOD)
&& config.get(TUNNEL_METHOD).has(TUNNEL_METHOD)
&& config.get(TUNNEL_METHOD).get(TUNNEL_METHOD).asText().equals(NO_TUNNEL)) {
// If no SSH tunnel.
if (config.has(SSL_METHOD) && config.get(SSL_METHOD).has(SSL_METHOD) &&
SSL_METHOD_UNENCRYPTED.equalsIgnoreCase(config.get(SSL_METHOD).get(SSL_METHOD).asText())) {
// Fail in case SSL method is unencrypted.
return new AirbyteConnectionStatus()
.withStatus(AirbyteConnectionStatus.Status.FAILED)
.withMessage("Unsecured connection not allowed. " +
"If no SSH Tunnel set up, please use one of the following SSL methods: " +
"encrypted_trust_server_certificate, encrypted_verify_certificate.");
}
}
}
return super.check(config);
}
/**
* See {@link MssqlQueryUtils#getWrappedColumnNames}
*/
@Override
protected String getWrappedColumnNames(final JdbcDatabase database,
final Connection connection,
final List<String> columnNames,
final String schemaName,
final String tableName) {
return MssqlQueryUtils.getWrappedColumnNames(database, getQuoteString(), columnNames, schemaName, tableName);
}
@Override
public JsonNode toDatabaseConfig(final JsonNode mssqlConfig) {
final List<String> additionalParameters = new ArrayList<>();
final StringBuilder jdbcUrl = new StringBuilder(
String.format("jdbc:sqlserver://%s:%s;databaseName=%s;",
mssqlConfig.get(JdbcUtils.HOST_KEY).asText(),
mssqlConfig.get(JdbcUtils.PORT_KEY).asText(),
mssqlConfig.get(JdbcUtils.DATABASE_KEY).asText()));
if (mssqlConfig.has("schemas") && mssqlConfig.get("schemas").isArray()) {
schemas = new ArrayList<>();
for (final JsonNode schema : mssqlConfig.get("schemas")) {
schemas.add(schema.asText());
}
}
if (mssqlConfig.has("ssl_method")) {
readSsl(mssqlConfig, additionalParameters);
} else {
additionalParameters.add("trustServerCertificate=true");
}
if (!additionalParameters.isEmpty()) {
jdbcUrl.append(String.join(";", additionalParameters));
}
final ImmutableMap.Builder<Object, Object> configBuilder = ImmutableMap.builder()
.put(JdbcUtils.USERNAME_KEY, mssqlConfig.get(JdbcUtils.USERNAME_KEY).asText())
.put(JdbcUtils.PASSWORD_KEY, mssqlConfig.get(JdbcUtils.PASSWORD_KEY).asText())
.put(JdbcUtils.JDBC_URL_KEY, jdbcUrl.toString());
if (mssqlConfig.has(JdbcUtils.JDBC_URL_PARAMS_KEY)) {
configBuilder.put(JdbcUtils.CONNECTION_PROPERTIES_KEY, mssqlConfig.get(JdbcUtils.JDBC_URL_PARAMS_KEY));
}
final Map<String, String> additionalParams = new HashMap<>();
additionalParameters.forEach(param -> {
final int i = param.indexOf('=');
additionalParams.put(param.substring(0, i), param.substring(i + 1));
});
configBuilder.putAll(additionalParams);
return Jsons.jsonNode(configBuilder.build());
}
@Override
public Set<String> getExcludedInternalNameSpaces() {
return Set.of(
"INFORMATION_SCHEMA",
"sys",
"spt_fallback_db",
"spt_monitor",
"spt_values",
"spt_fallback_usg",
"MSreplication_options",
"spt_fallback_dev",
"cdc"); // is this actually ok? what if the user wants cdc schema for some reason?
}
@Override
public AirbyteCatalog discover(final JsonNode config) {
final AirbyteCatalog catalog = super.discover(config);
if (MssqlCdcHelper.isCdc(config)) {
final List<AirbyteStream> streams = catalog.getStreams().stream()
.map(MssqlSource::overrideSyncModes)
.map(MssqlSource::setIncrementalToSourceDefined)
.map(MssqlSource::setDefaultCursorFieldForCdc)
.map(MssqlSource::addCdcMetadataColumns)
.collect(toList());
catalog.setStreams(streams);
}
return catalog;
}
@Override
public List<TableInfo<CommonField<JDBCType>>> discoverInternal(final JdbcDatabase database) throws Exception {
if (schemas != null && !schemas.isEmpty()) {
return schemas.stream().flatMap(schema -> {
LOGGER.info("Get columns for schema: {}", schema);
try {
return super.discoverInternal(database, schema).stream();
} catch (final Exception e) {
throw new ConfigErrorException(String.format("Error getting columns for schema: %s", schema), e);
}
}).collect(toList());
} else {
LOGGER.info("No schemas explicitly set on UI to process, so will process all of existing schemas in DB");
return super.discoverInternal(database);
}
}
@Override
protected boolean verifyCursorColumnValues(final JdbcDatabase database, final String schema, final String tableName, final String columnName)
throws SQLException {
boolean nullValExist = false;
final String resultColName = "nullValue";
final String descQuery = String.format(DESCRIBE_TABLE_QUERY, tableName);
final Optional<JsonNode> field = database.bufferedResultSetQuery(conn -> conn.createStatement()
.executeQuery(descQuery),
resultSet -> JdbcUtils.getDefaultSourceOperations().rowToJson(resultSet))
.stream()
.peek(x -> LOGGER.info("MsSQL Table Structure {}, {}, {}", x.toString(), schema, tableName))
.filter(x -> x.get("TABLE_OWNER") != null)
.filter(x -> x.get("COLUMN_NAME") != null)
.filter(x -> x.get("TABLE_OWNER").asText().equals(schema))
.filter(x -> x.get("COLUMN_NAME").asText().equalsIgnoreCase(columnName))
.findFirst();
if (field.isPresent()) {
final JsonNode jsonNode = field.get();
final JsonNode isNullable = jsonNode.get("IS_NULLABLE");
if (isNullable != null) {
if (isNullable.asText().equalsIgnoreCase("YES")) {
final String query = String.format(NULL_CURSOR_VALUE_WITH_SCHEMA_QUERY,
schema, tableName, columnName, resultColName);
LOGGER.debug("null value query: {}", query);
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.createStatement().executeQuery(query),
resultSet -> JdbcUtils.getDefaultSourceOperations().rowToJson(resultSet));
Preconditions.checkState(jsonNodes.size() == 1);
nullValExist = jsonNodes.get(0).get(resultColName).booleanValue();
LOGGER.info("null cursor value for MsSQL source : {}, shema {} , tableName {}, columnName {} ", nullValExist, schema, tableName,
columnName);
}
}
}
// return !nullValExist;
// will enable after we have sent comms to users this affects
return true;
}
@Override
public List<CheckedConsumer<JdbcDatabase, Exception>> getCheckOperations(final JsonNode config)
throws Exception {
final List<CheckedConsumer<JdbcDatabase, Exception>> checkOperations = new ArrayList<>(
super.getCheckOperations(config));
if (MssqlCdcHelper.isCdc(config)) {
checkOperations.add(database -> assertCdcEnabledInDb(config, database));
checkOperations.add(database -> assertCdcSchemaQueryable(config, database));
checkOperations.add(database -> assertSqlServerAgentRunning(database));
}
return checkOperations;
}
protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase database)
throws SQLException {
final List<JsonNode> queryResponse = database.queryJsons(connection -> {
final String sql = "SELECT name, is_cdc_enabled FROM sys.databases WHERE name = ?";
final PreparedStatement ps = connection.prepareStatement(sql);
ps.setString(1, config.get(JdbcUtils.DATABASE_KEY).asText());
LOGGER.info(String.format("Checking that cdc is enabled on database '%s' using the query: '%s'",
config.get(JdbcUtils.DATABASE_KEY).asText(), sql));
return ps;
}, sourceOperations::rowToJson);
if (queryResponse.size() < 1) {
throw new RuntimeException(String.format(
"Couldn't find '%s' in sys.databases table. Please check the spelling and that the user has relevant permissions (see docs).",
config.get(JdbcUtils.DATABASE_KEY).asText()));
}
if (!(queryResponse.get(0).get("is_cdc_enabled").asBoolean())) {
throw new RuntimeException(String.format(
"Detected that CDC is not enabled for database '%s'. Please check the documentation on how to enable CDC on MS SQL Server.",
config.get(JdbcUtils.DATABASE_KEY).asText()));
}
}
protected void assertCdcSchemaQueryable(final JsonNode config, final JdbcDatabase database)
throws SQLException {
final List<JsonNode> queryResponse = database.queryJsons(connection -> {
boolean isAzureSQL = false;
try (final Statement stmt = connection.createStatement();
final ResultSet editionRS = stmt.executeQuery("SELECT ServerProperty('Edition')")) {
isAzureSQL = editionRS.next() && "SQL Azure".equals(editionRS.getString(1));
}
// Azure SQL does not support USE clause
final String sql =
isAzureSQL ? "SELECT * FROM cdc.change_tables"
: "USE [" + config.get(JdbcUtils.DATABASE_KEY).asText() + "]; SELECT * FROM cdc.change_tables";
final PreparedStatement ps = connection.prepareStatement(sql);
LOGGER.info(String.format(
"Checking user '%s' can query the cdc schema and that we have at least 1 cdc enabled table using the query: '%s'",
config.get(JdbcUtils.USERNAME_KEY).asText(), sql));
return ps;
}, sourceOperations::rowToJson);
// Ensure at least one available CDC table
if (queryResponse.size() < 1) {
throw new RuntimeException(
"No cdc-enabled tables found. Please check the documentation on how to enable CDC on MS SQL Server.");
}
}
// todo: ensure this works for Azure managed SQL (since it uses different sql server agent)
protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws SQLException {
try {
// EngineEdition property values can be found at
// https://learn.microsoft.com/en-us/sql/t-sql/functions/serverproperty-transact-sql?view=sql-server-ver16
// SQL Server Agent is always running on SQL Managed Instance:
// https://learn.microsoft.com/en-us/azure/azure-sql/managed-instance/transact-sql-tsql-differences-sql-server?view=azuresql#sql-server-agent
final Integer engineEdition = database.queryInt("SELECT ServerProperty('EngineEdition')");
if (engineEdition == 8) {
LOGGER.info(String.format("SQL Server Agent is assumed to be running when EngineEdition == '%s'", engineEdition));
} else {
final List<JsonNode> queryResponse = database.queryJsons(connection -> {
final String sql =
"SELECT status_desc FROM sys.dm_server_services WHERE [servicename] LIKE 'SQL Server Agent%' OR [servicename] LIKE 'SQL Server 代理%' ";
final PreparedStatement ps = connection.prepareStatement(sql);
LOGGER.info(String.format("Checking that the SQL Server Agent is running using the query: '%s'", sql));
return ps;
}, sourceOperations::rowToJson);
if (!(queryResponse.get(0).get("status_desc").toString().contains("Running"))) {
throw new RuntimeException(String.format(
"The SQL Server Agent is not running. Current state: '%s'. Please check the documentation on ensuring SQL Server Agent is running.",
queryResponse.get(0).get("status_desc").toString()));
}
}
} catch (final Exception e) {
if (e.getCause() != null && e.getCause().getClass().equals(com.microsoft.sqlserver.jdbc.SQLServerException.class)) {
LOGGER.warn(String.format(
"Skipping check for whether the SQL Server Agent is running, SQLServerException thrown: '%s'",
e.getMessage()));
} else {
throw e;
}
}
}
@Override
public @NotNull List<AutoCloseableIterator<AirbyteMessage>> getIncrementalIterators(final JdbcDatabase database,
final @NotNull ConfiguredAirbyteCatalog catalog,
final @NotNull Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
final StateManager stateManager,
final @NotNull Instant emittedAt) {
final JsonNode sourceConfig = database.getSourceConfig();
if (MssqlCdcHelper.isCdc(sourceConfig) && isAnyStreamIncrementalSyncMode(catalog)) {
LOGGER.info("using OC + CDC");
return MssqlInitialReadUtil.getCdcReadIterators(database, catalog, tableNameToTable, stateManager, initialLoadStateManager, emittedAt,
getQuoteString());
} else {
if (isAnyStreamIncrementalSyncMode(catalog)) {
LOGGER.info("Syncing via Primary Key");
final MssqlCursorBasedStateManager cursorBasedStateManager = new MssqlCursorBasedStateManager(stateManager.getRawStateMessages(), catalog);
if (isExcludeTodayDateForCursorIncremental(sourceConfig)) {
setCutoffCursorTime(tableNameToTable, cursorBasedStateManager.getPairToCursorInfoMap());
}
final InitialLoadStreams initialLoadStreams =
filterStreamInIncrementalMode(streamsForInitialOrderedColumnLoad(cursorBasedStateManager, catalog));
final Map<AirbyteStreamNameNamespacePair, CursorBasedStatus> pairToCursorBasedStatus =
getCursorBasedSyncStatusForStreams(database, initialLoadStreams.streamsForInitialLoad(), stateManager, getQuoteString());
final CursorBasedStreams cursorBasedStreams =
new CursorBasedStreams(identifyStreamsForCursorBased(catalog, initialLoadStreams.streamsForInitialLoad()), pairToCursorBasedStatus);
logStreamSyncStatus(initialLoadStreams.streamsForInitialLoad(), "Primary Key");
logStreamSyncStatus(cursorBasedStreams.streamsForCursorBased(), "Cursor");
final MssqlInitialLoadHandler initialLoadHandler =
new MssqlInitialLoadHandler(sourceConfig, database, new MssqlSourceOperations(), getQuoteString(), initialLoadStateManager,
Optional.of(namespacePair -> Jsons.jsonNode(pairToCursorBasedStatus.get(namespacePair))),
getTableSizeInfoForStreams(database, initialLoadStreams.streamsForInitialLoad(), getQuoteString()));
// Cursor based incremental iterators are decorated with start and complete status traces
final List<AutoCloseableIterator<AirbyteMessage>> initialLoadIterator = new ArrayList<>(initialLoadHandler.getIncrementalIterators(
new ConfiguredAirbyteCatalog().withStreams(initialLoadStreams.streamsForInitialLoad()),
tableNameToTable,
emittedAt, true, true, Optional.empty()));
// Build Cursor based iterator
final List<AutoCloseableIterator<AirbyteMessage>> cursorBasedIterator =
new ArrayList<>(super.getIncrementalIterators(database,
new ConfiguredAirbyteCatalog().withStreams(
cursorBasedStreams.streamsForCursorBased()),
tableNameToTable,
cursorBasedStateManager, emittedAt));
return Stream.of(initialLoadIterator, cursorBasedIterator).flatMap(Collection::stream).collect(Collectors.toList());
}
}
LOGGER.info("using CDC: {}", false);
return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt);
}
private static void setCutoffCursorTime(@NotNull Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
@NotNull Map<AirbyteStreamNameNamespacePair, CursorInfo> pairToCursorInfoMap) {
LOGGER.info("Excluding Today's Date for incremental streams with temporal cursors");
pairToCursorInfoMap.forEach((pair, cursorInfo) -> {
final TableInfo<CommonField<JDBCType>> tableInfo = tableNameToTable.get("%s.%s".formatted(pair.getNamespace(), pair.getName()));
final Optional<CommonField<JDBCType>> maybeCursorField =
tableInfo.getFields().stream().filter(f -> f.getName().equals(cursorInfo.getCursorField()))
.findFirst();
maybeCursorField.ifPresent(f -> {
LOGGER.info("Setting cutoff time for stream {} with cursor field {} ({}) to exclude today's data", pair, f.getName(), f.getType());
setCursorCutoffInfoForValue(cursorInfo, f, Instant.now());
LOGGER.info("Set cutoff time for stream {} with cursor field {} to {}", pair, f.getName(), cursorInfo.getCutoffTime());
});
});
}
@VisibleForTesting
static void setCursorCutoffInfoForValue(CursorInfo cursorInfo, @NotNull CommonField<JDBCType> f, Instant nowInstant) {
switch (f.getType()) {
case JDBCType.DATE -> {
final var instant = nowInstant.atOffset(ZoneOffset.UTC);
cursorInfo.setCutoffTime(ISO_LOCAL_DATE.format(instant));
}
case JDBCType.TIMESTAMP -> {
final var instant = nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS);
cursorInfo.setCutoffTime(DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(instant));
}
case JDBCType.TIMESTAMP_WITH_TIMEZONE -> {
final var instant = nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS);
cursorInfo.setCutoffTime(TIMESTAMPTZ_FORMATTER.format(instant));
}
default -> LOGGER.warn("Only temporal cursors can exclude today's data. Cursor {} of JDBC type {} cannot exclude today's data", f.getName(),
f.getType());
}
}
@Override
protected int getStateEmissionFrequency() {
return this.stateEmissionFrequency;
}
@VisibleForTesting
protected void setStateEmissionFrequencyForDebug(final int stateEmissionFrequency) {
this.stateEmissionFrequency = stateEmissionFrequency;
}
@Override
protected void checkUserHasPrivileges(final JsonNode config, final JdbcDatabase database) {}
private static AirbyteStream overrideSyncModes(final AirbyteStream stream) {
return stream.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL));
}
// Note: in place mutation.
private static AirbyteStream setIncrementalToSourceDefined(final AirbyteStream stream) {
if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) {
stream.setSourceDefinedCursor(true);
}
return stream;
}
/*
* To prepare for Destination v2, cdc streams must have a default cursor field Cursor format: the
* airbyte [emittedAt] + [sync wide record counter]
*/
private static AirbyteStream setDefaultCursorFieldForCdc(final AirbyteStream stream) {
if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) {
stream.setDefaultCursorField(ImmutableList.of(CDC_DEFAULT_CURSOR));
}
return stream;
}
// Note: in place mutation.
private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) {
final ObjectNode jsonSchema = (ObjectNode) stream.getJsonSchema();
final ObjectNode properties = (ObjectNode) jsonSchema.get("properties");
final JsonNode airbyteIntegerType = Jsons.jsonNode(ImmutableMap.of("type", "number", "airbyte_type", "integer"));
final JsonNode stringType = Jsons.jsonNode(ImmutableMap.of("type", "string"));
properties.set(CDC_LSN, stringType);
properties.set(CDC_UPDATED_AT, stringType);
properties.set(CDC_DELETED_AT, stringType);
properties.set(CDC_EVENT_SERIAL_NO, stringType);
properties.set(CDC_DEFAULT_CURSOR, airbyteIntegerType);
return stream;
}
private void readSsl(final JsonNode sslMethod, final List<String> additionalParameters) {
final JsonNode config = sslMethod.get("ssl_method");
switch (config.get("ssl_method").asText()) {
case "unencrypted" -> {
additionalParameters.add("encrypt=false");
additionalParameters.add("trustServerCertificate=true");
}
case "encrypted_trust_server_certificate" -> {
additionalParameters.add("encrypt=true");
additionalParameters.add("trustServerCertificate=true");
}
case "encrypted_verify_certificate" -> {
additionalParameters.add("encrypt=true");
additionalParameters.add("trustServerCertificate=false");
if (config.has("certificate")) {
final String certificate = config.get("certificate").asText();
final String password = RandomStringUtils.secure().nextAlphanumeric(100);
final URI keyStoreUri;
try {
keyStoreUri = SSLCertificateUtils.keyStoreFromCertificate(certificate, password, null, null);
} catch (final IOException | KeyStoreException | NoSuchAlgorithmException | CertificateException e) {
throw new RuntimeException(e);
}
additionalParameters
.add("trustStore=" + keyStoreUri.getPath());
additionalParameters
.add("trustStorePassword=" + password);
}
if (config.has("hostNameInCertificate")) {
additionalParameters
.add("hostNameInCertificate=" + config.get("hostNameInCertificate").asText());
}
}
}
}
@Override
public Collection<AutoCloseableIterator<AirbyteMessage>> readStreams(final JsonNode config,
final ConfiguredAirbyteCatalog catalog,
final JsonNode state)
throws Exception {
final AirbyteStateType supportedType = getSupportedStateType(config);
final StateManager stateManager = StateManagerFactory.createStateManager(supportedType,
StateGeneratorUtils.deserializeInitialState(state, supportedType), catalog);
final Instant emittedAt = Instant.now();
final JdbcDatabase database = createDatabase(config);
final Map<String, TableInfo<CommonField<JDBCType>>> fullyQualifiedTableNameToInfo =
discoverWithoutSystemTables(database)
.stream()
.collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()),
Function
.identity()));
initializeForStateManager(database, catalog, fullyQualifiedTableNameToInfo, stateManager);
logPreSyncDebugData(database, catalog);
return super.readStreams(config, catalog, state);
}
private boolean cloudDeploymentMode() {
return AdaptiveSourceRunner.CLOUD_MODE.equalsIgnoreCase(getFeatureFlags().deploymentMode());
}
public Duration getConnectionTimeoutMssql(final Map<String, String> connectionProperties) {
return getConnectionTimeout(connectionProperties);
}
@Override
public JdbcDatabase createDatabase(final JsonNode sourceConfig) throws SQLException {
return createDatabase(sourceConfig, JDBC_DELIMITER);
}
public static void main(final String[] args) throws Exception {
final Source source = MssqlSource.sshWrappedSource(new MssqlSource());
final MSSqlSourceExceptionHandler exceptionHandler = new MSSqlSourceExceptionHandler();
LOGGER.info("starting source: {}", MssqlSource.class);
new IntegrationRunner(source).run(args, exceptionHandler);
LOGGER.info("completed source: {}", MssqlSource.class);
}
@Override
protected void logPreSyncDebugData(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog) throws SQLException {
super.logPreSyncDebugData(database, catalog);
MssqlQueryUtils.getIndexInfoForStreams(database, catalog, getQuoteString());
}
@Override
protected void initializeForStateManager(final JdbcDatabase database,
final ConfiguredAirbyteCatalog catalog,
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
final StateManager stateManager) {
if (initialLoadStateManager != null) {
return;
}
final var sourceConfig = database.getSourceConfig();
if (isCdc(sourceConfig)) {
initialLoadStateManager = getMssqlInitialLoadGlobalStateManager(database, catalog, stateManager, tableNameToTable, getQuoteString());
} else {
final MssqlCursorBasedStateManager cursorBasedStateManager = new MssqlCursorBasedStateManager(stateManager.getRawStateMessages(), catalog);
final InitialLoadStreams initialLoadStreams = streamsForInitialOrderedColumnLoad(cursorBasedStateManager, catalog);
initialLoadStateManager = new MssqlInitialLoadStreamStateManager(catalog, initialLoadStreams,
initPairToOrderedColumnInfoMap(database, catalog, tableNameToTable, getQuoteString()));
}
}
@Nullable
@Override
public InitialLoadHandler<JDBCType> getInitialLoadHandler(final JdbcDatabase database,
final ConfiguredAirbyteStream airbyteStream,
final ConfiguredAirbyteCatalog catalog,
final StateManager stateManager) {
final var sourceConfig = database.getSourceConfig();
if (isCdc(sourceConfig)) {
return getMssqlFullRefreshInitialLoadHandler(database, catalog, initialLoadStateManager, stateManager, airbyteStream, Instant.now(),
getQuoteString())
.get();
} else {
return new MssqlInitialLoadHandler(sourceConfig, database, new MssqlSourceOperations(), getQuoteString(), initialLoadStateManager,
Optional.empty(),
getTableSizeInfoForStreams(database, catalog.getStreams(), getQuoteString()));
}
}
@Override
public boolean supportResumableFullRefresh(final JdbcDatabase database, final ConfiguredAirbyteStream airbyteStream) {
return airbyteStream.getStream() != null && airbyteStream.getStream().getSourceDefinedPrimaryKey() != null
&& !airbyteStream.getStream().getSourceDefinedPrimaryKey().isEmpty();
}
@Override
protected SourceStateMessageProducer<AirbyteMessage> getSourceStateProducerForNonResumableFullRefreshStream(final JdbcDatabase database) {
return new NonResumableStateMessageProducer<>(isCdc(database.getSourceConfig()), initialLoadStateManager);
}
@NotNull
@Override
public AutoCloseableIterator<AirbyteMessage> augmentWithStreamStatus(@NotNull final ConfiguredAirbyteStream airbyteStream,
@NotNull final AutoCloseableIterator<AirbyteMessage> streamItrator) {
final var pair =
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
final var starterStatus =
new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED));
final var completeStatus =
new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE));
return AutoCloseableIterators.concatWithEagerClose(starterStatus, streamItrator, completeStatus);
}
private boolean isExcludeTodayDateForCursorIncremental(@NotNull JsonNode config) {
if (config.hasNonNull(LEGACY_REPLICATION_FIELD)) {
final JsonNode replicationConfig = config.get(LEGACY_REPLICATION_FIELD);
if (MssqlCdcHelper.ReplicationMethod.valueOf(replicationConfig.get(METHOD_FIELD).asText()) == ReplicationMethod.STANDARD) {
if (replicationConfig.hasNonNull(REPLICATION_INCREMENTAL_EXCLUDE_TODAYS)) {
return replicationConfig.get(REPLICATION_INCREMENTAL_EXCLUDE_TODAYS).asBoolean(false);
}
}
}
return false;
}
}

View File

@@ -1,199 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.db.DataTypeUtils.TIMESTAMPTZ_FORMATTER;
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_NAME;
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE;
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE_NAME;
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_SCHEMA_NAME;
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_TABLE_NAME;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.microsoft.sqlserver.jdbc.Geography;
import com.microsoft.sqlserver.jdbc.Geometry;
import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData;
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
import io.airbyte.cdk.db.jdbc.JdbcSourceOperations;
import io.airbyte.integrations.source.mssql.initialsync.CdcMetadataInjector;
import io.airbyte.protocol.models.JsonSchemaType;
import java.sql.JDBCType;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.Base64;
import java.util.Optional;
import microsoft.sql.DateTimeOffset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlSourceOperations extends JdbcSourceOperations {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSourceOperations.class);
private final Optional<CdcMetadataInjector> metadataInjector;
public MssqlSourceOperations() {
super();
this.metadataInjector = Optional.empty();
}
public MssqlSourceOperations(final Optional<CdcMetadataInjector> metadataInjector) {
super();
this.metadataInjector = metadataInjector;
}
@Override
public AirbyteRecordData convertDatabaseRowToAirbyteRecordData(final ResultSet queryContext) throws SQLException {
final AirbyteRecordData recordData = super.convertDatabaseRowToAirbyteRecordData(queryContext);
final ObjectNode jsonNode = (ObjectNode) recordData.rawRowData();
if (!metadataInjector.isPresent()) {
return recordData;
}
metadataInjector.get().inject(jsonNode);
return new AirbyteRecordData(jsonNode, recordData.meta());
}
/**
* The method is used to set json value by type. Need to be overridden as MSSQL has some its own
* specific types (ex. Geometry, Geography, Hierarchyid, etc)
*
* @throws SQLException
*/
@Override
public void copyToJsonField(final ResultSet resultSet, final int colIndex, final ObjectNode json)
throws SQLException {
final SQLServerResultSetMetaData metadata = (SQLServerResultSetMetaData) resultSet
.getMetaData();
final String columnName = metadata.getColumnName(colIndex);
final String columnTypeName = metadata.getColumnTypeName(colIndex);
// Attempt to access the column. this allows us to know if it is null before we do
// type-specific parsing. If the column is null, we will populate the null value and skip attempting
// to
// parse the column value.
resultSet.getObject(colIndex);
if (resultSet.wasNull()) {
json.putNull(columnName);
} else if (columnTypeName.equalsIgnoreCase("time")) {
putTime(json, columnName, resultSet, colIndex);
} else if (columnTypeName.equalsIgnoreCase("geometry")) {
putGeometry(json, columnName, resultSet, colIndex);
} else if (columnTypeName.equalsIgnoreCase("geography")) {
putGeography(json, columnName, resultSet, colIndex);
} else if (columnTypeName.equalsIgnoreCase("datetimeoffset")) {
// JDBC will recognize such columns as VARCHAR. Thus we have to have special handling on it.
putTimestampWithTimezone(json, columnName, resultSet, colIndex);
} else {
super.copyToJsonField(resultSet, colIndex, json);
}
}
@Override
public JDBCType getDatabaseFieldType(final JsonNode field) {
try {
final String typeName = field.get(INTERNAL_COLUMN_TYPE_NAME).asText();
if (typeName.equalsIgnoreCase("geography")
|| typeName.equalsIgnoreCase("geometry")
|| typeName.equalsIgnoreCase("hierarchyid")) {
return JDBCType.VARCHAR;
}
if (typeName.equalsIgnoreCase("datetime")) {
return JDBCType.TIMESTAMP;
}
if (typeName.equalsIgnoreCase("datetimeoffset")) {
return JDBCType.TIMESTAMP_WITH_TIMEZONE;
}
if (typeName.equalsIgnoreCase("real")) {
return JDBCType.REAL;
}
return JDBCType.valueOf(field.get(INTERNAL_COLUMN_TYPE).asInt());
} catch (final IllegalArgumentException ex) {
LOGGER.warn(String.format("Could not convert column: %s from table: %s.%s with type: %s. Casting to VARCHAR.",
field.get(INTERNAL_COLUMN_NAME),
field.get(INTERNAL_SCHEMA_NAME),
field.get(INTERNAL_TABLE_NAME),
field.get(INTERNAL_COLUMN_TYPE)));
return JDBCType.VARCHAR;
}
}
@Override
protected void putBinary(final ObjectNode node,
final String columnName,
final ResultSet resultSet,
final int index)
throws SQLException {
final byte[] bytes = resultSet.getBytes(index);
final String value = Base64.getEncoder().encodeToString(bytes);
node.put(columnName, value);
}
protected void putGeometry(final ObjectNode node,
final String columnName,
final ResultSet resultSet,
final int index)
throws SQLException {
node.put(columnName, Geometry.deserialize(resultSet.getBytes(index)).toString());
}
protected void putGeography(final ObjectNode node,
final String columnName,
final ResultSet resultSet,
final int index)
throws SQLException {
node.put(columnName, Geography.deserialize(resultSet.getBytes(index)).toString());
}
@Override
protected void putTimestamp(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException {
final DateTimeFormatter microsecondsFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]");
node.put(columnName, getObject(resultSet, index, LocalDateTime.class).format(microsecondsFormatter));
}
@Override
public JsonSchemaType getAirbyteType(final JDBCType jdbcType) {
return switch (jdbcType) {
case TINYINT, SMALLINT, INTEGER, BIGINT -> JsonSchemaType.INTEGER;
case DOUBLE, DECIMAL, FLOAT, NUMERIC, REAL -> JsonSchemaType.NUMBER;
case BOOLEAN, BIT -> JsonSchemaType.BOOLEAN;
case NULL -> JsonSchemaType.NULL;
case BLOB, BINARY, VARBINARY, LONGVARBINARY -> JsonSchemaType.STRING_BASE_64;
case TIME -> JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE;
case TIMESTAMP_WITH_TIMEZONE -> JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE;
case TIMESTAMP -> JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE;
case DATE -> JsonSchemaType.STRING_DATE;
default -> JsonSchemaType.STRING;
};
}
@Override
protected void setTimestampWithTimezone(final PreparedStatement preparedStatement, final int parameterIndex, final String value)
throws SQLException {
try {
final OffsetDateTime offsetDateTime = OffsetDateTime.parse(value, TIMESTAMPTZ_FORMATTER);
final Timestamp timestamp = Timestamp.valueOf(offsetDateTime.atZoneSameInstant(offsetDateTime.getOffset()).toLocalDateTime());
// Final step of conversion from
// OffsetDateTime (a Java construct) object -> Timestamp (a Java construct) ->
// DateTimeOffset (a Microsoft.sql specific construct)
// and provide the offset in minutes to the converter
final DateTimeOffset datetimeoffset = DateTimeOffset.valueOf(timestamp, offsetDateTime.getOffset().getTotalSeconds() / 60);
preparedStatement.setObject(parameterIndex, datetimeoffset);
} catch (final DateTimeParseException e) {
throw new RuntimeException(e);
}
}
}

View File

@@ -1,13 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.cdc;
public class MssqlCdcStateConstants {
public static final String MSSQL_CDC_OFFSET = "mssql_cdc_offset";
public static final String MSSQL_DB_HISTORY = "mssql_db_history";
public static final String IS_COMPRESSED = "is_compressed";
}

View File

@@ -1,306 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.cdc;
import static io.debezium.relational.RelationalDatabaseConnectorConfig.DATABASE_NAME;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.debezium.internals.AirbyteFileOffsetBackingStore;
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage;
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory;
import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager;
import io.airbyte.cdk.integrations.debezium.internals.DebeziumRecordPublisher;
import io.airbyte.cdk.integrations.debezium.internals.DebeziumStateUtil;
import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil;
import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumPropertiesManager;
import io.airbyte.commons.json.Jsons;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.debezium.config.Configuration;
import io.debezium.connector.common.OffsetReader;
import io.debezium.connector.sqlserver.Lsn;
import io.debezium.connector.sqlserver.SqlServerConnectorConfig;
import io.debezium.connector.sqlserver.SqlServerOffsetContext;
import io.debezium.connector.sqlserver.SqlServerOffsetContext.Loader;
import io.debezium.connector.sqlserver.SqlServerPartition;
import io.debezium.engine.ChangeEvent;
import io.debezium.pipeline.spi.Offsets;
import io.debezium.pipeline.spi.Partition;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import org.apache.kafka.connect.storage.FileOffsetBackingStore;
import org.apache.kafka.connect.storage.OffsetStorageReaderImpl;
import org.codehaus.plexus.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlDebeziumStateUtil implements DebeziumStateUtil {
// Testing is done concurrently so initialState is cached in a thread local variable
// in order to provide each test thread with its own correct initial state
private static ThreadLocal<JsonNode> initialState = new ThreadLocal<>();
final static String LSN_OFFSET_INCLUDED_QUERY = """
DECLARE @saved_lsn BINARY(10), @min_lsn BINARY(10), @max_lsn BINARY(10), @res BIT
-- Set @saved_lsn = 0x0000DF7C000006A80006
Set @saved_lsn = ?
SELECT @min_lsn = MIN(start_lsn) FROM cdc.change_tables
SELECT @max_lsn = sys.fn_cdc_get_max_lsn()
IF (@saved_lsn >= @min_lsn)
Set @res = 1
ELSE
Set @res = 0
select @res as [included], @min_lsn as [min], @max_lsn as [max]
""";
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlDebeziumStateUtil.class);
/**
* Generate initial state for debezium state.
*/
public static synchronized JsonNode constructInitialDebeziumState(final Properties properties,
final ConfiguredAirbyteCatalog catalog,
final JdbcDatabase database) {
// There is no need to construct an initial state after it was already constructed in this run
// Starting and stopping mssql debezium too many times causes it to hang during shutdown
if (initialState.get() == null) {
LOGGER.info("No initial state was found. Running Debezium state initialization...");
properties.setProperty("heartbeat.interval.ms", "0");
final JsonNode highWaterMark = constructLsnSnapshotState(database, database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText());
final AirbyteFileOffsetBackingStore emptyOffsetManager = AirbyteFileOffsetBackingStore.initializeState(null,
Optional.empty());
final AirbyteSchemaHistoryStorage schemaHistoryStorage =
AirbyteSchemaHistoryStorage.initializeDBHistory(new SchemaHistory<>(Optional.empty(), false), false);
final LinkedBlockingQueue<ChangeEvent<String, String>> queue = new LinkedBlockingQueue<>();
final Instant engineStartTime = Instant.now();
boolean schemaHistoryRead = false;
SchemaHistory<String> schemaHistory = null;
final var debeziumPropertiesManager =
new RelationalDbDebeziumPropertiesManager(properties, database.getSourceConfig(), catalog, Collections.emptyList());
try {
final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(debeziumPropertiesManager);
publisher.start(queue, emptyOffsetManager, Optional.of(schemaHistoryStorage));
while (!publisher.hasClosed()) {
final ChangeEvent<String, String> event = queue.poll(10, TimeUnit.SECONDS);
// If no event such as an empty table, generating schema history may take a few cycles
// depending on the size of history.
schemaHistory = schemaHistoryStorage.read();
schemaHistoryRead = Objects.nonNull(schemaHistory) && StringUtils.isNotBlank(schemaHistory.getSchema());
if (event != null || schemaHistoryRead) {
publisher.close();
break;
}
Duration initialWaitingDuration = Duration.ofMinutes(5L);
// If initial waiting seconds is configured and it's greater than 5 minutes, use that value instead
// of the default value
final Duration configuredDuration = RecordWaitTimeUtil.getFirstRecordWaitTime(database.getSourceConfig());
if (configuredDuration.compareTo(initialWaitingDuration) > 0) {
initialWaitingDuration = configuredDuration;
}
if (Duration.between(engineStartTime, Instant.now()).compareTo(initialWaitingDuration) > 0) {
LOGGER.error("Schema history not constructed after {} seconds of waiting, closing the engine", initialWaitingDuration.getSeconds());
publisher.close();
throw new RuntimeException(
"Building schema history has timed out. Please consider increasing the debezium wait time in advanced options.");
}
}
} catch (final InterruptedException ine) {
LOGGER.debug("Interrupted during closing of publisher");
} catch (final Exception e) {
throw new RuntimeException(e);
}
final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState(highWaterMark,
Optional.empty());
final Map<String, String> offset = offsetManager.read();
if (!schemaHistoryRead) {
schemaHistory = schemaHistoryStorage.read();
}
assert !offset.isEmpty();
assert Objects.nonNull(schemaHistory);
assert Objects.nonNull(schemaHistory.getSchema());
final JsonNode asJson = serialize(offset, schemaHistory);
LOGGER.info("Initial Debezium state constructed. offset={}", Jsons.jsonNode(offset));
if (asJson.get(MssqlCdcStateConstants.MSSQL_DB_HISTORY).asText().isBlank()) {
throw new RuntimeException("Schema history snapshot returned empty history.");
}
initialState.set(asJson);
}
return initialState.get();
}
public static void disposeInitialState() {
LOGGER.debug("Dispose initial state cached for {}", Thread.currentThread());
initialState.remove();
}
private static JsonNode serialize(final Map<String, String> offset, final SchemaHistory<String> dbHistory) {
final Map<String, Object> state = new HashMap<>();
state.put(MssqlCdcStateConstants.MSSQL_CDC_OFFSET, offset);
state.put(MssqlCdcStateConstants.MSSQL_DB_HISTORY, dbHistory.getSchema());
state.put(MssqlCdcStateConstants.IS_COMPRESSED, dbHistory.isCompressed());
return Jsons.jsonNode(state);
}
public static MssqlDebeziumStateAttributes getStateAttributesFromDB(final JdbcDatabase database) {
try (final Stream<MssqlDebeziumStateAttributes> stream = database.unsafeResultSetQuery(
connection -> connection.createStatement().executeQuery("select sys.fn_cdc_get_max_lsn()"),
resultSet -> {
final byte[] lsnBinary = resultSet.getBytes(1);
Lsn lsn = Lsn.valueOf(lsnBinary);
return new MssqlDebeziumStateAttributes(lsn);
})) {
final List<MssqlDebeziumStateAttributes> stateAttributes = stream.toList();
assert stateAttributes.size() == 1;
return stateAttributes.get(0);
} catch (final SQLException e) {
throw new RuntimeException(e);
}
}
public record MssqlDebeziumStateAttributes(Lsn lsn) {}
/**
* Method to construct initial Debezium state which can be passed onto Debezium engine to make it
* process binlogs from a specific file and position and skip snapshot phase Example:
* ["test",{"server":"test","database":"test"}]" :
* "{"transaction_id":null,"event_serial_no":1,"commit_lsn":"00000644:00002ff8:0099","change_lsn":"0000062d:00017ff0:016d"}"
*/
static JsonNode constructLsnSnapshotState(final JdbcDatabase database, final String dbName) {
return format(getStateAttributesFromDB(database), dbName);
}
@VisibleForTesting
public static JsonNode format(final MssqlDebeziumStateAttributes attributes, final String dbName) {
final String key = "[\"" + dbName + "\",{\"server\":\"" + dbName + "\",\"database\":\"" + dbName + "\"}]";
final String value =
"{\"commit_lsn\":\"" + attributes.lsn.toString() + "\",\"snapshot\":true,\"snapshot_completed\":true"
+ "}";
final Map<String, String> result = new HashMap<>();
result.put(key, value);
final JsonNode jsonNode = Jsons.jsonNode(result);
LOGGER.info("Initial Debezium state offset constructed: {}", jsonNode);
return jsonNode;
}
public Optional<MssqlDebeziumStateAttributes> savedOffset(final Properties baseProperties,
final ConfiguredAirbyteCatalog catalog,
final JsonNode cdcOffset,
final JsonNode config) {
if (Objects.isNull(cdcOffset)) {
return Optional.empty();
}
final var offsetManager = AirbyteFileOffsetBackingStore.initializeState(cdcOffset, Optional.empty());
final DebeziumPropertiesManager debeziumPropertiesManager =
new RelationalDbDebeziumPropertiesManager(baseProperties, config, catalog, Collections.emptyList());
final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager);
return parseSavedOffset(debeziumProperties);
}
private Optional<MssqlDebeziumStateAttributes> parseSavedOffset(final Properties properties) {
FileOffsetBackingStore fileOffsetBackingStore = null;
OffsetStorageReaderImpl offsetStorageReader = null;
try {
fileOffsetBackingStore = getFileOffsetBackingStore(properties);
offsetStorageReader = getOffsetStorageReader(fileOffsetBackingStore, properties);
final SqlServerConnectorConfig connectorConfig = new SqlServerConnectorConfig(Configuration.from(properties));
final SqlServerOffsetContext.Loader loader = new Loader(connectorConfig);
final Set<Partition> partitions =
Collections.singleton(new SqlServerPartition(connectorConfig.getLogicalName(), properties.getProperty(DATABASE_NAME.name())));
final OffsetReader<Partition, SqlServerOffsetContext, Loader> offsetReader = new OffsetReader<>(offsetStorageReader, loader);
final Map<Partition, SqlServerOffsetContext> offsets = offsetReader.offsets(partitions);
return extractStateAttributes(partitions, offsets);
} finally {
LOGGER.info("Closing offsetStorageReader and fileOffsetBackingStore");
if (offsetStorageReader != null) {
offsetStorageReader.close();
}
if (fileOffsetBackingStore != null) {
fileOffsetBackingStore.stop();
}
}
}
private Optional<MssqlDebeziumStateAttributes> extractStateAttributes(final Set<Partition> partitions,
final Map<Partition, SqlServerOffsetContext> offsets) {
boolean found = false;
for (final Partition partition : partitions) {
final SqlServerOffsetContext mssqlOffsetContext = offsets.get(partition);
if (mssqlOffsetContext != null) {
found = true;
LOGGER.info("Found previous partition offset {}: {}", partition, mssqlOffsetContext.getOffset());
}
}
if (!found) {
LOGGER.info("No previous offsets found");
return Optional.empty();
}
final Offsets<Partition, SqlServerOffsetContext> of = Offsets.of(offsets);
final SqlServerOffsetContext previousOffset = of.getTheOnlyOffset();
return Optional.of(new MssqlDebeziumStateAttributes(previousOffset.getChangePosition().getCommitLsn()));
}
public boolean savedOffsetStillPresentOnServer(final JdbcDatabase database, final MssqlDebeziumStateAttributes savedState) {
final Lsn savedLsn = savedState.lsn();
try (final Stream<Boolean> stream = database.unsafeResultSetQuery(
connection -> {
PreparedStatement stmt = connection.prepareStatement(LSN_OFFSET_INCLUDED_QUERY);
stmt.setBytes(1, savedLsn.getBinary());
return stmt.executeQuery();
},
resultSet -> {
final byte[] minLsnBinary = resultSet.getBytes(2);
Lsn min_lsn = Lsn.valueOf(minLsnBinary);
final byte[] maxLsnBinary = resultSet.getBytes(3);
Lsn max_lsn = Lsn.valueOf(maxLsnBinary);
final Boolean included = resultSet.getBoolean(1);
LOGGER.info("{} lsn exists on server: [{}]. (min server lsn: {} max server lsn: {})", savedLsn.toString(), included, min_lsn.toString(),
max_lsn.toString());
return included;
})) {
final List<Boolean> reses = stream.toList();
assert reses.size() == 1;
return reses.get(0);
} catch (final SQLException e) {
throw new RuntimeException(e);
}
}
}

View File

@@ -1,88 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.cursor_based;
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.MSSQL_STATE_VERSION;
import com.google.common.collect.Lists;
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
import io.airbyte.cdk.integrations.source.relationaldb.state.StreamStateManager;
import io.airbyte.commons.json.Jsons;
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
import io.airbyte.protocol.models.v0.AirbyteStreamState;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.StreamDescriptor;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlCursorBasedStateManager extends StreamStateManager {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCursorBasedStateManager.class);
public MssqlCursorBasedStateManager(final List<AirbyteStateMessage> airbyteStateMessages, final ConfiguredAirbyteCatalog catalog) {
super(airbyteStateMessages, catalog);
}
@Override
public AirbyteStateMessage toState(final Optional<AirbyteStreamNameNamespacePair> pair) {
if (pair.isPresent()) {
final Map<AirbyteStreamNameNamespacePair, CursorInfo> pairToCursorInfoMap = getPairToCursorInfoMap();
final Optional<CursorInfo> cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair.get()));
if (cursorInfo.isPresent()) {
LOGGER.debug("Generating state message for {}...", pair);
return new AirbyteStateMessage()
.withType(AirbyteStateType.STREAM)
// Temporarily include legacy state for backwards compatibility with the platform
.withStream(generateStreamState(pair.get(), cursorInfo.get()));
} else {
LOGGER.warn("Cursor information could not be located in state for stream {}. Returning a new, empty state message...", pair);
return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState());
}
} else {
LOGGER.warn("Stream not provided. Returning a new, empty state message...");
return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState());
}
}
/**
* Generates the stream state for the given stream and cursor information.
*
* @param airbyteStreamNameNamespacePair The stream.
* @param cursorInfo The current cursor.
* @return The {@link AirbyteStreamState} representing the current state of the stream.
*/
private AirbyteStreamState generateStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair,
final CursorInfo cursorInfo) {
return new AirbyteStreamState()
.withStreamDescriptor(
new StreamDescriptor().withName(airbyteStreamNameNamespacePair.getName()).withNamespace(airbyteStreamNameNamespacePair.getNamespace()))
.withStreamState(Jsons.jsonNode(generateDbStreamState(airbyteStreamNameNamespacePair, cursorInfo)));
}
private CursorBasedStatus generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair,
final CursorInfo cursorInfo) {
final CursorBasedStatus state = new CursorBasedStatus();
state.setStateType(StateType.CURSOR_BASED);
state.setVersion(MSSQL_STATE_VERSION);
state.setStreamName(airbyteStreamNameNamespacePair.getName());
state.setStreamNamespace(airbyteStreamNameNamespacePair.getNamespace());
state.setCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField()));
state.setCursor(cursorInfo.getCursor());
if (cursorInfo.getCursorRecordCount() > 0L) {
state.setCursorRecordCount(cursorInfo.getCursorRecordCount());
}
return state;
}
}

View File

@@ -1,29 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.initialsync;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.integrations.source.mssql.MssqlCdcConnectorMetadataInjector;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
public class CdcMetadataInjector {
private final String transactionTimestamp;
private final MssqlDebeziumStateAttributes stateAttributes;
private final MssqlCdcConnectorMetadataInjector metadataInjector;
public CdcMetadataInjector(final String transactionTimestamp,
final MssqlDebeziumStateAttributes stateAttributes,
final MssqlCdcConnectorMetadataInjector metadataInjector) {
this.transactionTimestamp = transactionTimestamp;
this.stateAttributes = stateAttributes;
this.metadataInjector = metadataInjector;
}
public void inject(final ObjectNode record) {
metadataInjector.addMetaDataToRowsFetchedOutsideDebezium(record, transactionTimestamp, stateAttributes);
}
}

View File

@@ -1,173 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.initialsync;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Preconditions;
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
import io.airbyte.protocol.models.v0.*;
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlInitialLoadGlobalStateManager extends MssqlInitialLoadStateManager {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadGlobalStateManager.class);
private StateManager stateManager;
private final CdcState initialCdcState;
// Only one global state is emitted, which is fanned out into many entries in the DB by platform. As
// a result, we need to keep track of streams that have completed the snapshot.
private Set<AirbyteStreamNameNamespacePair> streamsThatHaveCompletedSnapshot;
// No special handling for resumable full refresh streams. We will report the cursor as it is.
private Set<AirbyteStreamNameNamespacePair> resumableFullRefreshStreams;
private Set<AirbyteStreamNameNamespacePair> nonResumableFullRefreshStreams;
private Set<AirbyteStreamNameNamespacePair> completedNonResumableFullRefreshStreams;
public MssqlInitialLoadGlobalStateManager(final InitialLoadStreams initialLoadStreams,
final Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOrderedColInfo,
final StateManager stateManager,
final ConfiguredAirbyteCatalog catalog,
final CdcState initialCdcState) {
this.pairToOrderedColLoadStatus = MssqlInitialLoadStateManager.initPairToOrderedColumnLoadStatusMap(initialLoadStreams.pairToInitialLoadStatus());
this.pairToOrderedColInfo = pairToOrderedColInfo;
this.stateManager = stateManager;
this.initialCdcState = initialCdcState;
this.streamStateForIncrementalRunSupplier = pair -> Jsons.emptyObject();
initStreams(initialLoadStreams, catalog);
}
private AirbyteGlobalState generateGlobalState(final List<AirbyteStreamState> streamStates) {
CdcState cdcState = stateManager.getCdcStateManager().getCdcState();
if (cdcState == null || cdcState.getState() == null) {
cdcState = initialCdcState;
}
final AirbyteGlobalState globalState = new AirbyteGlobalState();
globalState.setSharedState(Jsons.jsonNode(cdcState));
globalState.setStreamStates(streamStates);
return globalState;
}
private void initStreams(final InitialLoadStreams initialLoadStreams,
final ConfiguredAirbyteCatalog catalog) {
this.streamsThatHaveCompletedSnapshot = new HashSet<>();
this.resumableFullRefreshStreams = new HashSet<>();
this.nonResumableFullRefreshStreams = new HashSet<>();
this.completedNonResumableFullRefreshStreams = new HashSet<>();
catalog.getStreams().forEach(configuredAirbyteStream -> {
var pairInStream =
new AirbyteStreamNameNamespacePair(configuredAirbyteStream.getStream().getName(), configuredAirbyteStream.getStream().getNamespace());
if (!initialLoadStreams.streamsForInitialLoad().contains(configuredAirbyteStream)
&& configuredAirbyteStream.getSyncMode() == SyncMode.INCREMENTAL) {
this.streamsThatHaveCompletedSnapshot.add(pairInStream);
}
if (configuredAirbyteStream.getSyncMode() == SyncMode.FULL_REFRESH) {
if (configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey() != null
&& !configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey().isEmpty()) {
this.resumableFullRefreshStreams.add(pairInStream);
} else {
this.nonResumableFullRefreshStreams.add(pairInStream);
}
}
});
}
@Override
public AirbyteStateMessage generateStateMessageAtCheckpoint(final ConfiguredAirbyteStream airbyteStream) {
final List<AirbyteStreamState> streamStates = new ArrayList<>();
streamsThatHaveCompletedSnapshot.forEach(stream -> {
final DbStreamState state = getFinalState(stream);
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(state)));
});
resumableFullRefreshStreams.forEach(stream -> {
var ocStatus = getOrderedColumnLoadStatus(stream);
if (ocStatus != null) {
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(ocStatus)));
}
});
completedNonResumableFullRefreshStreams.forEach(stream -> {
streamStates.add(new AirbyteStreamState()
.withStreamDescriptor(
new StreamDescriptor().withName(stream.getName()).withNamespace(stream.getNamespace())));
});
if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) {
AirbyteStreamNameNamespacePair pair =
new AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
var ocStatus = getOrderedColumnLoadStatus(pair);
streamStates.add(getAirbyteStreamState(pair, Jsons.jsonNode(ocStatus)));
}
return new AirbyteStateMessage()
.withType(AirbyteStateType.GLOBAL)
.withGlobal(generateGlobalState(streamStates));
}
private AirbyteStreamState getAirbyteStreamState(final AirbyteStreamNameNamespacePair pair, final JsonNode stateData) {
Preconditions.checkNotNull(pair);
Preconditions.checkNotNull(pair.getName());
Preconditions.checkNotNull(pair.getNamespace());
return new AirbyteStreamState()
.withStreamDescriptor(
new StreamDescriptor().withName(pair.getName()).withNamespace(pair.getNamespace()))
.withStreamState(stateData);
}
@Override
public AirbyteStateMessage createFinalStateMessage(final ConfiguredAirbyteStream airbyteStream) {
final io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair pair = new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(
airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) {
streamsThatHaveCompletedSnapshot.add(pair);
} else if (nonResumableFullRefreshStreams.contains(pair)) {
completedNonResumableFullRefreshStreams.add(pair);
}
final List<AirbyteStreamState> streamStates = new ArrayList<>();
streamsThatHaveCompletedSnapshot.forEach(stream -> {
final DbStreamState state = getFinalState(stream);
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(state)));
});
resumableFullRefreshStreams.forEach(stream -> {
var ocStatus = getOrderedColumnLoadStatus(stream);
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(ocStatus)));
});
completedNonResumableFullRefreshStreams.forEach(stream -> {
streamStates.add(new AirbyteStreamState()
.withStreamDescriptor(
new StreamDescriptor().withName(stream.getName()).withNamespace(stream.getNamespace())));
});
return new AirbyteStateMessage()
.withType(AirbyteStateType.GLOBAL)
.withGlobal(generateGlobalState(streamStates));
}
private DbStreamState getFinalState(final AirbyteStreamNameNamespacePair pair) {
Preconditions.checkNotNull(pair);
Preconditions.checkNotNull(pair.getName());
Preconditions.checkNotNull(pair.getNamespace());
return new DbStreamState()
.withStreamName(pair.getName())
.withStreamNamespace(pair.getNamespace())
.withCursorField(Collections.emptyList())
.withCursor(null);
}
}

View File

@@ -1,261 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.initialsync;
import static io.airbyte.cdk.db.jdbc.JdbcConstants.*;
import static io.airbyte.cdk.db.jdbc.JdbcUtils.getFullyQualifiedTableName;
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION_PROPERTY;
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import io.airbyte.cdk.db.SqlDatabase;
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants;
import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil;
import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadHandler;
import io.airbyte.cdk.integrations.source.relationaldb.TableInfo;
import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateIterator;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateEmitFrequency;
import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator;
import io.airbyte.commons.stream.AirbyteStreamStatusHolder;
import io.airbyte.commons.stream.AirbyteStreamUtils;
import io.airbyte.commons.util.AutoCloseableIterator;
import io.airbyte.commons.util.AutoCloseableIterators;
import io.airbyte.integrations.source.mssql.MssqlQueryUtils.TableSizeInfo;
import io.airbyte.integrations.source.mssql.MssqlSourceOperations;
import io.airbyte.protocol.models.CommonField;
import io.airbyte.protocol.models.v0.*;
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
import java.sql.*;
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlInitialLoadHandler implements InitialLoadHandler<JDBCType> {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadHandler.class);
private static final long RECORD_LOGGING_SAMPLE_RATE = 1_000_000;
private final JsonNode config;
private final JdbcDatabase database;
private final MssqlSourceOperations sourceOperations;
private final String quoteString;
private final MssqlInitialLoadStateManager initialLoadStateManager;
private final Optional<Function<AirbyteStreamNameNamespacePair, JsonNode>> streamStateForIncrementalRunSupplier;
private static final long QUERY_TARGET_SIZE_GB = 1_073_741_824;
private static final long DEFAULT_CHUNK_SIZE = 1_000_000;
final Map<AirbyteStreamNameNamespacePair, TableSizeInfo> tableSizeInfoMap;
public MssqlInitialLoadHandler(
final JsonNode config,
final JdbcDatabase database,
final MssqlSourceOperations sourceOperations,
final String quoteString,
final MssqlInitialLoadStateManager initialLoadStateManager,
final Optional<Function<AirbyteStreamNameNamespacePair, JsonNode>> streamStateForIncrementalRunSupplier,
final Map<AirbyteStreamNameNamespacePair, TableSizeInfo> tableSizeInfoMap) {
this.config = config;
this.database = database;
this.sourceOperations = sourceOperations;
this.quoteString = quoteString;
this.initialLoadStateManager = initialLoadStateManager;
this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier;
this.tableSizeInfoMap = tableSizeInfoMap;
}
private static String getCatalog(final SqlDatabase database) {
return (database.getSourceConfig().has(JdbcUtils.DATABASE_KEY) ? database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText() : null);
}
public static Map<String, List<String>> discoverClusteredIndexForStream(final JdbcDatabase database,
final AirbyteStream stream) {
Map<String, List<String>> clusteredIndexes = new HashMap<>();
try {
// Get all clustered index names without specifying a table name
clusteredIndexes = aggregateClusteredIndexes(database.bufferedResultSetQuery(
connection -> connection.getMetaData().getIndexInfo(getCatalog(database), stream.getNamespace(), stream.getName(), true, false),
r -> {
if (r.getShort(JDBC_COLUMN_TYPE) == DatabaseMetaData.tableIndexClustered) {
final String schemaName =
r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME);
final String streamName = getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME));
final String columnName = r.getString(JDBC_COLUMN_COLUMN_NAME);
return new ClusteredIndexAttributesFromDb(streamName, columnName);
} else {
return null;
}
}));
} catch (final SQLException e) {
LOGGER.debug(String.format("Could not retrieve clustered indexes without a table name (%s), not blocking, fall back to use pk.", e));
}
LOGGER.debug("Clustered Indexes: {}", clusteredIndexes);
return clusteredIndexes.isEmpty() ? null : clusteredIndexes;
}
@VisibleForTesting
public record ClusteredIndexAttributesFromDb(String streamName,
String columnName) {}
/**
* Aggregate list of @param entries of StreamName and clustered index column name
*
* @return a map by StreamName to associated columns in clustered index. If clustered index has
* multiple columns, we always use the first column.
*/
@VisibleForTesting
static Map<String, List<String>> aggregateClusteredIndexes(final List<ClusteredIndexAttributesFromDb> entries) {
final Map<String, List<String>> result = new HashMap<>();
entries.forEach(entry -> {
if (entry == null) {
return;
}
if (!result.containsKey(entry.streamName())) {
result.put(entry.streamName(), new ArrayList<>());
}
// Store the column name in a list to support composite clustered indexes.
result.get(entry.streamName()).add(entry.columnName());
});
return result;
}
public List<AutoCloseableIterator<AirbyteMessage>> getIncrementalIterators(
final ConfiguredAirbyteCatalog catalog,
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
final Instant emittedAt,
final boolean decorateWithStartedStatus,
final boolean decorateWithCompletedStatus,
@NotNull final Optional<Duration> cdcInitialLoadTimeout) {
final List<AutoCloseableIterator<AirbyteMessage>> iteratorList = new ArrayList<>();
for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) {
final AirbyteStream stream = airbyteStream.getStream();
final String streamName = stream.getName();
final String namespace = stream.getNamespace();
// TODO: need to select column according to indexing status of table. may not be primary key
final var pair = new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(streamName, namespace);
if (airbyteStream.getSyncMode().equals(SyncMode.INCREMENTAL)) {
final String fullyQualifiedTableName = DbSourceDiscoverUtil.getFullyQualifiedTableName(namespace, streamName);
// Grab the selected fields to sync
final TableInfo<CommonField<JDBCType>> table = tableNameToTable.get(fullyQualifiedTableName);
if (decorateWithStartedStatus) {
iteratorList.add(
new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED)));
}
iteratorList.add(getIteratorForStream(airbyteStream, table, emittedAt, cdcInitialLoadTimeout));
if (decorateWithCompletedStatus) {
iteratorList.add(new StreamStatusTraceEmitterIterator(
new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE)));
}
}
}
return iteratorList;
}
@NotNull
@Override
public AutoCloseableIterator<AirbyteMessage> getIteratorForStream(@NotNull final ConfiguredAirbyteStream airbyteStream,
@NotNull final TableInfo<CommonField<JDBCType>> table,
@NotNull final Instant emittedAt,
@NotNull final Optional<Duration> cdcInitialLoadTimeout) {
final AirbyteStream stream = airbyteStream.getStream();
final String streamName = stream.getName();
final String namespace = stream.getNamespace();
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace);
final List<String> selectedDatabaseFields = table.getFields()
.stream()
.map(CommonField::getName)
.filter(CatalogHelpers.getTopLevelFieldNames(airbyteStream)::contains)
.toList();
final AutoCloseableIterator<AirbyteRecordData> queryStream =
new MssqlInitialLoadRecordIterator(database, sourceOperations, quoteString, initialLoadStateManager, selectedDatabaseFields, pair,
calculateChunkSize(tableSizeInfoMap.get(pair), pair), isCompositePrimaryKey(airbyteStream), emittedAt, cdcInitialLoadTimeout);
final AutoCloseableIterator<AirbyteMessage> recordIterator =
getRecordIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli());
final AutoCloseableIterator<AirbyteMessage> recordAndMessageIterator = augmentWithState(recordIterator, airbyteStream);
return augmentWithLogs(recordAndMessageIterator, pair, streamName);
}
// Transforms the given iterator to create an {@link AirbyteRecordMessage}
private AutoCloseableIterator<AirbyteMessage> getRecordIterator(
final AutoCloseableIterator<AirbyteRecordData> recordIterator,
final String streamName,
final String namespace,
final long emittedAt) {
return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage()
.withType(Type.RECORD)
.withRecord(new AirbyteRecordMessage()
.withStream(streamName)
.withNamespace(namespace)
.withEmittedAt(emittedAt)
.withData(r.rawRowData())
.withMeta(isMetaChangesEmptyOrNull(r.meta()) ? null : r.meta())));
}
private boolean isMetaChangesEmptyOrNull(AirbyteRecordMessageMeta meta) {
return meta == null || meta.getChanges() == null || meta.getChanges().isEmpty();
}
// Augments the given iterator with record count logs.
private AutoCloseableIterator<AirbyteMessage> augmentWithLogs(final AutoCloseableIterator<AirbyteMessage> iterator,
final AirbyteStreamNameNamespacePair pair,
final String streamName) {
final AtomicLong recordCount = new AtomicLong();
return AutoCloseableIterators.transform(iterator,
AirbyteStreamUtils.convertFromNameAndNamespace(pair.getName(), pair.getNamespace()),
r -> {
final long count = recordCount.incrementAndGet();
if (count % RECORD_LOGGING_SAMPLE_RATE == 0) {
LOGGER.info("Reading stream {}. Records read: {}", streamName, count);
}
return r;
});
}
private AutoCloseableIterator<AirbyteMessage> augmentWithState(final AutoCloseableIterator<AirbyteMessage> recordIterator,
final ConfiguredAirbyteStream airbyteStream) {
final AirbyteStreamNameNamespacePair pair =
new AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
final Duration syncCheckpointDuration =
config.get(SYNC_CHECKPOINT_DURATION_PROPERTY) != null
? Duration.ofSeconds(config.get(SYNC_CHECKPOINT_DURATION_PROPERTY).asLong())
: DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION;
final Long syncCheckpointRecords = config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY) != null ? config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY).asLong()
: DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS;
streamStateForIncrementalRunSupplier.ifPresent(initialLoadStateManager::setStreamStateForIncrementalRunSupplier);
return AutoCloseableIterators.transformIterator(
r -> new SourceStateIterator<>(r, airbyteStream, initialLoadStateManager,
new StateEmitFrequency(syncCheckpointRecords, syncCheckpointDuration)),
recordIterator, new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(pair.getName(), pair.getNamespace()));
}
private static boolean isCompositePrimaryKey(final ConfiguredAirbyteStream stream) {
return stream.getStream().getSourceDefinedPrimaryKey().size() > 1;
}
public static long calculateChunkSize(final TableSizeInfo tableSizeInfo, final AirbyteStreamNameNamespacePair pair) {
// If table size info could not be calculated, a default chunk size will be provided.
if (tableSizeInfo == null || tableSizeInfo.tableSize() == 0 || tableSizeInfo.avgRowLength() == 0) {
LOGGER.info("Chunk size could not be determined for pair: {}, defaulting to {} rows", pair, DEFAULT_CHUNK_SIZE);
return DEFAULT_CHUNK_SIZE;
}
final long avgRowLength = tableSizeInfo.avgRowLength();
final long chunkSize = QUERY_TARGET_SIZE_GB / avgRowLength;
LOGGER.info("Chunk size determined for pair: {}, is {}", pair, chunkSize);
return chunkSize;
}
}

View File

@@ -1,200 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.initialsync;
import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcSnapshotForceShutdownMessage;
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifier;
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting;
import com.google.common.collect.AbstractIterator;
import io.airbyte.cdk.db.JdbcCompatibleSourceOperations;
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility;
import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus;
import io.airbyte.commons.exceptions.TransientErrorException;
import io.airbyte.commons.util.AutoCloseableIterator;
import io.airbyte.commons.util.AutoCloseableIterators;
import io.airbyte.integrations.source.mssql.MssqlQueryUtils;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
import java.sql.Connection;
import java.sql.JDBCType;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
import javax.annotation.CheckForNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@SuppressWarnings("try")
public class MssqlInitialLoadRecordIterator extends AbstractIterator<AirbyteRecordData>
implements AutoCloseableIterator<AirbyteRecordData> {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadRecordIterator.class);
private AutoCloseableIterator<AirbyteRecordData> currentIterator;
private final JdbcDatabase database;
private int numSubqueries = 0;
private final String quoteString;
private final JdbcCompatibleSourceOperations<JDBCType> sourceOperations;
private final List<String> columnNames;
private final AirbyteStreamNameNamespacePair pair;
private final MssqlInitialLoadStateManager initialLoadStateManager;
private final long chunkSize;
private final OrderedColumnInfo ocInfo;
private final boolean isCompositeKeyLoad;
private final Instant startInstant;
private Optional<Duration> cdcInitialLoadTimeout;
private boolean isCdcSync;
MssqlInitialLoadRecordIterator(
final JdbcDatabase database,
final JdbcCompatibleSourceOperations<JDBCType> sourceOperations,
final String quoteString,
final MssqlInitialLoadStateManager initialLoadStateManager,
final List<String> columnNames,
final AirbyteStreamNameNamespacePair pair,
final long chunkSize,
final boolean isCompositeKeyLoad,
final Instant startInstant,
final Optional<Duration> cdcInitialLoadTimeout) {
this.database = database;
this.sourceOperations = sourceOperations;
this.quoteString = quoteString;
this.initialLoadStateManager = initialLoadStateManager;
this.columnNames = columnNames;
this.pair = pair;
this.chunkSize = chunkSize;
this.ocInfo = initialLoadStateManager.getOrderedColumnInfo(pair);
this.isCompositeKeyLoad = isCompositeKeyLoad;
this.startInstant = startInstant;
this.cdcInitialLoadTimeout = cdcInitialLoadTimeout;
this.isCdcSync = isCdcSync(initialLoadStateManager);
}
@CheckForNull
@Override
protected AirbyteRecordData computeNext() {
if (isCdcSync && cdcInitialLoadTimeout.isPresent()
&& Duration.between(startInstant, Instant.now()).compareTo(cdcInitialLoadTimeout.get()) > 0) {
final String cdcInitialLoadTimeoutMessage = String.format(
"Initial load has taken longer than %s hours, Canceling sync so that CDC replication can catch-up on subsequent attempt, and then initial snapshotting will resume",
cdcInitialLoadTimeout.get().toHours());
LOGGER.info(cdcInitialLoadTimeoutMessage);
AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcSnapshotForceShutdownMessage());
throw new TransientErrorException(cdcInitialLoadTimeoutMessage);
}
if (shouldBuildNextSubquery()) {
try {
// We will only issue one query for a composite key load. If we have already processed all the data
// associated with this
// query, we should indicate that we are done processing for the given stream.
if (isCompositeKeyLoad && numSubqueries >= 1) {
return endOfData();
}
// Previous stream (and connection) must be manually closed in this iterator.
if (currentIterator != null) {
currentIterator.close();
}
LOGGER.info("Subquery number : {}", numSubqueries);
final Stream<AirbyteRecordData> stream = database.unsafeQuery(
this::getOcPreparedStatement, sourceOperations::convertDatabaseRowToAirbyteRecordData);
currentIterator = AutoCloseableIterators.fromStream(stream,
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(pair.getName(), pair.getNamespace()));
numSubqueries++;
// If the current subquery has no records associated with it, the entire stream has been read.
if (!currentIterator.hasNext()) {
return endOfData();
}
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
return currentIterator.next();
}
private boolean shouldBuildNextSubquery() {
// The next sub-query should be built if (i) it is the first subquery in the sequence. (ii) the
// previous subquery has finished.
return (currentIterator == null || !currentIterator.hasNext());
}
private PreparedStatement getOcPreparedStatement(final Connection connection) {
try {
final String tableName = pair.getName();
final String schemaName = pair.getNamespace();
final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, tableName,
quoteString);
LOGGER.info("Preparing query for table: {}", fullTableName);
final String wrappedColumnNames = MssqlQueryUtils.getWrappedColumnNames(database, quoteString, columnNames, schemaName, tableName);
final OrderedColumnLoadStatus ocLoadStatus = initialLoadStateManager.getOrderedColumnLoadStatus(pair);
if (ocLoadStatus == null) {
final String quotedCursorField = enquoteIdentifier(ocInfo.ocFieldName(), quoteString);
final String sql;
if (isCompositeKeyLoad) {
sql = "SELECT %s FROM %s ORDER BY %s".formatted(wrappedColumnNames, fullTableName, quotedCursorField);
} else {
sql = "SELECT TOP %s %s FROM %s ORDER BY %s".formatted(chunkSize, wrappedColumnNames, fullTableName, quotedCursorField);
}
final PreparedStatement preparedStatement = connection.prepareStatement(sql);
LOGGER.info("Executing query for table {}: {}", tableName, sql);
return preparedStatement;
} else {
LOGGER.info("ocLoadStatus value is : {}", ocLoadStatus.getOrderedColVal());
final String quotedCursorField = enquoteIdentifier(ocInfo.ocFieldName(), quoteString);
final String sql;
if (isCompositeKeyLoad) {
sql = "SELECT %s FROM %s WHERE %s >= ? ORDER BY %s".formatted(wrappedColumnNames, fullTableName,
quotedCursorField, quotedCursorField);
} else {
// The ordered column max value could be null - this can happen in the case of empty tables. In this
// case,
// we can just issue a query without any chunking.
if (ocInfo.ocMaxValue() != null) {
sql = "SELECT TOP %s %s FROM %s WHERE %s > ? AND %s <= ? ORDER BY %s".formatted(chunkSize, wrappedColumnNames, fullTableName,
quotedCursorField, quotedCursorField, quotedCursorField);
} else {
sql = "SELECT %s FROM %s WHERE %s > ? ORDER BY %s".formatted(wrappedColumnNames, fullTableName,
quotedCursorField, quotedCursorField);
}
}
final PreparedStatement preparedStatement = connection.prepareStatement(sql);
final JDBCType cursorFieldType = ocInfo.fieldType();
sourceOperations.setCursorField(preparedStatement, 1, cursorFieldType, ocLoadStatus.getOrderedColVal());
if (!isCompositeKeyLoad && ocInfo.ocMaxValue() != null) {
sourceOperations.setCursorField(preparedStatement, 2, cursorFieldType, ocInfo.ocMaxValue());
}
LOGGER.info("Executing query for table {}: {}", tableName, sql);
return preparedStatement;
}
} catch (final SQLException e) {
throw new RuntimeException(e);
}
}
@Override
public void close() throws Exception {
if (currentIterator != null) {
currentIterator.close();
}
}
private boolean isCdcSync(MssqlInitialLoadStateManager initialLoadStateManager) {
if (initialLoadStateManager instanceof MssqlInitialLoadGlobalStateManager) {
LOGGER.info("Running a cdc sync");
return true;
} else {
LOGGER.info("Not running a cdc sync");
return false;
}
}
}

View File

@@ -1,112 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.initialsync;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus;
import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateMessageProducer;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;
public abstract class MssqlInitialLoadStateManager implements SourceStateMessageProducer<AirbyteMessage> {
public static final long MSSQL_STATE_VERSION = 2;
public static final String STATE_TYPE_KEY = "state_type";
public static final String ORDERED_COL_STATE_TYPE = "ordered_column";
protected Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToOrderedColLoadStatus;
protected Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOrderedColInfo;
private OrderedColumnLoadStatus ocStatus;
protected Function<AirbyteStreamNameNamespacePair, JsonNode> streamStateForIncrementalRunSupplier;
void setStreamStateForIncrementalRunSupplier(final Function<AirbyteStreamNameNamespacePair, JsonNode> streamStateForIncrementalRunSupplier) {
this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier;
}
/**
* Updates the {@link OrderedColumnLoadStatus} for the state associated with the given pair.
*
* @param pair pair
* @param ocLoadStatus updated status
*/
public void updateOrderedColumnLoadState(final AirbyteStreamNameNamespacePair pair, final OrderedColumnLoadStatus ocLoadStatus) {
pairToOrderedColLoadStatus.put(pair, ocLoadStatus);
}
/**
* Returns the previous state emitted. Represented as a {@link OrderedColumnLoadStatus} associated
* with the stream.
*
* @param pair pair
* @return load status
*/
public OrderedColumnLoadStatus getOrderedColumnLoadStatus(final AirbyteStreamNameNamespacePair pair) {
return pairToOrderedColLoadStatus.get(pair);
}
/**
* Returns the current {@OrderedColumnInfo}, associated with the stream. This includes the data type
* and the column name associated with the stream.
*
* @param pair pair
* @return load status
*/
public OrderedColumnInfo getOrderedColumnInfo(final AirbyteStreamNameNamespacePair pair) {
return pairToOrderedColInfo.get(pair);
}
static Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> initPairToOrderedColumnLoadStatusMap(
final Map<io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToOcStatus) {
return pairToOcStatus.entrySet().stream()
.collect(Collectors.toMap(
e -> new AirbyteStreamNameNamespacePair(e.getKey().getName(), e.getKey().getNamespace()),
Entry::getValue));
}
protected JsonNode getIncrementalState(final AirbyteStreamNameNamespacePair pair) {
final OrderedColumnLoadStatus currentOcLoadStatus = getOrderedColumnLoadStatus(pair);
return (currentOcLoadStatus == null || currentOcLoadStatus.getIncrementalState() == null) ? streamStateForIncrementalRunSupplier.apply(pair)
: currentOcLoadStatus.getIncrementalState();
}
@Override
public AirbyteMessage processRecordMessage(final ConfiguredAirbyteStream stream, final AirbyteMessage message) {
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
final String ocFieldName = getOrderedColumnInfo(pair).ocFieldName();
final String lastOcVal = message.getRecord().getData().get(ocFieldName).asText();
ocStatus = new OrderedColumnLoadStatus()
.withVersion(MSSQL_STATE_VERSION)
.withStateType(StateType.ORDERED_COLUMN)
.withOrderedCol(ocFieldName)
.withOrderedColVal(lastOcVal)
.withIncrementalState(getIncrementalState(stream));
updateOrderedColumnLoadState(pair, ocStatus);
return message;
}
@Override
public boolean shouldEmitStateMessage(final ConfiguredAirbyteStream stream) {
return Objects.nonNull(getOrderedColumnInfo(new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace())));
}
private JsonNode getIncrementalState(final ConfiguredAirbyteStream stream) {
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
final OrderedColumnLoadStatus currentOcLoadStatus = getOrderedColumnLoadStatus(pair);
return (currentOcLoadStatus == null || currentOcLoadStatus.getIncrementalState() == null)
? streamStateForIncrementalRunSupplier.apply(pair)
: currentOcLoadStatus.getIncrementalState();
}
}

View File

@@ -1,75 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.initialsync;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Preconditions;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
import io.airbyte.protocol.models.v0.*;
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This state manager extends the StreamStateManager to enable writing the state_type and version
* keys to the stream state when they're going through the iterator Once we have verified that
* expanding StreamStateManager itself to include this functionality, this class will be removed
*/
public class MssqlInitialLoadStreamStateManager extends MssqlInitialLoadStateManager {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadStateManager.class);
public MssqlInitialLoadStreamStateManager(final ConfiguredAirbyteCatalog catalog,
final InitialLoadStreams initialLoadStreams,
final Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOrderedColInfo) {
this.pairToOrderedColInfo = pairToOrderedColInfo;
this.pairToOrderedColLoadStatus = MssqlInitialLoadStateManager.initPairToOrderedColumnLoadStatusMap(initialLoadStreams.pairToInitialLoadStatus());
this.streamStateForIncrementalRunSupplier = pair -> Jsons.emptyObject();
}
@Override
public AirbyteStateMessage createFinalStateMessage(final ConfiguredAirbyteStream stream) {
AirbyteStreamNameNamespacePair pair =
new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
final JsonNode incrementalState = getIncrementalState(pair);
// If there is no incremental state, save the latest OC state
// Such as in the case of full refresh
final JsonNode finalState;
if (incrementalState == null || incrementalState.isEmpty()) {
finalState = Jsons.jsonNode(getOrderedColumnLoadStatus(pair));
} else {
finalState = incrementalState;
}
return new AirbyteStateMessage()
.withType(AirbyteStateType.STREAM)
.withStream(getAirbyteStreamState(pair, finalState));
}
@Override
public AirbyteStateMessage generateStateMessageAtCheckpoint(final ConfiguredAirbyteStream stream) {
AirbyteStreamNameNamespacePair pair =
new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
var ocStatus = getOrderedColumnLoadStatus(pair);
return new AirbyteStateMessage()
.withType(AirbyteStateType.STREAM)
.withStream(getAirbyteStreamState(pair, Jsons.jsonNode(ocStatus)));
}
protected AirbyteStreamState getAirbyteStreamState(final AirbyteStreamNameNamespacePair pair, final JsonNode stateData) {
Preconditions.checkNotNull(pair);
Preconditions.checkNotNull(pair.getName());
Preconditions.checkNotNull(pair.getNamespace());
LOGGER.debug("State data for {}: {}", pair.getNamespace().concat("_").concat(pair.getName()), stateData);
return new AirbyteStreamState()
.withStreamDescriptor(
new StreamDescriptor().withName(pair.getName()).withNamespace(pair.getNamespace()))
.withStreamState(stateData);
}
}

View File

@@ -1,585 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql.initialsync;
import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcCursorInvalidMessage;
import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcResyncMessage;
import static io.airbyte.cdk.db.DbAnalyticsUtils.wassOccurrenceMessage;
import static io.airbyte.cdk.db.jdbc.JdbcUtils.getFullyQualifiedTableName;
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.FAIL_SYNC_OPTION;
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.INVALID_CDC_CURSOR_POSITION_PROPERTY;
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.RESYNC_DATA_OPTION;
import static io.airbyte.integrations.source.mssql.MssqlCdcHelper.getDebeziumProperties;
import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getTableSizeInfoForStreams;
import static io.airbyte.integrations.source.mssql.cdc.MssqlCdcStateConstants.MSSQL_CDC_OFFSET;
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.ORDERED_COL_STATE_TYPE;
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility;
import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler;
import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter;
import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil;
import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumEventConverter;
import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumPropertiesManager;
import io.airbyte.cdk.integrations.source.relationaldb.CdcStateManager;
import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil;
import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadTimeoutUtil;
import io.airbyte.cdk.integrations.source.relationaldb.TableInfo;
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator;
import io.airbyte.commons.exceptions.ConfigErrorException;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.stream.AirbyteStreamStatusHolder;
import io.airbyte.commons.util.AutoCloseableIterator;
import io.airbyte.commons.util.AutoCloseableIterators;
import io.airbyte.integrations.source.mssql.*;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
import io.airbyte.protocol.models.CommonField;
import io.airbyte.protocol.models.v0.*;
import io.debezium.connector.sqlserver.Lsn;
import java.sql.JDBCType;
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlInitialReadUtil {
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialReadUtil.class);
private static final int MIN_QUEUE_SIZE = 1000;
private static final int MAX_QUEUE_SIZE = 10000;
public record InitialLoadStreams(List<ConfiguredAirbyteStream> streamsForInitialLoad,
Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToInitialLoadStatus) {
}
public record CursorBasedStreams(List<ConfiguredAirbyteStream> streamsForCursorBased,
Map<AirbyteStreamNameNamespacePair, CursorBasedStatus> pairToCursorBasedStatus) {
}
public record OrderedColumnInfo(String ocFieldName, JDBCType fieldType, String ocMaxValue) {}
public static Optional<MssqlInitialLoadHandler> getMssqlFullRefreshInitialLoadHandler(final JdbcDatabase database,
final ConfiguredAirbyteCatalog catalog,
final MssqlInitialLoadStateManager initialLoadStateManager,
final StateManager stateManager,
final ConfiguredAirbyteStream fullRefreshStream,
final Instant emittedAt,
final String quoteString) {
final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager);
final InitialLoadStreams initialLoadStreams =
cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer);
// State manager will need to know all streams in order to produce a state message
// But for initial load handler we only want to produce iterator on the single full refresh stream.
if (!initialLoadStreams.streamsForInitialLoad().isEmpty()) {
// Filter on initialLoadStream
var pair = new AirbyteStreamNameNamespacePair(fullRefreshStream.getStream().getName(), fullRefreshStream.getStream().getNamespace());
var ocStatus = initialLoadStreams.pairToInitialLoadStatus.get(pair);
Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> fullRefreshOcStatus;
if (ocStatus == null) {
fullRefreshOcStatus = Map.of();
} else {
fullRefreshOcStatus = Map.of(pair, ocStatus);
}
var fullRefreshStreamInitialLoad = new InitialLoadStreams(List.of(fullRefreshStream), fullRefreshOcStatus);
return Optional
.of(getMssqlInitialLoadHandler(database, emittedAt, quoteString, fullRefreshStreamInitialLoad, initialLoadStateManager, Optional.empty()));
}
return Optional.empty();
}
private static MssqlInitialLoadHandler getMssqlInitialLoadHandler(final JdbcDatabase database,
final Instant emittedAt,
final String quoteString,
final InitialLoadStreams initialLoadStreams,
final MssqlInitialLoadStateManager initialLoadStateManager,
final Optional<CdcMetadataInjector> metadataInjector) {
final JsonNode sourceConfig = database.getSourceConfig();
final MssqlSourceOperations sourceOperations = new MssqlSourceOperations(metadataInjector);
return new MssqlInitialLoadHandler(sourceConfig, database,
sourceOperations, quoteString, initialLoadStateManager,
Optional.empty(),
getTableSizeInfoForStreams(database, initialLoadStreams.streamsForInitialLoad(), quoteString));
}
private static CdcState getCdcState(final JdbcDatabase database,
final ConfiguredAirbyteCatalog catalog,
final StateManager stateManager,
final boolean savedOffsetStillPresentOnServer) {
if (!savedOffsetStillPresentOnServer || (stateManager.getCdcStateManager().getCdcState() == null
|| stateManager.getCdcStateManager().getCdcState().getState() == null)) {
// Construct the initial state for Mssql. If there is already existing state, we use that instead
// since that is associated with the debezium state associated with the initial sync.
final JsonNode initialDebeziumState = MssqlDebeziumStateUtil.constructInitialDebeziumState(
getDebeziumProperties(database, catalog, false), catalog, database);
return new CdcState().withState(initialDebeziumState);
} else {
return stateManager.getCdcStateManager().getCdcState();
}
}
public static boolean isSavedOffsetStillPresentOnServer(final JdbcDatabase database,
final ConfiguredAirbyteCatalog catalog,
final StateManager stateManager) {
final MssqlDebeziumStateUtil mssqlDebeziumStateUtil = new MssqlDebeziumStateUtil();
final JsonNode sourceConfig = database.getSourceConfig();
final JsonNode state =
(stateManager.getCdcStateManager().getCdcState() == null || stateManager.getCdcStateManager().getCdcState().getState() == null)
? MssqlDebeziumStateUtil.constructInitialDebeziumState(getDebeziumProperties(database, catalog, false), catalog, database)
: Jsons.clone(stateManager.getCdcStateManager().getCdcState().getState());
final Optional<MssqlDebeziumStateAttributes> savedOffset = mssqlDebeziumStateUtil.savedOffset(
getDebeziumProperties(database, catalog, true), catalog, state.get(MSSQL_CDC_OFFSET), sourceConfig);
final boolean savedOffsetStillPresentOnServer =
savedOffset.isPresent() && mssqlDebeziumStateUtil.savedOffsetStillPresentOnServer(database, savedOffset.get());
if (!savedOffsetStillPresentOnServer) {
AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcCursorInvalidMessage());
if (!sourceConfig.get("replication_method").has(INVALID_CDC_CURSOR_POSITION_PROPERTY) || sourceConfig.get("replication_method").get(
INVALID_CDC_CURSOR_POSITION_PROPERTY).asText().equals(FAIL_SYNC_OPTION)) {
throw new ConfigErrorException(
"Saved offset no longer present on the server. Please reset the connection, and then increase binlog retention and/or increase sync frequency.");
} else if (sourceConfig.get("replication_method").get(INVALID_CDC_CURSOR_POSITION_PROPERTY).asText().equals(RESYNC_DATA_OPTION)) {
AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcResyncMessage());
LOGGER.warn("Saved offset no longer present on the server, Airbyte is going to trigger a sync from scratch");
}
}
return savedOffsetStillPresentOnServer;
}
public static MssqlInitialLoadGlobalStateManager getMssqlInitialLoadGlobalStateManager(final JdbcDatabase database,
final ConfiguredAirbyteCatalog catalog,
final StateManager stateManager,
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
final String quoteString) {
final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager);
final InitialLoadStreams initialLoadStreams =
cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer);
final CdcState initialStateToBeUsed = getCdcState(database, catalog, stateManager, savedOffsetStillPresentOnServer);
return new MssqlInitialLoadGlobalStateManager(initialLoadStreams,
initPairToOrderedColumnInfoMap(database, catalog, tableNameToTable, quoteString),
stateManager, catalog, initialStateToBeUsed);
}
public static List<AutoCloseableIterator<AirbyteMessage>> getCdcReadIterators(final JdbcDatabase database,
final ConfiguredAirbyteCatalog catalog,
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
final StateManager stateManager,
final MssqlInitialLoadStateManager initialLoadStateManager,
final Instant emittedAt,
final String quoteString) {
final JsonNode sourceConfig = database.getSourceConfig();
final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig);
LOGGER.info("First record waiting time: {} seconds", firstRecordWaitTime.getSeconds());
final int queueSize = getQueueSize(sourceConfig);
LOGGER.info("Queue size: {}", queueSize);
final Duration initialLoadTimeout = InitialLoadTimeoutUtil.getInitialLoadTimeout(sourceConfig);
// Determine the streams that need to be loaded via primary key sync.
final List<AutoCloseableIterator<AirbyteMessage>> initialLoadIterator = new ArrayList<>();
final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager);
final InitialLoadStreams initialLoadStreams =
cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer);
final MssqlCdcConnectorMetadataInjector metadataInjector = MssqlCdcConnectorMetadataInjector.getInstance(emittedAt);
final CdcState stateToBeUsed = getCdcState(database, catalog, stateManager, savedOffsetStillPresentOnServer);
// Debezium is started for streams that have been started - that is they have been partially or
// fully completed.
final var startedCdcStreamList = catalog.getStreams().stream()
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
.filter(stream -> isStreamPartiallyOrFullyCompleted(stream, initialLoadStreams))
.map(stream -> stream.getStream().getNamespace() + "." + stream.getStream().getName()).toList();
final var allCdcStreamList = catalog.getStreams().stream()
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
.map(stream -> stream.getStream().getNamespace() + "." + stream.getStream().getName()).toList();
// If there are streams to sync via ordered column load, build the relevant iterators.
if (!initialLoadStreams.streamsForInitialLoad().isEmpty()) {
final MssqlDebeziumStateAttributes stateAttributes = MssqlDebeziumStateUtil.getStateAttributesFromDB(database);
final MssqlInitialLoadHandler initialLoadHandler =
getMssqlInitialLoadHandler(database, emittedAt, quoteString, initialLoadStreams, initialLoadStateManager,
Optional.of(new CdcMetadataInjector(emittedAt.toString(), stateAttributes, metadataInjector)));
// Because initial load streams will be followed by cdc read of those stream, we only decorate with
// complete status trace after CDC read is done.
initialLoadIterator.addAll(initialLoadHandler.getIncrementalIterators(
new ConfiguredAirbyteCatalog().withStreams(initialLoadStreams.streamsForInitialLoad()),
tableNameToTable,
emittedAt, false, false, Optional.empty()));
}
final List<AutoCloseableIterator<AirbyteMessage>> cdcStreamsStartStatusEmitters = catalog.getStreams().stream()
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
.map(stream -> (AutoCloseableIterator<AirbyteMessage>) new StreamStatusTraceEmitterIterator(
new AirbyteStreamStatusHolder(
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()),
AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED)))
.toList();
final List<AutoCloseableIterator<AirbyteMessage>> cdcStreamsEndStatusEmitters = catalog.getStreams().stream()
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
.map(stream -> (AutoCloseableIterator<AirbyteMessage>) new StreamStatusTraceEmitterIterator(
new AirbyteStreamStatusHolder(
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()),
AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE)))
.toList();
// Build the incremental CDC iterators.
final var targetPosition = MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get(JdbcUtils.DATABASE_KEY).asText());
final AirbyteDebeziumHandler<Lsn> handler = new AirbyteDebeziumHandler<>(
sourceConfig,
targetPosition,
true,
firstRecordWaitTime,
queueSize,
false);
final var eventConverter = new RelationalDbDebeziumEventConverter(metadataInjector, emittedAt);
if (startedCdcStreamList.isEmpty()) {
LOGGER.info("First sync - no cdc streams have been completed or started");
/*
* This is the first run case - no initial loads have been started. In this case, we want to run the
* iterators in the following order: 1. Run the initial load iterators. This step will timeout and
* throw a transient error if run for too long (> 8hrs by default). 2. Run the debezium iterators
* with ALL of the incremental streams configured. This is because if step 1 completes, the initial
* load can be considered finished.
*/
final var propertiesManager =
new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, allCdcStreamList);
final Supplier<AutoCloseableIterator<AirbyteMessage>> incrementalIteratorsSupplier = getCdcIncrementalIteratorsSupplier(handler,
propertiesManager, eventConverter, stateToBeUsed, stateManager);
return Collections.singletonList(
AutoCloseableIterators.concatWithEagerClose(
Stream
.of(
cdcStreamsStartStatusEmitters,
initialLoadIterator,
Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorsSupplier, null)),
cdcStreamsEndStatusEmitters)
.flatMap(Collection::stream)
.collect(Collectors.toList()),
AirbyteTraceMessageUtility::emitStreamStatusTrace));
} else if (initialLoadIterator.isEmpty()) {
LOGGER.info("Initial load has finished completely - only reading the binlog");
/*
* In this case, the initial load has completed and only debezium should be run. The iterators
* should be run in the following order: 1. Run the debezium iterators with ALL of the incremental
* streams configured.
*/
final var propertiesManager =
new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, allCdcStreamList);
final Supplier<AutoCloseableIterator<AirbyteMessage>> incrementalIteratorSupplier = getCdcIncrementalIteratorsSupplier(handler,
propertiesManager, eventConverter, stateToBeUsed, stateManager);
return Collections.singletonList(
AutoCloseableIterators.concatWithEagerClose(
Stream
.of(
cdcStreamsStartStatusEmitters,
Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorSupplier, null)),
cdcStreamsEndStatusEmitters)
.flatMap(Collection::stream)
.collect(Collectors.toList()),
AirbyteTraceMessageUtility::emitStreamStatusTrace));
} else {
LOGGER.info("Initial load is in progress - reading binlog first and then resuming with initial load.");
/*
* In this case, the initial load has partially completed (WASS case). The iterators should be run
* in the following order: 1. Run the debezium iterators with only the incremental streams which
* have been fully or partially completed configured. 2. Resume initial load for partially completed
* and not started streams. This step will timeout and throw a transient error if run for too long
* (> 8hrs by default).
*/
AirbyteTraceMessageUtility.emitAnalyticsTrace(wassOccurrenceMessage());
final var propertiesManager =
new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, startedCdcStreamList);
final Supplier<AutoCloseableIterator<AirbyteMessage>> incrementalIteratorSupplier = getCdcIncrementalIteratorsSupplier(handler,
propertiesManager, eventConverter, stateToBeUsed, stateManager);
return Collections.singletonList(
AutoCloseableIterators.concatWithEagerClose(
Stream
.of(
cdcStreamsStartStatusEmitters,
Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorSupplier, null)),
initialLoadIterator,
cdcStreamsEndStatusEmitters)
.flatMap(Collection::stream)
.collect(Collectors.toList()),
AirbyteTraceMessageUtility::emitStreamStatusTrace));
}
}
public static InitialLoadStreams cdcStreamsForInitialOrderedColumnLoad(final CdcStateManager stateManager,
final ConfiguredAirbyteCatalog fullCatalog,
final boolean savedOffsetStillPresentOnServer) {
if (!savedOffsetStillPresentOnServer) {
// Add a filter here to identify resumable full refresh streams.
return new InitialLoadStreams(
fullCatalog.getStreams()
.stream()
.collect(Collectors.toList()),
new HashMap<>());
}
final AirbyteStateMessage airbyteStateMessage = stateManager.getRawStateMessage();
final Set<AirbyteStreamNameNamespacePair> streamsStillInOcSync = new HashSet<>();
// Build a map of stream <-> initial load status for streams that currently have an initial primary
// key load in progress.
final Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToInitialLoadStatus = new HashMap<>();
if (airbyteStateMessage != null && airbyteStateMessage.getGlobal() != null && airbyteStateMessage.getGlobal().getStreamStates() != null) {
LOGGER.info("Trying to extract streams need initial oc sync. State message: {}", airbyteStateMessage);
airbyteStateMessage.getGlobal().getStreamStates().forEach(stateMessage -> {
LOGGER.info("State message in this stream: {}", stateMessage);
final JsonNode streamState = stateMessage.getStreamState();
final StreamDescriptor streamDescriptor = stateMessage.getStreamDescriptor();
if (streamState == null || streamDescriptor == null) {
return;
}
if (streamState.has(STATE_TYPE_KEY)) {
if (streamState.get(STATE_TYPE_KEY).asText().equalsIgnoreCase(ORDERED_COL_STATE_TYPE)) {
final OrderedColumnLoadStatus orderedColumnLoadStatus = Jsons.object(streamState, OrderedColumnLoadStatus.class);
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamDescriptor.getName(),
streamDescriptor.getNamespace());
pairToInitialLoadStatus.put(pair, orderedColumnLoadStatus);
streamsStillInOcSync.add(pair);
}
}
});
}
final List<ConfiguredAirbyteStream> streamForOcSync = new ArrayList<>();
fullCatalog.getStreams().stream()
.filter(stream -> streamsStillInOcSync.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())))
.map(Jsons::clone)
.forEach(streamForOcSync::add);
final List<ConfiguredAirbyteStream> newlyAddedStreams = identifyStreamsToSnapshot(fullCatalog, stateManager.getInitialStreamsSynced());
streamForOcSync.addAll(newlyAddedStreams);
return new InitialLoadStreams(streamForOcSync, pairToInitialLoadStatus);
}
public static Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> initPairToOrderedColumnInfoMap(
final JdbcDatabase database,
final ConfiguredAirbyteCatalog catalog,
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
final String quoteString) {
final Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOcInfoMap = new HashMap<>();
// For every stream that is in initial ordered column sync, we want to maintain information about
// the current ordered column info associated with the stream
catalog.getStreams().forEach(stream -> {
final AirbyteStreamNameNamespacePair pair =
new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
final Optional<OrderedColumnInfo> ocInfo = getOrderedColumnInfo(database, stream, tableNameToTable, quoteString);
if (ocInfo.isPresent()) {
pairToOcInfoMap.put(pair, ocInfo.get());
}
});
return pairToOcInfoMap;
}
static Optional<OrderedColumnInfo> getOrderedColumnInfo(final JdbcDatabase database,
final ConfiguredAirbyteStream stream,
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
final String quoteString) {
final String fullyQualifiedTableName =
DbSourceDiscoverUtil.getFullyQualifiedTableName(stream.getStream().getNamespace(), stream.getStream().getName());
final TableInfo<CommonField<JDBCType>> table = tableNameToTable
.get(fullyQualifiedTableName);
return getOrderedColumnInfo(database, stream, table, quoteString);
}
static Optional<OrderedColumnInfo> getOrderedColumnInfo(final JdbcDatabase database,
final ConfiguredAirbyteStream stream,
final TableInfo<CommonField<JDBCType>> table,
final String quoteString) {
// For cursor-based syncs, we cannot always assume a ordered column field exists. We need to handle
// the case where it does not exist when we support cursor-based syncs.
// if (stream.getStream().getSourceDefinedPrimaryKey().size() > 1) {
// LOGGER.info("Composite primary key detected for {namespace, stream} : {}, {}",
// stream.getStream().getNamespace(), stream.getStream().getName());
// }
Optional<String> ocFieldNameOpt = selectOcFieldName(database, stream);
if (ocFieldNameOpt.isEmpty()) {
LOGGER.info("No primary key or clustered index found for stream: " + stream.getStream().getName());
return Optional.empty();
}
String ocFieldName = ocFieldNameOpt.get();
LOGGER.info("selected ordered column field name: " + ocFieldName);
final JDBCType ocFieldType = table.getFields().stream()
.filter(field -> field.getName().equals(ocFieldName))
.findFirst().get().getType();
final String ocMaxValue = MssqlQueryUtils.getMaxOcValueForStream(database, stream, ocFieldName, quoteString);
return Optional.of(new OrderedColumnInfo(ocFieldName, ocFieldType, ocMaxValue));
}
@VisibleForTesting
public static Optional<String> selectOcFieldName(final JdbcDatabase database,
final ConfiguredAirbyteStream stream) {
final Map<String, List<String>> clusteredIndexField = MssqlInitialLoadHandler.discoverClusteredIndexForStream(database, stream.getStream());
final String streamName = getFullyQualifiedTableName(stream.getStream().getNamespace(), stream.getStream().getName());
List<List<String>> primaryKey = stream.getStream().getSourceDefinedPrimaryKey();
if (primaryKey.isEmpty()) {
LOGGER.info("Stream does not have source defined primary key: " + stream.getStream().getName());
LOGGER.info("Trying to use logical primary key.");
primaryKey = stream.getPrimaryKey();
}
final String ocFieldName;
final List<String> clusterColumns = Optional.ofNullable(clusteredIndexField)
.map(map -> map.get(streamName))
.orElse(new ArrayList<>());
// Use the clustered index unless it is composite. Otherwise, default to the primary key.
if (clusterColumns.size() == 1) {
ocFieldName = clusterColumns.getFirst();
} else if (!primaryKey.isEmpty()) {
LOGGER.info("Clustered index is empty or composite. Defaulting to primary key.");
ocFieldName = primaryKey.getFirst().getFirst();
} else {
return Optional.empty();
}
return Optional.of(ocFieldName);
}
public static List<ConfiguredAirbyteStream> identifyStreamsToSnapshot(final ConfiguredAirbyteCatalog catalog,
final Set<AirbyteStreamNameNamespacePair> alreadySyncedStreams) {
final Set<AirbyteStreamNameNamespacePair> allStreams = AirbyteStreamNameNamespacePair.fromConfiguredCatalog(catalog);
final Set<AirbyteStreamNameNamespacePair> newlyAddedStreams = new HashSet<>(Sets.difference(allStreams, alreadySyncedStreams));
// Add a filter here to identify resumable full refresh streams.
return catalog.getStreams().stream()
.filter(stream -> newlyAddedStreams.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())))
.map(Jsons::clone)
.collect(Collectors.toList());
}
public static InitialLoadStreams streamsForInitialOrderedColumnLoad(final StateManager stateManager,
final ConfiguredAirbyteCatalog fullCatalog) {
final List<AirbyteStateMessage> rawStateMessages = stateManager.getRawStateMessages();
final Set<AirbyteStreamNameNamespacePair> streamsStillInOrderedColumnSync = new HashSet<>();
final Set<AirbyteStreamNameNamespacePair> alreadySeenStreamPairs = new HashSet<>();
// Build a map of stream <-> initial load status for streams that currently have an initial primary
// key load in progress.
final Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToInitialLoadStatus = new HashMap<>();
LOGGER.info("raw state message: " + rawStateMessages);
if (rawStateMessages != null) {
rawStateMessages.forEach(stateMessage -> {
final AirbyteStreamState stream = stateMessage.getStream();
final JsonNode streamState = stream.getStreamState();
final StreamDescriptor streamDescriptor = stateMessage.getStream().getStreamDescriptor();
if (streamState == null || streamDescriptor == null) {
return;
}
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamDescriptor.getName(),
streamDescriptor.getNamespace());
// Build a map of stream <-> initial load status for streams that currently have an initial primary
// key load in progress.
if (streamState.has(STATE_TYPE_KEY)) {
if (streamState.get(STATE_TYPE_KEY).asText().equalsIgnoreCase(ORDERED_COL_STATE_TYPE)) {
final OrderedColumnLoadStatus orderedColumnLoadStatus = Jsons.object(streamState, OrderedColumnLoadStatus.class);
pairToInitialLoadStatus.put(pair, orderedColumnLoadStatus);
streamsStillInOrderedColumnSync.add(pair);
}
}
alreadySeenStreamPairs.add(new AirbyteStreamNameNamespacePair(streamDescriptor.getName(), streamDescriptor.getNamespace()));
});
}
final List<ConfiguredAirbyteStream> streamsForOcSync = new ArrayList<>();
LOGGER.info("alreadySeenStreamPairs: {}", alreadySeenStreamPairs);
fullCatalog.getStreams().stream()
.filter(stream -> streamsStillInOrderedColumnSync.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())))
.map(Jsons::clone)
.forEach(streamsForOcSync::add);
final List<ConfiguredAirbyteStream> newlyAddedStreams = identifyStreamsToSnapshot(fullCatalog,
Collections.unmodifiableSet(alreadySeenStreamPairs));
streamsForOcSync.addAll(newlyAddedStreams);
LOGGER.info("streamsForOcSync: {}", streamsForOcSync);
return new InitialLoadStreams(streamsForOcSync.stream().filter((stream) -> !stream.getStream().getSourceDefinedPrimaryKey()
.isEmpty()).collect(Collectors.toList()),
pairToInitialLoadStatus);
}
private static OptionalInt extractQueueSizeFromConfig(final JsonNode config) {
final JsonNode replicationMethod = config.get("replication_method");
if (replicationMethod != null && replicationMethod.has("queue_size")) {
final int queueSize = config.get("replication_method").get("queue_size").asInt();
return OptionalInt.of(queueSize);
}
return OptionalInt.empty();
}
@SuppressWarnings("unchecked")
private static Supplier<AutoCloseableIterator<AirbyteMessage>> getCdcIncrementalIteratorsSupplier(AirbyteDebeziumHandler handler,
RelationalDbDebeziumPropertiesManager propertiesManager,
DebeziumEventConverter eventConverter,
CdcState stateToBeUsed,
StateManager stateManager) {
return () -> handler.getIncrementalIterators(
propertiesManager, eventConverter, new MssqlCdcSavedInfoFetcher(stateToBeUsed), new MssqlCdcStateHandler(stateManager));
}
private static boolean isStreamPartiallyOrFullyCompleted(ConfiguredAirbyteStream stream, InitialLoadStreams initialLoadStreams) {
boolean isStreamCompleted = !initialLoadStreams.streamsForInitialLoad.contains(stream);
// A stream has been partially completed if an initial load status exists.
boolean isStreamPartiallyCompleted = (initialLoadStreams.pairToInitialLoadStatus
.get(new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()))) != null;
return isStreamCompleted || isStreamPartiallyCompleted;
}
public static int getQueueSize(final JsonNode config) {
final OptionalInt sizeFromConfig = extractQueueSizeFromConfig(config);
if (sizeFromConfig.isPresent()) {
final int size = sizeFromConfig.getAsInt();
if (size < MIN_QUEUE_SIZE) {
LOGGER.warn("Queue size is overridden to {} , which is the min allowed for safety.",
MIN_QUEUE_SIZE);
return MIN_QUEUE_SIZE;
} else if (size > MAX_QUEUE_SIZE) {
LOGGER.warn("Queue size is overridden to {} , which is the max allowed for safety.",
MAX_QUEUE_SIZE);
return MAX_QUEUE_SIZE;
}
return size;
}
return MAX_QUEUE_SIZE;
}
public static InitialLoadStreams filterStreamInIncrementalMode(final InitialLoadStreams stream) {
return new InitialLoadStreams(
stream.streamsForInitialLoad.stream().filter(airbyteStream -> airbyteStream.getSyncMode() == SyncMode.INCREMENTAL)
.collect(Collectors.toList()),
stream.pairToInitialLoadStatus);
}
}

View File

@@ -1,91 +0,0 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.integrations.util.ConnectorErrorProfile
import io.airbyte.cdk.integrations.util.ConnectorExceptionHandler
import io.airbyte.cdk.integrations.util.FailureType
class MSSqlSourceExceptionHandler : ConnectorExceptionHandler() {
override fun initializeErrorDictionary() {
// include common error profiles
super.initializeErrorDictionary()
// adding connector specific error profiles
add(
ConnectorErrorProfile(
errorClass = "MSSQL Exception",
regexMatchingPattern =
"(?i).*returned an incomplete response. The connection has been closed.*",
failureType = FailureType.TRANSIENT,
externalMessage =
"(?i).*returned an incomplete response. The connection has been closed.*",
sampleInternalMessage =
"com.microsoft.sqlserver.jdbc.SQLServerException: SQL Server returned an incomplete response. The connection has been closed.",
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/6623")
),
)
add(
ConnectorErrorProfile(
errorClass = "MSSQL Exception",
regexMatchingPattern =
"(?i).*SQL Server did not return a response. The connection has been closed.*",
failureType = FailureType.TRANSIENT,
externalMessage =
"Encountered an error while reading from the database, will retry",
sampleInternalMessage =
"com.microsoft.sqlserver.jdbc.SQLServerException: SQL Server did not return a response. The connection has been closed.",
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/7757")
),
)
add(
ConnectorErrorProfile(
errorClass = "MSSQL Exception",
regexMatchingPattern = "(?i).*The connection is closed.*",
failureType = FailureType.TRANSIENT,
externalMessage = "The SQL Server connection was unexpectedly closed, will retry.",
sampleInternalMessage =
"com.microsoft.sqlserver.jdbc.SQLServerException: The connection is closed.",
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/6438")
),
)
add(
// Error 1205
// https://learn.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-1205-database-engine-error
ConnectorErrorProfile(
errorClass = "MSSQL Exception",
regexMatchingPattern =
"(?i).*was deadlocked on lock resources with another process and has been chosen as the deadlock victim. Rerun the transaction.*",
failureType = FailureType.TRANSIENT,
externalMessage =
"Transaction conflicted with another process and was terminated, will retry.",
sampleInternalMessage =
"com.microsoft.sqlserver.jdbc.SQLServerException: " +
"Transaction (Process ID 63) was deadlocked on lock resources with another process and has been chosen as the deadlock victim. Rerun the transaction.",
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/6287")
),
)
// This error occurs when Debezium encounters an exception.
// We classify it as TRANSIENT since it may be resolved through automatic retries but can
// also require investigation and manual intervention.
add(
ConnectorErrorProfile(
errorClass = "Connect Exception",
regexMatchingPattern = "(?i).*exception occurred in the change event producer.*",
failureType = FailureType.TRANSIENT,
externalMessage =
"The sync encountered an unexpected error in the change event producer and has stopped. Please check the logs for details and troubleshoot accordingly.",
sampleInternalMessage =
"java.lang.RuntimeException: org.apache.kafka.connect.errors.ConnectException: " +
"An exception occurred in the change event producer. This connector will be stopped.",
referenceLinks =
listOf(
"https://docs.oracle.com/javase/9/docs/api/java/lang/RuntimeException.html"
)
),
)
}
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.util.Jsons
data class MsSqlServerCdcInitialSnapshotStateValue(
@JsonProperty("pk_val") val pkVal: String? = null,
@JsonProperty("pk_name") val pkName: String? = null,
@JsonProperty("version") val version: Int? = null,
@JsonProperty("state_type") val stateType: String? = null,
@JsonProperty("incremental_state") val incrementalState: JsonNode? = null,
@JsonProperty("stream_name") val streamName: String? = null,
@JsonProperty("cursor_field") val cursorField: List<String>? = null,
@JsonProperty("stream_namespace") val streamNamespace: String? = null,
) {
companion object {
/** Value representing the completion of a FULL_REFRESH snapshot. */
fun getSnapshotCompletedState(stream: Stream): OpaqueStateValue =
Jsons.valueToTree(
MsSqlServerCdcInitialSnapshotStateValue(
streamName = stream.name,
cursorField = listOf(),
streamNamespace = stream.namespace
)
)
/** Value representing the progress of an ongoing snapshot. */
fun snapshotCheckpoint(
primaryKey: List<Field>,
primaryKeyCheckpoint: List<JsonNode>,
): OpaqueStateValue {
val primaryKeyField = primaryKey.first()
return when (primaryKeyCheckpoint.first().isNull) {
true -> Jsons.nullNode()
false ->
Jsons.valueToTree(
MsSqlServerCdcInitialSnapshotStateValue(
pkName = primaryKeyField.id,
pkVal = primaryKeyCheckpoint.first().asText(),
stateType = "primary_key",
)
)
}
}
}
}

View File

@@ -0,0 +1,116 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.util.Jsons
import io.github.oshai.kotlinlogging.KotlinLogging
import java.time.Instant
import java.time.ZoneOffset
import java.time.format.DateTimeFormatter
import java.time.temporal.ChronoUnit
private val log = KotlinLogging.logger {}
/**
* Utility class to calculate cutoff time for "Exclude Today's Data" feature. This ensures that
* incremental syncs using temporal cursor fields only include data up until midnight of the current
* day.
*/
object MsSqlServerCursorCutoffTimeProvider {
private val ISO_LOCAL_DATE: DateTimeFormatter = DateTimeFormatter.ISO_LOCAL_DATE
private val ISO_OFFSET_DATE_TIME: DateTimeFormatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME
private val TIMESTAMPTZ_FORMATTER: DateTimeFormatter =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS xxx")
private val SQL_SERVER_DATETIME_FORMATTER: DateTimeFormatter =
MsSqlServerJdbcPartitionFactory.outputDateFormatter
/**
* Calculates the cutoff time for a cursor field based on its type.
*
* @param cursorField The cursor field
* @param nowInstant The current instant (for testing)
* @return The cutoff time as JsonNode, or null if not applicable
*/
fun getCutoffTime(cursorField: Field, nowInstant: Instant = Instant.now()): JsonNode? {
return when (cursorField.type.airbyteSchemaType) {
is LeafAirbyteSchemaType -> {
when (cursorField.type.airbyteSchemaType as LeafAirbyteSchemaType) {
LeafAirbyteSchemaType.DATE -> {
// For DATE fields, exclude today by setting cutoff to today's date
// This means we include records < today's date (i.e., yesterday and before)
val today = nowInstant.atOffset(ZoneOffset.UTC).toLocalDate()
val cutoffValue = Jsons.valueToTree<JsonNode>(ISO_LOCAL_DATE.format(today))
log.info {
"DATE cutoff for field '${cursorField.id}': ${cutoffValue.asText()}"
}
cutoffValue
}
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE -> {
// For TIMESTAMP fields, set cutoff to start of today (00:00:00)
// Use local datetime without timezone for SQL Server DATETIME
val startOfToday =
nowInstant
.atOffset(ZoneOffset.UTC)
.truncatedTo(ChronoUnit.DAYS)
.toLocalDateTime()
val cutoffValue =
Jsons.valueToTree<JsonNode>(
SQL_SERVER_DATETIME_FORMATTER.format(startOfToday)
)
log.info {
"TIMESTAMP_WITHOUT_TIMEZONE cutoff for field '${cursorField.id}': ${cutoffValue.asText()}"
}
cutoffValue
}
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE -> {
// For TIME fields, set cutoff to start of today (00:00:00)
val startOfToday =
nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS)
Jsons.valueToTree(ISO_OFFSET_DATE_TIME.format(startOfToday))
}
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE -> {
// For TIMESTAMP WITH TIMEZONE fields, set cutoff to start of today
// (00:00:00)
val startOfToday =
nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS)
Jsons.valueToTree(TIMESTAMPTZ_FORMATTER.format(startOfToday))
}
else -> {
log.warn {
"Only temporal cursors can exclude today's data. " +
"Field '${cursorField.id}' has type '${cursorField.type}' which is not supported."
}
null
}
}
}
else -> {
log.warn {
"Only temporal cursors can exclude today's data. " +
"Field '${cursorField.id}' has non-leaf type '${cursorField.type}' which is not supported."
}
null
}
}
}
/** Checks if a cursor field type supports the "Exclude Today's Data" feature. */
fun isTemporalType(cursorField: Field): Boolean {
val schemaType = cursorField.type.airbyteSchemaType
return schemaType is LeafAirbyteSchemaType &&
schemaType in
listOf(
LeafAirbyteSchemaType.DATE,
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE,
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
)
}
}

View File

@@ -0,0 +1,346 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import io.debezium.spi.converter.CustomConverter
import io.debezium.spi.converter.RelationalColumn
import java.math.BigDecimal
import java.time.Instant
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.ZoneOffset
import java.time.format.DateTimeFormatter
import java.time.format.DateTimeParseException
import java.util.*
import microsoft.sql.DateTimeOffset
import org.apache.kafka.connect.data.SchemaBuilder
import org.slf4j.LoggerFactory
class MsSqlServerDebeziumConverter : CustomConverter<SchemaBuilder, RelationalColumn> {
companion object {
private val logger = LoggerFactory.getLogger(MsSqlServerDebeziumConverter::class.java)
private const val MSSQL_DATE_TYPE = "DATE"
private const val MSSQL_DATETIME_TYPE = "DATETIME"
private const val MSSQL_DATETIME2_TYPE = "DATETIME2"
private const val MSSQL_SMALLDATETIME_TYPE = "SMALLDATETIME"
private const val MSSQL_DATETIMEOFFSET_TYPE = "DATETIMEOFFSET"
private const val MSSQL_TIME_TYPE = "TIME"
private const val MSSQL_SMALLMONEY_TYPE = "SMALLMONEY"
private const val MSSQL_MONEY_TYPE = "MONEY"
private const val MSSQL_BINARY_TYPE = "BINARY"
private const val MSSQL_VARBINARY_TYPE = "VARBINARY"
private const val MSSQL_IMAGE_TYPE = "IMAGE"
private const val MSSQL_GEOMETRY_TYPE = "GEOMETRY"
private const val MSSQL_GEOGRAPHY_TYPE = "GEOGRAPHY"
private const val MSSQL_UNIQUEIDENTIFIER_TYPE = "UNIQUEIDENTIFIER"
private const val MSSQL_XML_TYPE = "XML"
private const val MSSQL_HIERARCHYID_TYPE = "HIERARCHYID"
private const val MSSQL_SQL_VARIANT_TYPE = "SQL_VARIANT"
}
override fun configure(properties: Properties) {
// No configuration needed
}
override fun converterFor(
field: RelationalColumn,
registration: CustomConverter.ConverterRegistration<SchemaBuilder>
) {
val typeName = field.typeName().uppercase()
when (typeName) {
MSSQL_DATE_TYPE -> {
registration.register(SchemaBuilder.string().optional(), this::convertDate)
}
MSSQL_DATETIME_TYPE,
MSSQL_DATETIME2_TYPE,
MSSQL_SMALLDATETIME_TYPE -> {
registration.register(SchemaBuilder.string().optional(), this::convertDateTime)
}
MSSQL_DATETIMEOFFSET_TYPE -> {
registration.register(
SchemaBuilder.string().optional(),
this::convertDateTimeOffset
)
}
MSSQL_TIME_TYPE -> {
registration.register(SchemaBuilder.string().optional(), this::convertTime)
}
MSSQL_SMALLMONEY_TYPE,
MSSQL_MONEY_TYPE -> {
registration.register(SchemaBuilder.float64().optional(), this::convertMoney)
}
MSSQL_BINARY_TYPE,
MSSQL_VARBINARY_TYPE,
MSSQL_IMAGE_TYPE -> {
registration.register(SchemaBuilder.string().optional(), this::convertBinary)
}
MSSQL_GEOMETRY_TYPE -> {
registration.register(SchemaBuilder.string().optional()) { value ->
convertSpatial(value, isGeography = false)
}
}
MSSQL_GEOGRAPHY_TYPE -> {
registration.register(SchemaBuilder.string().optional()) { value ->
convertSpatial(value, isGeography = true)
}
}
MSSQL_UNIQUEIDENTIFIER_TYPE -> {
registration.register(
SchemaBuilder.string().optional(),
this::convertUniqueIdentifier
)
}
MSSQL_XML_TYPE -> {
registration.register(SchemaBuilder.string().optional(), this::convertXml)
}
MSSQL_HIERARCHYID_TYPE -> {
registration.register(SchemaBuilder.string().optional(), this::convertHierarchyId)
}
MSSQL_SQL_VARIANT_TYPE -> {
registration.register(SchemaBuilder.string().optional(), this::convertSqlVariant)
}
else -> {
// For unhandled types, just return as string
logger.debug("Unhandled SQL Server type: {}", typeName)
}
}
}
private fun convertDate(value: Any?): Any? {
if (value == null) return null
return try {
when (value) {
is LocalDate -> value.toString()
is String -> {
// Try to parse and reformat to ensure consistent format
val date = LocalDate.parse(value, DateTimeFormatter.ISO_LOCAL_DATE)
date.toString()
}
is java.sql.Date -> value.toLocalDate().toString()
else -> value.toString()
}
} catch (e: DateTimeParseException) {
logger.warn("Failed to parse date value: {}", value, e)
value.toString()
}
}
private fun convertDateTime(value: Any?): Any? {
if (value == null) return null
return try {
val formatter = MsSqlServerJdbcPartitionFactory.outputDateFormatter
when (value) {
is LocalDateTime -> value.format(formatter)
is String -> {
// Try to parse as LocalDateTime first
val dateTime = LocalDateTime.parse(value.replace(" ", "T"))
dateTime.format(formatter)
}
is java.sql.Timestamp -> value.toLocalDateTime().format(formatter)
is Instant -> LocalDateTime.ofInstant(value, ZoneOffset.UTC).format(formatter)
else -> value.toString()
}
} catch (e: DateTimeParseException) {
logger.warn("Failed to parse datetime value: {}", value, e)
value.toString()
}
}
private fun convertDateTimeOffset(value: Any?): Any? {
if (value == null) return null
return try {
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX")
when (value) {
is DateTimeOffset -> value.offsetDateTime.format(formatter)
is OffsetDateTime -> value.format(formatter)
is String -> {
// Try to parse as OffsetDateTime
val offsetDateTime = OffsetDateTime.parse(value)
offsetDateTime.format(formatter)
}
else -> value.toString()
}
} catch (e: Exception) {
logger.warn("Failed to parse datetimeoffset value: {}", value, e)
value.toString()
}
}
private fun convertTime(value: Any?): Any? {
if (value == null) return null
return try {
val formatter = DateTimeFormatter.ofPattern("HH:mm:ss.SSSSSS")
when (value) {
is LocalTime -> value.format(formatter)
is String -> {
// Handle SQL Server TIME values that come as datetime strings
if (value.contains(" ")) {
// Extract time part from "1900-01-01 13:00:01.0" format
val timePart = value.substringAfter(" ")
val time = LocalTime.parse(timePart)
time.format(formatter)
} else {
val time = LocalTime.parse(value)
time.format(formatter)
}
}
is java.sql.Time -> value.toLocalTime().format(formatter)
else -> {
// Handle other cases where TIME might come as datetime string
val stringValue = value.toString()
if (stringValue.contains(" ")) {
val timePart = stringValue.substringAfter(" ")
val time = LocalTime.parse(timePart)
time.format(formatter)
} else {
stringValue
}
}
}
} catch (e: Exception) {
logger.warn("Failed to parse time value: {}", value, e)
value.toString()
}
}
private fun convertMoney(value: Any?): Any? {
if (value == null) return null
return try {
when (value) {
is BigDecimal -> value.toDouble()
is Double -> value
is String -> value.toBigDecimal().toDouble()
is Number -> value.toDouble()
else -> value.toString().toBigDecimal().toDouble()
}
} catch (e: Exception) {
logger.warn("Failed to parse money value: {}", value, e)
null
}
}
private fun convertBinary(value: Any?): Any? {
if (value == null) return null
return when (value) {
is ByteArray -> Base64.getEncoder().encodeToString(value)
is String -> value // Already base64 encoded
else -> {
logger.warn("Unexpected binary type: {}", value.javaClass.name)
value.toString()
}
}
}
private fun convertSpatial(value: Any?, isGeography: Boolean): Any? {
if (value == null) return null
return try {
when (value) {
is String -> {
// If already a string (WKT format), check if it's base64
if (value.matches(Regex("^[A-Za-z0-9+/]+=*$"))) {
// It's base64, decode and convert
try {
val bytes = Base64.getDecoder().decode(value)
convertSpatialBytes(bytes, isGeography)
} catch (e: Exception) {
logger.warn("Failed to decode base64 spatial value: {}", e.message)
value
}
} else {
// Already WKT format
value
}
}
is ByteArray -> convertSpatialBytes(value, isGeography)
else -> value.toString()
}
} catch (e: Exception) {
logger.warn("Failed to convert spatial value: {}", value, e)
value.toString()
}
}
private fun convertSpatialBytes(bytes: ByteArray, isGeography: Boolean): String {
return try {
if (isGeography) {
// Deserialize as Geography
com.microsoft.sqlserver.jdbc.Geography.deserialize(bytes).toString()
} else {
// Deserialize as Geometry
com.microsoft.sqlserver.jdbc.Geometry.deserialize(bytes).toString()
}
} catch (e: Exception) {
logger.warn(
"Failed to deserialize spatial binary as ${if (isGeography) "Geography" else "Geometry"}: {}",
e.message
)
// Fallback to base64 if deserialization fails
Base64.getEncoder().encodeToString(bytes)
}
}
private fun convertUniqueIdentifier(value: Any?): Any? {
if (value == null) return null
return try {
when (value) {
is String -> {
UUID.fromString(value).toString().uppercase()
}
else -> value.toString().uppercase()
}
} catch (e: Exception) {
logger.warn("Failed to convert UUID value: {}", value, e)
value.toString()
}
}
private fun convertXml(value: Any?): Any? {
if (value == null) return null
return try {
// XML is stored as string in Airbyte
value.toString()
} catch (e: Exception) {
logger.warn("Failed to convert XML value: {}", value, e)
value.toString()
}
}
private fun convertHierarchyId(value: Any?): Any? {
if (value == null) return null
return try {
// HierarchyID is stored as string representation
value.toString()
} catch (e: Exception) {
logger.warn("Failed to convert HierarchyID value: {}", value, e)
value.toString()
}
}
private fun convertSqlVariant(value: Any?): Any? {
if (value == null) return null
return try {
// SQL_VARIANT can hold various types - store as string
value.toString()
} catch (e: Exception) {
logger.warn("Failed to convert SQL_VARIANT value: {}", value, e)
value.toString()
}
}
}

View File

@@ -0,0 +1,698 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.cdk.data.BigDecimalCodec
import io.airbyte.cdk.data.BinaryCodec
import io.airbyte.cdk.data.DoubleCodec
import io.airbyte.cdk.data.FloatCodec
import io.airbyte.cdk.data.IntCodec
import io.airbyte.cdk.data.JsonCodec
import io.airbyte.cdk.data.JsonEncoder
import io.airbyte.cdk.data.LongCodec
import io.airbyte.cdk.data.NullCodec
import io.airbyte.cdk.data.TextCodec
import io.airbyte.cdk.discover.CommonMetaField
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
import io.airbyte.cdk.output.sockets.FieldValueEncoder
import io.airbyte.cdk.output.sockets.NativeRecordPayload
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.read.cdc.AbortDebeziumWarmStartState
import io.airbyte.cdk.read.cdc.CdcPartitionReaderDebeziumOperations
import io.airbyte.cdk.read.cdc.CdcPartitionsCreatorDebeziumOperations
import io.airbyte.cdk.read.cdc.DebeziumOffset
import io.airbyte.cdk.read.cdc.DebeziumPropertiesBuilder
import io.airbyte.cdk.read.cdc.DebeziumPropertiesBuilder.Companion.AIRBYTE_HEARTBEAT_TIMEOUT_SECONDS
import io.airbyte.cdk.read.cdc.DebeziumRecordKey
import io.airbyte.cdk.read.cdc.DebeziumRecordValue
import io.airbyte.cdk.read.cdc.DebeziumSchemaHistory
import io.airbyte.cdk.read.cdc.DebeziumWarmStartState
import io.airbyte.cdk.read.cdc.DeserializedRecord
import io.airbyte.cdk.read.cdc.InvalidDebeziumWarmStartState
import io.airbyte.cdk.read.cdc.ResetDebeziumWarmStartState
import io.airbyte.cdk.read.cdc.ValidDebeziumWarmStartState
import io.airbyte.cdk.ssh.TunnelSession
import io.airbyte.cdk.util.Jsons
import io.debezium.connector.sqlserver.Lsn
import io.debezium.connector.sqlserver.SqlServerConnector
import io.debezium.document.DocumentReader
import io.debezium.document.DocumentWriter
import io.debezium.relational.history.HistoryRecord
import io.github.oshai.kotlinlogging.KotlinLogging
import jakarta.inject.Singleton
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.sql.Connection
import java.time.Instant
import java.time.OffsetDateTime
import java.time.ZoneOffset
import java.util.concurrent.atomic.AtomicLong
import java.util.zip.GZIPInputStream
import java.util.zip.GZIPOutputStream
import kotlin.collections.plus
import org.apache.kafka.connect.source.SourceRecord
import org.apache.mina.util.Base64
data class MsSqlServerCdcPosition(val lsn: String) : Comparable<MsSqlServerCdcPosition> {
override fun compareTo(other: MsSqlServerCdcPosition): Int {
return lsn.compareTo(other.lsn)
}
}
@Singleton
class MsSqlServerDebeziumOperations(
private val jdbcConnectionFactory: JdbcConnectionFactory,
private val configuration: MsSqlServerSourceConfiguration
) :
CdcPartitionsCreatorDebeziumOperations<MsSqlServerCdcPosition>,
CdcPartitionReaderDebeziumOperations<MsSqlServerCdcPosition> {
// Generates globally unique cursor values for CDC records by combining
// current timestamp with an incrementing counter. This ensures monotonically
// increasing values across sync restarts and avoids collisions.
val cdcCursorGenerator = AtomicLong(Instant.now().toEpochMilli() * 10_000_000 + 1)
private val log = KotlinLogging.logger {}
@Suppress("UNCHECKED_CAST")
override fun deserializeRecord(
key: DebeziumRecordKey,
value: DebeziumRecordValue,
stream: Stream,
): DeserializedRecord {
val before: JsonNode = value.before
val after: JsonNode = value.after
val source: JsonNode = value.source
val isDelete: Boolean = after.isNull
// Use either `before` or `after` as the record data, depending on the nature of the change.
val recordData: JsonNode = if (isDelete) before else after
// Convert JsonNode to NativeRecordPayload based on stream schema
val resultRow: NativeRecordPayload = mutableMapOf()
// Process fields based on stream schema (following MySQL pattern)
for (field in stream.schema) {
val fieldValue = recordData[field.id] ?: continue
when {
fieldValue.isNull -> {
resultRow[field.id] = FieldValueEncoder(null, NullCodec)
}
else -> {
// Use the field's jsonEncoder if available, otherwise fall back to TextCodec
val codec: JsonCodec<*> = field.type.jsonEncoder as? JsonCodec<*> ?: TextCodec
// Handle numeric and binary values from Debezium (can come as JSON strings or
// numbers)
val decodedValue =
when {
// BigDecimal: handle both string and number
fieldValue.isTextual && codec is BigDecimalCodec ->
java.math.BigDecimal(fieldValue.asText())
fieldValue.isNumber && codec is BigDecimalCodec ->
fieldValue.decimalValue()
// Int: handle both string and number
fieldValue.isTextual && codec is IntCodec -> fieldValue.asText().toInt()
fieldValue.isNumber && codec is IntCodec -> fieldValue.intValue()
// Long: handle both string and number
fieldValue.isTextual && codec is LongCodec ->
fieldValue.asText().toLong()
fieldValue.isNumber && codec is LongCodec -> fieldValue.longValue()
// Float: handle both string and number
fieldValue.isTextual && codec is FloatCodec ->
fieldValue.asText().toFloat()
fieldValue.isNumber && codec is FloatCodec -> fieldValue.floatValue()
// Double: handle both string and number
fieldValue.isTextual && codec is DoubleCodec ->
fieldValue.asText().toDouble()
fieldValue.isNumber && codec is DoubleCodec -> fieldValue.doubleValue()
// Binary: handle base64 string
fieldValue.isTextual && codec is BinaryCodec ->
java.nio.ByteBuffer.wrap(
java.util.Base64.getDecoder().decode(fieldValue.asText())
)
else -> codec.decode(fieldValue)
}
resultRow[field.id] = FieldValueEncoder(decodedValue, codec as JsonCodec<Any>)
}
}
}
// Set CDC meta-field values
val transactionMillis: Long = source["ts_ms"].asLong()
val transactionOffsetDateTime: OffsetDateTime =
OffsetDateTime.ofInstant(Instant.ofEpochMilli(transactionMillis), ZoneOffset.UTC)
resultRow[CommonMetaField.CDC_UPDATED_AT.id] =
FieldValueEncoder(
transactionOffsetDateTime,
CommonMetaField.CDC_UPDATED_AT.type.jsonEncoder as JsonEncoder<Any>
)
resultRow[CommonMetaField.CDC_DELETED_AT.id] =
FieldValueEncoder(
if (isDelete) transactionOffsetDateTime else null,
(if (isDelete) CommonMetaField.CDC_DELETED_AT.type.jsonEncoder else NullCodec)
as JsonEncoder<Any>
)
// Set MSSQL-specific CDC meta-fields
val commitLsn = source["commit_lsn"].asText()
resultRow[MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_LSN.id] =
FieldValueEncoder(
commitLsn,
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_LSN.type.jsonEncoder
as JsonEncoder<Any>
)
resultRow[MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_CURSOR.id] =
FieldValueEncoder(
cdcCursorGenerator.getAndIncrement(),
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_CURSOR.type.jsonEncoder
as JsonEncoder<Any>
)
val eventSerialNo = source["event_serial_no"]?.asInt()?.let { "$it" } ?: "0"
resultRow[MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO.id] =
FieldValueEncoder(
eventSerialNo,
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO.type.jsonEncoder
as JsonEncoder<Any>
)
// Return a DeserializedRecord instance.
return DeserializedRecord(resultRow, changes = emptyMap())
}
override fun position(recordValue: DebeziumRecordValue): MsSqlServerCdcPosition? {
val commitLsn = recordValue.source["commit_lsn"]?.asText()
return commitLsn?.let { MsSqlServerCdcPosition(it) }
}
override fun position(sourceRecord: SourceRecord): MsSqlServerCdcPosition? {
val commitLsn: String =
sourceRecord.sourceOffset()[("commit_lsn")]?.toString() ?: return null
return MsSqlServerCdcPosition(commitLsn)
}
override fun position(offset: DebeziumOffset): MsSqlServerCdcPosition {
if (offset.wrapped.size != 1) {
throw IllegalArgumentException("Expected exactly 1 key in $offset")
}
val offsetValue = offset.wrapped.values.first() as ObjectNode
val commitLsn = offsetValue["commit_lsn"].asText()
return MsSqlServerCdcPosition(commitLsn)
}
override fun serializeState(
offset: DebeziumOffset,
schemaHistory: DebeziumSchemaHistory?
): JsonNode {
// Sanitize offset before saving to state to fix heartbeat corruption
val sanitizedOffset = sanitizeOffset(offset)
val stateNode: ObjectNode = Jsons.objectNode()
// Serialize offset.
val offsetNode: JsonNode =
Jsons.objectNode().apply {
for ((k, v) in sanitizedOffset.wrapped) {
put(Jsons.writeValueAsString(k), Jsons.writeValueAsString(v))
}
}
stateNode.set<JsonNode>(MSSQL_CDC_OFFSET, offsetNode)
val realSchemaHistory: List<HistoryRecord>? = schemaHistory?.wrapped
if (realSchemaHistory != null) {
val uncompressedString: String =
realSchemaHistory.joinToString(separator = "\n") {
DocumentWriter.defaultWriter().write(it.document())
}
if (uncompressedString.length <= MSSQL_MAX_UNCOMPRESSED_LENGTH) {
stateNode.put(MSSQL_DB_HISTORY, uncompressedString)
stateNode.put(MSSQL_IS_COMPRESSED, false)
} else {
stateNode.put(MSSQL_IS_COMPRESSED, true)
val baos = ByteArrayOutputStream()
val builder = StringBuilder()
GZIPOutputStream(baos).writer(Charsets.UTF_8).use { it.write(uncompressedString) }
builder.append("\"")
builder.append(Base64.encodeBase64(baos.toByteArray()).toString(Charsets.UTF_8))
builder.append("\"")
stateNode.put(MSSQL_DB_HISTORY, builder.toString())
}
}
return Jsons.objectNode().apply { set<JsonNode>(MSSQL_STATE, stateNode) }
}
override fun deserializeState(opaqueStateValue: JsonNode): DebeziumWarmStartState {
val stateNode = opaqueStateValue[MSSQL_STATE]
val offsetNode = stateNode[MSSQL_CDC_OFFSET] as JsonNode
val offsetMap: Map<JsonNode, JsonNode> =
offsetNode
.fieldNames()
.asSequence()
.map { k -> Jsons.readTree(k) to Jsons.readTree(offsetNode[k].textValue()) }
.toMap()
// Handle legacy state with multiple offset keys (e.g., different database name casings)
val finalOffsetMap =
when {
offsetMap.size == 1 -> offsetMap
offsetMap.size > 1 -> {
log.warn {
"Found ${offsetMap.size} offset keys in saved state. This may be from a legacy connector version. " +
"Selecting the offset with the highest LSN (most recent position)."
}
// Select the offset with the highest LSN
val selectedEntry =
offsetMap.entries.maxByOrNull { (_, value) ->
val offsetValue = value as ObjectNode
val commitLsn = offsetValue["commit_lsn"]?.asText()
try {
commitLsn?.let { Lsn.valueOf(it) } ?: Lsn.NULL
} catch (e: Exception) {
log.warn(e) { "Failed to parse LSN from offset value: $value" }
Lsn.NULL
}
}
if (selectedEntry == null) {
throw RuntimeException(
"Unable to select valid offset from multiple keys in $opaqueStateValue"
)
}
log.info {
"Selected offset key with commit_lsn='${(selectedEntry.value as ObjectNode)["commit_lsn"]?.asText()}' " +
"from ${offsetMap.size} available offset keys."
}
mapOf(selectedEntry.key to selectedEntry.value)
}
else ->
throw RuntimeException(
"Offset object must have at least 1 key in $opaqueStateValue"
)
}
val offset = DebeziumOffset(finalOffsetMap)
// Check if the saved LSN is valid
val savedLsn =
try {
val offsetValue = offset.wrapped.values.first() as ObjectNode
val commitLsn = offsetValue["commit_lsn"].asText()
Lsn.valueOf(commitLsn)
} catch (e: Exception) {
log.error(e) { "Failed to parse saved LSN from offset: $offset" }
return abortCdcSync("Invalid LSN format in saved offset")
}
// Validate the saved LSN is still available in SQL Server
val isLsnValid =
try {
validateLsnStillAvailable(savedLsn)
} catch (e: Exception) {
log.error(e) { "Failed to validate LSN availability: ${savedLsn}" }
false
}
if (!isLsnValid) {
return abortCdcSync(
"Saved LSN '${savedLsn}' is no longer available in SQL Server transaction logs"
)
}
val historyNode = stateNode[MSSQL_DB_HISTORY]
val schemaHistory: DebeziumSchemaHistory? =
historyNode?.let {
val isCompressed: Boolean = stateNode[MSSQL_IS_COMPRESSED]?.asBoolean() ?: false
val uncompressedString: String =
if (isCompressed) {
val textValue: String = it.textValue()
val compressedBytes: ByteArray =
textValue.substring(1, textValue.length - 1).toByteArray(Charsets.UTF_8)
val decoded = Base64.decodeBase64(compressedBytes)
GZIPInputStream(ByteArrayInputStream(decoded))
.reader(Charsets.UTF_8)
.readText()
} else {
it.textValue()
}
val schemaHistoryList: List<HistoryRecord> =
uncompressedString
.lines()
.filter { it.isNotBlank() }
.map { HistoryRecord(DocumentReader.defaultReader().read(it)) }
DebeziumSchemaHistory(schemaHistoryList)
}
// Store the loaded offset for heartbeat sanitization comparison
lastLoadedOffset = offset
return ValidDebeziumWarmStartState(offset, schemaHistory)
}
// Track the last loaded offset to detect heartbeat corruption
@Volatile private var lastLoadedOffset: DebeziumOffset? = null
/**
* Sanitizes the offset before saving to state to fix heartbeat-induced corruption. SQL Server
* heartbeats reset event_serial_no to 0 and change_lsn to NULL, causing duplicate record
* emission on subsequent syncs.
*
* Compares the current offset (read from Debezium) against the offset that was loaded at the
* start of the sync.
*/
private fun sanitizeOffset(currentOffset: DebeziumOffset): DebeziumOffset {
val startingOffset = lastLoadedOffset ?: return currentOffset
if (startingOffset.wrapped.size != 1 || currentOffset.wrapped.size != 1) {
return currentOffset
}
val offsetKey = currentOffset.wrapped.keys.first()
val startValue =
startingOffset.wrapped.values.first() as? ObjectNode ?: return currentOffset
val currentValue =
currentOffset.wrapped.values.first() as? ObjectNode ?: return currentOffset
val startLsn = startValue["commit_lsn"]?.asText()
val currentLsn = currentValue["commit_lsn"]?.asText()
// If LSN has progressed, the current offset is valid
if (startLsn == null || currentLsn == null || startLsn != currentLsn) {
return currentOffset
}
// LSN hasn't progressed - check for heartbeat regression
val startEventSerialNo = startValue["event_serial_no"]?.asInt()
val currentEventSerialNo = currentValue["event_serial_no"]?.asInt()
val startChangeLsn = startValue["change_lsn"]
val currentChangeLsn = currentValue["change_lsn"]
val eventSerialNoRegressed =
startEventSerialNo != null &&
startEventSerialNo > 0 &&
(currentEventSerialNo == null || currentEventSerialNo == 0)
// Check if change_lsn has regressed to NULL (either JSON null or string "NULL")
val changeLsnRegressed =
startChangeLsn != null &&
!startChangeLsn.isNull &&
(currentChangeLsn == null ||
currentChangeLsn.isNull ||
(currentChangeLsn.isTextual && currentChangeLsn.asText() == "NULL"))
if (!eventSerialNoRegressed && !changeLsnRegressed) {
return currentOffset
}
// Heartbeat has corrupted the offset - restore starting values
log.info {
"Detected heartbeat offset regression at LSN $currentLsn. " +
"Preserving event_serial_no=$startEventSerialNo and change_lsn=${startChangeLsn?.asText()} " +
"from starting offset (current had event_serial_no=$currentEventSerialNo, change_lsn=${currentChangeLsn?.asText()})"
}
val sanitizedValue = currentValue.deepCopy()
if (eventSerialNoRegressed && startEventSerialNo != null) {
sanitizedValue.put("event_serial_no", startEventSerialNo)
}
if (changeLsnRegressed && !startChangeLsn.isNull) {
sanitizedValue.set<JsonNode>("change_lsn", startChangeLsn)
}
return DebeziumOffset(mapOf(offsetKey to sanitizedValue))
}
/**
* Validates if the given LSN is still available in SQL Server transaction logs. Returns true if
* the LSN is available, false otherwise.
*/
private fun validateLsnStillAvailable(lsn: Lsn): Boolean {
// Use jdbcConnectionFactory which handles SSH tunneling
jdbcConnectionFactory.get().use { connection: Connection ->
connection.createStatement().use { statement ->
// Check if the LSN is within the available range
// sys.fn_cdc_get_min_lsn returns the minimum available LSN for a capture instance
// sys.fn_cdc_get_max_lsn returns the current maximum LSN
val query =
"""
SELECT
sys.fn_cdc_get_min_lsn('') AS min_lsn,
sys.fn_cdc_get_max_lsn() AS max_lsn
""".trimIndent()
statement.executeQuery(query).use { resultSet ->
if (resultSet.next()) {
val minLsnBytes = resultSet.getBytes("min_lsn")
val maxLsnBytes = resultSet.getBytes("max_lsn")
if (minLsnBytes == null || maxLsnBytes == null) {
log.warn { "CDC is not enabled or no LSN range available" }
return false
}
val minLsn = Lsn.valueOf(minLsnBytes)
val maxLsn = Lsn.valueOf(maxLsnBytes)
// Check if saved LSN is within the valid range
val isValid = lsn.compareTo(minLsn) >= 0 && lsn.compareTo(maxLsn) <= 0
if (!isValid) {
log.warn {
"Saved LSN '$lsn' is outside the available range [min: $minLsn, max: $maxLsn]. " +
"Transaction logs may have been truncated."
}
}
return isValid
}
return false
}
}
}
}
/**
* Handles invalid CDC cursor position based on configured behavior. Either fails the sync or
* resets to start fresh from current position.
*/
private fun abortCdcSync(reason: String): InvalidDebeziumWarmStartState {
val cdcConfig =
configuration.incrementalReplicationConfiguration as CdcIncrementalConfiguration
return when (cdcConfig.invalidCdcCursorPositionBehavior) {
InvalidCdcCursorPositionBehavior.FAIL_SYNC ->
AbortDebeziumWarmStartState(
"Saved offset no longer present on the server, please reset the connection. " +
"To prevent this, increase transaction log retention and/or increase sync frequency. " +
"$reason."
)
InvalidCdcCursorPositionBehavior.RESET_SYNC ->
ResetDebeziumWarmStartState(
"Saved offset no longer present on the server. " +
"Automatically resetting to current position. " +
"WARNING: Any changes between the saved position and current position will be lost. " +
"$reason."
)
}
}
/**
* Gets the current maximum LSN from SQL Server for CDC cold start. This follows the pattern
* from the old MSSQL connector and returns the Debezium Lsn type for type safety.
*
* @return Lsn object representing the current maximum LSN
* @throws IllegalStateException if CDC is not enabled or LSN cannot be retrieved
*/
private fun getCurrentMaxLsn(): Lsn {
// Use jdbcConnectionFactory which handles SSH tunneling
jdbcConnectionFactory.get().use { connection: Connection ->
connection.createStatement().use { statement ->
// Query sys.fn_cdc_get_max_lsn() - no need for USE statement since connection is
// already to the right database
val query = "SELECT sys.fn_cdc_get_max_lsn() AS max_lsn"
statement.executeQuery(query).use { resultSet ->
if (resultSet.next()) {
val lsnBytes = resultSet.getBytes("max_lsn")
if (lsnBytes != null && lsnBytes.isNotEmpty()) {
// Use Debezium's Lsn class for proper validation and formatting
return Lsn.valueOf(lsnBytes)
} else {
throw IllegalStateException(
"CDC is not enabled or no max LSN available for database '${configuration.databaseName}'. " +
"Please ensure: 1) CDC is enabled on the database, 2) At least one table has CDC enabled, " +
"3) The user has necessary permissions to query CDC functions."
)
}
} else {
throw IllegalStateException(
"Failed to query max LSN from database '${configuration.databaseName}'. " +
"The query returned no results."
)
}
}
}
}
}
override fun generateColdStartOffset(): DebeziumOffset {
val currentLsn = getCurrentMaxLsn()
val databaseName = configuration.databaseName
// Create offset structure that matches SQL Server Debezium connector format
val key =
Jsons.arrayNode().apply {
add(databaseName)
add(
Jsons.objectNode().apply {
put("server", databaseName)
put("database", databaseName)
}
)
}
val value =
Jsons.objectNode().apply {
put("commit_lsn", currentLsn.toString())
put("snapshot", true)
put("snapshot_completed", true)
}
val offset = DebeziumOffset(mapOf(key to value))
log.info { "Constructed SQL Server CDC cold start offset with LSN: $currentLsn" }
return offset
}
override fun generateColdStartProperties(streams: List<Stream>): Map<String, String> {
return generateCommonDebeziumProperties(streams) + ("snapshot.mode" to "recovery")
}
override fun generateWarmStartProperties(streams: List<Stream>): Map<String, String> {
return generateCommonDebeziumProperties(streams) + ("snapshot.mode" to "when_needed")
}
private fun generateCommonDebeziumProperties(streams: List<Stream>): Map<String, String> {
val databaseName = configuration.databaseName
val schemaList = streams.map { it.namespace }.distinct().joinToString(",")
val messageKeyColumns = buildMessageKeyColumns(streams)
val tunnelSession: TunnelSession = jdbcConnectionFactory.ensureTunnelSession()
return DebeziumPropertiesBuilder()
.withDefault()
.withConnector(SqlServerConnector::class.java)
.withDebeziumName(databaseName)
.withHeartbeats(configuration.debeziumHeartbeatInterval)
.withOffset()
.withSchemaHistory()
.withStreams(streams)
.with("include.schema.changes", "false")
.with("provide.transaction.metadata", "false")
.with("snapshot.isolation.mode", "read_committed")
.with("schema.include.list", schemaList)
.let { builder ->
if (messageKeyColumns.isNotEmpty()) {
builder.with("message.key.columns", messageKeyColumns)
} else {
builder
}
}
.withDatabase("hostname", tunnelSession.address.hostName)
.withDatabase("port", tunnelSession.address.port.toString())
.withDatabase("user", configuration.jdbcProperties["user"].toString())
.withDatabase("password", configuration.jdbcProperties["password"].toString())
.withDatabase("dbname", databaseName)
.withDatabase("names", databaseName)
.with("database.encrypt", configuration.jdbcProperties["encrypt"] ?: "false")
.with(
"driver.trustServerCertificate",
configuration.jdbcProperties["trustServerCertificate"] ?: "true"
)
// Register the MSSQL custom converter
.with("converters", "mssql_converter")
.with("mssql_converter.type", MsSqlServerDebeziumConverter::class.java.name)
.with("binary.handling.mode", "base64")
.with("snapshot.locking.mode", "none")
// Set poll.interval.ms to control how often Debezium queries for new data
// This value is now configurable and validated to be smaller than heartbeat.interval.ms
.with(
"poll.interval.ms",
(configuration.incrementalReplicationConfiguration as CdcIncrementalConfiguration)
.pollIntervalMs
.toString()
)
// Enable heartbeat timeout for MSSQL to detect idle database states
.with(
AIRBYTE_HEARTBEAT_TIMEOUT_SECONDS,
configuration.incrementalReplicationConfiguration.initialWaitingSeconds
.toSeconds()
.toString()
)
.buildMap()
}
override fun findStreamName(key: DebeziumRecordKey, value: DebeziumRecordValue): String? {
return value.source["table"]?.asText()
}
override fun findStreamNamespace(key: DebeziumRecordKey, value: DebeziumRecordValue): String? {
return value.source["schema"]?.asText()
}
/**
* Builds the message.key.columns property value for Debezium. Format:
* "schema1.table1:keyCol1,keyCol2;schema2.table2:keyCol1,keyCol2" This replicates the logic
* from the old MSSQL connector's getMessageKeyColumnValue method.
*/
private fun buildMessageKeyColumns(streams: List<Stream>): String {
return streams
.filter { it.configuredPrimaryKey?.isNotEmpty() == true }
.joinToString(";") { stream ->
val tableId =
"${escapeSpecialChars(stream.namespace)}.${escapeSpecialChars(stream.name)}"
val keyCols =
stream.configuredPrimaryKey!!.joinToString(",") { escapeSpecialChars(it.id) }
"$tableId:$keyCols"
}
}
/**
* Escapes special characters for Debezium message key columns. Escapes: comma (,), period (.),
* semicolon (;), and colon (:) This replicates the logic from the old MSSQL connector's
* escapeSpecialChars method.
*/
private fun escapeSpecialChars(input: String?): String {
if (input == null) return ""
return input
.map { char ->
when (char) {
',',
'.',
';',
':' -> "\\${char}"
else -> char.toString()
}
}
.joinToString("")
}
companion object {
const val MSSQL_MAX_UNCOMPRESSED_LENGTH = 1024 * 1024
const val MSSQL_STATE = "state"
const val MSSQL_CDC_OFFSET = "mssql_cdc_offset"
const val MSSQL_DB_HISTORY = "mssql_db_history"
const val MSSQL_IS_COMPRESSED = "is_compressed"
}
}

View File

@@ -0,0 +1,689 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.BinaryNode
import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.data.OffsetDateTimeCodec
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.read.And
import io.airbyte.cdk.read.DefaultJdbcStreamState
import io.airbyte.cdk.read.Equal
import io.airbyte.cdk.read.From
import io.airbyte.cdk.read.FromSample
import io.airbyte.cdk.read.Greater
import io.airbyte.cdk.read.GreaterOrEqual
import io.airbyte.cdk.read.JdbcCursorPartition
import io.airbyte.cdk.read.JdbcPartition
import io.airbyte.cdk.read.JdbcSplittablePartition
import io.airbyte.cdk.read.Lesser
import io.airbyte.cdk.read.LesserOrEqual
import io.airbyte.cdk.read.Limit
import io.airbyte.cdk.read.NoWhere
import io.airbyte.cdk.read.Or
import io.airbyte.cdk.read.OrderBy
import io.airbyte.cdk.read.SelectColumnMaxValue
import io.airbyte.cdk.read.SelectColumns
import io.airbyte.cdk.read.SelectQuery
import io.airbyte.cdk.read.SelectQueryGenerator
import io.airbyte.cdk.read.SelectQuerySpec
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.read.Where
import io.airbyte.cdk.read.WhereClauseLeafNode
import io.airbyte.cdk.read.WhereClauseNode
import io.airbyte.cdk.read.optimize
import io.airbyte.cdk.util.Jsons
import io.github.oshai.kotlinlogging.KotlinLogging
import java.time.LocalDateTime
import java.time.OffsetDateTime
import java.time.ZoneOffset
import java.time.format.DateTimeParseException
import java.util.Base64
private val log = KotlinLogging.logger {}
/**
* Converts a state value string to a JsonNode based on the field type. This function handles type
* conversions and date formatting for state checkpoints.
*/
fun stateValueToJsonNode(field: Field, stateValue: String?): JsonNode {
when (field.type.airbyteSchemaType) {
is LeafAirbyteSchemaType ->
return when (field.type.airbyteSchemaType as LeafAirbyteSchemaType) {
LeafAirbyteSchemaType.INTEGER -> {
Jsons.valueToTree(stateValue?.toBigInteger())
}
LeafAirbyteSchemaType.NUMBER -> {
Jsons.valueToTree(stateValue?.toDouble())
}
LeafAirbyteSchemaType.BINARY -> {
val ba = Base64.getDecoder().decode(stateValue!!)
Jsons.valueToTree<BinaryNode>(ba)
}
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE -> {
try {
val parsedDate =
LocalDateTime.parse(
stateValue,
MsSqlServerJdbcPartitionFactory.inputDateFormatter
)
val dateAsString =
parsedDate.format(MsSqlServerJdbcPartitionFactory.outputDateFormatter)
Jsons.textNode(dateAsString)
} catch (e: DateTimeParseException) {
// Resolve to use the new format.
Jsons.valueToTree(stateValue)
}
}
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE -> {
try {
if (stateValue == null || stateValue.isEmpty()) {
return Jsons.nullNode()
}
// Normalize: remove spaces before timezone indicators
val normalizedValue =
stateValue.trim().replace(Regex("\\s+(?=[+\\-]|Z)"), "")
// Try parsing with timezone first, then fall back to assuming UTC
val offsetDateTime =
try {
OffsetDateTime.parse(
normalizedValue,
MsSqlServerJdbcPartitionFactory.timestampWithTimezoneParser
)
} catch (e: DateTimeParseException) {
// No timezone info - parse as LocalDateTime and assume UTC
LocalDateTime.parse(
normalizedValue,
MsSqlServerJdbcPartitionFactory
.timestampWithoutTimezoneParser
)
.atOffset(ZoneOffset.UTC)
}
// Format using standard codec formatter (6 decimal places, Z or offset)
Jsons.valueToTree(offsetDateTime.format(OffsetDateTimeCodec.formatter))
} catch (e: DateTimeParseException) {
// If all parsing fails, return as-is (already in new format)
Jsons.valueToTree(stateValue)
}
}
else -> Jsons.valueToTree(stateValue)
}
else ->
throw IllegalStateException(
"PK field must be leaf type but is ${field.type.airbyteSchemaType}."
)
}
}
sealed class MsSqlServerJdbcPartition(
val selectQueryGenerator: SelectQueryGenerator,
streamState: DefaultJdbcStreamState,
) : JdbcPartition<DefaultJdbcStreamState> {
val stream: Stream = streamState.stream
val from = From(stream.name, stream.namespace)
override val nonResumableQuery: SelectQuery
get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize())
open val nonResumableQuerySpec = SelectQuerySpec(SelectColumns(stream.fields), from)
override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery {
val sampleSize: Int = streamState.sharedState.maxSampleSize
val querySpec =
SelectQuerySpec(
SelectColumns(stream.fields),
From(stream.name, stream.namespace),
limit = Limit(sampleSize.toLong()),
)
return selectQueryGenerator.generate(querySpec.optimize())
}
}
class MsSqlServerJdbcNonResumableSnapshotPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
) : MsSqlServerJdbcPartition(selectQueryGenerator, streamState) {
override val completeState: OpaqueStateValue = MsSqlServerJdbcStreamStateValue.snapshotCompleted
}
class MsSqlServerJdbcNonResumableSnapshotWithCursorPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
val cursor: Field,
val cursorCutoffTime: JsonNode? = null,
) :
MsSqlServerJdbcPartition(selectQueryGenerator, streamState),
JdbcCursorPartition<DefaultJdbcStreamState> {
override val completeState: OpaqueStateValue
get() =
MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
cursor,
cursorCheckpoint = streamState.cursorUpperBound!!,
)
override val cursorUpperBoundQuery: SelectQuery
get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize())
val cursorUpperBoundQuerySpec: SelectQuerySpec
get() =
if (cursorCutoffTime != null) {
// When excluding today's data, apply cutoff constraint to upper bound query too
SelectQuerySpec(
SelectColumnMaxValue(cursor),
from,
Where(Lesser(cursor, cursorCutoffTime))
)
} else {
SelectQuerySpec(SelectColumnMaxValue(cursor), from)
}
override val nonResumableQuerySpec: SelectQuerySpec
get() {
// Add cutoff time constraint if present
return if (cursorCutoffTime != null) {
SelectQuerySpec(
SelectColumns(stream.fields),
from,
Where(Lesser(cursor, cursorCutoffTime))
)
} else {
SelectQuerySpec(SelectColumns(stream.fields), from)
}
}
}
sealed class MsSqlServerJdbcResumablePartition(
selectQueryGenerator: SelectQueryGenerator,
streamState: DefaultJdbcStreamState,
val checkpointColumns: List<Field>,
) :
MsSqlServerJdbcPartition(selectQueryGenerator, streamState),
JdbcSplittablePartition<DefaultJdbcStreamState> {
abstract val lowerBound: List<JsonNode>?
abstract val upperBound: List<JsonNode>?
override val nonResumableQuery: SelectQuery
get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize())
override val nonResumableQuerySpec: SelectQuerySpec
get() = SelectQuerySpec(SelectColumns(stream.fields), from, where)
override fun resumableQuery(limit: Long): SelectQuery {
val querySpec =
SelectQuerySpec(
SelectColumns((stream.fields + checkpointColumns).distinct()),
from,
where,
OrderBy(checkpointColumns),
Limit(limit),
)
return selectQueryGenerator.generate(querySpec.optimize())
}
override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery {
val sampleSize: Int = streamState.sharedState.maxSampleSize
val querySpec =
SelectQuerySpec(
SelectColumns(stream.fields + checkpointColumns),
FromSample(stream.name, stream.namespace, sampleRateInvPow2, sampleSize),
NoWhere,
OrderBy(checkpointColumns),
Limit(sampleSize.toLong())
)
return selectQueryGenerator.generate(querySpec.optimize())
}
val where: Where
get() {
val zippedLowerBound: List<Pair<Field, JsonNode>> =
lowerBound?.let { checkpointColumns.zip(it) } ?: listOf()
val lowerBoundDisj: List<WhereClauseNode> =
zippedLowerBound.mapIndexed { idx: Int, (gtCol: Field, gtValue: JsonNode) ->
val lastLeaf: WhereClauseLeafNode =
if (isLowerBoundIncluded && idx == checkpointColumns.size - 1) {
GreaterOrEqual(gtCol, gtValue)
} else {
Greater(gtCol, gtValue)
}
And(
zippedLowerBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) ->
Equal(eqCol, eqValue)
} + listOf(lastLeaf),
)
}
val zippedUpperBound: List<Pair<Field, JsonNode>> =
upperBound?.let { checkpointColumns.zip(it) } ?: listOf()
val upperBoundDisj: List<WhereClauseNode> =
zippedUpperBound.mapIndexed { idx: Int, (leqCol: Field, leqValue: JsonNode) ->
val lastLeaf: WhereClauseLeafNode =
if (idx < zippedUpperBound.size - 1) {
Lesser(leqCol, leqValue)
} else {
LesserOrEqual(leqCol, leqValue)
}
And(
zippedUpperBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) ->
Equal(eqCol, eqValue)
} + listOf(lastLeaf),
)
}
val baseClause = And(Or(lowerBoundDisj), Or(upperBoundDisj))
// Add additional where clause if present
val additional = additionalWhereClause
return if (additional != null) {
Where(And(baseClause, additional))
} else {
Where(baseClause)
}
}
open val isLowerBoundIncluded: Boolean = false
open val additionalWhereClause: WhereClauseNode? = null
}
/** RFR for cursor based read. */
class MsSqlServerJdbcRfrSnapshotPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
primaryKey: List<Field>,
override val lowerBound: List<JsonNode>?,
override val upperBound: List<JsonNode>?,
) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) {
// TODO: this needs to reflect lastRecord. Complete state needs to have last primary key value
// in RFR case.
override val completeState: OpaqueStateValue
get() =
when (upperBound) {
null -> MsSqlServerJdbcStreamStateValue.snapshotCompleted
else ->
MsSqlServerJdbcStreamStateValue.snapshotCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = upperBound,
)
}
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
MsSqlServerJdbcStreamStateValue.snapshotCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
)
}
/** RFR for CDC. */
class MsSqlServerJdbcCdcRfrSnapshotPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
primaryKey: List<Field>,
override val lowerBound: List<JsonNode>?,
override val upperBound: List<JsonNode>?,
) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) {
override val completeState: OpaqueStateValue
get() =
when (upperBound) {
null -> MsSqlServerCdcInitialSnapshotStateValue.getSnapshotCompletedState(stream)
else ->
MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = upperBound,
)
}
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
)
}
/**
* Implementation of a [JdbcPartition] for a CDC snapshot partition. Used for incremental CDC
* initial sync.
*/
class MsSqlServerJdbcCdcSnapshotPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
primaryKey: List<Field>,
override val lowerBound: List<JsonNode>?
) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) {
override val upperBound: List<JsonNode>? = null
override val completeState: OpaqueStateValue
get() = MsSqlServerCdcInitialSnapshotStateValue.getSnapshotCompletedState(stream)
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
)
}
sealed class MsSqlServerJdbcCursorPartition(
selectQueryGenerator: SelectQueryGenerator,
streamState: DefaultJdbcStreamState,
checkpointColumns: List<Field>,
val cursor: Field,
private val explicitCursorUpperBound: JsonNode?,
val cursorCutoffTime: JsonNode? = null,
) :
MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, checkpointColumns),
JdbcCursorPartition<DefaultJdbcStreamState> {
val cursorUpperBound: JsonNode
get() = explicitCursorUpperBound ?: streamState.cursorUpperBound!!
override val cursorUpperBoundQuery: SelectQuery
get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize())
val cursorUpperBoundQuerySpec: SelectQuerySpec
get() =
if (cursorCutoffTime != null && checkpointColumns.contains(cursor)) {
// When excluding today's data, apply cutoff constraint to upper bound query too
SelectQuerySpec(
SelectColumnMaxValue(cursor),
from,
Where(Lesser(cursor, cursorCutoffTime))
)
} else {
SelectQuerySpec(SelectColumnMaxValue(cursor), from)
}
override val additionalWhereClause: WhereClauseNode?
get() =
if (cursorCutoffTime != null && checkpointColumns.contains(cursor)) {
// Add an additional constraint for the cutoff time
Lesser(cursor, cursorCutoffTime)
} else {
null
}
}
class MsSqlServerJdbcSnapshotWithCursorPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
primaryKey: List<Field>,
override val lowerBound: List<JsonNode>?,
cursor: Field,
cursorUpperBound: JsonNode?,
cursorCutoffTime: JsonNode? = null,
) :
MsSqlServerJdbcCursorPartition(
selectQueryGenerator,
streamState,
primaryKey,
cursor,
cursorUpperBound,
cursorCutoffTime
) {
// UpperBound is always null for the initial partition that gets split
override val upperBound: List<JsonNode>? = null
override val completeState: OpaqueStateValue
get() {
// Handle cursor cutoff time first
val effectiveCursorCheckpoint =
if (
cursorCutoffTime != null &&
!cursorCutoffTime.isNull &&
!cursorUpperBound.isNull &&
cursorCutoffTime.asText() < cursorUpperBound.asText()
) {
cursorCutoffTime
} else {
cursorUpperBound
}
// Since this is the initial partition (that can be split),
// completion means moving to cursor incremental mode
return MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
cursor,
effectiveCursorCheckpoint,
)
}
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
cursor,
)
}
class MsSqlServerJdbcSplittableSnapshotWithCursorPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
primaryKey: List<Field>,
override val lowerBound: List<JsonNode>?,
override val upperBound: List<JsonNode>?,
cursor: Field,
cursorUpperBound: JsonNode?,
cursorCutoffTime: JsonNode? = null,
) :
MsSqlServerJdbcCursorPartition(
selectQueryGenerator,
streamState,
primaryKey,
cursor,
cursorUpperBound,
cursorCutoffTime
) {
override val completeState: OpaqueStateValue
get() {
// Handle cursor cutoff time first
val effectiveCursorCheckpoint =
if (
cursorCutoffTime != null &&
!cursorCutoffTime.isNull &&
!cursorUpperBound.isNull &&
cursorCutoffTime.asText() < cursorUpperBound.asText()
) {
cursorCutoffTime
} else {
cursorUpperBound
}
return when (upperBound) {
null ->
MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
cursor,
effectiveCursorCheckpoint,
)
else ->
MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = upperBound,
cursor,
)
}
}
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint(
primaryKey = checkpointColumns,
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
cursor,
)
}
/**
* Default implementation of a [JdbcPartition] for a cursor incremental partition. These are always
* splittable.
*/
class MsSqlServerJdbcCursorIncrementalPartition(
selectQueryGenerator: SelectQueryGenerator,
override val streamState: DefaultJdbcStreamState,
cursor: Field,
val cursorLowerBound: JsonNode,
override val isLowerBoundIncluded: Boolean,
cursorUpperBound: JsonNode?,
cursorCutoffTime: JsonNode? = null,
) :
MsSqlServerJdbcCursorPartition(
selectQueryGenerator,
streamState,
listOf(cursor),
cursor,
cursorUpperBound,
cursorCutoffTime
) {
override val lowerBound: List<JsonNode> = listOf(cursorLowerBound)
override val upperBound: List<JsonNode>
get() = listOf(cursorUpperBound)
override val completeState: OpaqueStateValue
get() {
// When we have a cutoff time that's less than the upper bound,
// use the cutoff as the checkpoint since that's where we actually stopped reading
val effectiveCheckpoint =
if (
cursorCutoffTime != null &&
!cursorCutoffTime.isNull &&
!cursorUpperBound.isNull &&
cursorCutoffTime.asText() < cursorUpperBound.asText()
) {
cursorCutoffTime
} else {
cursorUpperBound
}
return MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
cursor,
cursorCheckpoint = effectiveCheckpoint,
)
}
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
cursor,
cursorCheckpoint = lastRecord[cursor.id] ?: Jsons.nullNode(),
)
}
// Extension methods for splitting MSSQL partitions
fun MsSqlServerJdbcRfrSnapshotPartition.split(
opaqueStateValues: List<OpaqueStateValue>
): List<MsSqlServerJdbcRfrSnapshotPartition> {
val splitPointValues: List<MsSqlServerJdbcStreamStateValue> =
opaqueStateValues.map { MsSqlServerStateMigration.parseStateValue(it) }
val inners: List<List<JsonNode>> =
splitPointValues.mapNotNull { sv ->
val pkField = checkpointColumns.firstOrNull()
if (pkField != null && sv.pkValue != null) {
listOf(stateValueToJsonNode(pkField, sv.pkValue))
} else null
}
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
MsSqlServerJdbcRfrSnapshotPartition(
selectQueryGenerator,
streamState,
checkpointColumns,
lowerBound,
upperBound,
)
}
}
fun MsSqlServerJdbcCdcRfrSnapshotPartition.split(
opaqueStateValues: List<OpaqueStateValue>
): List<MsSqlServerJdbcCdcRfrSnapshotPartition> {
val splitPointValues: List<MsSqlServerCdcInitialSnapshotStateValue> =
opaqueStateValues.map {
Jsons.treeToValue(it, MsSqlServerCdcInitialSnapshotStateValue::class.java)
}
val inners: List<List<JsonNode>> =
splitPointValues.mapNotNull { sv ->
val pkField = checkpointColumns.firstOrNull()
if (pkField != null && sv.pkVal != null) {
listOf(stateValueToJsonNode(pkField, sv.pkVal))
} else null
}
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
MsSqlServerJdbcCdcRfrSnapshotPartition(
selectQueryGenerator,
streamState,
checkpointColumns,
lowerBound,
upperBound,
)
}
}
fun MsSqlServerJdbcCdcSnapshotPartition.split(
opaqueStateValues: List<OpaqueStateValue>
): List<MsSqlServerJdbcCdcRfrSnapshotPartition> {
val splitPointValues: List<MsSqlServerCdcInitialSnapshotStateValue> =
opaqueStateValues.map {
Jsons.treeToValue(it, MsSqlServerCdcInitialSnapshotStateValue::class.java)
}
val inners: List<List<JsonNode>> =
splitPointValues.mapNotNull { sv ->
val pkField = checkpointColumns.firstOrNull()
if (pkField != null && sv.pkVal != null) {
listOf(stateValueToJsonNode(pkField, sv.pkVal))
} else null
}
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
MsSqlServerJdbcCdcRfrSnapshotPartition(
selectQueryGenerator,
streamState,
checkpointColumns,
lowerBound,
upperBound,
)
}
}
fun MsSqlServerJdbcSnapshotWithCursorPartition.split(
opaqueStateValues: List<OpaqueStateValue>
): List<MsSqlServerJdbcSplittableSnapshotWithCursorPartition> {
val splitPointValues: List<MsSqlServerJdbcStreamStateValue> =
opaqueStateValues.map { MsSqlServerStateMigration.parseStateValue(it) }
val inners: List<List<JsonNode>> =
splitPointValues.mapNotNull { sv ->
val pkField = checkpointColumns.firstOrNull()
if (pkField != null && sv.pkValue != null) {
listOf(stateValueToJsonNode(pkField, sv.pkValue))
} else null
}
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
MsSqlServerJdbcSplittableSnapshotWithCursorPartition(
selectQueryGenerator,
streamState,
checkpointColumns,
lowerBound,
upperBound,
cursor,
cursorUpperBound,
cursorCutoffTime,
)
}
}

View File

@@ -0,0 +1,354 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.ConfigErrorException
import io.airbyte.cdk.StreamIdentifier
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
import io.airbyte.cdk.jdbc.JdbcFieldType
import io.airbyte.cdk.read.ConfiguredSyncMode
import io.airbyte.cdk.read.DefaultJdbcSharedState
import io.airbyte.cdk.read.DefaultJdbcStreamState
import io.airbyte.cdk.read.From
import io.airbyte.cdk.read.JdbcPartitionFactory
import io.airbyte.cdk.read.SelectColumnMaxValue
import io.airbyte.cdk.read.SelectQuerySpec
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.read.StreamFeedBootstrap
import io.airbyte.cdk.util.Jsons
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Primary
import java.time.format.DateTimeFormatter
import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.ChronoField
import java.util.concurrent.ConcurrentHashMap
import javax.inject.Singleton
@Primary
@Singleton
class MsSqlServerJdbcPartitionFactory(
override val sharedState: DefaultJdbcSharedState,
val selectQueryGenerator: MsSqlSourceOperations,
val config: MsSqlServerSourceConfiguration,
) :
JdbcPartitionFactory<
DefaultJdbcSharedState,
DefaultJdbcStreamState,
MsSqlServerJdbcPartition,
> {
private val log = KotlinLogging.logger {}
private val streamStates = ConcurrentHashMap<StreamIdentifier, DefaultJdbcStreamState>()
override fun streamState(streamFeedBootstrap: StreamFeedBootstrap): DefaultJdbcStreamState =
streamStates.getOrPut(streamFeedBootstrap.feed.id) {
DefaultJdbcStreamState(sharedState, streamFeedBootstrap)
}
private fun findPkUpperBound(stream: Stream, pkChosenFromCatalog: List<Field>): JsonNode {
// find upper bound using maxPk query
val jdbcConnectionFactory = JdbcConnectionFactory(config)
val from = From(stream.name, stream.namespace)
val maxPkQuery = SelectQuerySpec(SelectColumnMaxValue(pkChosenFromCatalog[0]), from)
jdbcConnectionFactory.get().use { connection ->
val stmt = connection.prepareStatement(selectQueryGenerator.generate(maxPkQuery).sql)
val rs = stmt.executeQuery()
if (rs.next()) {
val jdbcFieldType = pkChosenFromCatalog[0].type as JdbcFieldType<*>
val pkUpperBound: JsonNode = jdbcFieldType.get(rs, 1)
return pkUpperBound
} else {
// Table might be empty thus there is no max PK value.
return Jsons.nullNode()
}
}
}
private fun coldStart(streamState: DefaultJdbcStreamState): MsSqlServerJdbcPartition {
val stream: Stream = streamState.stream
val pkChosenFromCatalog: List<Field> = stream.configuredPrimaryKey ?: listOf()
if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) {
if (pkChosenFromCatalog.isEmpty()) {
return MsSqlServerJdbcNonResumableSnapshotPartition(
selectQueryGenerator,
streamState,
)
}
val upperBound = findPkUpperBound(stream, pkChosenFromCatalog)
return if (sharedState.configuration.global) {
MsSqlServerJdbcCdcRfrSnapshotPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
lowerBound = null,
upperBound = listOf(upperBound),
)
} else {
MsSqlServerJdbcRfrSnapshotPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
lowerBound = null,
upperBound = listOf(upperBound),
)
}
}
if (sharedState.configuration.global) {
return MsSqlServerJdbcCdcSnapshotPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
lowerBound = null,
)
}
val cursorChosenFromCatalog: Field =
stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor")
// Calculate cutoff time for cursor if exclude today's data is enabled
val cursorCutoffTime = getCursorCutoffTime(cursorChosenFromCatalog)
if (pkChosenFromCatalog.isEmpty()) {
return MsSqlServerJdbcNonResumableSnapshotWithCursorPartition(
selectQueryGenerator,
streamState,
cursorChosenFromCatalog,
cursorCutoffTime = cursorCutoffTime,
)
}
return MsSqlServerJdbcSnapshotWithCursorPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
lowerBound = null,
cursorChosenFromCatalog,
cursorUpperBound = null,
cursorCutoffTime = cursorCutoffTime,
)
}
/**
* Flowchart:
* 1. If the input state is null - using coldstart.
* ```
* a. If it's global but without PK, use non-resumable snapshot.
* b. If it's global with PK, use snapshot.
* c. If it's not global, use snapshot with cursor.
* ```
* 2. If the input state is not null -
* ```
* a. If it's in global mode, JdbcPartitionFactory will not handle this. (TODO)
* b. If it's cursor based, it could be either in PK read phase (initial read) or
* cursor read phase (incremental read). This is differentiated by the stateType.
* i. In PK read phase, use snapshot with cursor. If no PKs were found,
* use non-resumable snapshot with cursor.
* ii. In cursor read phase, use cursor incremental.
* ```
*/
override fun create(streamFeedBootstrap: StreamFeedBootstrap): MsSqlServerJdbcPartition? {
val stream: Stream = streamFeedBootstrap.feed
val streamState: DefaultJdbcStreamState = streamState(streamFeedBootstrap)
val opaqueStateValue: OpaqueStateValue =
streamFeedBootstrap.currentState ?: return coldStart(streamState)
val isCursorBased: Boolean = !sharedState.configuration.global
val pkChosenFromCatalog: List<Field> = stream.configuredPrimaryKey ?: listOf()
if (
pkChosenFromCatalog.isEmpty() &&
stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH
) {
if (
streamState.streamFeedBootstrap.currentState ==
MsSqlServerJdbcStreamStateValue.snapshotCompleted
) {
return null
}
return MsSqlServerJdbcNonResumableSnapshotPartition(
selectQueryGenerator,
streamState,
)
}
if (!isCursorBased) {
val sv: MsSqlServerCdcInitialSnapshotStateValue =
Jsons.treeToValue(
opaqueStateValue,
MsSqlServerCdcInitialSnapshotStateValue::class.java
)
if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) {
val upperBound = findPkUpperBound(stream, pkChosenFromCatalog)
if (sv.pkVal == upperBound.asText()) {
return null
}
val pkLowerBound: JsonNode = stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkVal)
return MsSqlServerJdbcRfrSnapshotPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound),
upperBound = listOf(upperBound)
)
}
if (sv.pkName == null) {
// This indicates initial snapshot has been completed. CDC snapshot will be handled
// by CDCPartitionFactory.
// Nothing to do here.
return null
} else {
// This branch indicates snapshot is incomplete. We need to resume based on previous
// snapshot state.
val pkField = pkChosenFromCatalog.first()
val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkVal)
return MsSqlServerJdbcCdcSnapshotPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
listOf(pkLowerBound),
)
}
} else {
val sv: MsSqlServerJdbcStreamStateValue =
MsSqlServerStateMigration.parseStateValue(opaqueStateValue)
if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) {
val upperBound = findPkUpperBound(stream, pkChosenFromCatalog)
if (sv.pkValue == upperBound.asText()) {
return null
}
val pkLowerBound: JsonNode =
stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkValue)
return MsSqlServerJdbcRfrSnapshotPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound),
upperBound = listOf(upperBound)
)
}
if (sv.stateType != StateType.CURSOR_BASED.stateType) {
// Loading value from catalog. Note there could be unexpected behaviors if user
// updates their schema but did not reset their state.
val pkField = pkChosenFromCatalog.first()
val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkValue)
val cursorChosenFromCatalog: Field =
stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor")
// in a state where it's still in primary_key read part.
return MsSqlServerJdbcSnapshotWithCursorPartition(
selectQueryGenerator,
streamState,
pkChosenFromCatalog,
lowerBound = listOf(pkLowerBound),
cursorChosenFromCatalog,
cursorUpperBound = null,
cursorCutoffTime = getCursorCutoffTime(cursorChosenFromCatalog),
)
}
// resume back to cursor based increment.
val cursor: Field = stream.fields.find { it.id == sv.cursorField.first() } as Field
val cursorCheckpoint: JsonNode = stateValueToJsonNode(cursor, sv.cursor)
// Compose a jsonnode of cursor label to cursor value to fit in
// DefaultJdbcCursorIncrementalPartition
if (cursorCheckpoint.toString() == streamState.cursorUpperBound?.toString()) {
// Incremental complete.
return null
}
return MsSqlServerJdbcCursorIncrementalPartition(
selectQueryGenerator,
streamState,
cursor,
cursorLowerBound = cursorCheckpoint,
isLowerBoundIncluded = false,
cursorUpperBound = streamState.cursorUpperBound,
cursorCutoffTime = getCursorCutoffTime(cursor),
)
}
}
private fun getCursorCutoffTime(cursorField: Field): JsonNode? {
val incrementalConfig = config.incrementalReplicationConfiguration
return if (
incrementalConfig is UserDefinedCursorIncrementalConfiguration &&
incrementalConfig.excludeTodaysData &&
MsSqlServerCursorCutoffTimeProvider.isTemporalType(
cursorField,
)
) {
val cutoffTime = MsSqlServerCursorCutoffTimeProvider.getCutoffTime(cursorField)
log.info { "Using cursor cutoff time: $cutoffTime for field '${cursorField.id}'" }
cutoffTime
} else {
null
}
}
override fun split(
unsplitPartition: MsSqlServerJdbcPartition,
opaqueStateValues: List<OpaqueStateValue>
): List<MsSqlServerJdbcPartition> {
return when (unsplitPartition) {
is MsSqlServerJdbcRfrSnapshotPartition -> unsplitPartition.split(opaqueStateValues)
is MsSqlServerJdbcCdcRfrSnapshotPartition -> unsplitPartition.split(opaqueStateValues)
is MsSqlServerJdbcCdcSnapshotPartition -> unsplitPartition.split(opaqueStateValues)
is MsSqlServerJdbcSnapshotWithCursorPartition ->
unsplitPartition.split(opaqueStateValues)
is MsSqlServerJdbcSplittableSnapshotWithCursorPartition -> listOf(unsplitPartition)
is MsSqlServerJdbcCursorIncrementalPartition -> listOf(unsplitPartition)
is MsSqlServerJdbcNonResumableSnapshotPartition -> listOf(unsplitPartition)
is MsSqlServerJdbcNonResumableSnapshotWithCursorPartition -> listOf(unsplitPartition)
}
}
companion object {
const val DATETIME_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
val outputDateFormatter: DateTimeFormatter = DateTimeFormatter.ofPattern(DATETIME_PATTERN)
val TIMESTAMP_WITHOUT_FRACT_SECOND_PATTERN = "yyyy-MM-dd'T'HH:mm:ss"
val inputDateFormatter: DateTimeFormatter =
DateTimeFormatterBuilder()
.appendPattern(TIMESTAMP_WITHOUT_FRACT_SECOND_PATTERN)
.optionalStart()
.appendFraction(ChronoField.NANO_OF_SECOND, 1, 6, true)
.optionalEnd()
.toFormatter()
// Parser for timestamps without timezone info
val timestampWithoutTimezoneParser: DateTimeFormatter =
DateTimeFormatterBuilder()
.appendPattern("yyyy-MM-dd'T'HH:mm:ss")
.optionalStart()
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
.optionalEnd()
.toFormatter()
// Parser for timestamps with timezone info
val timestampWithTimezoneParser: DateTimeFormatter =
DateTimeFormatterBuilder()
.appendPattern("yyyy-MM-dd'T'HH:mm:ss")
.optionalStart()
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
.optionalEnd()
.appendOffset("+HH:MM", "Z")
.toFormatter()
}
}

View File

@@ -0,0 +1,99 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.util.Jsons
import io.github.oshai.kotlinlogging.KotlinLogging
private val log = KotlinLogging.logger {}
data class MsSqlServerJdbcStreamStateValue(
@JsonProperty("cursor") val cursor: String = "",
@JsonProperty("version") val version: Int = CURRENT_VERSION,
@JsonProperty("state_type") val stateType: String = StateType.CURSOR_BASED.stateType,
@JsonProperty("cursor_field") val cursorField: List<String> = listOf(),
@JsonProperty("cursor_record_count") val cursorRecordCount: Int = 0,
@JsonProperty("pk_name") val pkName: String? = null,
@JsonProperty("pk_val") val pkValue: String? = null,
@JsonProperty("incremental_state") val incrementalState: JsonNode? = null,
) {
companion object {
/** Current state version used by the new CDK MSSQL connector */
const val CURRENT_VERSION = 3
/** Legacy state version used by the old CDK MSSQL connector */
const val LEGACY_VERSION = 2
/**
* Determines if a given version number represents a legacy state format
* @param version The version number to check (null is considered legacy)
* @return true if the version is legacy and needs migration
*/
fun isLegacy(version: Int?): Boolean = version == null || version <= LEGACY_VERSION
/** Value representing the completion of a FULL_REFRESH snapshot. */
val snapshotCompleted: OpaqueStateValue
get() = Jsons.valueToTree(MsSqlServerJdbcStreamStateValue(stateType = "primary_key"))
/** Value representing the progress of an ongoing incremental cursor read. */
fun cursorIncrementalCheckpoint(
cursor: Field,
cursorCheckpoint: JsonNode,
): OpaqueStateValue {
return Jsons.valueToTree(
MsSqlServerJdbcStreamStateValue(
cursorField = listOf(cursor.id),
cursor = cursorCheckpoint.asText(),
)
)
}
/** Value representing the progress of an ongoing snapshot not involving cursor columns. */
fun snapshotCheckpoint(
primaryKey: List<Field>,
primaryKeyCheckpoint: List<JsonNode>,
): OpaqueStateValue {
val primaryKeyField = primaryKey.first()
return Jsons.valueToTree(
MsSqlServerJdbcStreamStateValue(
pkName = primaryKeyField.id,
pkValue = primaryKeyCheckpoint.first().asText(),
stateType = StateType.PRIMARY_KEY.stateType,
)
)
}
/** Value representing the progress of an ongoing snapshot involving cursor columns. */
fun snapshotWithCursorCheckpoint(
primaryKey: List<Field>,
primaryKeyCheckpoint: List<JsonNode>,
cursor: Field,
): OpaqueStateValue {
val primaryKeyField = primaryKey.first()
return Jsons.valueToTree(
MsSqlServerJdbcStreamStateValue(
pkName = primaryKeyField.id,
pkValue = primaryKeyCheckpoint.first().asText(),
stateType = StateType.PRIMARY_KEY.stateType,
incrementalState =
Jsons.valueToTree(
MsSqlServerJdbcStreamStateValue(
cursorField = listOf(cursor.id),
)
),
)
)
}
}
}
enum class StateType(val stateType: String) {
PRIMARY_KEY("primary_key"),
CURSOR_BASED("cursor_based"),
}

View File

@@ -0,0 +1,14 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.AirbyteSourceRunner
object MsSqlServerSource {
@JvmStatic
fun main(args: Array<String>) {
AirbyteSourceRunner.run(*args)
}
}

View File

@@ -0,0 +1,273 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.ConfigErrorException
import io.airbyte.cdk.command.*
import io.airbyte.cdk.jdbc.SSLCertificateUtils
import io.airbyte.cdk.output.DataChannelMedium
import io.airbyte.cdk.output.sockets.DATA_CHANNEL_PROPERTY_PREFIX
import io.airbyte.cdk.ssh.SshConnectionOptions
import io.airbyte.cdk.ssh.SshNoTunnelMethod
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Factory
import io.micronaut.context.annotation.Value
import jakarta.inject.Inject
import jakarta.inject.Singleton
import java.net.URLDecoder
import java.nio.charset.StandardCharsets
import java.time.Duration
import org.apache.commons.lang3.RandomStringUtils
private val log = KotlinLogging.logger {}
class MsSqlServerSourceConfiguration(
override val realHost: String,
override val realPort: Int,
override val sshTunnel: SshTunnelMethodConfiguration?,
override val sshConnectionOptions: SshConnectionOptions,
override val jdbcUrlFmt: String,
override val jdbcProperties: Map<String, String>,
override val namespaces: Set<String>,
override val maxConcurrency: Int,
override val resourceAcquisitionHeartbeat: Duration = Duration.ofMillis(100L),
override val checkpointTargetInterval: Duration,
override val checkPrivileges: Boolean,
override val debeziumHeartbeatInterval: Duration = Duration.ofSeconds(10),
val incrementalReplicationConfiguration: IncrementalConfiguration,
val databaseName: String,
) : JdbcSourceConfiguration, CdcSourceConfiguration {
override val global = incrementalReplicationConfiguration is CdcIncrementalConfiguration
override val maxSnapshotReadDuration: Duration? =
(incrementalReplicationConfiguration as? CdcIncrementalConfiguration)?.initialLoadTimeout
/** Required to inject [MsSqlServerSourceConfiguration] directly. */
@Factory
private class MicronautFactory {
@Singleton
fun mssqlServerSourceConfig(
factory:
SourceConfigurationFactory<
MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration>,
supplier:
ConfigurationSpecificationSupplier<MsSqlServerSourceConfigurationSpecification>,
): MsSqlServerSourceConfiguration = factory.make(supplier.get())
}
}
sealed interface IncrementalConfiguration
data class UserDefinedCursorIncrementalConfiguration(val excludeTodaysData: Boolean = false) :
IncrementalConfiguration
data class CdcIncrementalConfiguration(
val initialWaitingSeconds: Duration,
val invalidCdcCursorPositionBehavior: InvalidCdcCursorPositionBehavior,
val initialLoadTimeout: Duration,
val pollIntervalMs: Int
) : IncrementalConfiguration
enum class InvalidCdcCursorPositionBehavior {
FAIL_SYNC,
RESET_SYNC,
}
@Singleton
class MsSqlServerSourceConfigurationFactory
@Inject
constructor(
val featureFlags: Set<FeatureFlag>,
@Value("\${${DATA_CHANNEL_PROPERTY_PREFIX}.medium}")
val dataChannelMedium: String = DataChannelMedium.STDIO.name,
@Value("\${${DATA_CHANNEL_PROPERTY_PREFIX}.socket-paths}")
val socketPaths: List<String> = emptyList(),
) :
SourceConfigurationFactory<
MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration> {
constructor() : this(emptySet(), DataChannelMedium.STDIO.name, emptyList())
override fun makeWithoutExceptionHandling(
pojo: MsSqlServerSourceConfigurationSpecification,
): MsSqlServerSourceConfiguration {
val incrementalSpec = pojo.getIncrementalValue()
val incrementalReplicationConfiguration =
when (incrementalSpec) {
is UserDefinedCursor -> {
UserDefinedCursorIncrementalConfiguration(
excludeTodaysData = incrementalSpec.excludeTodaysData ?: false
)
}
is Cdc -> {
val initialWaitingSeconds: Duration =
Duration.ofSeconds(incrementalSpec.initialWaitingSeconds?.toLong() ?: 300L)
val initialLoadTimeout: Duration =
Duration.ofHours(incrementalSpec.initialLoadTimeoutHours?.toLong() ?: 8L)
val invalidCdcCursorPositionBehavior: InvalidCdcCursorPositionBehavior =
if (incrementalSpec.invalidCdcCursorPositionBehavior == "Fail sync") {
InvalidCdcCursorPositionBehavior.FAIL_SYNC
} else {
InvalidCdcCursorPositionBehavior.RESET_SYNC
}
// Validate poll interval vs heartbeat interval
val pollIntervalMs = incrementalSpec.pollIntervalMs ?: 500
val heartbeatIntervalMs =
MsSqlServerSourceConfigurationSpecification.DEFAULT_HEARTBEAT_INTERVAL_MS
if (pollIntervalMs >= heartbeatIntervalMs) {
throw ConfigErrorException(
"Poll interval ($pollIntervalMs ms) must be smaller than heartbeat interval ($heartbeatIntervalMs ms). " +
"Please reduce the poll interval to a value less than $heartbeatIntervalMs ms."
)
}
CdcIncrementalConfiguration(
initialWaitingSeconds,
invalidCdcCursorPositionBehavior,
initialLoadTimeout,
pollIntervalMs,
)
}
}
val sshTunnel: SshTunnelMethodConfiguration? = pojo.getTunnelMethodValue()
// Check if encryption was explicitly set in JSON (encryptionJson != null)
// vs using the default value (encryptionJson == null).
// Old connector used "ssl_method" field which was optional, so legacy configs
// won't have ssl_mode at all, resulting in encryptionJson being null.
val isLegacyConfig = pojo.encryptionJson == null
val jdbcEncryption =
when (val encryptionSpec: EncryptionSpecification? = pojo.getEncryptionValue()) {
is MsSqlServerEncryptionDisabledConfigurationSpecification -> {
// For legacy configs without ssl_mode field, allow unencrypted for backward
// compatibility
// even in cloud deployments. This handles migration from old connector
// versions.
if (isLegacyConfig) {
log.warn {
"No encryption configuration found in JSON. " +
"This appears to be a legacy configuration migrated from an older connector version. " +
"Consider adding SSL encryption for better security."
}
mapOf("encrypt" to "false", "trustServerCertificate" to "true")
} else {
// Explicitly disabled encryption (user set ssl_mode.mode = "unencrypted")
// should fail in cloud without SSH tunnel
if (
featureFlags.contains(FeatureFlag.AIRBYTE_CLOUD_DEPLOYMENT) &&
sshTunnel is SshNoTunnelMethod
) {
throw ConfigErrorException(
"Connection from Airbyte Cloud requires " +
"SSL encryption or an SSH tunnel."
)
} else {
mapOf("encrypt" to "false", "trustServerCertificate" to "true")
}
}
}
null -> {
// This should never happen since getEncryptionValue() has a default
mapOf("encrypt" to "false", "trustServerCertificate" to "true")
}
is MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification ->
mapOf("encrypt" to "true", "trustServerCertificate" to "true")
is SslVerifyCertificate -> {
val certificate = encryptionSpec.certificate
val trustStoreProperties =
if (certificate == null) {
emptyMap()
} else {
val password = RandomStringUtils.secure().next(100)
val keyStoreUri =
SSLCertificateUtils.keyStoreFromCertificate(certificate, password)
mapOf(
"trustStore" to keyStoreUri.path,
"trustStorePassword" to password
)
}
val hostNameInCertificate = encryptionSpec.hostNameInCertificate
val hostNameProperties =
if (hostNameInCertificate == null) {
emptyMap()
} else {
mapOf("hostNameInCertificate" to hostNameInCertificate)
}
trustStoreProperties +
hostNameProperties +
mapOf("encrypt" to "true", "trustServerCertificate" to "false")
}
}
// Parse JDBC URL parameters
val jdbcProperties = mutableMapOf<String, String>()
jdbcProperties["user"] = pojo.username
jdbcProperties["password"] = pojo.password
// Parse URL parameters from jdbcUrlParams
val pattern = "^([^=]+)=(.*)$".toRegex()
for (pair in (pojo.jdbcUrlParams ?: "").trim().split("&".toRegex())) {
if (pair.isBlank()) {
continue
}
val result: MatchResult? = pattern.matchEntire(pair)
if (result == null) {
log.warn { "ignoring invalid JDBC URL param '$pair'" }
} else {
val key: String = result.groupValues[1].trim()
val urlEncodedValue: String = result.groupValues[2].trim()
jdbcProperties[key] = URLDecoder.decode(urlEncodedValue, StandardCharsets.UTF_8)
}
}
jdbcProperties.putAll(jdbcEncryption)
// Validate and process configuration values
val checkpointTargetInterval: Duration =
Duration.ofSeconds(pojo.checkpointTargetIntervalSeconds?.toLong() ?: 300L)
if (!checkpointTargetInterval.isPositive) {
throw ConfigErrorException("Checkpoint Target Interval should be positive")
}
var maxConcurrency: Int? = pojo.concurrency
log.info { "maxConcurrency: $maxConcurrency. socket paths: ${socketPaths.size}" }
// If maxConcurrency is set, we use it.
// Otherwise, we use the number of socket paths provided for speed mode
// Or 1 for legacy mode
maxConcurrency =
when (DataChannelMedium.valueOf(dataChannelMedium)) {
DataChannelMedium.STDIO -> maxConcurrency ?: 1
DataChannelMedium.SOCKET -> maxConcurrency ?: socketPaths.size
}
log.info { "Effective concurrency: $maxConcurrency" }
if (maxConcurrency <= 0) {
throw ConfigErrorException("Concurrency setting should be positive")
}
return MsSqlServerSourceConfiguration(
realHost = pojo.host,
realPort = pojo.port,
sshTunnel = sshTunnel,
sshConnectionOptions = SshConnectionOptions.fromAdditionalProperties(emptyMap()),
checkpointTargetInterval = checkpointTargetInterval,
jdbcUrlFmt = "jdbc:sqlserver://%s:%d;databaseName=${pojo.database}",
namespaces = pojo.schemas?.toSet() ?: setOf("dbo"),
jdbcProperties = jdbcProperties,
maxConcurrency = maxConcurrency,
checkPrivileges = pojo.checkPrivileges ?: true,
debeziumHeartbeatInterval =
Duration.ofMillis(
MsSqlServerSourceConfigurationSpecification.DEFAULT_HEARTBEAT_INTERVAL_MS
),
resourceAcquisitionHeartbeat = Duration.ofSeconds(15),
incrementalReplicationConfiguration = incrementalReplicationConfiguration,
databaseName = pojo.database
)
}
}

View File

@@ -0,0 +1,348 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.annotation.JsonAnyGetter
import com.fasterxml.jackson.annotation.JsonAnySetter
import com.fasterxml.jackson.annotation.JsonGetter
import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.annotation.JsonPropertyDescription
import com.fasterxml.jackson.annotation.JsonPropertyOrder
import com.fasterxml.jackson.annotation.JsonSetter
import com.fasterxml.jackson.annotation.JsonSubTypes
import com.fasterxml.jackson.annotation.JsonTypeInfo
import com.fasterxml.jackson.annotation.JsonValue
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDefault
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
import io.airbyte.cdk.ConfigErrorException
import io.airbyte.cdk.command.CONNECTOR_CONFIG_PREFIX
import io.airbyte.cdk.command.ConfigurationSpecification
import io.airbyte.cdk.ssh.MicronautPropertiesFriendlySshTunnelMethodConfigurationSpecification
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
import io.micronaut.context.annotation.ConfigurationBuilder
import io.micronaut.context.annotation.ConfigurationProperties
import jakarta.inject.Singleton
/**
* The object which is mapped to the MS SQL Server source configuration JSON.
*
* Use [MsSqlServerSourceConfiguration] instead wherever possible. This object also allows injecting
* values through Micronaut properties, this is made possible by the classes named
* `MicronautPropertiesFriendly.*`.
*/
@JsonSchemaTitle("MSSQL Source Spec")
@JsonPropertyOrder(
value = ["host", "port", "database", "username", "replication_method"],
)
@Singleton
@ConfigurationProperties(CONNECTOR_CONFIG_PREFIX)
@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI")
class MsSqlServerSourceConfigurationSpecification : ConfigurationSpecification() {
@JsonProperty("host")
@JsonSchemaTitle("Host")
@JsonSchemaInject(json = """{"order":0}""")
@JsonPropertyDescription("The hostname of the database.")
lateinit var host: String
@JsonProperty("port")
@JsonSchemaTitle("Port")
@JsonSchemaInject(json = """{"order":1,"minimum": 0,"maximum": 65536, "examples":["1433"]}""")
@JsonSchemaDefault("1433")
@JsonPropertyDescription(
"The port of the database.",
)
var port: Int = 1433
@JsonProperty("database")
@JsonSchemaTitle("Database")
@JsonPropertyDescription("The name of the database.")
@JsonSchemaInject(json = """{"order":2, "examples":["master"]}""")
lateinit var database: String
@JsonProperty("schemas")
@JsonSchemaTitle("Schemas")
@JsonPropertyDescription("The list of schemas to sync from. Defaults to user. Case sensitive.")
@JsonSchemaInject(json = """{"order":3, "default":["dbo"], "minItems":0, "uniqueItems":true}""")
var schemas: Array<String>? = arrayOf("dbo")
@JsonProperty("username")
@JsonSchemaTitle("Username")
@JsonPropertyDescription("The username which is used to access the database.")
@JsonSchemaInject(json = """{"order":4}""")
lateinit var username: String
@JsonProperty("password")
@JsonSchemaTitle("Password")
@JsonPropertyDescription("The password associated with the username.")
@JsonSchemaInject(json = """{"order":5,"airbyte_secret":true}""")
lateinit var password: String
@JsonProperty("jdbc_url_params")
@JsonSchemaTitle("JDBC URL Params")
@JsonPropertyDescription(
"Additional properties to pass to the JDBC URL string when connecting to the database " +
"formatted as 'key=value' pairs separated by the symbol '&'. " +
"(example: key1=value1&key2=value2&key3=value3).",
)
@JsonSchemaInject(json = """{"order":6}""")
var jdbcUrlParams: String? = null
@JsonIgnore
@ConfigurationBuilder(configurationPrefix = "ssl_mode")
var encryption = MicronautPropertiesFriendlyEncryptionSpecification()
@JsonIgnore var encryptionJson: EncryptionSpecification? = null
@JsonSetter("ssl_mode")
fun setEncryptionValue(value: EncryptionSpecification) {
encryptionJson = value
}
@JsonGetter("ssl_mode")
@JsonSchemaTitle("Encryption")
@JsonPropertyDescription(
"The encryption method which is used when communicating with the database.",
)
@JsonSchemaInject(json = """{"order":8,"default":"required"}""")
fun getEncryptionValue(): EncryptionSpecification? = encryptionJson ?: encryption.asEncryption()
@JsonIgnore
@ConfigurationBuilder(configurationPrefix = "tunnel_method")
val tunnelMethod = MicronautPropertiesFriendlySshTunnelMethodConfigurationSpecification()
@JsonIgnore var tunnelMethodJson: SshTunnelMethodConfiguration? = null
@JsonSetter("tunnel_method")
fun setTunnelMethodValue(value: SshTunnelMethodConfiguration) {
tunnelMethodJson = value
}
@JsonGetter("tunnel_method")
@JsonSchemaTitle("SSH Tunnel Method")
@JsonPropertyDescription(
"Whether to initiate an SSH tunnel before connecting to the database, " +
"and if so, which kind of authentication to use.",
)
@JsonSchemaInject(json = """{"order":9}""")
fun getTunnelMethodValue(): SshTunnelMethodConfiguration? =
tunnelMethodJson ?: tunnelMethod.asSshTunnelMethod()
@JsonIgnore
@ConfigurationBuilder(configurationPrefix = "replication_method")
var replicationMethod = MicronautPropertiesFriendlyIncrementalConfigurationSpecification()
@JsonIgnore var replicationMethodJson: IncrementalConfigurationSpecification? = null
@JsonSetter("replication_method")
fun setIncrementalValue(value: IncrementalConfigurationSpecification) {
replicationMethodJson = value
}
@JsonGetter("replication_method")
@JsonSchemaTitle("Update Method")
@JsonPropertyDescription("Configures how data is extracted from the database.")
@JsonSchemaInject(json = """{"order":10,"display_type":"radio"}""")
fun getIncrementalValue(): IncrementalConfigurationSpecification =
replicationMethodJson ?: replicationMethod.asCursorMethodConfiguration()
@JsonProperty("checkpoint_target_interval_seconds")
@JsonSchemaTitle("Checkpoint Target Time Interval")
@JsonSchemaInject(json = """{"order":11}""")
@JsonSchemaDefault("300")
@JsonPropertyDescription("How often (in seconds) a stream should checkpoint, when possible.")
var checkpointTargetIntervalSeconds: Int? = 300
@JsonProperty("concurrency")
@JsonSchemaTitle("Concurrency")
@JsonSchemaInject(json = """{"order":12}""")
@JsonPropertyDescription("Maximum number of concurrent queries to the database.")
var concurrency: Int? = 1
@JsonProperty("check_privileges")
@JsonSchemaTitle("Check Table and Column Access Privileges")
@JsonSchemaInject(json = """{"order":13}""")
@JsonSchemaDefault("true")
@JsonPropertyDescription(
"When this feature is enabled, during schema discovery the connector " +
"will query each table or view individually to check access privileges " +
"and inaccessible tables, views, or columns therein will be removed. " +
"In large schemas, this might cause schema discovery to take too long, " +
"in which case it might be advisable to disable this feature.",
)
var checkPrivileges: Boolean? = true
@JsonIgnore var additionalPropertiesMap = mutableMapOf<String, Any>()
@JsonAnyGetter fun getAdditionalProperties(): Map<String, Any> = additionalPropertiesMap
@JsonAnySetter
fun setAdditionalProperty(
name: String,
value: Any,
) {
additionalPropertiesMap[name] = value
}
companion object {
const val DEFAULT_HEARTBEAT_INTERVAL_MS = 15000L
}
}
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "mode")
@JsonSubTypes(
JsonSubTypes.Type(
value = MsSqlServerEncryptionDisabledConfigurationSpecification::class,
name = "unencrypted"
),
JsonSubTypes.Type(
value =
MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification::class,
name = "encrypted_trust_server_certificate"
),
JsonSubTypes.Type(value = SslVerifyCertificate::class, name = "encrypted_verify_certificate"),
)
@JsonSchemaTitle("Encryption")
@JsonSchemaDescription("The encryption method which is used when communicating with the database.")
sealed interface EncryptionSpecification
@JsonSchemaTitle("Unencrypted")
@JsonSchemaDescription(
"Data transfer will not be encrypted.",
)
data object MsSqlServerEncryptionDisabledConfigurationSpecification : EncryptionSpecification
@JsonSchemaTitle("Encrypted (trust server certificate)")
@JsonSchemaDescription(
"Use the certificate provided by the server without verification. (For testing purposes only!)"
)
data object MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification :
EncryptionSpecification
@JsonSchemaTitle("Encrypted (verify certificate)")
@JsonSchemaDescription("Verify and use the certificate provided by the server.")
@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI")
class SslVerifyCertificate : EncryptionSpecification {
@JsonProperty("hostNameInCertificate")
@JsonSchemaTitle("Host Name In Certificate")
@JsonPropertyDescription(
"Specifies the host name of the server. The value of this property must match the subject property of the certificate.",
)
@JsonSchemaInject(json = """{"order":0}""")
var hostNameInCertificate: String? = null
@JsonProperty("certificate", required = false)
@JsonSchemaTitle("Certificate")
@JsonPropertyDescription(
"certificate of the server, or of the CA that signed the server certificate",
)
@JsonSchemaInject(json = """{"order":1,"airbyte_secret":true,"multiline":true}""")
var certificate: String? = null
}
@ConfigurationProperties("$CONNECTOR_CONFIG_PREFIX.ssl_mode")
class MicronautPropertiesFriendlyEncryptionSpecification {
var mode: String = "unencrypted"
var sslCertificate: String? = null
@JsonValue
fun asEncryption(): EncryptionSpecification =
when (mode) {
"unencrypted" -> MsSqlServerEncryptionDisabledConfigurationSpecification
"Encrypted (trust server certificate)" ->
MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification
"Encrypted (verify certificate)" ->
SslVerifyCertificate().also { it.certificate = sslCertificate!! }
else -> throw ConfigErrorException("invalid value $mode")
}
}
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "method")
@JsonSubTypes(
JsonSubTypes.Type(value = UserDefinedCursor::class, name = "STANDARD"),
JsonSubTypes.Type(value = Cdc::class, name = "CDC")
)
@JsonSchemaTitle("Update Method")
@JsonSchemaDescription("Configures how data is extracted from the database.")
sealed interface IncrementalConfigurationSpecification
@JsonSchemaTitle("Scan Changes with User Defined Cursor")
@JsonSchemaDescription(
"Incrementally detects new inserts and updates using the " +
"<a href=\"https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/" +
"#user-defined-cursor\">cursor column</a> chosen when configuring a connection " +
"(e.g. created_at, updated_at).",
)
class UserDefinedCursor : IncrementalConfigurationSpecification {
@JsonProperty("exclude_todays_data")
@JsonSchemaTitle("Exclude Today's Data")
@JsonPropertyDescription(
"When enabled incremental syncs using a cursor of a temporal type (date or datetime) will include cursor values only up until the previous midnight UTC"
)
@JsonSchemaDefault("false")
@JsonSchemaInject(json = """{"order":1,"always_show":true}""")
var excludeTodaysData: Boolean? = false
}
@JsonSchemaTitle("Read Changes using Change Data Capture (CDC)")
@JsonSchemaDescription(
"<i>Recommended</i> - " +
"Incrementally reads new inserts, updates, and deletes using MSSQL's <a href=" +
"\"https://docs.airbyte.com/integrations/sources/mssql/#change-data-capture-cdc\"" +
"> change data capture feature</a>. This must be enabled on your database.",
)
class Cdc : IncrementalConfigurationSpecification {
@JsonProperty("initial_waiting_seconds")
@JsonSchemaTitle("Initial Waiting Time in Seconds (Advanced)")
@JsonPropertyDescription(
"The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about <a href=\"https://docs.airbyte.com/integrations/sources/mssql#setting-up-cdc-for-mssql\">initial waiting time</a>"
)
@JsonSchemaInject(json = """{"order":1,"always_show":true}""")
var initialWaitingSeconds: Int? = null
@JsonProperty("invalid_cdc_cursor_position_behavior")
@JsonSchemaTitle("Invalid CDC Position Behavior (Advanced)")
@JsonPropertyDescription(
"Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value in the mined logs. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.",
)
@JsonSchemaDefault("Fail sync")
@JsonSchemaInject(
json = """{"order":2,"always_show":true, "enum": ["Fail sync","Re-sync data"]}"""
)
var invalidCdcCursorPositionBehavior: String? = "Fail sync"
@JsonProperty("initial_load_timeout_hours")
@JsonSchemaTitle("Initial Load Timeout in Hours (Advanced)")
@JsonPropertyDescription(
"The amount of time an initial load is allowed to continue for before catching up on CDC logs.",
)
@JsonSchemaDefault("8")
@JsonSchemaInject(json = """{"order":3, "max": 24, "min": 4,"always_show": true}""")
var initialLoadTimeoutHours: Int? = 8
@JsonProperty("poll_interval_ms")
@JsonSchemaTitle("Poll Interval in Milliseconds (Advanced)")
@JsonPropertyDescription(
"How often (in milliseconds) Debezium should poll for new data. Must be smaller than heartbeat interval (15000ms). Lower values provide more responsive data capture but may increase database load.",
)
@JsonSchemaDefault("500")
@JsonSchemaInject(json = """{"order":4, "max": 14999, "min": 100,"always_show": true}""")
var pollIntervalMs: Int? = 500
}
@ConfigurationProperties("$CONNECTOR_CONFIG_PREFIX.replication_method")
class MicronautPropertiesFriendlyIncrementalConfigurationSpecification {
var method: String = "STANDARD"
fun asCursorMethodConfiguration(): IncrementalConfigurationSpecification =
when (method) {
"STANDARD" -> UserDefinedCursor()
"CDC" -> Cdc()
else -> throw ConfigErrorException("invalid value $method")
}
}

View File

@@ -0,0 +1,137 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.annotation.JsonProperty
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.util.Jsons
import io.github.oshai.kotlinlogging.KotlinLogging
private val log = KotlinLogging.logger {}
/** Represents the old OrderedColumnLoadStatus format used by the legacy MSSQL connector */
data class LegacyOrderedColumnLoadStatus(
@JsonProperty("version") val version: Long? = null,
@JsonProperty("state_type") val stateType: String? = null,
@JsonProperty("ordered_col") val orderedCol: String? = null,
@JsonProperty("ordered_col_val") val orderedColVal: String? = null,
@JsonProperty("incremental_state") val incrementalState: JsonNode? = null,
)
/** Represents the old CursorBasedStatus format used by the legacy MSSQL connector */
data class LegacyCursorBasedStatus(
@JsonProperty("version") val version: Long? = null,
@JsonProperty("state_type") val stateType: String? = null,
@JsonProperty("stream_name") val streamName: String? = null,
@JsonProperty("stream_namespace") val streamNamespace: String? = null,
@JsonProperty("cursor_field") val cursorField: List<String>? = null,
@JsonProperty("cursor") val cursor: String? = null,
@JsonProperty("cursor_record_count") val cursorRecordCount: Long? = null,
)
/** Helper class to migrate legacy MSSQL connector states to the new v2 format */
object MsSqlServerStateMigration {
/** Parses state value and handles backward compatibility with legacy formats */
fun parseStateValue(opaqueStateValue: OpaqueStateValue): MsSqlServerJdbcStreamStateValue {
// Check version to detect legacy state using centralized version constants
val version = opaqueStateValue.get("version")?.asInt()
val isLegacy = MsSqlServerJdbcStreamStateValue.isLegacy(version)
return if (isLegacy) {
log.info {
"Detected legacy state (version=$version), migrating to version ${MsSqlServerJdbcStreamStateValue.CURRENT_VERSION}"
}
migrateLegacyState(opaqueStateValue)
} else {
try {
// Version 3+ states should parse directly
Jsons.treeToValue(opaqueStateValue, MsSqlServerJdbcStreamStateValue::class.java)
} catch (e: Exception) {
throw IllegalStateException(
"Failed to parse state with version $version as MsSqlServerJdbcStreamStateValue.",
e
)
}
}
}
/** Migrates legacy state formats to new MsSqlServerJdbcStreamStateValue format */
private fun migrateLegacyState(
opaqueStateValue: OpaqueStateValue
): MsSqlServerJdbcStreamStateValue {
val stateType = opaqueStateValue.get("state_type")?.asText()
return when (stateType) {
"ordered_column" -> migrateOrderedColumnLoadStatus(opaqueStateValue)
"cursor_based" -> migrateCursorBasedStatus(opaqueStateValue)
else -> {
// Try to detect format based on field presence
when {
opaqueStateValue.has("ordered_col") ->
migrateOrderedColumnLoadStatus(opaqueStateValue)
opaqueStateValue.has("cursor_field") ->
migrateCursorBasedStatus(opaqueStateValue)
else -> {
log.warn {
"Unknown legacy state format, falling back to default: $opaqueStateValue"
}
MsSqlServerJdbcStreamStateValue()
}
}
}
}
}
/** Migrates OrderedColumnLoadStatus (primary key based initial sync) to new format */
private fun migrateOrderedColumnLoadStatus(
opaqueStateValue: OpaqueStateValue
): MsSqlServerJdbcStreamStateValue {
val legacy = Jsons.treeToValue(opaqueStateValue, LegacyOrderedColumnLoadStatus::class.java)
log.info {
"Migrating OrderedColumnLoadStatus state: ordered_col=${legacy.orderedCol}, ordered_col_val=${legacy.orderedColVal}"
}
// Extract incremental state if present
val incrementalState = legacy.incrementalState?.let { migrateCursorBasedStatusFromJson(it) }
return MsSqlServerJdbcStreamStateValue(
version = MsSqlServerJdbcStreamStateValue.CURRENT_VERSION,
stateType =
StateType.PRIMARY_KEY.stateType, // Convert "ordered_column" to "primary_key"
pkName = legacy.orderedCol,
pkValue = legacy.orderedColVal,
// If there's incremental state, embed it for transition after initial sync completes
incrementalState = incrementalState?.let { Jsons.valueToTree(it) }
)
}
/** Migrates CursorBasedStatus (cursor-based incremental) to new format */
private fun migrateCursorBasedStatusFromJson(
stateValue: JsonNode
): MsSqlServerJdbcStreamStateValue {
val legacy = Jsons.treeToValue(stateValue, LegacyCursorBasedStatus::class.java)
log.info {
"Migrating CursorBasedStatus state: stream=${legacy.streamName}, cursor_field=${legacy.cursorField}, cursor=${legacy.cursor}"
}
return MsSqlServerJdbcStreamStateValue(
version = MsSqlServerJdbcStreamStateValue.CURRENT_VERSION,
stateType = StateType.CURSOR_BASED.stateType,
cursorField = legacy.cursorField ?: emptyList(),
cursor = legacy.cursor ?: "",
cursorRecordCount = legacy.cursorRecordCount?.toInt() ?: 0
)
}
private fun migrateCursorBasedStatus(
opaqueStateValue: OpaqueStateValue
): MsSqlServerJdbcStreamStateValue {
return migrateCursorBasedStatusFromJson(opaqueStateValue)
}
}

View File

@@ -0,0 +1,430 @@
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.ConfigErrorException
import io.airbyte.cdk.StreamIdentifier
import io.airbyte.cdk.check.JdbcCheckQueries
import io.airbyte.cdk.command.SourceConfiguration
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.discover.JdbcMetadataQuerier
import io.airbyte.cdk.discover.MetadataQuerier
import io.airbyte.cdk.discover.TableName
import io.airbyte.cdk.jdbc.DefaultJdbcConstants
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
import io.airbyte.cdk.read.SelectQueryGenerator
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.airbyte.protocol.models.v0.StreamDescriptor
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Primary
import jakarta.inject.Singleton
import java.sql.ResultSet
import java.sql.SQLException
import java.sql.Statement
private val log = KotlinLogging.logger {}
/** Delegates to [JdbcMetadataQuerier] except for [fields]. */
class MsSqlSourceMetadataQuerier(
val base: JdbcMetadataQuerier,
val configuredCatalog: ConfiguredAirbyteCatalog? = null,
) : MetadataQuerier by base {
override fun extraChecks() {
base.extraChecks()
if (base.config.global) {
// Extra checks for CDC
checkSqlServerAgentRunning()
checkDatabaseCdcEnabled()
}
}
private fun checkSqlServerAgentRunning() {
try {
base.conn.createStatement().use { stmt: Statement ->
stmt
.executeQuery(
"SELECT servicename, status_desc FROM sys.dm_server_services WHERE servicename LIKE '%SQL Server Agent%'"
)
.use { rs: ResultSet ->
if (!rs.next()) {
throw ConfigErrorException(
"SQL Server Agent service is not found. CDC requires SQL Server Agent to be running."
)
}
val status = rs.getString("status_desc")
if (status != "Running") {
throw ConfigErrorException(
"SQL Server Agent is not running (status: $status). CDC requires SQL Server Agent to be running."
)
}
}
}
} catch (e: SQLException) {
throw ConfigErrorException("Failed to check SQL Server Agent status: ${e.message}")
}
}
private fun checkDatabaseCdcEnabled() {
try {
base.conn.createStatement().use { stmt: Statement ->
stmt
.executeQuery("SELECT is_cdc_enabled FROM sys.databases WHERE name = DB_NAME()")
.use { rs: ResultSet ->
if (!rs.next()) {
throw ConfigErrorException(
"Could not determine CDC status for current database"
)
}
val cdcEnabled = rs.getBoolean("is_cdc_enabled")
if (!cdcEnabled) {
throw ConfigErrorException(
"CDC is not enabled for the database. Please enable CDC with: EXEC sys.sp_cdc_enable_db"
)
}
}
}
} catch (e: SQLException) {
throw ConfigErrorException("Failed to check database CDC status: ${e.message}")
}
}
override fun fields(streamID: StreamIdentifier): List<Field> {
val table: TableName = findTableName(streamID) ?: return listOf()
if (table !in base.memoizedColumnMetadata) return listOf()
return base.memoizedColumnMetadata[table]!!.map {
Field(it.label, base.fieldTypeMapper.toFieldType(it))
}
}
override fun streamNamespaces(): List<String> = base.config.namespaces.toList()
val memoizedTableNames: List<TableName> by lazy {
log.info { "Querying SQL Server table names for catalog discovery." }
try {
val allTables = mutableSetOf<TableName>()
val dbmd = base.conn.metaData
val currentDatabase = base.conn.catalog
for (namespace in
base.config.namespaces + base.config.namespaces.map { it.uppercase() }) {
// For SQL Server with SCHEMA namespace kind, use current database as catalog
dbmd.getTables(currentDatabase, namespace, null, null).use { rs ->
while (rs.next()) {
allTables.add(
TableName(
catalog = rs.getString("TABLE_CAT"),
schema = rs.getString("TABLE_SCHEM"),
name = rs.getString("TABLE_NAME"),
type = rs.getString("TABLE_TYPE") ?: "",
),
)
}
}
}
log.info {
"Discovered ${allTables.size} table(s) in SQL Server database '$currentDatabase'."
}
return@lazy allTables.toList()
} catch (e: Exception) {
throw RuntimeException("SQL Server table discovery query failed: ${e.message}", e)
}
}
override fun streamNames(streamNamespace: String?): List<StreamIdentifier> =
memoizedTableNames
.filter { it.schema == streamNamespace }
.map { StreamDescriptor().withName(it.name).withNamespace(streamNamespace) }
.map(StreamIdentifier::from)
fun findTableName(
streamID: StreamIdentifier,
): TableName? =
memoizedTableNames.find { it.name == streamID.name && it.schema == streamID.namespace }
val memoizedClusteredIndexKeys: Map<TableName, List<List<String>>> by lazy {
val results = mutableListOf<AllClusteredIndexKeysRow>()
val schemas: List<String> = streamNamespaces()
val sql: String = CLUSTERED_INDEX_QUERY_FMTSTR.format(schemas.joinToString { "'$it'" })
log.info {
"Querying SQL Server system tables for all clustered index keys for catalog discovery."
}
try {
base.conn.createStatement().use { stmt: Statement ->
stmt.executeQuery(sql).use { rs: ResultSet ->
while (rs.next()) {
results.add(
AllClusteredIndexKeysRow(
rs.getString("table_schema"),
rs.getString("table_name"),
rs.getString("index_name"),
rs.getInt("key_ordinal").takeUnless { rs.wasNull() },
rs.getString("column_name").takeUnless { rs.wasNull() },
),
)
}
}
}
log.info {
"Discovered all clustered index keys in ${schemas.size} SQL Server schema(s)."
}
return@lazy results
.groupBy {
findTableName(
StreamIdentifier.from(
StreamDescriptor().withName(it.tableName).withNamespace(it.tableSchema),
),
)
}
.mapNotNull { (table, rowsByTable) ->
if (table == null) return@mapNotNull null
val clusteredIndexRows: List<AllClusteredIndexKeysRow> =
rowsByTable
.groupBy { it.indexName }
.filterValues { rowsByIndex: List<AllClusteredIndexKeysRow> ->
rowsByIndex.all { it.keyOrdinal != null && it.columnName != null }
}
.values
.firstOrNull()
?: return@mapNotNull null
val clusteredIndexColumnNames: List<List<String>> =
clusteredIndexRows
.sortedBy { it.keyOrdinal }
.mapNotNull { it.columnName }
.map { listOf(it) }
table to clusteredIndexColumnNames
}
.toMap()
} catch (e: Exception) {
throw RuntimeException(
"SQL Server clustered index discovery query failed: ${e.message}",
e
)
}
}
/**
* The logic flow:
* 1. Check for clustered index
* 2. If single-column clustered index exists → Use it
* 3. If composite clustered index exists → Use primary key
* 4. If no clustered index exists → Use primary key
* 5. If no primary key exists → Check configured catalog for user-defined logical PK
* 6. If no logical PK exists → Return empty list
*/
override fun primaryKey(
streamID: StreamIdentifier,
): List<List<String>> {
val table: TableName = findTableName(streamID) ?: return listOf()
// First try to get clustered index keys
val clusteredIndexKeys = memoizedClusteredIndexKeys[table]
// Use clustered index if it exists and is a single column
// For composite clustered indexes, fall back to primary key
val databasePK =
when {
clusteredIndexKeys != null && clusteredIndexKeys.size == 1 -> {
log.info {
"Using single-column clustered index for table ${table.schema}.${table.name}"
}
clusteredIndexKeys
}
clusteredIndexKeys != null && clusteredIndexKeys.size > 1 -> {
log.info {
"Clustered index is composite for table ${table.schema}.${table.name}. Falling back to primary key."
}
memoizedPrimaryKeys[table]
}
else -> {
log.info {
"No clustered index found for table ${table.schema}.${table.name}. Using primary key."
}
memoizedPrimaryKeys[table]
}
}
// If we found a database PK, use it
if (!databasePK.isNullOrEmpty()) {
return databasePK
}
// Fall back to user-defined logical PK from configured catalog
// This handles migration from old connector where tables without physical PKs
// could have logical PKs configured in the UI
val logicalPK = getUserDefinedPrimaryKey(streamID)
if (logicalPK.isNotEmpty()) {
log.info {
"No physical primary key found for table ${table.schema}.${table.name}. " +
"Using user-defined logical primary key from configured catalog: $logicalPK"
}
return logicalPK
}
return listOf()
}
/**
* Gets the user-defined logical primary key from the configured catalog. This is used for
* backward compatibility with the old connector where users could configure logical PKs for
* tables without physical PKs.
*/
private fun getUserDefinedPrimaryKey(streamID: StreamIdentifier): List<List<String>> {
if (configuredCatalog == null) {
return listOf()
}
val configuredStream: ConfiguredAirbyteStream? =
configuredCatalog.streams.find {
it.stream.name == streamID.name && it.stream.namespace == streamID.namespace
}
return configuredStream?.primaryKey ?: listOf()
}
val memoizedPrimaryKeys: Map<TableName, List<List<String>>> by lazy {
val results = mutableListOf<AllPrimaryKeysRow>()
val schemas: List<String> = streamNamespaces()
val sql: String = PK_QUERY_FMTSTR.format(schemas.joinToString { "'$it'" })
log.info { "Querying SQL Server system tables for all primary keys for catalog discovery." }
try {
// Get primary keys for the specified table
base.conn.createStatement().use { stmt: Statement ->
stmt.executeQuery(sql).use { rs: ResultSet ->
while (rs.next()) {
results.add(
AllPrimaryKeysRow(
rs.getString("table_schema"),
rs.getString("table_name"),
rs.getString("constraint_name"),
rs.getInt("ordinal_position").takeUnless { rs.wasNull() },
rs.getString("column_name").takeUnless { rs.wasNull() },
),
)
}
}
}
log.info { "Discovered all primary keys in ${schemas.size} SQL Server schema(s)." }
return@lazy results
.groupBy {
findTableName(
StreamIdentifier.from(
StreamDescriptor().withName(it.tableName).withNamespace(it.tableSchema),
),
)
}
.mapNotNull { (table, rowsByTable) ->
if (table == null) return@mapNotNull null
val pkRows: List<AllPrimaryKeysRow> =
rowsByTable
.groupBy { it.constraintName }
.filterValues { rowsByPK: List<AllPrimaryKeysRow> ->
rowsByPK.all { it.position != null && it.columnName != null }
}
.values
.firstOrNull()
?: return@mapNotNull null
val pkColumnNames: List<List<String>> =
pkRows
.sortedBy { it.position }
.mapNotNull { it.columnName }
.map { listOf(it) }
table to pkColumnNames
}
.toMap()
} catch (e: Exception) {
throw RuntimeException("SQL Server primary key discovery query failed: ${e.message}", e)
}
}
private data class AllClusteredIndexKeysRow(
val tableSchema: String,
val tableName: String,
val indexName: String,
val keyOrdinal: Int?,
val columnName: String?,
)
private data class AllPrimaryKeysRow(
val tableSchema: String,
val tableName: String,
val constraintName: String,
val position: Int?,
val columnName: String?,
)
companion object {
const val CLUSTERED_INDEX_QUERY_FMTSTR =
"""
SELECT
s.name as table_schema,
t.name as table_name,
i.name as index_name,
ic.key_ordinal,
c.name as column_name
FROM
sys.tables t
INNER JOIN
sys.schemas s ON t.schema_id = s.schema_id
INNER JOIN
sys.indexes i ON t.object_id = i.object_id
INNER JOIN
sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
INNER JOIN
sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
WHERE
s.name IN (%s)
AND i.type = 1 -- Clustered index
AND ic.is_included_column = 0 -- Only key columns, not included columns
ORDER BY
s.name, t.name, ic.key_ordinal;
"""
const val PK_QUERY_FMTSTR =
"""
SELECT
kcu.TABLE_SCHEMA as table_schema,
kcu.TABLE_NAME as table_name,
kcu.COLUMN_NAME as column_name,
kcu.ORDINAL_POSITION as ordinal_position,
kcu.CONSTRAINT_NAME as constraint_name
FROM
INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
INNER JOIN
INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
ON
kcu.CONSTRAINT_NAME = tc.CONSTRAINT_NAME
AND kcu.TABLE_SCHEMA = tc.TABLE_SCHEMA
WHERE
kcu.TABLE_SCHEMA IN (%s)
AND tc.CONSTRAINT_TYPE = 'PRIMARY KEY';
"""
}
/** SQL Server implementation of [MetadataQuerier.Factory]. */
@Singleton
@Primary
class Factory(
val constants: DefaultJdbcConstants,
val selectQueryGenerator: SelectQueryGenerator,
val fieldTypeMapper: JdbcMetadataQuerier.FieldTypeMapper,
val checkQueries: JdbcCheckQueries,
val configuredCatalog: ConfiguredAirbyteCatalog? = null,
) : MetadataQuerier.Factory<MsSqlServerSourceConfiguration> {
/** The [SourceConfiguration] is deliberately not injected in order to support tests. */
override fun session(config: MsSqlServerSourceConfiguration): MetadataQuerier {
val jdbcConnectionFactory = JdbcConnectionFactory(config)
val base =
JdbcMetadataQuerier(
constants,
config,
selectQueryGenerator,
fieldTypeMapper,
checkQueries,
jdbcConnectionFactory,
)
return MsSqlSourceMetadataQuerier(base, configuredCatalog)
}
}
}

View File

@@ -0,0 +1,437 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.ObjectNode
import com.microsoft.sqlserver.jdbc.Geography
import com.microsoft.sqlserver.jdbc.Geometry
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.data.FloatCodec
import io.airbyte.cdk.data.JsonEncoder
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.data.TextCodec
import io.airbyte.cdk.discover.CdcIntegerMetaFieldType
import io.airbyte.cdk.discover.CdcOffsetDateTimeMetaFieldType
import io.airbyte.cdk.discover.CdcStringMetaFieldType
import io.airbyte.cdk.discover.CommonMetaField
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.discover.FieldType
import io.airbyte.cdk.discover.JdbcAirbyteStreamFactory
import io.airbyte.cdk.discover.JdbcMetadataQuerier
import io.airbyte.cdk.discover.MetaField
import io.airbyte.cdk.discover.SystemType
import io.airbyte.cdk.jdbc.*
import io.airbyte.cdk.jdbc.LosslessJdbcFieldType
import io.airbyte.cdk.output.sockets.FieldValueEncoder
import io.airbyte.cdk.output.sockets.NativeRecordPayload
import io.airbyte.cdk.read.*
import io.airbyte.cdk.read.SelectQueryGenerator
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.util.Jsons
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Primary
import jakarta.inject.Singleton
import java.sql.JDBCType
import java.sql.PreparedStatement
import java.sql.ResultSet
import java.time.OffsetDateTime
private val log = KotlinLogging.logger {}
@Singleton
@Primary
class MsSqlSourceOperations :
JdbcMetadataQuerier.FieldTypeMapper, SelectQueryGenerator, JdbcAirbyteStreamFactory {
override fun toFieldType(c: JdbcMetadataQuerier.ColumnMetadata): FieldType {
when (val type = c.type) {
is SystemType -> {
val retVal = leafType(type)
return retVal
}
else -> {
return PokemonFieldType
}
}
}
private fun leafType(type: SystemType): JdbcFieldType<*> {
val retVal =
MsSqlServerSqlType.fromName(type.typeName)?.jdbcType
?: when (type.jdbcType) {
JDBCType.BIT,
JDBCType.BOOLEAN -> BooleanFieldType
JDBCType.TINYINT,
JDBCType.SMALLINT -> ShortFieldType
JDBCType.INTEGER -> IntFieldType
JDBCType.BIGINT -> BigIntegerFieldType
JDBCType.FLOAT -> FloatFieldType
JDBCType.REAL ->
// according to
// https://learn.microsoft.com/en-us/sql/t-sql/data-types/float-and-real-transact-sql?view=sql-server-ver16,
// when precision is less than 25, the value is stored in a 4 bytes
// structure, which corresponds to a float in Java.
// Between 25 and 53, it's stored in a 8 bytes structure, which corresponds
// to a double in Java.
// Correspondance between SQLServer and java was mostly by experience, and
// the sizes match
if (type.precision!! < 25) FloatFieldType else DoubleFieldType
JDBCType.DOUBLE -> DoubleFieldType
JDBCType.NUMERIC,
JDBCType.DECIMAL -> BigDecimalFieldType
JDBCType.CHAR,
JDBCType.VARCHAR,
JDBCType.LONGVARCHAR,
JDBCType.NCHAR,
JDBCType.NVARCHAR,
JDBCType.LONGNVARCHAR -> StringFieldType
JDBCType.DATE -> LocalDateFieldType
JDBCType.TIME -> LocalTimeFieldType
JDBCType.TIMESTAMP -> LocalDateTimeFieldType
JDBCType.BINARY,
JDBCType.VARBINARY,
JDBCType.LONGVARBINARY -> BytesFieldType
JDBCType.BLOB -> BinaryStreamFieldType
JDBCType.CLOB,
JDBCType.NCLOB -> CharacterStreamFieldType
JDBCType.TIME_WITH_TIMEZONE -> OffsetTimeFieldType
JDBCType.TIMESTAMP_WITH_TIMEZONE -> OffsetDateTimeFieldType
JDBCType.NULL -> NullFieldType
JDBCType.SQLXML -> XmlFieldType
JDBCType.OTHER,
JDBCType.JAVA_OBJECT,
JDBCType.DISTINCT,
JDBCType.STRUCT,
JDBCType.ARRAY,
JDBCType.REF,
JDBCType.DATALINK,
JDBCType.ROWID,
JDBCType.REF_CURSOR,
null -> PokemonFieldType
}
return retVal
}
data object MsSqlServerFloatAccessor : JdbcAccessor<Float> {
override fun get(
rs: ResultSet,
colIdx: Int,
): Float? {
val retVal = rs.getFloat(colIdx).takeUnless { rs.wasNull() }
return retVal
}
override fun set(
stmt: PreparedStatement,
paramIdx: Int,
value: Float,
) {
stmt.setFloat(paramIdx, value)
}
}
data object MsSqlServerFloatFieldType :
SymmetricJdbcFieldType<Float>(
LeafAirbyteSchemaType.NUMBER,
MsSqlServerFloatAccessor,
FloatCodec,
)
data object MsSqlServerGeographyFieldType :
SymmetricJdbcFieldType<String>(
LeafAirbyteSchemaType.STRING,
MsSqlServerGeographyAccessor,
TextCodec,
)
data object MsSqlServerGeographyAccessor : JdbcAccessor<String> {
override fun get(
rs: ResultSet,
colIdx: Int,
): String? {
val bytes = rs.getBytes(colIdx)
if (rs.wasNull() || bytes == null) return null
return Geography.deserialize(bytes).toString()
}
override fun set(
stmt: PreparedStatement,
paramIdx: Int,
value: String,
) {
stmt.setBytes(paramIdx, Geography.parse(value).serialize())
}
}
data object MsSqlServerGeometryFieldType :
SymmetricJdbcFieldType<String>(
LeafAirbyteSchemaType.STRING,
MsSqlServerGeometryAccessor,
TextCodec,
)
data object MsSqlServerGeometryAccessor : JdbcAccessor<String> {
override fun get(
rs: ResultSet,
colIdx: Int,
): String? {
val bytes = rs.getBytes(colIdx)
if (rs.wasNull() || bytes == null) return null
return Geometry.deserialize(bytes).toString()
}
override fun set(
stmt: PreparedStatement,
paramIdx: Int,
value: String,
) {
stmt.setBytes(paramIdx, Geometry.parse(value).serialize())
}
}
data object MsSqlServerHierarchyFieldType :
SymmetricJdbcFieldType<String>(
LeafAirbyteSchemaType.STRING,
StringAccessor,
TextCodec,
)
enum class MsSqlServerSqlType(
val names: List<String>,
val jdbcType: JdbcFieldType<*>,
) {
BINARY_FIELD(BinaryStreamFieldType, "VARBINARY", "BINARY"),
DATETIME_TYPES(LocalDateTimeFieldType, "DATETIME", "DATETIME2", "SMALLDATETIME"),
DATE(LocalDateFieldType, "DATE"),
DATETIMEOFFSET(OffsetDateTimeFieldType, "DATETIMEOFFSET"),
TIME_TYPE(LocalTimeFieldType, "TIME"),
GEOMETRY(MsSqlServerGeometryFieldType, "GEOMETRY"),
GEOGRAPHY(MsSqlServerGeographyFieldType, "GEOGRAPHY"),
DOUBLE(DoubleFieldType, "MONEY", "SMALLMONEY"),
HIERARCHY(MsSqlServerHierarchyFieldType, "HIERARCHYID"),
;
constructor(
jdbcType: JdbcFieldType<*>,
vararg names: String,
) : this(names.toList(), jdbcType) {}
companion object {
private val nameToValue =
MsSqlServerSqlType.entries
.flatMap { msSqlServerSqlType ->
msSqlServerSqlType.names.map { name ->
name.uppercase() to msSqlServerSqlType
}
}
.toMap()
fun fromName(name: String?): MsSqlServerSqlType? {
val retVal = nameToValue[name?.uppercase()]
return retVal
}
}
}
override fun generate(ast: SelectQuerySpec): SelectQuery =
SelectQuery(ast.sql(), ast.select.columns, ast.bindings())
fun SelectQuerySpec.sql(): String {
val components: List<String> =
listOf(sql(select, limit), from.sql(), where.sql(), orderBy.sql())
val sql: String = components.filter { it.isNotBlank() }.joinToString(" ")
return sql
}
fun sql(
selectNode: SelectNode,
limit: LimitNode,
): String {
val topClause: String =
when (limit) {
NoLimit -> ""
Limit(0) -> "TOP 0 "
is Limit -> "TOP ${limit.n} "
}
return "SELECT $topClause" +
when (selectNode) {
is SelectColumns -> selectNode.columns.joinToString(", ") { it.sql() }
is SelectColumnMaxValue -> "MAX(${selectNode.column.sql()})"
}
}
fun Field.sql(): String = if (type is MsSqlServerHierarchyFieldType) "$id.ToString()" else "$id"
fun FromNode.sql(): String =
when (this) {
NoFrom -> ""
is From -> if (this.namespace == null) "FROM $name" else "FROM $namespace.$name"
is FromSample -> {
if (sampleRateInv == 1L) {
if (namespace == null) "FROM $name" else "FROM $namespace.$name"
} else {
val tableName = if (namespace == null) name else "$namespace.$name"
val samplePercent = sampleRatePercentage.toPlainString()
"FROM (SELECT TOP $sampleSize * FROM $tableName TABLESAMPLE ($samplePercent PERCENT) ORDER BY NEWID()) AS randomly_sampled"
}
}
}
fun WhereNode.sql(): String =
when (this) {
NoWhere -> ""
is Where -> "WHERE ${clause.sql()}"
}
fun WhereClauseNode.sql(): String =
when (this) {
is And -> conj.joinToString(") AND (", "(", ")") { it.sql() }
is Or -> disj.joinToString(") OR (", "(", ")") { it.sql() }
is Equal -> "${column.sql()} = ?"
is Greater -> "${column.sql()} > ?"
is GreaterOrEqual -> "${column.sql()} >= ?"
is LesserOrEqual -> "${column.sql()} <= ?"
is Lesser -> "${column.sql()} < ?"
}
fun OrderByNode.sql(): String =
when (this) {
NoOrderBy -> ""
is OrderBy -> "ORDER BY " + columns.joinToString(", ") { it.sql() }
}
fun SelectQuerySpec.bindings(): List<SelectQuery.Binding> = where.bindings() + limit.bindings()
fun WhereNode.bindings(): List<SelectQuery.Binding> =
when (this) {
is NoWhere -> listOf()
is Where -> clause.bindings()
}
fun WhereClauseNode.bindings(): List<SelectQuery.Binding> =
when (this) {
is And -> conj.flatMap { it.bindings() }
is Or -> disj.flatMap { it.bindings() }
is WhereClauseLeafNode -> {
val type = column.type as LosslessJdbcFieldType<*, *>
listOf(SelectQuery.Binding(bindingValue, type))
}
}
fun LimitNode.bindings(): List<SelectQuery.Binding> =
when (this) {
NoLimit,
Limit(0),
is Limit, -> emptyList()
}
override val globalCursor: MetaField = MsSqlServerCdcMetaFields.CDC_CURSOR
override val globalMetaFields: Set<MetaField> =
setOf(
CommonMetaField.CDC_UPDATED_AT,
CommonMetaField.CDC_DELETED_AT,
MsSqlServerCdcMetaFields.CDC_CURSOR,
MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO,
MsSqlServerCdcMetaFields.CDC_LSN,
)
override fun decorateRecordData(
timestamp: OffsetDateTime,
globalStateValue: OpaqueStateValue?,
stream: Stream,
recordData: ObjectNode,
) {
recordData.set<JsonNode>(
CommonMetaField.CDC_UPDATED_AT.id,
CdcOffsetDateTimeMetaFieldType.jsonEncoder.encode(timestamp),
)
recordData.set<JsonNode>(
MsSqlServerCdcMetaFields.CDC_LSN.id,
CdcStringMetaFieldType.jsonEncoder.encode(""),
)
if (globalStateValue == null) {
return
}
// For MSSQL, we would need to deserialize the state to get the LSN
// This is a placeholder implementation - actual implementation would extract LSN from state
try {
val stateNode = globalStateValue["state"] as? ObjectNode
if (stateNode != null) {
val offsetNode = stateNode["mssql_cdc_offset"] as? ObjectNode
if (offsetNode != null && offsetNode.size() > 0) {
// Extract LSN from the offset if available
val offsetValue = offsetNode.values().asSequence().first()
val lsn = Jsons.readTree(offsetValue.textValue())["commit_lsn"]?.asText()
if (lsn != null) {
recordData.set<JsonNode>(
MsSqlServerCdcMetaFields.CDC_LSN.id,
CdcStringMetaFieldType.jsonEncoder.encode(lsn),
)
}
}
}
} catch (e: Exception) {
log.warn(e) {
"Failed to extract LSN from CDC state for stream ${stream.name}. Using empty LSN value."
}
}
}
@Suppress("UNCHECKED_CAST")
override fun decorateRecordData(
timestamp: OffsetDateTime,
globalStateValue: OpaqueStateValue?,
stream: Stream,
recordData: NativeRecordPayload
) {
// Add CDC_UPDATED_AT field
recordData[CommonMetaField.CDC_UPDATED_AT.id] =
FieldValueEncoder(
timestamp,
CommonMetaField.CDC_UPDATED_AT.type.jsonEncoder as JsonEncoder<Any>
)
// Add CDC_LSN field with empty string as default
var lsnValue = ""
if (globalStateValue != null) {
// For MSSQL, extract the LSN from the state if available
try {
val stateNode = globalStateValue["state"] as? ObjectNode
if (stateNode != null) {
val offsetNode = stateNode["mssql_cdc_offset"] as? ObjectNode
if (offsetNode != null && offsetNode.size() > 0) {
// Extract LSN from the offset if available
val offsetValue = offsetNode.values().asSequence().first()
val lsn = Jsons.readTree(offsetValue.textValue())["commit_lsn"]?.asText()
if (lsn != null) {
lsnValue = lsn
}
}
}
} catch (e: Exception) {
log.warn(e) {
"Failed to extract LSN from CDC state for stream ${stream.name}. Using empty LSN value."
}
}
}
recordData[MsSqlServerCdcMetaFields.CDC_LSN.id] =
FieldValueEncoder(
lsnValue,
MsSqlServerCdcMetaFields.CDC_LSN.type.jsonEncoder as JsonEncoder<Any>
)
}
enum class MsSqlServerCdcMetaFields(override val type: FieldType) : MetaField {
CDC_CURSOR(CdcIntegerMetaFieldType),
CDC_LSN(CdcStringMetaFieldType),
CDC_EVENT_SERIAL_NO(CdcStringMetaFieldType);
override val id: String
get() = MetaField.META_PREFIX + name.lowercase()
}
}

View File

@@ -0,0 +1,165 @@
---
airbyte:
connector:
data-channel:
medium: ${DATA_CHANNEL_MEDIUM:STDIO}
format: ${DATA_CHANNEL_FORMAT:JSONL}
socket-paths: ${DATA_CHANNEL_SOCKET_PATHS}
output:
buffer-byte-size-threshold-for-flush: 8192
extract:
jdbc:
mode: concurrent
with-sampling: true
table-sample-size: 1024
throughput-bytes-per-second: 10000000
min-fetch-size: 10
default-fetch-size: 1024
max-fetch-size: 1000000000
memory-capacity-ratio: 0.6
estimated-record-overhead-bytes: 16
estimated-field-overhead-bytes: 16
check:
jdbc:
queries:
- >-
SELECT 1 WHERE 1 = 0;
exception-classifiers:
regex:
# The following rules are for the RegexExceptionClassifier [0] which are applied
# sequentially on a Throwable's message [1] and its nested messages by cause [2].
#
# This classifier's rules are applied ahead of the JdbcExceptionClassifier's further down.
#
# [0] https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/bulk/core/base/src/main/kotlin/io/airbyte/cdk/output/ExceptionClassifier.kt
# [1] https://docs.oracle.com/javase/8/docs/api/java/lang/Throwable.html#getMessage--
# [2] https://docs.oracle.com/javase/8/docs/api/java/lang/Throwable.html#getCause--
rules:
## REGEX RULE TEMPLATE:
# pattern: Required; regex pattern, c.f. https://www.freeformatter.com/java-regex-tester.html.
# Note that regex patterns are not case-sensitive and are multiline.
# input-example: Required, string matching regex pattern.
# error: Required, one of (transient|config|system).
# group: Optional, string prefixing user-facing error message.
# output: Optional, user-facing error message; when not set, the exception message is used instead.
# reference-links: Optional, list of URLs appended to user-facing message after a newline.
- pattern: (?i).*connection is not available, request timed out after.*
input-example: >-
java.sql.SQLTransientConnectionException: HikariPool-x -
Connection is not available, request timed out after 10 ms
error: transient
group: Hikari Connection Pool Timeout
output: The sync encountered a database read failure due to a connection timeout, will retry.
reference-links: https://docs.oracle.com/javase/9/docs/api/java/sql/SQLTransientConnectionException.html
- pattern: (?i).*the tcp\/ip connection to the host.*has failed.*
input-example: >-
com.microsoft.sqlserver.jdbc.SQLServerException: The TCP/IP connection to the host localhost, port 1433 has failed.
error: transient
group: SQL Server Connection Error
output: The sync encountered a network connection issue while connecting to the SQL Server, will retry.
reference-links: https://docs.microsoft.com/en-us/sql/connect/jdbc/troubleshooting-connectivity
- pattern: (?i).*login failed for user.*
input-example: >-
com.microsoft.sqlserver.jdbc.SQLServerException: Login failed for user 'sa'.
error: config
group: SQL Server Authentication Error
output: >-
The sync failed because the provided credentials are invalid.
Please verify your username and password configuration.
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/security/authentication-access/troubleshoot-connecting-to-the-sql-server-database-engine
- pattern: (?i).*cannot open database.*requested by the login.*
input-example: >-
com.microsoft.sqlserver.jdbc.SQLServerException: Cannot open database "testdb" requested by the login. The login failed.
error: config
group: SQL Server Database Access Error
output: >-
The sync failed because the specified database cannot be accessed with the provided credentials.
Please verify your database name and user permissions.
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/security/authentication-access/database-level-roles
- pattern: (?i).*invalid object name.*
input-example: >-
com.microsoft.sqlserver.jdbc.SQLServerException: Invalid object name 'dbo.test_table'.
error: config
group: SQL Server Object Error
output: >-
The sync failed because a required table or view does not exist in the database.
Please verify your table/view names and schema configuration.
reference-links: https://docs.microsoft.com/en-us/sql/t-sql/language-elements/database-identifiers
- pattern: (?i).*timeout expired.*
input-example: >-
com.microsoft.sqlserver.jdbc.SQLServerException: The query has timed out.
error: transient
group: SQL Server Query Timeout
output: The sync was aborted because the query took too long to return results, will retry.
reference-links: https://docs.microsoft.com/en-us/sql/connect/jdbc/setting-the-connection-properties
- pattern: (?i).*an exception occurred in the change event producer.*
input-example: >-
java.lang.RuntimeException: org.apache.kafka.connect.errors.ConnectException:
An exception occurred in the change event producer. This connector will be stopped.
error: config
group: SQL Server CDC Error
output: >-
The sync encountered an unexpected error in the change event producer and has stopped.
Please ensure CDC is properly configured and the SQL Server Agent is running.
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/track-changes/enable-and-disable-change-data-capture-sql-server
jdbc:
# The following rules are for the JdbcExceptionClassifier [0] which are applied on a
# SQL Server's error code [1]. The vendor error code is printed in the exception
# message, and is not to be confused with the SQLState [2] which is also in the message.
#
# [0] https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/output/JdbcExceptionClassifier.kt
# [1] https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/database-engine-events-and-errors
# [2] https://en.wikipedia.org/wiki/SQLSTATE
#
rules:
## JDBC RULE TEMPLATE
# code: Required, SQL Server vendor error code.
# error: Required, one of (transient|config|system).
# output: Optional, user-facing error message; the exception message is used instead when this is not defined.
# reference-links: Optional, list of URLs appended to user-facing message after newline.
- code: 18456
error: config
output: >-
The sync failed because the provided credentials are invalid.
Please verify your username and password configuration.
group: SQL Server Authentication Error
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/security/authentication-access/troubleshoot-connecting-to-the-sql-server-database-engine
- code: 4060
error: config
output: >-
The sync failed because the specified database cannot be accessed.
Please verify your database name and connection configuration.
group: SQL Server Database Access Error
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-4060-database-engine-error
- code: 208
error: config
output: >-
The sync failed because a required table or view does not exist in the database.
Please verify your table/view names and schema configuration.
group: SQL Server Object Error
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-208-database-engine-error
- code: 2
error: transient
output: The sync encountered a network connection issue while connecting to the SQL Server, will retry.
group: SQL Server Network Error
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-2-database-engine-error
- code: 1205
error: transient
output: The sync was aborted due to a deadlock, will retry.
group: SQL Server Deadlock
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-1205-database-engine-error

View File

@@ -1,196 +0,0 @@
{
"documentationUrl": "https://docs.airbyte.com/integrations/destinations/mssql",
"connectionSpecification": {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "MSSQL Source Spec",
"type": "object",
"required": ["host", "port", "database", "username", "password"],
"properties": {
"host": {
"description": "The hostname of the database.",
"title": "Host",
"type": "string",
"order": 0
},
"port": {
"description": "The port of the database.",
"title": "Port",
"type": "integer",
"minimum": 0,
"maximum": 65536,
"examples": ["1433"],
"order": 1
},
"database": {
"description": "The name of the database.",
"title": "Database",
"type": "string",
"examples": ["master"],
"order": 2
},
"schemas": {
"title": "Schemas",
"description": "The list of schemas to sync from. Defaults to user. Case sensitive.",
"type": "array",
"items": {
"type": "string"
},
"minItems": 0,
"uniqueItems": true,
"default": ["dbo"],
"order": 3
},
"username": {
"description": "The username which is used to access the database.",
"title": "Username",
"type": "string",
"order": 4
},
"password": {
"description": "The password associated with the username.",
"title": "Password",
"type": "string",
"airbyte_secret": true,
"order": 5
},
"jdbc_url_params": {
"title": "JDBC URL Params",
"description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).",
"type": "string",
"order": 6
},
"ssl_method": {
"title": "SSL Method",
"type": "object",
"description": "The encryption method which is used when communicating with the database.",
"order": 7,
"oneOf": [
{
"title": "Unencrypted",
"description": "Data transfer will not be encrypted.",
"required": ["ssl_method"],
"properties": {
"ssl_method": {
"type": "string",
"const": "unencrypted"
}
}
},
{
"title": "Encrypted (trust server certificate)",
"description": "Use the certificate provided by the server without verification. (For testing purposes only!)",
"required": ["ssl_method"],
"properties": {
"ssl_method": {
"type": "string",
"const": "encrypted_trust_server_certificate"
}
}
},
{
"title": "Encrypted (verify certificate)",
"description": "Verify and use the certificate provided by the server.",
"required": ["ssl_method"],
"properties": {
"ssl_method": {
"type": "string",
"const": "encrypted_verify_certificate"
},
"hostNameInCertificate": {
"title": "Host Name In Certificate",
"type": "string",
"description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.",
"order": 0
},
"certificate": {
"title": "Certificate",
"type": "string",
"description": "certificate of the server, or of the CA that signed the server certificate",
"order": 1,
"airbyte_secret": true,
"multiline": true
}
}
}
]
},
"replication_method": {
"type": "object",
"title": "Update Method",
"description": "Configures how data is extracted from the database.",
"default": "CDC",
"display_type": "radio",
"order": 8,
"oneOf": [
{
"title": "Read Changes using Change Data Capture (CDC)",
"description": "<i>Recommended</i> - Incrementally reads new inserts, updates, and deletes using the SQL Server's <a href=\"https://docs.airbyte.com/integrations/sources/mssql/#change-data-capture-cdc\">change data capture feature</a>. This must be enabled on your database.",
"required": ["method"],
"properties": {
"method": {
"type": "string",
"const": "CDC",
"order": 0
},
"initial_waiting_seconds": {
"type": "integer",
"title": "Initial Waiting Time in Seconds (Advanced)",
"description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about <a href=\"https://docs.airbyte.com/integrations/sources/mysql/#change-data-capture-cdc\">initial waiting time</a>.",
"default": 300,
"min": 120,
"max": 3600,
"order": 3
},
"invalid_cdc_cursor_position_behavior": {
"type": "string",
"title": "Invalid CDC position behavior (Advanced)",
"description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.",
"enum": ["Fail sync", "Re-sync data"],
"default": "Fail sync",
"order": 4
},
"queue_size": {
"type": "integer",
"title": "Size of the queue (Advanced)",
"description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.",
"default": 10000,
"order": 5,
"min": 1000,
"max": 10000
},
"initial_load_timeout_hours": {
"type": "integer",
"title": "Initial Load Timeout in Hours (Advanced)",
"description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.",
"default": 8,
"min": 4,
"max": 24,
"order": 6
}
}
},
{
"title": "Scan Changes with User Defined Cursor",
"description": "Incrementally detects new inserts and updates using the <a href=\"https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor\">cursor column</a> chosen when configuring a connection (e.g. created_at, updated_at).",
"required": ["method"],
"properties": {
"method": {
"type": "string",
"const": "STANDARD",
"order": 0
},
"exclude_todays_data": {
"title": "Exclude Today's Data",
"description": "When enabled incremental syncs using a cursor of a temporal types (date or datetime) will include cursor values only up until last midnight (Advanced)",
"default": false,
"type": "boolean",
"always_show": true,
"order": 1
}
}
}
]
}
}
}
}

View File

@@ -1,360 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest;
import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder;
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
import io.airbyte.protocol.models.JsonSchemaType;
public abstract class AbstractMssqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest {
protected MsSQLTestDatabase testdb;
@Override
protected String getNameSpace() {
return "dbo";
}
@Override
protected String getImageName() {
return "airbyte/source-mssql:dev";
}
@Override
protected void tearDown(final TestDestinationEnv testEnv) {
testdb.close();
}
protected static final String CREATE_TABLE_SQL = "CREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)";
@Override
protected void initTests() {
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("bigint")
.airbyteType(JsonSchemaType.INTEGER)
.addInsertValues("-9223372036854775808", "9223372036854775807", "0", "null")
.addExpectedValues("-9223372036854775808", "9223372036854775807", "0", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("int")
.airbyteType(JsonSchemaType.INTEGER)
.addInsertValues("null", "-2147483648", "2147483647")
.addExpectedValues(null, "-2147483648", "2147483647")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("smallint")
.airbyteType(JsonSchemaType.INTEGER)
.addInsertValues("null", "-32768", "32767")
.addExpectedValues(null, "-32768", "32767")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("tinyint")
.airbyteType(JsonSchemaType.INTEGER)
.addInsertValues("null", "0", "255")
.addExpectedValues(null, "0", "255")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("bit")
.airbyteType(JsonSchemaType.BOOLEAN)
.addInsertValues("null", "0", "1", "'true'", "'false'")
.addExpectedValues(null, "false", "true", "true", "false")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("decimal")
.fullSourceDataType("DECIMAL(5,2)")
.airbyteType(JsonSchemaType.NUMBER)
.addInsertValues("999.33", "null")
.addExpectedValues("999.33", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("numeric")
.airbyteType(JsonSchemaType.NUMBER)
.addInsertValues("'99999'", "null")
.addExpectedValues("99999", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("money")
.airbyteType(JsonSchemaType.NUMBER)
.addInsertValues("null", "'9990000.3647'")
.addExpectedValues(null, "9990000.3647")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("smallmoney")
.airbyteType(JsonSchemaType.NUMBER)
.addInsertValues("null", "'-214748.3648'", "214748.3647")
.addExpectedValues(null, "-214748.3648", "214748.3647")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("float")
.airbyteType(JsonSchemaType.NUMBER)
.addInsertValues("'123'", "'1234567890.1234567'", "null")
.addExpectedValues("123.0", "1.2345678901234567E9", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(TestDataHolder.builder()
.sourceType("real")
.airbyteType(JsonSchemaType.NUMBER)
.addInsertValues("'123'", "'1234567890.1234567'", "null")
.addExpectedValues("123.0", "1.234568E9", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("date")
.airbyteType(JsonSchemaType.STRING_DATE)
.addInsertValues("'0001-01-01'", "'9999-12-31'", "'1999-01-08'", "null")
.addExpectedValues("0001-01-01", "9999-12-31", "1999-01-08", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("smalldatetime")
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE)
.addInsertValues("'1900-01-01'", "'2079-06-06'", "null")
.addExpectedValues("1900-01-01T00:00:00.000000", "2079-06-06T00:00:00.000000", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("datetime")
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE)
.addInsertValues("'1753-01-01'", "'9999-12-31'", "'9999-12-31T13:00:04'",
"'9999-12-31T13:00:04.123'", "null")
.addExpectedValues("1753-01-01T00:00:00.000000", "9999-12-31T00:00:00.000000", "9999-12-31T13:00:04.000000",
"9999-12-31T13:00:04.123000", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("datetime2")
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE)
.addInsertValues("'0001-01-01'", "'9999-12-31'", "'9999-12-31T13:00:04.123456'", "null", "'2023-11-08T01:20:11.3733338'")
.addExpectedValues("0001-01-01T00:00:00.000000", "9999-12-31T00:00:00.000000", "9999-12-31T13:00:04.123456", null,
"2023-11-08T01:20:11.373333")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("time")
.airbyteType(JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE)
.addInsertValues("null", "'13:00:01'", "'13:00:04Z'", "'13:00:04.123456Z'")
.addExpectedValues(null, "13:00:01", "13:00:04", "13:00:04.123456")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("datetimeoffset")
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE)
.addInsertValues("'2001-01-10 00:00:00 +01:00'", "'9999-01-10 00:00:00 +01:00'", "null", "'2024-05-10 19:00:01.604805 +03:00'",
"'2024-03-02 19:08:07.1234567 +09:00'", "'2024-03-02 19:08:07.12345678 +09:00'",
"'0001-01-01 00:00:00.0000000 +00:00'")
.addExpectedValues("2001-01-10T00:00:00.000000+01:00",
"9999-01-10T00:00:00.000000+01:00", null, "2024-05-10T19:00:01.604805+03:00", "2024-03-02T19:08:07.123456+09:00",
"2024-03-02T19:08:07.123456+09:00", "0001-01-01T00:00:00.000000Z")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("char")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'a'", "'*'", "null")
.addExpectedValues("a", "*", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("varchar")
.fullSourceDataType("varchar(max) COLLATE Latin1_General_100_CI_AI_SC_UTF8")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
null, "\\xF0\\x9F\\x9A\\x80")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("text")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'a'", "'abc'", "'Some test text 123$%^&*()_'", "''", "null")
.addExpectedValues("a", "abc", "Some test text 123$%^&*()_", "", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("nchar")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'a'", "'*'", "N'ї'", "null")
.addExpectedValues("a", "*", "ї", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("nvarchar")
.fullSourceDataType("nvarchar(max)")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
null, "\\xF0\\x9F\\x9A\\x80")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("ntext")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
null, "\\xF0\\x9F\\x9A\\x80")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("binary")
.airbyteType(JsonSchemaType.STRING_BASE_64)
.addInsertValues("CAST( 'A' AS BINARY(1))", "null")
.addExpectedValues("QQ==", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("varbinary")
.fullSourceDataType("varbinary(3)")
.airbyteType(JsonSchemaType.STRING_BASE_64)
.addInsertValues("CAST( 'ABC' AS VARBINARY)", "null")
.addExpectedValues("QUJD", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
// Proper select query example: SELECT test_column.STAsText() from dbo_1_geometry;
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("geometry")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0)",
"null")
.addExpectedValues("LINESTRING(100 100, 20 180, 180 180)", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("uniqueidentifier")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'375CFC44-CAE3-4E43-8083-821D2DF0E626'", "null")
.addExpectedValues("375CFC44-CAE3-4E43-8083-821D2DF0E626", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("xml")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues(
"'<user><user_id>1</user_id></user>'", "null", "''")
.addExpectedValues("<user><user_id>1</user_id></user>", null, "")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
// Proper select query example: SELECT test_column.STAsText() from dbo_1_geography;
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("geography")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues(
"geography::STGeomFromText('LINESTRING(-122.360 47.656, -122.343 47.656 )', 4326)",
"null")
.addExpectedValues("LINESTRING(-122.36 47.656, -122.343 47.656)", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
// sql_variant is not supported by debezium, always getting null. So only works for regular sync.
// The hierarchyid is returned in binary state, but mssql doesn't provide any parcers for it.
// On a regular sync we do a pre-flight request and then do additional wrap to sql query in case
// if we have hierarchyid. But this option is not available as we use a third-party tool "Debezium"
// as a CDC client.
if (this instanceof MssqlSourceDatatypeTest) {
// create table dbo_1_hierarchyid1 (test_column hierarchyid);
// insert dbo_1_hierarchyid1 values ('/1/1/');
// select test_column ,test_column.ToString() AS [Node Text],test_column.GetLevel() [Node Level]
// from dbo_1_hierarchyid1;
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("hierarchyid")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'/1/1/'", "null")
.addExpectedValues("/1/1/", null)
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("sql_variant")
.airbyteType(JsonSchemaType.STRING)
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
null, "\\xF0\\x9F\\x9A\\x80")
.createTablePatternSql(CREATE_TABLE_SQL)
.build());
}
addDataTypeTestData(
TestDataHolder.builder()
.sourceType("int")
.airbyteType(JsonSchemaType.INTEGER)
.addInsertValues("null", "1234", "7878")
.addExpectedValues(null, "1234", "7878")
.createTablePatternSql("CREATE TABLE %1$s(%2$s INTEGER NULL DEFAULT ((7878)), %3$s %4$s)")
.build());
}
}

View File

@@ -1,152 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.collect.Lists;
import io.airbyte.cdk.db.Database;
import io.airbyte.cdk.db.factory.DSLContextFactory;
import io.airbyte.cdk.db.factory.DatabaseDriver;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer;
import io.airbyte.cdk.integrations.base.ssh.SshHelpers;
import io.airbyte.cdk.integrations.base.ssh.SshTunnel;
import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest;
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
import io.airbyte.commons.functional.CheckedFunction;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaType;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.ConnectorSpecification;
import io.airbyte.protocol.models.v0.DestinationSyncMode;
import io.airbyte.protocol.models.v0.SyncMode;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.HashMap;
import java.util.List;
import org.jooq.SQLDialect;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class AbstractSshMssqlSourceAcceptanceTest extends SourceAcceptanceTest {
static private final Logger LOGGER = LoggerFactory.getLogger(AbstractSshMssqlSourceAcceptanceTest.class);
private static final String SCHEMA_NAME = "dbo";
private static final String STREAM_NAME = "id_and_name";
private static final String STREAM_NAME2 = "starships";
public abstract SshTunnel.TunnelMethod getTunnelMethod();
private final SshBastionContainer bastion = new SshBastionContainer();
private MsSQLTestDatabase testdb;
@Override
protected JsonNode getConfig() {
try {
return testdb.integrationTestConfigBuilder()
.withoutSsl()
.with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), true))
.build();
} catch (IOException e) {
throw new UncheckedIOException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
private void populateDatabaseTestData() throws Exception {
final var outerConfig = testdb.integrationTestConfigBuilder()
.withSchemas("public")
.withoutSsl()
.with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), false))
.build();
SshTunnel.sshWrap(
outerConfig,
JdbcUtils.HOST_LIST_KEY,
JdbcUtils.PORT_LIST_KEY,
(CheckedFunction<JsonNode, List<JsonNode>, Exception>) mangledConfig -> getDatabaseFromConfig(mangledConfig)
.query(ctx -> {
ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));");
ctx.fetch("INSERT INTO id_and_name (id, name, born) VALUES " +
"(1, 'picard', '2124-03-04T01:01:01Z'), " +
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
"(3, 'vash', '2124-03-04T01:01:01Z');");
return null;
}));
}
private static Database getDatabaseFromConfig(final JsonNode config) {
return new Database(
DSLContextFactory.create(
config.get(JdbcUtils.USERNAME_KEY).asText(),
config.get(JdbcUtils.PASSWORD_KEY).asText(),
DatabaseDriver.MSSQLSERVER.getDriverClassName(),
String.format(DatabaseDriver.MSSQLSERVER.getUrlFormatString(),
config.get(JdbcUtils.HOST_KEY).asText(),
config.get(JdbcUtils.PORT_KEY).asInt(),
config.get(JdbcUtils.DATABASE_KEY).asText()) + ";encrypt=false;trustServerCertificate=true",
SQLDialect.DEFAULT));
}
@Override
protected void setupEnvironment(final TestDestinationEnv environment) throws Exception {
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
LOGGER.info("starting bastion");
bastion.initAndStartBastion(testdb.getContainer().getNetwork());
LOGGER.info("bastion started");
populateDatabaseTestData();
}
@Override
protected void tearDown(final TestDestinationEnv testEnv) {
bastion.stopAndClose();
}
@Override
protected String getImageName() {
return "airbyte/source-mssql:dev";
}
@Override
protected ConnectorSpecification getSpec() throws Exception {
return SshHelpers.getSpecAndInjectSsh();
}
@Override
protected ConfiguredAirbyteCatalog getConfiguredCatalog() {
return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(CatalogHelpers.createAirbyteStream(
STREAM_NAME, SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withSupportedSyncModes(
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))),
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(CatalogHelpers.createAirbyteStream(
STREAM_NAME2, SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withSupportedSyncModes(
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)))));
}
@Override
protected JsonNode getState() {
return Jsons.jsonNode(new HashMap<>());
}
}

View File

@@ -1,247 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.protocol.models.v0.SyncMode.FULL_REFRESH;
import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL;
import static org.junit.jupiter.api.Assertions.*;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import io.airbyte.cdk.integrations.base.ssh.SshHelpers;
import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest;
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaType;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
import io.airbyte.protocol.models.v0.AirbyteStreamState;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.ConnectorSpecification;
import io.airbyte.protocol.models.v0.DestinationSyncMode;
import io.airbyte.protocol.models.v0.SyncMode;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.junit.Assert;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.TestInstance.Lifecycle;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
@TestInstance(Lifecycle.PER_METHOD)
@Execution(ExecutionMode.CONCURRENT)
public class CdcMssqlSourceAcceptanceTest extends SourceAcceptanceTest {
private static final String SCHEMA_NAME = "dbo";
private static final String STREAM_NAME = "id_and_name";
private static final String STREAM_NAME2 = "starships";
private static final String CDC_ROLE_NAME = "cdc_selector";
private static final String STREAM_NAME3 = "stream3";
private MsSQLTestDatabase testdb;
@Override
protected String getImageName() {
return "airbyte/source-mssql:dev";
}
@Override
protected ConnectorSpecification getSpec() throws Exception {
return SshHelpers.getSpecAndInjectSsh();
}
@Override
protected JsonNode getConfig() {
return testdb.integrationTestConfigBuilder()
.withCdcReplication()
.withoutSsl()
.build();
}
@Override
protected ConfiguredAirbyteCatalog getConfiguredCatalog() {
return new ConfiguredAirbyteCatalog().withStreams(getConfiguredAirbyteStreams());
}
protected List<ConfiguredAirbyteStream> getConfiguredAirbyteStreams() {
return Lists.newArrayList(
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(CatalogHelpers.createAirbyteStream(
STREAM_NAME, SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withSourceDefinedCursor(true)
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
.withSupportedSyncModes(
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))),
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(CatalogHelpers.createAirbyteStream(
STREAM_NAME2, SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withSourceDefinedCursor(true)
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
.withSupportedSyncModes(
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))));
}
@Override
protected JsonNode getState() {
return null;
}
@Override
protected void setupEnvironment(final TestDestinationEnv environment) {
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT);
testdb
.withWaitUntilAgentRunning()
.withCdc()
// create tables
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME)
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
// populate tables
.with("INSERT INTO %s.%s (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", SCHEMA_NAME, STREAM_NAME)
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');", SCHEMA_NAME, STREAM_NAME2)
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3)
// enable cdc on tables for designated role
.withCdcForTable(SCHEMA_NAME, STREAM_NAME, CDC_ROLE_NAME)
.withCdcForTable(SCHEMA_NAME, STREAM_NAME2, CDC_ROLE_NAME)
.withCdcForTable(SCHEMA_NAME, STREAM_NAME3, CDC_ROLE_NAME)
// revoke user permissions
.with("REVOKE ALL FROM %s CASCADE;", testdb.getUserName())
.with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testdb.getUserName())
// grant user permissions
.with("EXEC sp_addrolemember N'%s', N'%s';", "db_datareader", testdb.getUserName())
.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName())
.with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testdb.getUserName())
.withWaitUntilMaxLsnAvailable();
}
@Override
protected void tearDown(final TestDestinationEnv testEnv) {
testdb.close();
}
@Test
void testAddNewStreamToExistingSync() throws Exception {
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
new ConfiguredAirbyteCatalog().withStreams(List.of(getConfiguredAirbyteStreams().get(0)));
// Start a sync with one stream
final List<AirbyteMessage> messages = runRead(configuredCatalogWithOneStream);
final List<AirbyteRecordMessage> recordMessages = filterRecords(messages);
final List<AirbyteStateMessage> stateMessages = filterStateMessages(messages);
final List<AirbyteStreamState> streamStates = stateMessages.get(0).getGlobal().getStreamStates();
assertEquals(3, recordMessages.size());
assertEquals(2, stateMessages.size());
assertEquals(1, streamStates.size());
assertEquals(STREAM_NAME, streamStates.get(0).getStreamDescriptor().getName());
assertEquals(SCHEMA_NAME, streamStates.get(0).getStreamDescriptor().getNamespace());
final AirbyteStateMessage lastStateMessage = Iterables.getLast(stateMessages);
final ConfiguredAirbyteCatalog configuredCatalogWithTwoStreams = configuredCatalogWithOneStream.withStreams(getConfiguredAirbyteStreams());
// Start another sync with a newly added stream
final List<AirbyteMessage> messages2 = runRead(configuredCatalogWithTwoStreams, Jsons.jsonNode(List.of(lastStateMessage)));
final List<AirbyteRecordMessage> recordMessages2 = filterRecords(messages2);
final List<AirbyteStateMessage> stateMessages2 = filterStateMessages(messages2);
assertEquals(3, recordMessages2.size());
assertEquals(2, stateMessages2.size());
final AirbyteStateMessage lastStateMessage2 = Iterables.getLast(stateMessages2);
final List<AirbyteStreamState> streamStates2 = lastStateMessage2.getGlobal().getStreamStates();
assertEquals(2, streamStates2.size());
assertEquals(STREAM_NAME, streamStates2.get(0).getStreamDescriptor().getName());
assertEquals(SCHEMA_NAME, streamStates2.get(0).getStreamDescriptor().getNamespace());
assertEquals(STREAM_NAME2, streamStates2.get(1).getStreamDescriptor().getName());
assertEquals(SCHEMA_NAME, streamStates2.get(1).getStreamDescriptor().getNamespace());
}
private List<AirbyteStateMessage> filterStateMessages(final List<AirbyteMessage> messages) {
return messages.stream().filter(r -> r.getType() == AirbyteMessage.Type.STATE).map(AirbyteMessage::getState)
.collect(Collectors.toList());
}
@Test
protected void testNullValueConversion() throws Exception {
final List<ConfiguredAirbyteStream> configuredAirbyteStreams =
Lists.newArrayList(new ConfiguredAirbyteStream()
.withSyncMode(INCREMENTAL)
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(CatalogHelpers.createAirbyteStream(STREAM_NAME3,
SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING),
Field.of("userid", JsonSchemaType.NUMBER))
.withSourceDefinedCursor(true)
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
.withSupportedSyncModes(Lists.newArrayList(FULL_REFRESH, INCREMENTAL))));
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0)));
final List<AirbyteMessage> airbyteMessages = runRead(configuredCatalogWithOneStream, getState());
final List<AirbyteRecordMessage> recordMessages = filterRecords(airbyteMessages);
final List<AirbyteStateMessage> stateMessages = airbyteMessages
.stream()
.filter(m -> m.getType() == AirbyteMessage.Type.STATE)
.map(AirbyteMessage::getState)
.collect(Collectors.toList());
Assert.assertEquals(recordMessages.size(), 1);
assertFalse(stateMessages.isEmpty(), "Reason");
ObjectMapper mapper = new ObjectMapper();
assertTrue(cdcFieldsOmitted(recordMessages.get(0).getData()).equals(
mapper.readTree("{\"id\":4, \"name\":\"voyager\", \"userid\":null}")));
// when we run incremental sync again there should be no new records. Run a sync with the latest
// state message and assert no records were emitted.
JsonNode latestState = extractLatestState(stateMessages);
testdb.getDatabase().query(c -> c.query("INSERT INTO %s.%s (id, name) VALUES (5,'deep space nine')".formatted(SCHEMA_NAME, STREAM_NAME3)))
.execute();
assert Objects.nonNull(latestState);
final List<AirbyteRecordMessage> secondSyncRecords = filterRecords(runRead(configuredCatalogWithOneStream, latestState));
assertFalse(
secondSyncRecords.isEmpty(),
"Expected the second incremental sync to produce records.");
assertEquals(cdcFieldsOmitted(secondSyncRecords.get(0).getData()),
mapper.readTree("{\"id\":5, \"name\":\"deep space nine\", \"userid\":null}"));
}
private JsonNode cdcFieldsOmitted(final JsonNode node) {
ObjectMapper mapper = new ObjectMapper();
ObjectNode object = mapper.createObjectNode();
node.fieldNames().forEachRemaining(name -> {
if (!name.toLowerCase().startsWith("_ab_cdc_")) {
object.put(name, node.get(name));
}
});
return object;
}
}

View File

@@ -1,79 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.db.Database;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.TestInstance.Lifecycle;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
@TestInstance(Lifecycle.PER_METHOD)
@Execution(ExecutionMode.CONCURRENT)
public class CdcMssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest {
private final ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
@Override
protected JsonNode getConfig() {
return testdb.integrationTestConfigBuilder()
.withCdcReplication()
.withoutSsl()
.build();
}
@Override
protected Database setupDatabase() {
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT)
.withCdc();
return testdb.getDatabase();
}
protected void createTables() throws Exception {
List<Callable<MsSQLTestDatabase>> createTableTasks = new ArrayList<>();
List<Callable<MsSQLTestDatabase>> enableCdcForTableTasks = new ArrayList<>();
for (var test : testDataHolders) {
createTableTasks.add(() -> testdb.with(test.getCreateSqlQuery()));
enableCdcForTableTasks.add(() -> testdb.withCdcForTable(test.getNameSpace(), test.getNameWithTestPrefix(), null));
}
executor.invokeAll(createTableTasks);
executor.invokeAll(enableCdcForTableTasks);
}
protected void populateTables() throws Exception {
List<Callable<MsSQLTestDatabase>> insertTasks = new ArrayList<>();
List<Callable<MsSQLTestDatabase>> waitForCdcRecordsTasks = new ArrayList<>();
for (var test : testDataHolders) {
insertTasks.add(() -> {
this.database.query((ctx) -> {
List<String> sql = test.getInsertSqlQueries();
Objects.requireNonNull(ctx);
sql.forEach(ctx::fetch);
return null;
});
return null;
});
waitForCdcRecordsTasks.add(() -> testdb.waitForCdcRecords(test.getNameSpace(), test.getNameWithTestPrefix(), test.getExpectedValues().size()));
}
// executor.invokeAll(insertTasks);
executor.invokeAll(insertTasks);
executor.invokeAll(waitForCdcRecordsTasks);
}
@Override
public boolean testCatalog() {
return true;
}
}

View File

@@ -1,48 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
import io.airbyte.commons.features.FeatureFlags;
import io.airbyte.commons.features.FeatureFlagsWrapper;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
public class CloudDeploymentSslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest {
@Override
protected void setupEnvironment(final TestDestinationEnv environment) {
final var container = new MsSQLContainerFactory().shared(BaseImage.MSSQL_2022.reference);
testdb = new MsSQLTestDatabase(container);
testdb = testdb
.withConnectionProperty("encrypt", "true")
.withConnectionProperty("trustServerCertificate", "true")
.withConnectionProperty("databaseName", testdb.getDatabaseName())
.initialized()
.with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));")
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
.with("INSERT INTO id_and_name (id, name, born) VALUES " +
"(1,'picard', '2124-03-04T01:01:01Z'), " +
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
"(3, 'vash', '2124-03-04T01:01:01Z');")
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2)
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3);
}
@Override
protected FeatureFlags featureFlags() {
return FeatureFlagsWrapper.overridingDeploymentMode(super.featureFlags(), "CLOUD");
}
@Override
protected JsonNode getConfig() {
return testdb.integrationTestConfigBuilder()
.withEncrytedTrustServerCertificate()
.build();
}
}

View File

@@ -1,215 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import io.airbyte.cdk.integrations.base.ssh.SshHelpers;
import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest;
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaType;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
import io.airbyte.protocol.models.v0.AirbyteStreamState;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.ConnectorSpecification;
import io.airbyte.protocol.models.v0.DestinationSyncMode;
import io.airbyte.protocol.models.v0.SyncMode;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
public class MssqlSourceAcceptanceTest extends SourceAcceptanceTest {
protected static final String SCHEMA_NAME = "dbo";
protected static final String STREAM_NAME = "id_and_name";
protected static final String STREAM_NAME2 = "starships";
protected static final String STREAM_NAME3 = "stream3";
protected MsSQLTestDatabase testdb;
@Override
protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException {
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022)
.with("CREATE TABLE %s.%s (id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));", SCHEMA_NAME, STREAM_NAME)
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
.with("INSERT INTO id_and_name (id, name, born) VALUES " +
"(1, 'picard', '2124-03-04T01:01:01Z'), " +
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
"(3, 'vash', '2124-03-04T01:01:01Z');")
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2)
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3);
}
@Override
protected void tearDown(final TestDestinationEnv testEnv) {
testdb.close();
}
@Override
protected String getImageName() {
return "airbyte/source-mssql:dev";
}
@Override
protected ConnectorSpecification getSpec() throws Exception {
return SshHelpers.getSpecAndInjectSsh();
}
@Override
protected JsonNode getConfig() {
return testdb.integrationTestConfigBuilder()
.withoutSsl()
.build();
}
@Override
protected ConfiguredAirbyteCatalog getConfiguredCatalog() {
return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
new ConfiguredAirbyteStream()
.withSyncMode(INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(CatalogHelpers.createAirbyteStream(
STREAM_NAME, SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, INCREMENTAL))),
new ConfiguredAirbyteStream()
.withSyncMode(INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withStream(CatalogHelpers.createAirbyteStream(
STREAM_NAME2, SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, INCREMENTAL)))));
}
@Override
protected JsonNode getState() {
return Jsons.jsonNode(new HashMap<>());
}
@Test
protected void testAddNewStreamToExistingSync() throws Exception {
final List<ConfiguredAirbyteStream> configuredAirbyteStreams =
Lists.newArrayList(CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME,
SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withSyncMode(INCREMENTAL)
.withCursorField(List.of("id")),
CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME2,
SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withSyncMode(INCREMENTAL)
.withCursorField(List.of("id")));
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0)));
// Start a sync with one stream
final List<AirbyteMessage> messages = runRead(withSourceDefinedCursors(configuredCatalogWithOneStream));
final List<AirbyteRecordMessage> recordMessages = filterRecords(messages);
final List<AirbyteStateMessage> stateMessages = filterStateMessages(messages);
final AirbyteStateMessage lastStateMessage = Iterables.getLast(stateMessages);
final AirbyteStreamState streamState = lastStateMessage.getStream();
assertEquals(3, recordMessages.size());
assertEquals(1, stateMessages.size());
assertEquals(STREAM_NAME, streamState.getStreamDescriptor().getName());
assertEquals(SCHEMA_NAME, streamState.getStreamDescriptor().getNamespace());
final ConfiguredAirbyteCatalog configuredCatalogWithTwoStreams =
new ConfiguredAirbyteCatalog().withStreams(configuredAirbyteStreams);
// Start another sync with a newly added stream
final List<AirbyteMessage> messages2 = runRead(configuredCatalogWithTwoStreams, Jsons.jsonNode(List.of(lastStateMessage)));
final List<AirbyteRecordMessage> recordMessages2 = filterRecords(messages2);
final List<AirbyteStateMessage> stateMessages2 = filterStateMessages(messages2);
assertEquals(4, recordMessages2.size());
assertEquals(2, stateMessages2.size());
assertEquals(2, stateMessages2.size());
assertEquals(STREAM_NAME, stateMessages2.get(0).getStream().getStreamDescriptor().getName());
assertEquals(SCHEMA_NAME, stateMessages2.get(0).getStream().getStreamDescriptor().getNamespace());
assertEquals(STREAM_NAME2, stateMessages2.get(1).getStream().getStreamDescriptor().getName());
assertEquals(SCHEMA_NAME, stateMessages2.get(1).getStream().getStreamDescriptor().getNamespace());
}
@Test
protected void testNullValueConversion() throws Exception {
final List<ConfiguredAirbyteStream> configuredAirbyteStreams =
Lists.newArrayList(CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME3,
SCHEMA_NAME,
Field.of("id", JsonSchemaType.NUMBER),
Field.of("name", JsonSchemaType.STRING),
Field.of("userid", JsonSchemaType.NUMBER))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withSyncMode(INCREMENTAL)
.withCursorField(List.of("id")));
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0)));
final List<AirbyteMessage> airbyteMessages = runRead(configuredCatalogWithOneStream, getState());
final List<AirbyteRecordMessage> recordMessages = filterRecords(airbyteMessages);
final List<AirbyteStateMessage> stateMessages = airbyteMessages
.stream()
.filter(m -> m.getType() == AirbyteMessage.Type.STATE)
.map(AirbyteMessage::getState)
.collect(Collectors.toList());
assertEquals(recordMessages.size(), 1);
assertFalse(stateMessages.isEmpty(), "Reason");
ObjectMapper mapper = new ObjectMapper();
assertTrue(recordMessages.get(0).getData().equals(
mapper.readTree("{\"id\":4, \"name\":\"voyager\", \"userid\":null}}")));
// when we run incremental sync again there should be no new records. Run a sync with the latest
// state message and assert no records were emitted.
JsonNode latestState = extractLatestState(stateMessages);
testdb.getDatabase().query(c -> {
return c.query("INSERT INTO %s.%s (id, name) VALUES (5,'deep space nine');".formatted(SCHEMA_NAME, STREAM_NAME3));
}).execute();
assert Objects.nonNull(latestState);
final List<AirbyteRecordMessage> secondSyncRecords = filterRecords(runRead(configuredCatalogWithOneStream, latestState));
assertFalse(
secondSyncRecords.isEmpty(),
"Expected the second incremental sync to produce records.");
assertTrue(secondSyncRecords.get(0).getData().equals(
mapper.readTree("{\"id\":5, \"name\":\"deep space nine\", \"userid\":null}}")));
}
private List<AirbyteStateMessage> filterStateMessages(final List<AirbyteMessage> messages) {
return messages.stream().filter(r -> r.getType() == AirbyteMessage.Type.STATE).map(AirbyteMessage::getState)
.collect(Collectors.toList());
}
}

View File

@@ -1,31 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.db.Database;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
public class MssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest {
@Override
protected Database setupDatabase() {
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
return testdb.getDatabase();
}
@Override
protected JsonNode getConfig() {
return testdb.integrationTestConfigBuilder()
.withoutSsl()
.build();
}
@Override
public boolean testCatalog() {
return true;
}
}

View File

@@ -1,94 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsInAnyOrder;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import java.sql.Connection;
import java.sql.JDBCType;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class MssqlSourceOperationsTest {
private final MssqlSourceOperations mssqlSourceOperations = new MssqlSourceOperations();
private MsSQLTestDatabase testdb;
private final String cursorColumn = "cursor_column";
@BeforeEach
public void init() {
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
}
@AfterEach
public void tearDown() {
testdb.close();
}
@Test
public void setDateTimeOffsetColumnAsCursor() throws SQLException {
final String tableName = "datetimeoffset_table";
final String createTableQuery = String.format("CREATE TABLE %s(id INTEGER PRIMARY KEY IDENTITY(1,1), %s DATETIMEOFFSET(7));",
tableName,
cursorColumn);
executeQuery(createTableQuery);
final List<JsonNode> expectedRecords = new ArrayList<>();
for (int i = 1; i <= 4; i++) {
final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap());
// Manually generate DATETIMEOFFSET data
final String cursorValue = String.format("'2023-0%s-10T10:00:00.100000Z'", i, i * 10);
jsonNode.put("id", i);
// Remove single quotes from string since the date being retrieved will not have quotes
jsonNode.put(cursorColumn, cursorValue.replaceAll("\'", ""));
final String insertQuery = String.format("INSERT INTO %s (%s) VALUES (CAST(%s as DATETIMEOFFSET))", tableName, cursorColumn, cursorValue);
executeQuery(insertQuery);
expectedRecords.add(jsonNode);
}
final String cursorAnchorValue = "2023-01-01T00:00:00.000000+00:00";
final List<JsonNode> actualRecords = new ArrayList<>();
try (final Connection connection = testdb.getContainer().createConnection("")) {
final PreparedStatement preparedStatement = connection.prepareStatement(
"SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?");
mssqlSourceOperations.setCursorField(preparedStatement,
1,
JDBCType.TIMESTAMP_WITH_TIMEZONE,
cursorAnchorValue);
try (final ResultSet resultSet = preparedStatement.executeQuery()) {
final int columnCount = resultSet.getMetaData().getColumnCount();
while (resultSet.next()) {
final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap());
for (int i = 1; i <= columnCount; i++) {
mssqlSourceOperations.copyToJsonField(resultSet, i, jsonNode);
}
actualRecords.add(jsonNode);
}
}
}
assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray()));
}
protected void executeQuery(final String query) throws SQLException {
try (final Connection connection = testdb.getContainer().createConnection("")) {
connection.createStatement().execute(query);
}
}
}

View File

@@ -1,16 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import io.airbyte.cdk.integrations.base.ssh.SshTunnel.TunnelMethod;
public class SshKeyMssqlSourceAcceptanceTest extends AbstractSshMssqlSourceAcceptanceTest {
@Override
public TunnelMethod getTunnelMethod() {
return TunnelMethod.SSH_KEY_AUTH;
}
}

View File

@@ -1,16 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import io.airbyte.cdk.integrations.base.ssh.SshTunnel.TunnelMethod;
public class SshPasswordMssqlSourceAcceptanceTest extends AbstractSshMssqlSourceAcceptanceTest {
@Override
public TunnelMethod getTunnelMethod() {
return TunnelMethod.SSH_PASSWORD_AUTH;
}
}

View File

@@ -1,41 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
public class SslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest {
@Override
protected JsonNode getConfig() {
return testdb.integrationTestConfigBuilder()
.withEncrytedTrustServerCertificate()
.build();
}
@Override
protected void setupEnvironment(final TestDestinationEnv environment) {
final var container = new MsSQLContainerFactory().shared(BaseImage.MSSQL_2022.reference);
testdb = new MsSQLTestDatabase(container);
testdb = testdb
.withConnectionProperty("encrypt", "true")
.withConnectionProperty("trustServerCertificate", "true")
.withConnectionProperty("databaseName", testdb.getDatabaseName())
.initialized()
.with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));")
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
.with("INSERT INTO id_and_name (id, name, born) VALUES " +
"(1, 'picard', '2124-03-04T01:01:01Z'), " +
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
"(3, 'vash', '2124-03-04T01:01:01Z');")
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2)
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3);
}
}

View File

@@ -1,7 +0,0 @@
{
"host": "default",
"port": 5555,
"database": "default",
"username": "default",
"password": "default"
}

View File

@@ -1,313 +0,0 @@
{
"documentationUrl": "https://docs.airbyte.com/integrations/destinations/mssql",
"connectionSpecification": {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "MSSQL Source Spec",
"type": "object",
"required": ["host", "port", "database", "username", "password"],
"properties": {
"host": {
"description": "The hostname of the database.",
"title": "Host",
"type": "string",
"order": 0
},
"port": {
"description": "The port of the database.",
"title": "Port",
"type": "integer",
"minimum": 0,
"maximum": 65536,
"examples": ["1433"],
"order": 1
},
"database": {
"description": "The name of the database.",
"title": "Database",
"type": "string",
"examples": ["master"],
"order": 2
},
"schemas": {
"title": "Schemas",
"description": "The list of schemas to sync from. Defaults to user. Case sensitive.",
"type": "array",
"items": {
"type": "string"
},
"minItems": 0,
"uniqueItems": true,
"default": ["dbo"],
"order": 3
},
"username": {
"description": "The username which is used to access the database.",
"title": "Username",
"type": "string",
"order": 4
},
"password": {
"description": "The password associated with the username.",
"title": "Password",
"type": "string",
"airbyte_secret": true,
"order": 5
},
"jdbc_url_params": {
"title": "JDBC URL Params",
"description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).",
"type": "string",
"order": 6
},
"ssl_method": {
"title": "SSL Method",
"type": "object",
"description": "The encryption method which is used when communicating with the database.",
"order": 7,
"oneOf": [
{
"title": "Unencrypted",
"description": "Data transfer will not be encrypted.",
"required": ["ssl_method"],
"properties": {
"ssl_method": {
"type": "string",
"const": "unencrypted"
}
}
},
{
"title": "Encrypted (trust server certificate)",
"description": "Use the certificate provided by the server without verification. (For testing purposes only!)",
"required": ["ssl_method"],
"properties": {
"ssl_method": {
"type": "string",
"const": "encrypted_trust_server_certificate"
}
}
},
{
"title": "Encrypted (verify certificate)",
"description": "Verify and use the certificate provided by the server.",
"required": ["ssl_method"],
"properties": {
"ssl_method": {
"type": "string",
"const": "encrypted_verify_certificate"
},
"hostNameInCertificate": {
"title": "Host Name In Certificate",
"type": "string",
"description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.",
"order": 0
},
"certificate": {
"title": "Certificate",
"type": "string",
"description": "certificate of the server, or of the CA that signed the server certificate",
"order": 1,
"airbyte_secret": true,
"multiline": true
}
}
}
]
},
"replication_method": {
"type": "object",
"title": "Update Method",
"description": "Configures how data is extracted from the database.",
"default": "CDC",
"display_type": "radio",
"order": 8,
"oneOf": [
{
"title": "Read Changes using Change Data Capture (CDC)",
"description": "<i>Recommended</i> - Incrementally reads new inserts, updates, and deletes using the SQL Server's <a href=\"https://docs.airbyte.com/integrations/sources/mssql/#change-data-capture-cdc\">change data capture feature</a>. This must be enabled on your database.",
"required": ["method"],
"properties": {
"method": {
"type": "string",
"const": "CDC",
"order": 0
},
"initial_waiting_seconds": {
"type": "integer",
"title": "Initial Waiting Time in Seconds (Advanced)",
"description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about <a href=\"https://docs.airbyte.com/integrations/sources/mysql/#change-data-capture-cdc\">initial waiting time</a>.",
"default": 300,
"min": 120,
"max": 3600,
"order": 3
},
"invalid_cdc_cursor_position_behavior": {
"type": "string",
"title": "Invalid CDC position behavior (Advanced)",
"description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.",
"enum": ["Fail sync", "Re-sync data"],
"default": "Fail sync",
"order": 4
},
"queue_size": {
"type": "integer",
"title": "Size of the queue (Advanced)",
"description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.",
"default": 10000,
"order": 5,
"min": 1000,
"max": 10000
},
"initial_load_timeout_hours": {
"type": "integer",
"title": "Initial Load Timeout in Hours (Advanced)",
"description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.",
"default": 8,
"min": 4,
"max": 24,
"order": 6
}
}
},
{
"title": "Scan Changes with User Defined Cursor",
"description": "Incrementally detects new inserts and updates using the <a href=\"https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor\">cursor column</a> chosen when configuring a connection (e.g. created_at, updated_at).",
"required": ["method"],
"properties": {
"method": {
"type": "string",
"const": "STANDARD",
"order": 0
},
"exclude_todays_data": {
"title": "Exclude Today's Data",
"description": "When enabled incremental syncs using a cursor of a temporal types (date or datetime) will include cursor values only up until last midnight (Advanced)",
"default": false,
"type": "boolean",
"always_show": true,
"order": 1
}
}
}
]
},
"tunnel_method": {
"type": "object",
"title": "SSH Tunnel Method",
"description": "Whether to initiate an SSH tunnel before connecting to the database, and if so, which kind of authentication to use.",
"oneOf": [
{
"title": "No Tunnel",
"required": ["tunnel_method"],
"properties": {
"tunnel_method": {
"description": "No ssh tunnel needed to connect to database",
"type": "string",
"const": "NO_TUNNEL",
"order": 0
}
}
},
{
"title": "SSH Key Authentication",
"required": [
"tunnel_method",
"tunnel_host",
"tunnel_port",
"tunnel_user",
"ssh_key"
],
"properties": {
"tunnel_method": {
"description": "Connect through a jump server tunnel host using username and ssh key",
"type": "string",
"const": "SSH_KEY_AUTH",
"order": 0
},
"tunnel_host": {
"title": "SSH Tunnel Jump Server Host",
"description": "Hostname of the jump server host that allows inbound ssh tunnel.",
"type": "string",
"order": 1
},
"tunnel_port": {
"title": "SSH Connection Port",
"description": "Port on the proxy/jump server that accepts inbound ssh connections.",
"type": "integer",
"minimum": 0,
"maximum": 65536,
"default": 22,
"examples": ["22"],
"order": 2
},
"tunnel_user": {
"title": "SSH Login Username",
"description": "OS-level username for logging into the jump server host.",
"type": "string",
"order": 3
},
"ssh_key": {
"title": "SSH Private Key",
"description": "OS-level user account ssh key credentials in RSA PEM format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )",
"type": "string",
"airbyte_secret": true,
"multiline": true,
"order": 4
}
}
},
{
"title": "Password Authentication",
"required": [
"tunnel_method",
"tunnel_host",
"tunnel_port",
"tunnel_user",
"tunnel_user_password"
],
"properties": {
"tunnel_method": {
"description": "Connect through a jump server tunnel host using username and password authentication",
"type": "string",
"const": "SSH_PASSWORD_AUTH",
"order": 0
},
"tunnel_host": {
"title": "SSH Tunnel Jump Server Host",
"description": "Hostname of the jump server host that allows inbound ssh tunnel.",
"type": "string",
"order": 1
},
"tunnel_port": {
"title": "SSH Connection Port",
"description": "Port on the proxy/jump server that accepts inbound ssh connections.",
"type": "integer",
"minimum": 0,
"maximum": 65536,
"default": 22,
"examples": ["22"],
"order": 2
},
"tunnel_user": {
"title": "SSH Login Username",
"description": "OS-level username for logging into the jump server host",
"type": "string",
"order": 3
},
"tunnel_user_password": {
"title": "Password",
"description": "OS-level password for logging into the jump server host",
"type": "string",
"airbyte_secret": true,
"order": 4
}
}
}
]
}
}
},
"supportsNormalization": false,
"supportsDBT": false,
"supported_destination_sync_modes": []
}

View File

@@ -1,82 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.collect.ImmutableMap;
import io.airbyte.cdk.db.Database;
import io.airbyte.cdk.db.factory.DSLContextFactory;
import io.airbyte.cdk.db.factory.DatabaseDriver;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
import io.airbyte.cdk.integrations.standardtest.source.performancetest.AbstractSourceFillDbWithTestData;
import io.airbyte.commons.json.Jsons;
import java.util.stream.Stream;
import org.jooq.DSLContext;
import org.junit.jupiter.params.provider.Arguments;
public class FillMsSqlTestDbScriptTest extends AbstractSourceFillDbWithTestData {
private JsonNode config;
private DSLContext dslContext;
@Override
protected JsonNode getConfig() {
return config;
}
@Override
protected void tearDown(final TestDestinationEnv testEnv) {}
@Override
protected String getImageName() {
return "airbyte/source-mssql:dev";
}
@Override
protected Database setupDatabase(final String dbName) {
final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder()
.put("method", "Standard")
.build());
config = Jsons.jsonNode(ImmutableMap.builder()
.put(JdbcUtils.HOST_KEY, "your_host")
.put(JdbcUtils.PORT_KEY, 1433)
.put(JdbcUtils.DATABASE_KEY, dbName) // set your db name
.put(JdbcUtils.USERNAME_KEY, "your_username")
.put(JdbcUtils.PASSWORD_KEY, "your_pass")
.put("replication_method", replicationMethod)
.build());
dslContext = DSLContextFactory.create(
config.get(JdbcUtils.USERNAME_KEY).asText(),
config.get(JdbcUtils.PASSWORD_KEY).asText(),
DatabaseDriver.MSSQLSERVER.getDriverClassName(),
String.format("jdbc:sqlserver://%s:%s;databaseName=%s;",
config.get(JdbcUtils.HOST_KEY).asText(),
config.get(JdbcUtils.PORT_KEY).asInt(),
dbName),
null);
return new Database(dslContext);
}
/**
* This is a data provider for fill DB script,, Each argument's group would be ran as a separate
* test. 1st arg - a name of DB that will be used in jdbc connection string. 2nd arg - a schemaName
* that will be ised as a NameSpace in Configured Airbyte Catalog. 3rd arg - a number of expected
* records retrieved in each stream. 4th arg - a number of messages batches
* (numberOfMessages*numberOfBatches, ex. 100*2=200 messages in total in each stream) 5th arg - a
* number of columns in each stream\table that will be use for Airbyte Cataloq configuration 6th arg
* - a number of streams to read in configured airbyte Catalog. Each stream\table in DB should be
* names like "test_0", "test_1",..., test_n.
*/
@Override
protected Stream<Arguments> provideParameters() {
return Stream.of(Arguments.of("your_db_name", "dbo", 100, 2, 240, 1000) // "dbo" is a default schema name in MsSQl DB
);
}
}

View File

@@ -1,55 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.collect.ImmutableMap;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.standardtest.source.performancetest.AbstractSourcePerformanceTest;
import io.airbyte.commons.io.IOs;
import io.airbyte.commons.json.Jsons;
import java.nio.file.Path;
import java.util.stream.Stream;
import org.junit.jupiter.params.provider.Arguments;
public class MssqlSourcePerformanceTest extends AbstractSourcePerformanceTest {
private static final String PERFORMANCE_SECRET_CREDS = "secrets/performance-config.json";
@Override
protected String getImageName() {
return "airbyte/source-mssql:dev";
}
@Override
protected void setupDatabase(final String dbName) {
final JsonNode plainConfig = Jsons.deserialize(IOs.readFile(Path.of(PERFORMANCE_SECRET_CREDS)));
setConfig(Jsons.jsonNode(ImmutableMap.builder()
.put(JdbcUtils.HOST_KEY, plainConfig.get(JdbcUtils.HOST_KEY))
.put(JdbcUtils.PORT_KEY, plainConfig.get(JdbcUtils.PORT_KEY))
.put(JdbcUtils.DATABASE_KEY, dbName)
.put(JdbcUtils.USERNAME_KEY, plainConfig.get(JdbcUtils.USERNAME_KEY))
.put(JdbcUtils.PASSWORD_KEY, plainConfig.get(JdbcUtils.PASSWORD_KEY))
.build()));
}
/**
* This is a data provider for performance tests, Each argument's group would be ran as a separate
* test. 1st arg - a name of DB that will be used in jdbc connection string. 2nd arg - a schemaName
* that will be used as a NameSpace in Configured Airbyte Catalog. 3rd arg - a number of expected
* records retrieved in each stream. 4th arg - a number of columns in each stream\table that will be
* use for Airbyte Cataloq configuration 5th arg - a number of streams to read in configured airbyte
* Catalog. Each stream\table in DB should be names like "test_0", "test_1",..., test_n.
*/
@Override
protected Stream<Arguments> provideParameters() {
return Stream.of(
Arguments.of("t1000_c240_r200", "dbo", 200, 240, 1000),
Arguments.of("t25_c8_r50k_s10kb", "dbo", 50000, 8, 25),
Arguments.of("t1000_c8_r10k_s500b", "dbo", 10000, 8, 1000));
}
}

View File

@@ -1,305 +0,0 @@
CREATE
PROCEDURE table_copy(
@tablecount INT
) AS BEGIN
SET
nocount ON;
DECLARE @v_max_table INT;
DECLARE @v_counter_table INT;
DECLARE @tnamee VARCHAR(255);
SET
@v_max_table = @tablecount;
SET
@v_counter_table = 1;
while @v_counter_table < @v_max_table BEGIN
SET
@tnamee = concat(
'SELECT * INTO test_',
@v_counter_table,
' FROM test;'
);
EXEC(@tnamee);
SET
@v_counter_table = @v_counter_table + 1;
END;
END;
GO --
CREATE
PROCEDURE insert_rows(
@allrows INT,
@insertcount INT,
@value NVARCHAR(MAX)
) AS BEGIN
SET
nocount ON;
DECLARE @dummyIpsum VARCHAR(255) DECLARE @fieldText NVARCHAR(MAX)
SET
@fieldText = @value DECLARE @vmax INT;
DECLARE @vmaxx INT;
DECLARE @vmaxoneinsert INT;
DECLARE @counter INT;
DECLARE @lastinsertcounter INT;
DECLARE @lastinsert INT;
DECLARE @fullloop INT;
DECLARE @fullloopcounter INT;
SET
@vmax = @allrows;
SET
@vmaxx = @allrows;
SET
@vmaxoneinsert = @insertcount;
SET
@counter = 1;
SET
@lastinsertcounter = 1;
SET
@lastinsert = 0;
SET
@fullloop = 0;
SET
@fullloopcounter = 0;
SET
@dummyIpsum = '''dummy_ipsum''' while @vmaxx <= @vmaxoneinsert BEGIN
SET
@vmaxoneinsert = @vmaxx;
SET
@fullloop = @fullloop + 1;
SET
@vmaxx = @vmaxx + 1;
END;
while @vmax > @vmaxoneinsert BEGIN
SET
@fullloop = @fullloop + 1;
SET
@vmax = @vmax - @vmaxoneinsert;
SET
@lastinsert = @vmax;
END;
DECLARE @insertTable NVARCHAR(MAX)
SET
@insertTable = CONVERT(
NVARCHAR(MAX),
'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values ('
);
while @counter < @vmaxoneinsert BEGIN
SET
@insertTable = CONVERT(
NVARCHAR(MAX),
concat(
@insertTable,
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@fieldText,
', CURRENT_TIMESTAMP), ('
)
);
SET
@counter = @counter + 1;
END;
SET
@insertTable = CONVERT(
NVARCHAR(MAX),
concat(
@insertTable,
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@fieldText,
', CURRENT_TIMESTAMP);'
)
);
while @vmax < 1 BEGIN
SET
@fullloop = 0
SET
@vmax = 1
END;
while @fullloopcounter < @fullloop BEGIN EXEC(@insertTable);
SET
@fullloopcounter = @fullloopcounter + 1;
END;
DECLARE @insertTableLasted NVARCHAR(MAX);
SET
@insertTableLasted = CONVERT(
NVARCHAR(MAX),
'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values ('
);
while @lastinsertcounter < @lastinsert BEGIN
SET
@insertTableLasted = CONVERT(
NVARCHAR(MAX),
concat(
@insertTableLasted,
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@fieldText,
', CURRENT_TIMESTAMP), ('
)
);
SET
@lastinsertcounter = @lastinsertcounter + 1;
END;
SET
@insertTableLasted = CONVERT(
NVARCHAR(MAX),
concat(
@insertTableLasted,
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@dummyIpsum,
', ',
@fieldText,
', CURRENT_TIMESTAMP);'
)
);
while @lastinsert > 0 BEGIN EXEC(@insertTableLasted);
SET
@lastinsert = 0;
END;
END;
GO --
CREATE
PROCEDURE table_create(
@val INT
) AS BEGIN
SET
nocount ON;
-- SQLINES LICENSE FOR EVALUATION USE ONLY
CREATE
TABLE
test(
id INT CHECK(
id > 0
) NOT NULL IDENTITY PRIMARY KEY,
varchar1 VARCHAR(255),
varchar2 VARCHAR(255),
varchar3 VARCHAR(255),
varchar4 VARCHAR(255),
varchar5 VARCHAR(255),
longtextfield nvarchar(MAX),
timestampfield datetime2(0)
);
DECLARE @extraSmallText NVARCHAR(MAX);
DECLARE @smallText NVARCHAR(MAX);
DECLARE @regularText NVARCHAR(MAX);
DECLARE @largeText NVARCHAR(MAX);
DECLARE @someText nvarchar(MAX);
SELECT
@someText = N'some text, some text, ';
SET
@extraSmallText = N'''test weight 50b - some text, some text, some text''';
SET
@smallText = N'''test weight 500b - ';
SET
@regularText = N'''test weight 10kb - ';
SET
@largeText = N'''test weight 100kb - ';
SELECT
@smallText = @smallText + REPLICATE(
@someText,
20
)+ N'''';
SELECT
@regularText = @regularText + REPLICATE(
@someText,
590
)+ N'some text''';
SELECT
@largeText = @largeText + REPLICATE(
@someText,
4450
)+ N'some text''';
) -- TODO: change the following @allrows to control the number of records with different sizes
-- number of 50B records
EXEC insert_rows @allrows = 0,
@insertcount = 998,
@value = @extraSmallText -- number of 500B records
EXEC insert_rows @allrows = 0,
@insertcount = 998,
@value = @smallText -- number of 10Kb records
EXEC insert_rows @allrows = 0,
@insertcount = 998,
@value = @regularText -- number of 100Kb records
EXEC insert_rows @allrows = 0,
@insertcount = 98,
@value = @largeText
END;
GO --
EXEC table_create @val = 0 DROP
PROCEDURE IF EXISTS insert_rows;
DROP
PROCEDURE IF EXISTS table_create;
-- TODO: change the value to control the number of tables
EXEC table_copy @tablecount = 1;
DROP
PROCEDURE IF EXISTS table_copy;
EXEC sp_rename 'test',
'test_0';

View File

@@ -1,749 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT;
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT;
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR;
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO;
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.ORDERED_COL_STATE_TYPE;
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY;
import static org.awaitility.Awaitility.await;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Streams;
import io.airbyte.cdk.db.factory.DataSourceFactory;
import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase;
import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig;
import io.airbyte.cdk.integrations.JdbcConnector;
import io.airbyte.cdk.integrations.debezium.CdcSourceTest;
import io.airbyte.cdk.integrations.debezium.CdcTargetPosition;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.util.AutoCloseableIterator;
import io.airbyte.commons.util.AutoCloseableIterators;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaType;
import io.airbyte.protocol.models.v0.*;
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
import io.debezium.connector.sqlserver.Lsn;
import java.sql.SQLException;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import javax.sql.DataSource;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.TestInstance.Lifecycle;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@TestInstance(Lifecycle.PER_METHOD)
@Execution(ExecutionMode.CONCURRENT)
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH")
public class CdcMssqlSourceTest extends CdcSourceTest<MssqlSource, MsSQLTestDatabase> {
private static final Logger LOGGER = LoggerFactory.getLogger(CdcSourceTest.class);
static private final String CDC_ROLE_NAME = "cdc_selector";
static private final String TEST_USER_NAME_PREFIX = "cdc_test_user";
private DataSource testDataSource;
protected final String testUserName() {
return testdb.withNamespace(TEST_USER_NAME_PREFIX);
}
@Override
protected AirbyteCatalog expectedCatalogForDiscover() {
final String COL_ID = "id";
final String COL_MAKE_ID = "make_id";
final String COL_MODEL = "model";
final String MODELS_STREAM_NAME_2 = "models_stream_2";
final String MODELS_STREAM_NAME = "models";
AirbyteCatalog expectedCatalog = new AirbyteCatalog()
.withStreams(
java.util.List.of(
CatalogHelpers.createAirbyteStream(
MODELS_STREAM_NAME,
modelsSchema(),
Field.of(COL_ID, JsonSchemaType.INTEGER),
Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER),
Field.of(COL_MODEL, JsonSchemaType.STRING))
.withSupportedSyncModes(
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(
java.util.List.of(java.util.List.of(COL_ID)))
.withIsResumable(true)));
Map<String, String> columns = ImmutableMap.of(
COL_ID, "INTEGER",
COL_MAKE_ID, "INTEGER",
COL_MODEL, "VARCHAR(200)");
testdb.with(
createTableSqlFmt(),
modelsSchema(),
MODELS_STREAM_NAME_2,
columnClause(columns, Optional.empty()));
List<AirbyteStream> streams = new ArrayList<>(expectedCatalog.getStreams());
// stream with PK
streams.get(0).setSourceDefinedCursor(true);
streams.get(0).setIsResumable(true);
addCdcMetadataColumns(streams.get(0));
addCdcDefaultCursorField(streams.get(0));
AirbyteStream streamWithoutPK = CatalogHelpers.createAirbyteStream(
MODELS_STREAM_NAME_2,
modelsSchema(),
Field.of(COL_ID, JsonSchemaType.INTEGER),
Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER),
Field.of(COL_MODEL, JsonSchemaType.STRING));
streamWithoutPK.setSourceDefinedPrimaryKey(Collections.emptyList());
streamWithoutPK.setSupportedSyncModes(java.util.List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL));
streamWithoutPK.setSourceDefinedCursor(true);
addCdcDefaultCursorField(streamWithoutPK);
addCdcMetadataColumns(streamWithoutPK);
addIsResumableFlagForNonPkTable(streamWithoutPK);
AirbyteStream randomStream = CatalogHelpers.createAirbyteStream(
RANDOM_TABLE_NAME,
randomSchema(),
Field.of(COL_ID + "_random", JsonSchemaType.INTEGER),
Field.of(COL_MAKE_ID + "_random", JsonSchemaType.INTEGER),
Field.of(COL_MODEL + "_random", JsonSchemaType.STRING))
.withSourceDefinedCursor(true)
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(
java.util.List.of(java.util.List.of(COL_ID + "_random")))
.withIsResumable(true);
addCdcDefaultCursorField(randomStream);
addCdcMetadataColumns(randomStream);
streams.add(streamWithoutPK);
streams.add(randomStream);
expectedCatalog.withStreams(streams);
return expectedCatalog;
}
@Override
protected MsSQLTestDatabase createTestDatabase() {
return MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT)
.withWaitUntilAgentRunning()
.withCdc();
}
@Override
protected MssqlSource source() {
return new MssqlSource();
}
@Override
protected JsonNode config() {
return testdb.configBuilder()
.withHostAndPort()
.withDatabase()
.with(JdbcUtils.USERNAME_KEY, testUserName())
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
.withSchemas(modelsSchema(), randomSchema())
.withCdcReplication()
.withoutSsl()
.with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1)
.build();
}
@Override
protected void assertExpectedStateMessageCountMatches(final List<? extends AirbyteStateMessage> stateMessages, long totalCount) {
AtomicLong count = new AtomicLong(0L);
stateMessages.stream().forEach(
stateMessage -> count.addAndGet(stateMessage.getSourceStats() != null ? stateMessage.getSourceStats().getRecordCount().longValue() : 0L));
assertEquals(totalCount, count.get());
}
@Override
@BeforeEach
protected void setup() {
testdb = createTestDatabase();
createTables();
// Enables cdc on MODELS_SCHEMA.MODELS_STREAM_NAME, giving CDC_ROLE_NAME select access.
testdb
.withCdcForTable(modelsSchema(), MODELS_STREAM_NAME, CDC_ROLE_NAME)
.withCdcForTable(randomSchema(), RANDOM_TABLE_NAME, CDC_ROLE_NAME);
// Create a test user to be used by the source, with proper permissions.
testdb
.with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName())
.with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName())
.with("REVOKE ALL FROM %s CASCADE;", testUserName())
.with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName())
.with("GRANT SELECT ON SCHEMA :: [%s] TO %s", modelsSchema(), testUserName())
.with("GRANT SELECT ON SCHEMA :: [%s] TO %s", randomSchema(), testUserName())
.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName())
.with("USE [master]")
.with("GRANT VIEW SERVER STATE TO %s", testUserName())
.with("USE [%s]", testdb.getDatabaseName())
.with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName());
populateTables();
waitForCdcRecords();
testDataSource = createTestDataSource();
}
public void waitForCdcRecords() {
testdb.waitForCdcRecords(modelsSchema(), MODELS_STREAM_NAME, MODEL_RECORDS.size());
testdb.waitForCdcRecords(randomSchema(), RANDOM_TABLE_NAME, MODEL_RECORDS_RANDOM.size());
}
protected DataSource createTestDataSource() {
return DataSourceFactory.create(
testUserName(),
testdb.getPassword(),
testdb.getDatabaseDriver().getDriverClassName(),
testdb.getJdbcUrl(),
Map.of("encrypt", "false", "trustServerCertificate", "true"),
JdbcConnector.CONNECT_TIMEOUT_DEFAULT);
}
@Override
@AfterEach
protected void tearDown() {
try {
DataSourceFactory.close(testDataSource);
} catch (final Exception e) {
throw new RuntimeException(e);
}
super.tearDown();
}
private JdbcDatabase testDatabase() {
return new DefaultJdbcDatabase(testDataSource);
}
// TODO : Delete this Override when MSSQL supports individual table snapshot
@Override
public void newTableSnapshotTest() {
// Do nothing
}
@Override
protected void addIsResumableFlagForNonPkTable(final AirbyteStream stream) {
stream.setIsResumable(false);
}
// Utilize the setup to do test on MssqlDebeziumStateUtil.
@Test
public void testCdcSnapshot() {
JdbcDatabase testDatabase = testDatabase();
testDatabase.setSourceConfig(config());
testDatabase.setDatabaseConfig(source().toDatabaseConfig(config()));
JsonNode debeziumState =
MssqlDebeziumStateUtil.constructInitialDebeziumState(MssqlCdcHelper.getDebeziumProperties(testDatabase, getConfiguredCatalog(), true),
getConfiguredCatalog(), testDatabase);
Assertions.assertEquals(3, Jsons.object(debeziumState, Map.class).size());
Assertions.assertTrue(debeziumState.has("is_compressed"));
Assertions.assertFalse(debeziumState.get("is_compressed").asBoolean());
Assertions.assertTrue(debeziumState.has("mssql_db_history"));
Assertions.assertNotNull(debeziumState.get("mssql_db_history"));
Assertions.assertTrue(debeziumState.has("mssql_cdc_offset"));
}
// Tests even with consistent inserting operations, CDC snapshot and incremental load will not lose
// data.
@Test
@Timeout(value = 5,
unit = TimeUnit.MINUTES)
public void testCdcNotLoseDataWithConsistentWriting() throws Exception {
ExecutorService executor = Executors.newFixedThreadPool(10);
// Inserting 50 records in 10 seconds.
// Intention is to insert records while we are running the first snapshot read. And we check with
// the first snapshot read operations
// and a following incremental read operation, we will be able to capture all data.
int numberOfRecordsToInsert = 50;
var insertingProcess = executor.submit(() -> {
for (int i = 0; i < numberOfRecordsToInsert; i++) {
testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');",
modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019 + i, i, "car description");
try {
Thread.sleep(200);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
});
final AutoCloseableIterator<AirbyteMessage> read1 = source()
.read(config(), getConfiguredCatalog(), null);
final List<AirbyteMessage> actualRecords1 = AutoCloseableIterators.toListAndClose(read1);
final Set<AirbyteRecordMessage> recordMessages = extractRecordMessages(actualRecords1);
final List<AirbyteStateMessage> stateMessagesFromFirstSync = extractStateMessages(actualRecords1);
final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessagesFromFirstSync.get(stateMessagesFromFirstSync.size() - 1)));
// Make sure we have finished inserting process and read from previous state.
insertingProcess.get();
final AutoCloseableIterator<AirbyteMessage> read2 = source()
.read(config(), getConfiguredCatalog(), state);
final List<AirbyteMessage> actualRecords2 = AutoCloseableIterators.toListAndClose(read2);
recordMessages.addAll(extractRecordMessages(actualRecords2));
final Set<Integer> ids = recordMessages.stream().map(message -> message.getData().get("id").intValue()).collect(Collectors.toSet());
// Originally in setup we have inserted 6 records in the table.
assertEquals(ids.size(), numberOfRecordsToInsert + 6);
}
@Override
protected String columnClause(final Map<String, String> columnsWithDataType, final Optional<String> primaryKey) {
final StringBuilder columnClause = new StringBuilder();
int i = 0;
for (final Map.Entry<String, String> column : columnsWithDataType.entrySet()) {
columnClause.append(column.getKey());
columnClause.append(" ");
columnClause.append(column.getValue());
if (primaryKey.isPresent() && primaryKey.get().equals(column.getKey())) {
columnClause.append(" PRIMARY KEY");
}
if (i < (columnsWithDataType.size() - 1)) {
columnClause.append(",");
columnClause.append(" ");
}
i++;
}
return columnClause.toString();
}
@Test
void testAssertCdcEnabledInDb() {
// since we enable cdc in setup, assert that we successfully pass this first
assertDoesNotThrow(() -> source().assertCdcEnabledInDb(config(), testDatabase()));
// then disable cdc and assert the check fails
testdb.withoutCdc();
assertThrows(RuntimeException.class, () -> source().assertCdcEnabledInDb(config(), testDatabase()));
}
@Test
void testAssertCdcSchemaQueryable() {
// correct access granted by setup so assert check passes
assertDoesNotThrow(() -> source().assertCdcSchemaQueryable(config(), testDatabase()));
// now revoke perms and assert that check fails
testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testUserName());
assertThrows(com.microsoft.sqlserver.jdbc.SQLServerException.class,
() -> source().assertCdcSchemaQueryable(config(), testDatabase()));
}
@Test
void testCdcCheckOperationsWithDot() throws Exception {
final String dbNameWithDot = testdb.getDatabaseName().replace("_", ".");
testdb.with("CREATE DATABASE [%s];", dbNameWithDot)
.with("USE [%s]", dbNameWithDot)
.with("EXEC sys.sp_cdc_enable_db;");
final AirbyteConnectionStatus status = source().check(config());
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.SUCCEEDED);
}
// todo: check LSN returned is actually the max LSN
// todo: check we fail as expected under certain conditions
@Test
void testGetTargetPosition() throws Exception {
// check that getTargetPosition returns higher Lsn after inserting new row
testdb.withWaitUntilMaxLsnAvailable();
final Lsn firstLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn;
testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');",
modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019, 1, "another car");
// Wait for Agent capture job to log CDC change.
await().atMost(Duration.ofSeconds(45)).until(() -> {
final Lsn secondLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn;
return secondLsn.compareTo(firstLsn) > 0;
});
}
// Remove all timestamp related fields in shared state. We want to make sure other information will
// not change.
private void pruneSharedStateTimestamp(final JsonNode rootNode) throws Exception {
ObjectMapper mapper = new ObjectMapper();
// Navigate to the specific node
JsonNode historyNode = rootNode.path("state").path("mssql_db_history");
if (historyNode.isMissingNode()) {
return; // Node not found, nothing to do
}
String historyJson = historyNode.asText();
JsonNode historyJsonNode = mapper.readTree(historyJson);
ObjectNode objectNode = (ObjectNode) historyJsonNode;
objectNode.remove("ts_ms");
if (objectNode.has("position") && objectNode.get("position").has("ts_sec")) {
((ObjectNode) objectNode.get("position")).remove("ts_sec");
}
JsonNode offsetNode = rootNode.path("state").path("mssql_cdc_offset");
JsonNode offsetJsonNode = mapper.readTree(offsetNode.asText());
if (offsetJsonNode.has("ts_sec")) {
((ObjectNode) offsetJsonNode).remove("ts_sec");
}
// Replace the original string with the modified one
((ObjectNode) rootNode.path("state")).put("mssql_db_history", mapper.writeValueAsString(historyJsonNode));
((ObjectNode) rootNode.path("state")).put("mssql_cdc_offset", mapper.writeValueAsString(offsetJsonNode));
}
@Test
public void testTwoStreamSync() throws Exception {
// Add another stream models_2 and read that one as well.
final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog());
final List<JsonNode> MODEL_RECORDS_2 = ImmutableList.of(
Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")),
Jsons.jsonNode(ImmutableMap.of(COL_ID, 120, COL_MAKE_ID, 1, COL_MODEL, "Focus-2")),
Jsons.jsonNode(ImmutableMap.of(COL_ID, 130, COL_MAKE_ID, 1, COL_MODEL, "Ranger-2")),
Jsons.jsonNode(ImmutableMap.of(COL_ID, 140, COL_MAKE_ID, 2, COL_MODEL, "GLA-2")),
Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")),
Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2")));
testdb.with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME + "_2",
columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID)));
for (final JsonNode recordJson : MODEL_RECORDS_2) {
writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME + "_2", COL_ID,
COL_MAKE_ID, COL_MODEL);
}
final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream()
.withStream(CatalogHelpers.createAirbyteStream(
MODELS_STREAM_NAME + "_2",
modelsSchema(),
Field.of(COL_ID, JsonSchemaType.INTEGER),
Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER),
Field.of(COL_MODEL, JsonSchemaType.STRING))
.withSupportedSyncModes(
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))));
airbyteStream.setSyncMode(SyncMode.INCREMENTAL);
final List<ConfiguredAirbyteStream> streams = configuredCatalog.getStreams();
streams.add(airbyteStream);
configuredCatalog.withStreams(streams);
final AutoCloseableIterator<AirbyteMessage> read1 = source()
.read(config(), configuredCatalog, null);
final List<AirbyteMessage> actualRecords1 = AutoCloseableIterators.toListAndClose(read1);
final Set<AirbyteRecordMessage> recordMessages1 = extractRecordMessages(actualRecords1);
final List<AirbyteStateMessage> stateMessages1 = extractStateMessages(actualRecords1);
assertEquals(13, stateMessages1.size());
assertExpectedStateMessagesWithTotalCount(stateMessages1, 12);
JsonNode sharedState = null;
StreamDescriptor firstStreamInState = null;
for (int i = 0; i < stateMessages1.size(); i++) {
final AirbyteStateMessage stateMessage = stateMessages1.get(i);
assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType());
final AirbyteGlobalState global = stateMessage.getGlobal();
assertNotNull(global.getSharedState());
if (Objects.isNull(sharedState)) {
ObjectMapper mapper = new ObjectMapper();
sharedState = mapper.valueToTree(global.getSharedState());
pruneSharedStateTimestamp(sharedState);
} else {
ObjectMapper mapper = new ObjectMapper();
var newSharedState = mapper.valueToTree(global.getSharedState());
pruneSharedStateTimestamp(newSharedState);
assertEquals(sharedState, newSharedState);
}
if (Objects.isNull(firstStreamInState)) {
assertEquals(1, global.getStreamStates().size());
firstStreamInState = global.getStreamStates().get(0).getStreamDescriptor();
}
if (i <= 4) {
// First 4 state messages are pk state
assertEquals(1, global.getStreamStates().size());
final AirbyteStreamState streamState = global.getStreamStates().get(0);
assertTrue(streamState.getStreamState().has(STATE_TYPE_KEY));
assertEquals(ORDERED_COL_STATE_TYPE, streamState.getStreamState().get(STATE_TYPE_KEY).asText());
} else if (i == 5) {
// 5th state message is the final state message emitted for the stream
assertEquals(1, global.getStreamStates().size());
final AirbyteStreamState streamState = global.getStreamStates().get(0);
assertFalse(streamState.getStreamState().has(STATE_TYPE_KEY));
} else if (i <= 10) {
// 6th to 10th is the primary_key state message for the 2nd stream but final state message for 1st
// stream
assertEquals(2, global.getStreamStates().size());
final StreamDescriptor finalFirstStreamInState = firstStreamInState;
global.getStreamStates().forEach(c -> {
if (c.getStreamDescriptor().equals(finalFirstStreamInState)) {
assertFalse(c.getStreamState().has(STATE_TYPE_KEY));
} else {
assertTrue(c.getStreamState().has(STATE_TYPE_KEY));
assertEquals(ORDERED_COL_STATE_TYPE, c.getStreamState().get(STATE_TYPE_KEY).asText());
}
});
} else {
// last 2 state messages don't contain primary_key info cause primary_key sync should be complete
assertEquals(2, global.getStreamStates().size());
global.getStreamStates().forEach(c -> assertFalse(c.getStreamState().has(STATE_TYPE_KEY)));
}
}
final Set<String> names = new HashSet<>(STREAM_NAMES);
names.add(MODELS_STREAM_NAME + "_2");
assertExpectedRecords(Streams.concat(MODEL_RECORDS_2.stream(), MODEL_RECORDS.stream())
.collect(Collectors.toSet()),
recordMessages1,
names,
names,
modelsSchema());
assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()), firstStreamInState);
// Triggering a sync with a primary_key state for 1 stream and complete state for other stream
final AutoCloseableIterator<AirbyteMessage> read2 = source()
.read(config(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6))));
final List<AirbyteMessage> actualRecords2 = AutoCloseableIterators.toListAndClose(read2);
final List<AirbyteStateMessage> stateMessages2 = extractStateMessages(actualRecords2);
assertEquals(6, stateMessages2.size());
// State was reset to the 7th; thus 5 remaining records were expected to be reloaded.
assertExpectedStateMessagesWithTotalCount(stateMessages2, 5);
for (int i = 0; i < stateMessages2.size(); i++) {
final AirbyteStateMessage stateMessage = stateMessages2.get(i);
assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType());
final AirbyteGlobalState global = stateMessage.getGlobal();
assertNotNull(global.getSharedState());
assertEquals(2, global.getStreamStates().size());
if (i <= 4) {
final StreamDescriptor finalFirstStreamInState = firstStreamInState;
global.getStreamStates().forEach(c -> {
// First 5 state messages are primary_key state for the stream that didn't complete primary_key sync
// the first time
if (c.getStreamDescriptor().equals(finalFirstStreamInState)) {
assertFalse(c.getStreamState().has(STATE_TYPE_KEY));
} else {
assertTrue(c.getStreamState().has(STATE_TYPE_KEY));
assertEquals(ORDERED_COL_STATE_TYPE, c.getStreamState().get(STATE_TYPE_KEY).asText());
}
});
} else {
// last state messages doesn't contain primary_key info cause primary_key sync should be complete
global.getStreamStates().forEach(c -> assertFalse(c.getStreamState().has(STATE_TYPE_KEY)));
}
}
final Set<AirbyteRecordMessage> recordMessages2 = extractRecordMessages(actualRecords2);
assertEquals(5, recordMessages2.size());
assertExpectedRecords(new HashSet<>(MODEL_RECORDS_2.subList(1, MODEL_RECORDS_2.size())),
recordMessages2,
names,
names,
modelsSchema());
}
protected void assertExpectedStateMessagesWithTotalCount(final List<AirbyteStateMessage> stateMessages, final long totalRecordCount) {
long actualRecordCount = 0L;
for (final AirbyteStateMessage message : stateMessages) {
actualRecordCount += message.getSourceStats().getRecordCount();
}
assertEquals(actualRecordCount, totalRecordCount);
}
@Override
protected void removeCDCColumns(final ObjectNode data) {
data.remove(CDC_LSN);
data.remove(CDC_UPDATED_AT);
data.remove(CDC_DELETED_AT);
data.remove(CDC_EVENT_SERIAL_NO);
data.remove(CDC_DEFAULT_CURSOR);
}
@Override
protected MssqlCdcTargetPosition cdcLatestTargetPosition() {
testdb.withWaitUntilMaxLsnAvailable();
final JdbcDatabase jdbcDatabase = new StreamingJdbcDatabase(
testDataSource,
new MssqlSourceOperations(),
AdaptiveStreamingQueryConfig::new);
return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, testdb.getDatabaseName());
}
@Override
protected MssqlCdcTargetPosition extractPosition(final JsonNode record) {
return new MssqlCdcTargetPosition(Lsn.valueOf(record.get(CDC_LSN).asText()));
}
@Override
protected void assertNullCdcMetaData(final JsonNode data) {
assertNull(data.get(CDC_LSN));
assertNull(data.get(CDC_UPDATED_AT));
assertNull(data.get(CDC_DELETED_AT));
assertNull(data.get(CDC_EVENT_SERIAL_NO));
assertNull(data.get(CDC_DEFAULT_CURSOR));
}
@Override
protected void assertCdcMetaData(final JsonNode data, final boolean deletedAtNull) {
assertNotNull(data.get(CDC_LSN));
assertNotNull(data.get(CDC_EVENT_SERIAL_NO));
assertNotNull(data.get(CDC_UPDATED_AT));
assertNotNull(data.get(CDC_DEFAULT_CURSOR));
if (deletedAtNull) {
assertTrue(data.get(CDC_DELETED_AT).isNull());
} else {
assertFalse(data.get(CDC_DELETED_AT).isNull());
}
}
@Override
protected void addCdcMetadataColumns(final AirbyteStream stream) {
final ObjectNode jsonSchema = (ObjectNode) stream.getJsonSchema();
final ObjectNode properties = (ObjectNode) jsonSchema.get("properties");
final JsonNode airbyteIntegerType = Jsons.jsonNode(ImmutableMap.of("type", "number", "airbyte_type", "integer"));
final JsonNode stringType = Jsons.jsonNode(ImmutableMap.of("type", "string"));
properties.set(CDC_LSN, stringType);
properties.set(CDC_UPDATED_AT, stringType);
properties.set(CDC_DELETED_AT, stringType);
properties.set(CDC_EVENT_SERIAL_NO, stringType);
properties.set(CDC_DEFAULT_CURSOR, airbyteIntegerType);
}
@Override
protected void addCdcDefaultCursorField(final AirbyteStream stream) {
if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) {
stream.setDefaultCursorField(ImmutableList.of(CDC_DEFAULT_CURSOR));
}
}
@Override
protected void assertExpectedStateMessages(final List<? extends AirbyteStateMessage> stateMessages) {
assertEquals(7, stateMessages.size());
assertStateTypes(stateMessages, 4);
}
@Override
protected void assertExpectedStateMessagesFromIncrementalSync(final List<? extends AirbyteStateMessage> stateMessages) {
assertEquals(1, stateMessages.size());
assertNotNull(stateMessages.get(0).getData());
for (final AirbyteStateMessage stateMessage : stateMessages) {
assertNotNull(stateMessage.getData().get("cdc_state").get("state").get(MSSQL_CDC_OFFSET));
assertNotNull(stateMessage.getData().get("cdc_state").get("state").get(MSSQL_DB_HISTORY));
}
}
@Override
protected void assertExpectedStateMessagesForNoData(final List<? extends AirbyteStateMessage> stateMessages) {
assertEquals(2, stateMessages.size());
}
@Override
protected void assertExpectedStateMessagesForRecordsProducedDuringAndAfterSync(final List<? extends AirbyteStateMessage> stateAfterFirstBatch) {
assertEquals(27, stateAfterFirstBatch.size());
assertStateTypes(stateAfterFirstBatch, 24);
}
private void assertStateTypes(final List<? extends AirbyteStateMessage> stateMessages, final int indexTillWhichExpectOcState) {
JsonNode sharedState = null;
LOGGER.info("*** states to assert: {}", Arrays.deepToString(stateMessages.toArray()));
for (int i = 0; i < stateMessages.size(); i++) {
final AirbyteStateMessage stateMessage = stateMessages.get(i);
assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType());
final AirbyteGlobalState global = stateMessage.getGlobal();
assertNotNull(global.getSharedState());
if (Objects.isNull(sharedState)) {
sharedState = global.getSharedState();
} else {
assertEquals(sharedState, global.getSharedState(), "states were " + Arrays.deepToString(stateMessages.toArray()));
// assertEquals(sharedState.toString().replaceAll("ts_ms\\\\\":\\d+", ""),
// global.getSharedState().toString().replaceAll("ts_ms\\\\\":\\d+", ""));
}
assertEquals(1, global.getStreamStates().size());
final AirbyteStreamState streamState = global.getStreamStates().get(0);
if (i <= indexTillWhichExpectOcState) {
assertTrue(streamState.getStreamState().has(STATE_TYPE_KEY));
assertEquals(ORDERED_COL_STATE_TYPE, streamState.getStreamState().get(STATE_TYPE_KEY).asText());
} else {
assertFalse(streamState.getStreamState().has(STATE_TYPE_KEY));
}
}
}
@Override
protected void compareTargetPositionFromTheRecordsWithTargetPostionGeneratedBeforeSync(final CdcTargetPosition targetPosition,
final AirbyteRecordMessage record) {
// The LSN from records should be either equal or grater than the position value before the sync
// started.
// Since we're using shared containers, the current LSN can move forward without any data
// modifications
// (INSERT, UPDATE, DELETE) in the current DB
assert targetPosition instanceof MssqlCdcTargetPosition;
assertTrue(extractPosition(record.getData()).targetLsn.compareTo(((MssqlCdcTargetPosition) targetPosition).targetLsn) >= 0);
}
protected void waitForCdcRecords(String schemaName, String tableName, int recordCount)
throws Exception {
testdb.waitForCdcRecords(schemaName, tableName, recordCount);
}
protected void deleteCommand(final String streamName) {
String selectCountSql = "SELECT COUNT(*) FROM %s.%s".formatted(modelsSchema(), streamName);
try {
int rowCount = testdb.query(ctx -> ctx.fetch(selectCountSql)).get(0).get(0, Integer.class);
LOGGER.info("deleting all {} rows from table {}.{}", rowCount, modelsSchema(), streamName);
super.deleteCommand(streamName);
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@Override
protected boolean supportResumableFullRefresh() {
return true;
}
@Override
protected void assertExpectedStateMessagesForFullRefresh(final List<? extends AirbyteStateMessage> stateMessages) {
// Full refresh will only send 6 state messages - one for each record (including the final one).
assertEquals(6, stateMessages.size());
}
}

View File

@@ -1,71 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.db.factory.DataSourceFactory;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.JdbcConnector;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.CertificateKey;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Map;
import javax.sql.DataSource;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.TestInstance.Lifecycle;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
@TestInstance(Lifecycle.PER_METHOD)
@Execution(ExecutionMode.CONCURRENT)
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH")
public class CdcMssqlSslSourceTest extends CdcMssqlSourceTest {
@Override
final protected MsSQLTestDatabase createTestDatabase() {
final var testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT, ContainerModifier.WITH_SSL_CERTIFICATES);
return testdb.withWaitUntilAgentRunning()
.withCdc();
}
@Override
protected DataSource createTestDataSource() {
return DataSourceFactory.create(
testUserName(),
testdb.getPassword(),
testdb.getDatabaseDriver().getDriverClassName(),
testdb.getJdbcUrl(),
Map.of("encrypt", "true", "databaseName", testdb.getDatabaseName(), "trustServerCertificate", "true"),
JdbcConnector.CONNECT_TIMEOUT_DEFAULT);
}
@Override
protected JsonNode config() {
final String containerIp;
try {
containerIp = InetAddress.getByName(testdb.getContainer().getHost())
.getHostAddress();
} catch (final UnknownHostException e) {
throw new RuntimeException(e);
}
final String certificate = testdb.getCertificate(CertificateKey.SERVER);
return testdb.configBuilder()
.withEncrytedVerifyServerCertificate(certificate, testdb.getContainer().getHost())
.with(JdbcUtils.HOST_KEY, containerIp)
.with(JdbcUtils.PORT_KEY, testdb.getContainer().getFirstMappedPort())
.withDatabase()
.with(JdbcUtils.USERNAME_KEY, testUserName())
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
.withSchemas(modelsSchema(), randomSchema())
.withCdcReplication()
.with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1)
.build();
}
}

View File

@@ -1,266 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.source.relationaldb.state.StateGeneratorUtils;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.util.AutoCloseableIterators;
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaType;
import io.airbyte.protocol.models.v0.AirbyteCatalog;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.SyncMode;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class CdcStateCompressionTest {
private static final Logger LOGGER = LoggerFactory.getLogger(CdcStateCompressionTest.class);
static private final String CDC_ROLE_NAME = "cdc_selector";
static private final String TEST_USER_NAME_PREFIX = "cdc_test_user";
static private final String TEST_SCHEMA = "test_schema";
static private final int TEST_TABLES = 4;
// SQLServer tables can't have more than 1024 columns.
static private final int ADDED_COLUMNS = 1000;
private MsSQLTestDatabase testdb;
private final ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
private static final String ALTER_TABLE_ADD_COLUMN_SQL;
static {
StringBuilder sb = new StringBuilder();
sb.append("ALTER TABLE ").append(TEST_SCHEMA).append(".%s ADD");
for (int j = 0; j < ADDED_COLUMNS; j++) {
sb.append((j > 0) ? ", " : " ")
// Sqlserver column names can't be longer than 128 characters
.append("rather_long_column_name_________________________________________________________________________________________").append(j)
.append(" INT NULL");
}
ALTER_TABLE_ADD_COLUMN_SQL = sb.toString();
}
@BeforeEach
public void setup() throws Exception {
testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022, MsSQLTestDatabase.ContainerModifier.AGENT)
.withWaitUntilAgentRunning()
.withCdc();
// Create a test schema and a bunch of test tables with CDC enabled.
// Insert one row in each table so that they're not empty.
testdb.with("CREATE SCHEMA %s;", TEST_SCHEMA);
List<Callable<MsSQLTestDatabase>> createAndPopulateTableTasks = new ArrayList<>();
List<Callable<MsSQLTestDatabase>> waitForCdcRecordTasks = new ArrayList<>();
List<Callable<MsSQLTestDatabase>> alterTabletasks = new ArrayList<>();
List<Callable<MsSQLTestDatabase>> enableTableCdctasks = new ArrayList<>();
List<Callable<MsSQLTestDatabase>> disableTableCdctasks = new ArrayList<>();
for (int i = 0; i < TEST_TABLES; i++) {
String tableName = "test_table_%d".formatted(i);
String initialCdcInstanceName = "capture_instance_%d_%d".formatted(i, 1);
String finalCdcInstanceName = "capture_instance_%d_%d".formatted(i, 2);
createAndPopulateTableTasks.add(() -> testdb
.with("CREATE TABLE %s.%s (id INT IDENTITY(1,1) PRIMARY KEY);", TEST_SCHEMA, tableName)
.withCdcForTable(TEST_SCHEMA, tableName, CDC_ROLE_NAME, initialCdcInstanceName)
.with("INSERT INTO %s.%s DEFAULT VALUES", TEST_SCHEMA, tableName));
waitForCdcRecordTasks.add(() -> testdb.waitForCdcRecords(TEST_SCHEMA, tableName, initialCdcInstanceName, 1));
// Increase schema history size to trigger state compression.
// We do this by adding lots of columns with long names,
// then migrating to a new CDC capture instance for each table.
// This is admittedly somewhat awkward and perhaps could be improved.
alterTabletasks.add(() -> testdb.with(ALTER_TABLE_ADD_COLUMN_SQL.formatted(tableName)));
enableTableCdctasks.add(() -> testdb.withCdcForTable(TEST_SCHEMA, tableName, CDC_ROLE_NAME, finalCdcInstanceName));
disableTableCdctasks.add(() -> testdb.withCdcDisabledForTable(TEST_SCHEMA, tableName, initialCdcInstanceName));
}
executor.invokeAll(createAndPopulateTableTasks);
executor.invokeAll(waitForCdcRecordTasks);
// Create a test user to be used by the source, with proper permissions.
testdb
.with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName())
.with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName())
.with("REVOKE ALL FROM %s CASCADE;", testUserName())
.with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName())
.with("GRANT SELECT ON SCHEMA :: [%s] TO %s", TEST_SCHEMA, testUserName())
.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName())
.with("USE [master]")
.with("GRANT VIEW SERVER STATE TO %s", testUserName())
.with("USE [%s]", testdb.getDatabaseName())
.with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName());
executor.invokeAll(alterTabletasks);
executor.invokeAll(enableTableCdctasks);
executor.invokeAll(disableTableCdctasks);
}
private AirbyteCatalog getCatalog() {
final var streams = new ArrayList<AirbyteStream>();
for (int i = 0; i < TEST_TABLES; i++) {
streams.add(CatalogHelpers.createAirbyteStream(
"test_table_%d".formatted(i),
TEST_SCHEMA,
Field.of("id", JsonSchemaType.INTEGER))
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(List.of(List.of("id"))));
}
return new AirbyteCatalog().withStreams(streams);
}
private ConfiguredAirbyteCatalog getConfiguredCatalog() {
final var configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(getCatalog());
configuredCatalog.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL));
return configuredCatalog;
}
private MssqlSource source() {
return new MssqlSource();
}
private JsonNode config() {
return testdb.configBuilder()
.withHostAndPort()
.withDatabase()
.with(JdbcUtils.USERNAME_KEY, testUserName())
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
.withSchemas(TEST_SCHEMA)
.withoutSsl()
// Configure for CDC replication but with a higher timeout than usual.
// This is because Debezium requires more time than usual to build the initial snapshot.
.with("is_test", true)
.with("replication_method", Map.of(
"method", "CDC",
"initial_waiting_seconds", 20))
.build();
}
private String testUserName() {
return testdb.withNamespace(TEST_USER_NAME_PREFIX);
}
/**
* This test is similar in principle to CdcMysqlSourceTest.testCompressedSchemaHistory.
*/
@Test
public void testCompressedSchemaHistory() throws Exception {
// First sync.
final var firstBatchIterator = source().read(config(), getConfiguredCatalog(), null);
final var dataFromFirstBatch = AutoCloseableIterators.toListAndClose(firstBatchIterator);
final AirbyteStateMessage lastStateMessageFromFirstBatch =
StateGeneratorUtils.convertLegacyStateToGlobalState(Iterables.getLast(extractStateMessages(dataFromFirstBatch)));
assertNotNull(lastStateMessageFromFirstBatch.getGlobal().getSharedState());
final var lastSharedStateFromFirstBatch = lastStateMessageFromFirstBatch.getGlobal().getSharedState().get("state");
assertNotNull(lastSharedStateFromFirstBatch);
assertNotNull(lastSharedStateFromFirstBatch.get(MSSQL_DB_HISTORY));
assertNotNull(lastSharedStateFromFirstBatch.get(MSSQL_CDC_OFFSET));
assertNotNull(lastSharedStateFromFirstBatch.get(IS_COMPRESSED));
assertTrue(lastSharedStateFromFirstBatch.get(IS_COMPRESSED).asBoolean());
final var recordsFromFirstBatch = extractRecordMessages(dataFromFirstBatch);
assertEquals(TEST_TABLES, recordsFromFirstBatch.size());
for (final var record : recordsFromFirstBatch) {
assertEquals("1", record.getData().get("id").toString());
}
LOGGER.info("inserting new data into test tables");
List<Callable<MsSQLTestDatabase>> waitForCdcTasks = new ArrayList<>();
// Insert a bunch of records (1 per table, again).
for (int i = 0; i < TEST_TABLES; i++) {
String tableName = "test_table_%d".formatted(i);
String cdcInstanceName = "capture_instance_%d_%d".formatted(i, 2);
testdb.with("INSERT %s.%s DEFAULT VALUES;", TEST_SCHEMA, tableName);
waitForCdcTasks.add(() -> testdb.waitForCdcRecords(TEST_SCHEMA, tableName, cdcInstanceName, 1));
}
LOGGER.info("waiting for CDC records");
executor.invokeAll(waitForCdcTasks);
LOGGER.info("starting second sync");
// Second sync.
final var secondBatchStateForRead = Jsons.jsonNode(Collections.singletonList(Iterables.getLast(extractStateMessages(dataFromFirstBatch))));
final var secondBatchIterator = source().read(config(), getConfiguredCatalog(), secondBatchStateForRead);
final var dataFromSecondBatch = AutoCloseableIterators.toListAndClose(secondBatchIterator);
final AirbyteStateMessage lastStateMessageFromSecondBatch =
StateGeneratorUtils.convertLegacyStateToGlobalState(Iterables.getLast(extractStateMessages(dataFromSecondBatch)));
assertNotNull(lastStateMessageFromSecondBatch.getGlobal().getSharedState());
final var lastSharedStateFromSecondBatch = lastStateMessageFromSecondBatch.getGlobal().getSharedState().get("state");
assertNotNull(lastSharedStateFromSecondBatch);
assertNotNull(lastSharedStateFromSecondBatch.get(MSSQL_DB_HISTORY));
assertEquals(lastSharedStateFromFirstBatch.get(MSSQL_DB_HISTORY), lastSharedStateFromSecondBatch.get(MSSQL_DB_HISTORY));
assertNotNull(lastSharedStateFromSecondBatch.get(MSSQL_CDC_OFFSET));
assertNotNull(lastSharedStateFromSecondBatch.get(IS_COMPRESSED));
assertTrue(lastSharedStateFromSecondBatch.get(IS_COMPRESSED).asBoolean());
final var recordsFromSecondBatch = extractRecordMessages(dataFromSecondBatch);
assertEquals(TEST_TABLES, recordsFromSecondBatch.size());
for (final var record : recordsFromSecondBatch) {
assertEquals("2", record.getData().get("id").toString());
}
}
@AfterEach
public void tearDown() {
testdb.close();
}
private Set<AirbyteRecordMessage> extractRecordMessages(final List<AirbyteMessage> messages) {
final var recordsPerStream = extractRecordMessagesStreamWise(messages);
return recordsPerStream.values().stream().flatMap(Set::stream).collect(Collectors.toSet());
}
private Map<String, Set<AirbyteRecordMessage>> extractRecordMessagesStreamWise(final List<AirbyteMessage> messages) {
final var recordsPerStream = messages.stream()
.filter(m -> m.getType() == Type.RECORD)
.map(AirbyteMessage::getRecord)
.collect(Collectors.groupingBy(AirbyteRecordMessage::getStream));
final Map<String, Set<AirbyteRecordMessage>> recordsPerStreamWithNoDuplicates = new HashMap<>();
for (final var entry : recordsPerStream.entrySet()) {
final var set = new HashSet<>(entry.getValue());
recordsPerStreamWithNoDuplicates.put(entry.getKey(), set);
assertEquals(entry.getValue().size(), set.size(), "duplicate records in sync for " + entry.getKey());
}
return recordsPerStreamWithNoDuplicates;
}
private List<AirbyteStateMessage> extractStateMessages(final List<AirbyteMessage> messages) {
return messages.stream()
.filter(r -> r.getType() == Type.STATE)
.map(AirbyteMessage::getState)
.toList();
}
}

View File

@@ -1,122 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.google.common.collect.ImmutableMap;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.base.Source;
import io.airbyte.cdk.integrations.base.adaptive.AdaptiveSourceRunner;
import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer;
import io.airbyte.cdk.integrations.base.ssh.SshTunnel;
import io.airbyte.commons.features.EnvVariableFeatureFlags;
import io.airbyte.commons.features.FeatureFlagsWrapper;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
@Execution(ExecutionMode.CONCURRENT)
public class CloudDeploymentMssqlTest {
private MsSQLTestDatabase createTestDatabase(String... containerFactoryMethods) {
final var container = new MsSQLContainerFactory().shared(
BaseImage.MSSQL_2022.reference, containerFactoryMethods);
final var testdb = new MsSQLTestDatabase(container);
return testdb
.withConnectionProperty("encrypt", "true")
.withConnectionProperty("trustServerCertificate", "true")
.withConnectionProperty("databaseName", testdb.getDatabaseName())
.initialized();
}
private Source source() {
final var source = new MssqlSource(FeatureFlagsWrapper.overridingDeploymentMode(
new EnvVariableFeatureFlags(), AdaptiveSourceRunner.CLOUD_MODE));
return MssqlSource.sshWrappedSource(source);
}
@Test
void testStrictSSLUnsecuredNoTunnel() throws Exception {
try (final var testdb = createTestDatabase()) {
final var config = testdb.configBuilder()
.withHostAndPort()
.withDatabase()
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
.with(JdbcUtils.PASSWORD_KEY, "fake")
.withoutSsl()
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build())
.build();
final AirbyteConnectionStatus actual = source().check(config);
assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus());
assertTrue(actual.getMessage().contains("Unsecured connection not allowed"), actual.getMessage());
}
}
@Test
void testStrictSSLSecuredNoTunnel() throws Exception {
try (final var testdb = createTestDatabase()) {
final var config = testdb.testConfigBuilder()
.withEncrytedTrustServerCertificate()
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build())
.build();
final AirbyteConnectionStatus actual = source().check(config);
assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus());
}
}
@Test
void testStrictSSLSecuredWithTunnel() throws Exception {
try (final var testdb = createTestDatabase()) {
final var config = testdb.configBuilder()
.withHostAndPort()
.withDatabase()
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
.with(JdbcUtils.PASSWORD_KEY, "fake")
.withEncrytedTrustServerCertificate()
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build())
.build();
final AirbyteConnectionStatus actual = source().check(config);
assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus());
assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage());
}
}
@Test
void testStrictSSLUnsecuredWithTunnel() throws Exception {
try (final var testdb = createTestDatabase()) {
final var config = testdb.configBuilder()
.withHostAndPort()
.withDatabase()
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
.with(JdbcUtils.PASSWORD_KEY, "fake")
.withEncrytedTrustServerCertificate()
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build())
.build();
final AirbyteConnectionStatus actual = source().check(config);
assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus());
assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage());
}
}
@Test
void testCheckWithSslModeDisabled() throws Exception {
try (final var testdb = createTestDatabase("withNetwork")) {
try (final SshBastionContainer bastion = new SshBastionContainer()) {
bastion.initAndStartBastion(testdb.getContainer().getNetwork());
final var config = testdb.integrationTestConfigBuilder()
.with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, false))
.withoutSsl()
.build();
final AirbyteConnectionStatus actual = source().check(config);
assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus());
}
}
}
}

View File

@@ -1,121 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.db.factory.DataSourceFactory;
import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.JdbcConnector;
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
import java.util.Map;
import javax.sql.DataSource;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.testcontainers.containers.MSSQLServerContainer;
public class MssqlAgentStateTest {
private static MsSQLTestDatabase testdb;
private static DataSource testDataSource;
private static MSSQLServerContainer privateContainer;
@BeforeAll
public static void setup() {
privateContainer = new MsSQLContainerFactory().exclusive(
MsSQLTestDatabase.BaseImage.MSSQL_2022.reference,
MsSQLTestDatabase.ContainerModifier.AGENT);
testdb = new MsSQLTestDatabase(privateContainer);
testdb
.withConnectionProperty("encrypt", "false")
.withConnectionProperty("trustServerCertificate", "true")
.withConnectionProperty("databaseName", testdb.getDatabaseName())
.initialized()
.withWaitUntilAgentRunning()
.withCdc();
testDataSource = DataSourceFactory.create(
testdb.getUserName(),
testdb.getPassword(),
testdb.getDatabaseDriver().getDriverClassName(),
testdb.getJdbcUrl(),
Map.of("encrypt", "false", "trustServerCertificate", "true"),
JdbcConnector.CONNECT_TIMEOUT_DEFAULT);
}
@AfterAll
static void tearDown() {
try {
DataSourceFactory.close(testDataSource);
testdb.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
privateContainer.close();
}
protected MssqlSource source() {
return new MssqlSource();
}
private JdbcDatabase testDatabase() {
return new DefaultJdbcDatabase(testDataSource);
}
protected JsonNode config() {
return testdb.configBuilder()
.withHostAndPort()
.withDatabase()
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
.withCdcReplication()
.withoutSsl()
.with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1)
.build();
}
@Test
void testAssertSqlServerAgentRunning() throws Exception {
testdb.withAgentStopped().withWaitUntilAgentStopped();
// assert expected failure if sql server agent stopped
assertThrows(RuntimeException.class,
() -> source().assertSqlServerAgentRunning(testDatabase()));
// assert success if sql server agent running
testdb.withAgentStarted().withWaitUntilAgentRunning();
assertDoesNotThrow(() -> source().assertSqlServerAgentRunning(testDatabase()));
}
// Ensure the CDC check operations are included when CDC is enabled
// todo: make this better by checking the returned checkOperations from source.getCheckOperations
@Test
void testCdcCheckOperations() throws Exception {
// assertCdcEnabledInDb
testdb.withoutCdc();
AirbyteConnectionStatus status = source().check(config());
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
testdb.withCdc();
// assertCdcSchemaQueryable
testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName());
status = source().check(config());
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
testdb.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName());
// assertSqlServerAgentRunning
testdb.withAgentStopped().withWaitUntilAgentStopped();
status = source().check(config());
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
testdb.withAgentStarted().withWaitUntilAgentRunning();
status = source().check(config());
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
}
}

View File

@@ -1,165 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.commons.json.Jsons;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.SyncMode;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;
class MssqlCdcHelperTest {
private static final JsonNode LEGACY_NON_CDC_CONFIG = Jsons.jsonNode(Map.of("replication_method", "STANDARD"));
private static final JsonNode LEGACY_CDC_CONFIG = Jsons.jsonNode(Map.of("replication_method", "CDC"));
@Test
public void testIsCdc() {
// legacy replication method config before version 0.4.0
assertFalse(MssqlCdcHelper.isCdc(LEGACY_NON_CDC_CONFIG));
assertTrue(MssqlCdcHelper.isCdc(LEGACY_CDC_CONFIG));
// new replication method config since version 0.4.0
final JsonNode newNonCdc = Jsons.jsonNode(Map.of("replication_method",
Jsons.jsonNode(Map.of("method", "STANDARD"))));
assertFalse(MssqlCdcHelper.isCdc(newNonCdc));
final JsonNode newCdc = Jsons.jsonNode(Map.of("replication_method",
Jsons.jsonNode(Map.of(
"method", "CDC"))));
assertTrue(MssqlCdcHelper.isCdc(newCdc));
// migration from legacy to new config
final JsonNode mixNonCdc = Jsons.jsonNode(Map.of(
"replication_method", Jsons.jsonNode(Map.of("method", "STANDARD")),
"replication", Jsons.jsonNode(Map.of("replication_type", "CDC"))));
assertFalse(MssqlCdcHelper.isCdc(mixNonCdc));
final JsonNode mixCdc = Jsons.jsonNode(Map.of(
"replication", Jsons.jsonNode(Map.of(
"replication_type", "Standard")),
"replication_method", Jsons.jsonNode(Map.of(
"method", "CDC"))));
assertTrue(MssqlCdcHelper.isCdc(mixCdc));
}
@Test
public void testGetTableIncludeListSingleTable() {
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog()
.withStreams(Collections.singletonList(
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("users"))));
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
// Pattern.quote escapes the period in "dbo.users" to "\Qdbo.users\E"
assertEquals("\\Qdbo.users\\E", result);
}
@Test
public void testGetTableIncludeListMultipleTables() {
final List<ConfiguredAirbyteStream> streams = Arrays.asList(
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("users")),
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("orders")),
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("sales")
.withName("products")));
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(streams);
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
// Should generate a comma-separated list of escaped table identifiers
assertEquals("\\Qdbo.users\\E,\\Qdbo.orders\\E,\\Qsales.products\\E", result);
}
@Test
public void testGetTableIncludeListFiltersNonIncrementalStreams() {
final List<ConfiguredAirbyteStream> streams = Arrays.asList(
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("users")),
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.FULL_REFRESH)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("logs")));
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(streams);
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
// Should only include INCREMENTAL streams
assertEquals("\\Qdbo.users\\E", result);
}
@Test
public void testGetTableIncludeListWithSpecialCharactersInTableName() {
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog()
.withStreams(Collections.singletonList(
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("table$with_special-chars"))));
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
// Pattern.quote should escape special characters
assertEquals("\\Qdbo.table$with_special-chars\\E", result);
}
@Test
public void testGetTableIncludeListWithCommaInTableName() {
final List<ConfiguredAirbyteStream> streams = Arrays.asList(
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("table,with,commas")),
new ConfiguredAirbyteStream()
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(new AirbyteStream()
.withNamespace("dbo")
.withName("normal_table")));
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(streams);
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
// Commas in table names should be escaped with backslash
assertEquals("\\Qdbo.table\\,with\\,commas\\E,\\Qdbo.normal_table\\E", result);
}
@Test
public void testGetTableIncludeListEmptyCatalog() {
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog()
.withStreams(Collections.emptyList());
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
assertEquals("", result);
}
}

View File

@@ -1,35 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import com.zaxxer.hikari.HikariDataSource;
import io.airbyte.cdk.db.factory.DataSourceFactory;
import java.util.Map;
import javax.sql.DataSource;
import org.junit.jupiter.api.Test;
public class MssqlDataSourceFactoryTest {
@Test
protected void testCreatingDataSourceWithConnectionTimeoutSetBelowDefault() {
try (var testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022)) {
final Map<String, String> connectionProperties = Map.of("loginTimeout", String.valueOf(5));
final DataSource dataSource = DataSourceFactory.create(
testdb.getUserName(),
testdb.getPassword(),
testdb.getDatabaseDriver().getDriverClassName(),
testdb.getJdbcUrl(),
connectionProperties,
new MssqlSource().getConnectionTimeoutMssql(connectionProperties));
assertNotNull(dataSource);
assertEquals(HikariDataSource.class, dataSource.getClass());
assertEquals(5000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout());
}
}
}

View File

@@ -1,29 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.junit.jupiter.api.Assertions.assertEquals;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
import io.debezium.connector.sqlserver.Lsn;
import org.junit.jupiter.api.Test;
public class MssqlDebeziumStateUtilTest {
private static String DB_NAME = "db_name";
private static String LSN_STRING = "0000062d:00017ff0:016d";
private static Lsn LSN = Lsn.valueOf(LSN_STRING);
@Test
void generateCorrectFormat() {
MssqlDebeziumStateAttributes attributes = new MssqlDebeziumStateAttributes(LSN);
JsonNode formatResult = MssqlDebeziumStateUtil.format(attributes, DB_NAME);
assertEquals("{\"commit_lsn\":\"0000062d:00017ff0:016d\",\"snapshot\":true,\"snapshot_completed\":true}",
formatResult.get("[\"db_name\",{\"server\":\"db_name\",\"database\":\"db_name\"}]").asText());
}
}

View File

@@ -1,36 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.junit.jupiter.api.Assertions.assertEquals;
import io.airbyte.integrations.source.mssql.MssqlQueryUtils.TableSizeInfo;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler;
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
import org.junit.jupiter.api.Test;
public class MssqlInitialLoadHandlerTest {
private static final long ONE_GB = 1_073_741_824;
private static final long ONE_MB = 1_048_576;
@Test
void testInvalidOrNullTableSizeInfo() {
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair("table_name", "schema_name");
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(null, pair), 1_000_000L);
final TableSizeInfo invalidRowLengthInfo = new TableSizeInfo(ONE_GB, 0L);
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(invalidRowLengthInfo, pair), 1_000_000L);
final TableSizeInfo invalidTableSizeInfo = new TableSizeInfo(0L, 0L);
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(invalidTableSizeInfo, pair), 1_000_000L);
}
@Test
void testTableSizeInfo() {
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair("table_name", "schema_name");
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(new TableSizeInfo(ONE_GB, 2 * ONE_MB), pair), 512L);
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(new TableSizeInfo(ONE_GB, 200L), pair), 5368709L);
}
}

View File

@@ -1,482 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY;
import static java.util.stream.Collectors.toList;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest;
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState;
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.util.MoreIterators;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaType;
import io.airbyte.protocol.models.v0.AirbyteCatalog;
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
import io.airbyte.protocol.models.v0.AirbyteStateStats;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.AirbyteStreamState;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.DestinationSyncMode;
import io.airbyte.protocol.models.v0.StreamDescriptor;
import io.airbyte.protocol.models.v0.SyncMode;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH")
public class MssqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest<MssqlSource, MsSQLTestDatabase> {
protected static final String USERNAME_WITHOUT_PERMISSION = "new_user";
protected static final String PASSWORD_WITHOUT_PERMISSION = "password_3435!";
static {
// In mssql, timestamp is generated automatically, so we need to use
// the datetime type instead so that we can set the value manually.
COL_TIMESTAMP_TYPE = "DATETIME2";
}
@Override
protected JsonNode config() {
return testdb.testConfigBuilder()
.withoutSsl()
.build();
}
@Override
protected MssqlSource source() {
return new MssqlSource();
}
@Override
protected MsSQLTestDatabase createTestDatabase() {
return MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
}
@Override
public boolean supportsSchemas() {
return true;
}
@Override
protected void maybeSetShorterConnectionTimeout(final JsonNode config) {
((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=1");
}
@Test
void testCheckIncorrectPasswordFailure() throws Exception {
final var config = config();
maybeSetShorterConnectionTimeout(config);
((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake");
final AirbyteConnectionStatus status = source().check(config);
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage());
}
@Test
public void testCheckIncorrectUsernameFailure() throws Exception {
final var config = config();
maybeSetShorterConnectionTimeout(config);
((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake");
final AirbyteConnectionStatus status = source().check(config);
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage());
}
@Test
public void testCheckIncorrectHostFailure() throws Exception {
final var config = config();
maybeSetShorterConnectionTimeout(config);
((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2");
final AirbyteConnectionStatus status = source().check(config);
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage());
}
@Test
public void testCheckIncorrectPortFailure() throws Exception {
final var config = config();
maybeSetShorterConnectionTimeout(config);
((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000");
final AirbyteConnectionStatus status = source().check(config);
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage());
}
@Test
public void testCheckIncorrectDataBaseFailure() throws Exception {
final var config = config();
maybeSetShorterConnectionTimeout(config);
((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase");
final AirbyteConnectionStatus status = source().check(config);
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage());
}
@Test
public void testUserHasNoPermissionToDataBase() throws Exception {
final var config = config();
maybeSetShorterConnectionTimeout(config);
testdb.with("CREATE LOGIN %s WITH PASSWORD = '%s'; ", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION);
((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, USERNAME_WITHOUT_PERMISSION);
((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION);
final AirbyteConnectionStatus status = source().check(config);
assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage());
}
@Test
@Override
protected void testReadMultipleTablesIncrementally() throws Exception {
final var config = config();
((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1);
final String streamOneName = TABLE_NAME + "one";
// Create a fresh first table
testdb.with("CREATE TABLE %s (\n"
+ " id INT NOT NULL PRIMARY KEY,\n"
+ " name VARCHAR(50) NOT NULL,\n"
+ " updated_at DATE NOT NULL\n"
+ ");", getFullyQualifiedTableName(streamOneName))
.with("INSERT INTO %s(id, name, updated_at) VALUES (1, 'picard', '2004-10-19')",
getFullyQualifiedTableName(streamOneName))
.with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')",
getFullyQualifiedTableName(streamOneName))
.with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')",
getFullyQualifiedTableName(streamOneName));
// Create a fresh second table
final String streamTwoName = TABLE_NAME + "two";
final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName);
// Insert records into second table
testdb.with("CREATE TABLE %s (\n"
+ " id INT NOT NULL PRIMARY KEY,\n"
+ " name VARCHAR(50) NOT NULL,\n"
+ " updated_at DATE NOT NULL\n"
+ ");", streamTwoFullyQualifiedName)
.with("INSERT INTO %s (id, name, updated_at) VALUES (40, 'Jean Luc','2006-10-19')",
streamTwoFullyQualifiedName)
.with("INSERT INTO %s (id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')",
streamTwoFullyQualifiedName)
.with("INSERT INTO %s (id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')",
streamTwoFullyQualifiedName);
final List<AirbyteMessage> streamOneExpectedRecords = Arrays.asList(
createRecord(streamOneName, getDefaultNamespace(), Map
.of(COL_ID, ID_VALUE_1,
COL_NAME, "picard",
COL_UPDATED_AT, "2004-10-19")),
createRecord(streamOneName, getDefaultNamespace(), Map
.of(COL_ID, ID_VALUE_2,
COL_NAME, "crusher",
COL_UPDATED_AT,
"2005-10-19")),
createRecord(streamOneName, getDefaultNamespace(), Map
.of(COL_ID, ID_VALUE_3,
COL_NAME, "vash",
COL_UPDATED_AT, "2006-10-19")));
// Create records list that we expect to see in the state message
final List<AirbyteMessage> streamTwoExpectedRecords = Arrays.asList(
createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(
COL_ID, 40,
COL_NAME, "Jean Luc",
COL_UPDATED_AT, "2006-10-19")),
createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(
COL_ID, 41,
COL_NAME, "Groot",
COL_UPDATED_AT, "2006-10-19")),
createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(
COL_ID, 42,
COL_NAME, "Thanos",
COL_UPDATED_AT, "2006-10-19")));
// Prep and create a configured catalog to perform sync
final AirbyteStream streamOne = getAirbyteStream(streamOneName, getDefaultNamespace());
final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, getDefaultNamespace());
final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(
new AirbyteCatalog().withStreams(List.of(streamOne, streamTwo)));
configuredCatalog.getStreams().forEach(airbyteStream -> {
airbyteStream.setSyncMode(SyncMode.INCREMENTAL);
airbyteStream.setCursorField(List.of(COL_ID));
airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND);
airbyteStream.withPrimaryKey(List.of(List.of(COL_ID)));
});
// Perform initial sync
final List<AirbyteMessage> messagesFromFirstSync = MoreIterators
.toList(source().read(config, configuredCatalog, null));
final List<AirbyteMessage> recordsFromFirstSync = filterRecords(messagesFromFirstSync);
setEmittedAtToNull(messagesFromFirstSync);
// All records in the 2 configured streams should be present
assertThat(filterRecords(recordsFromFirstSync)).containsExactlyElementsOf(
Stream.concat(streamOneExpectedRecords.stream().parallel(),
streamTwoExpectedRecords.stream().parallel()).collect(toList()));
final List<AirbyteStateMessage> actualFirstSyncState = extractStateMessage(messagesFromFirstSync);
// Since we are emitting a state message after each record, we should have 1 state for each record -
// 3 from stream1 and 3 from stream2
assertEquals(6, actualFirstSyncState.size());
// The expected state type should be 2 ordered_column's and the last one being cursor_based
final List<String> expectedStateTypesFromFirstSync = List.of("ordered_column", "ordered_column", "cursor_based");
final List<String> stateTypeOfStreamOneStatesFromFirstSync =
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, STATE_TYPE_KEY);
final List<String> stateTypeOfStreamTwoStatesFromFirstSync =
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamTwoName, STATE_TYPE_KEY);
// It should be the same for stream1 and stream2
assertEquals(stateTypeOfStreamOneStatesFromFirstSync, expectedStateTypesFromFirstSync);
assertEquals(stateTypeOfStreamTwoStatesFromFirstSync, expectedStateTypesFromFirstSync);
// Create the expected ordered_column values that we should see
final List<String> expectedOrderedColumnValueFromFirstSync = List.of("1", "2");
final List<String> orderedColumnValuesOfStreamOneFromFirstSync =
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, "ordered_col_val");
final List<String> orderedColumnValuesOfStreamTwoFromFirstSync =
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, "ordered_col_val");
// Verifying each element and its index to match.
// Only checking the first 2 elements since we have verified that the last state_type is
// "cursor_based"
assertEquals(expectedOrderedColumnValueFromFirstSync.get(0), orderedColumnValuesOfStreamOneFromFirstSync.get(0));
assertEquals(expectedOrderedColumnValueFromFirstSync.get(1), orderedColumnValuesOfStreamOneFromFirstSync.get(1));
assertEquals(expectedOrderedColumnValueFromFirstSync.get(0), orderedColumnValuesOfStreamTwoFromFirstSync.get(0));
assertEquals(expectedOrderedColumnValueFromFirstSync.get(1), orderedColumnValuesOfStreamTwoFromFirstSync.get(1));
// Extract only state messages for each stream
final List<AirbyteStateMessage> streamOneStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamOneName);
final List<AirbyteStateMessage> streamTwoStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamTwoName);
// Extract the incremental states of each stream's first and second state message
final List<JsonNode> streamOneIncrementalStatesFromFirstSync =
List.of(streamOneStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"),
streamOneStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state"));
final JsonNode streamOneFinalStreamStateFromFirstSync = streamOneStateMessagesFromFirstSync.get(2).getStream().getStreamState();
final List<JsonNode> streamTwoIncrementalStatesFromFirstSync =
List.of(streamTwoStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"),
streamTwoStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state"));
final JsonNode streamTwoFinalStreamStateFromFirstSync = streamTwoStateMessagesFromFirstSync.get(2).getStream().getStreamState();
// The incremental_state of each stream's first and second incremental states is expected
// to be identical to the stream_state of the final state message for each stream
assertEquals(streamOneIncrementalStatesFromFirstSync.get(0), streamOneFinalStreamStateFromFirstSync);
assertEquals(streamOneIncrementalStatesFromFirstSync.get(1), streamOneFinalStreamStateFromFirstSync);
assertEquals(streamTwoIncrementalStatesFromFirstSync.get(0), streamTwoFinalStreamStateFromFirstSync);
assertEquals(streamTwoIncrementalStatesFromFirstSync.get(1), streamTwoFinalStreamStateFromFirstSync);
// Sync should work with a ordered_column state AND a cursor-based state from each stream
// Forcing a sync with
// - stream one state still being the first record read via Ordered column.
// - stream two state being the Ordered Column state before the final emitted state before the
// cursor
// switch
final List<AirbyteMessage> messagesFromSecondSyncWithMixedStates = MoreIterators
.toList(source().read(config, configuredCatalog,
Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0),
streamTwoStateMessagesFromFirstSync.get(1)))));
// Extract only state messages for each stream after second sync
final List<AirbyteStateMessage> streamOneStateMessagesFromSecondSync =
extractStateMessage(messagesFromSecondSyncWithMixedStates, streamOneName);
final List<String> stateTypeOfStreamOneStatesFromSecondSync =
extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, streamOneName, STATE_TYPE_KEY);
final List<AirbyteStateMessage> streamTwoStateMessagesFromSecondSync =
extractStateMessage(messagesFromSecondSyncWithMixedStates, streamTwoName);
final List<String> stateTypeOfStreamTwoStatesFromSecondSync =
extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, streamTwoName, STATE_TYPE_KEY);
// Stream One states after the second sync are expected to have 2 stream states
// - 1 with PrimaryKey state_type and 1 state that is of cursorBased state type
assertEquals(2, streamOneStateMessagesFromSecondSync.size());
assertEquals(List.of("ordered_column", "cursor_based"), stateTypeOfStreamOneStatesFromSecondSync);
// Stream Two states after the second sync are expected to have 1 stream state
// - The state that is of cursorBased state type
assertEquals(1, streamTwoStateMessagesFromSecondSync.size());
assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromSecondSync);
// Add some data to each table and perform a third read.
// Expect to see all records be synced via cursorBased method and not ordered_column
testdb.with("INSERT INTO %s (id, name, updated_at) VALUES (4,'Hooper','2006-10-19')",
getFullyQualifiedTableName(streamOneName))
.with("INSERT INTO %s (id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')",
streamTwoFullyQualifiedName);
final List<AirbyteMessage> messagesFromThirdSync = MoreIterators
.toList(source().read(config, configuredCatalog,
Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1),
streamTwoStateMessagesFromSecondSync.get(0)))));
// Extract only state messages, state type, and cursor for each stream after second sync
final List<AirbyteStateMessage> streamOneStateMessagesFromThirdSync =
extractStateMessage(messagesFromThirdSync, streamOneName);
final List<String> stateTypeOfStreamOneStatesFromThirdSync =
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, STATE_TYPE_KEY);
final List<String> cursorOfStreamOneStatesFromThirdSync =
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, "cursor");
final List<AirbyteStateMessage> streamTwoStateMessagesFromThirdSync =
extractStateMessage(messagesFromThirdSync, streamTwoName);
final List<String> stateTypeOfStreamTwoStatesFromThirdSync =
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, STATE_TYPE_KEY);
final List<String> cursorOfStreamTwoStatesFromThirdSync =
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, "cursor");
// Both streams should now be synced via standard cursor and have updated max cursor values
// cursor: 4 for stream one
// cursor: 43 for stream two
assertEquals(1, streamOneStateMessagesFromThirdSync.size());
assertEquals(List.of("cursor_based"), stateTypeOfStreamOneStatesFromThirdSync);
assertEquals(List.of("4"), cursorOfStreamOneStatesFromThirdSync);
assertEquals(1, streamTwoStateMessagesFromThirdSync.size());
assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromThirdSync);
assertEquals(List.of("43"), cursorOfStreamTwoStatesFromThirdSync);
}
private AirbyteStream getAirbyteStream(final String tableName, final String namespace) {
return CatalogHelpers.createAirbyteStream(
tableName,
namespace,
Field.of(COL_ID, JsonSchemaType.INTEGER),
Field.of(COL_NAME, JsonSchemaType.STRING),
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(List.of(List.of(COL_ID)));
}
@Override
protected AirbyteCatalog getCatalog(final String defaultNamespace) {
return new AirbyteCatalog().withStreams(List.of(
CatalogHelpers.createAirbyteStream(
TABLE_NAME,
defaultNamespace,
Field.of(COL_ID, JsonSchemaType.INTEGER),
Field.of(COL_NAME, JsonSchemaType.STRING),
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
.withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(List.of(List.of(COL_ID)))
.withIsResumable(true),
CatalogHelpers.createAirbyteStream(
TABLE_NAME_WITHOUT_PK,
defaultNamespace,
Field.of(COL_ID, JsonSchemaType.INTEGER),
Field.of(COL_NAME, JsonSchemaType.STRING),
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
.withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(Collections.emptyList())
.withIsResumable(false),
CatalogHelpers.createAirbyteStream(
TABLE_NAME_COMPOSITE_PK,
defaultNamespace,
Field.of(COL_FIRST_NAME, JsonSchemaType.STRING),
Field.of(COL_LAST_NAME, JsonSchemaType.STRING),
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
.withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(
List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME)))
.withIsResumable(true)));
}
@Override
protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configuredAirbyteStream,
final String cursorField,
final String cursorValue) {
return new CursorBasedStatus().withStateType(StateType.CURSOR_BASED).withVersion(2L)
.withStreamName(configuredAirbyteStream.getStream().getName())
.withStreamNamespace(configuredAirbyteStream.getStream().getNamespace())
.withCursorField(List.of(cursorField))
.withCursor(cursorValue)
.withCursorRecordCount(1L);
}
// Override from parent class as we're no longer including the legacy Data field.
@Override
protected List<AirbyteMessage> createExpectedTestMessages(final List<? extends DbStreamState> states, final long numRecords) {
return states.stream()
.map(s -> new AirbyteMessage().withType(Type.STATE)
.withState(
new AirbyteStateMessage().withType(AirbyteStateType.STREAM)
.withStream(new AirbyteStreamState()
.withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName()))
.withStreamState(Jsons.jsonNode(s)))
.withSourceStats(new AirbyteStateStats().withRecordCount((double) numRecords))))
.collect(
Collectors.toList());
}
@Override
protected JsonNode getStateData(final AirbyteMessage airbyteMessage, final String streamName) {
final JsonNode streamState = airbyteMessage.getState().getStream().getStreamState();
if (streamState.get("stream_name").asText().equals(streamName)) {
return streamState;
}
throw new IllegalArgumentException("Stream not found in state message: " + streamName);
}
@Override
protected List<AirbyteMessage> getExpectedAirbyteMessagesSecondSync(final String namespace) {
final List<AirbyteMessage> expectedMessages = new ArrayList<>();
expectedMessages.addAll(List.of(createRecord(streamName(), namespace, ImmutableMap
.of(COL_ID, ID_VALUE_4,
COL_NAME, "riker",
COL_UPDATED_AT, "2006-10-19")),
createRecord(streamName(), namespace, ImmutableMap
.of(COL_ID, ID_VALUE_5,
COL_NAME, "data",
COL_UPDATED_AT, "2006-10-19"))));
final DbStreamState state = new CursorBasedStatus()
.withStateType(StateType.CURSOR_BASED)
.withVersion(2L)
.withStreamName(streamName())
.withStreamNamespace(namespace)
.withCursorField(ImmutableList.of(COL_ID))
.withCursor("5")
.withCursorRecordCount(1L);
expectedMessages.addAll(createExpectedTestMessages(List.of(state), 2L));
return expectedMessages;
}
@Override
protected void validateFullRefreshStateMessageReadSuccess(final List<? extends AirbyteStateMessage> stateMessages) {
var finalStateMessage = stateMessages.get(stateMessages.size() - 1);
assertEquals(
finalStateMessage.getStream().getStreamState().get("state_type").textValue(),
"ordered_column");
assertEquals(finalStateMessage.getStream().getStreamState().get("ordered_col").textValue(), "id");
assertEquals(finalStateMessage.getStream().getStreamState().get("ordered_col_val").textValue(), "3");
}
}

View File

@@ -1,305 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
import static org.assertj.core.api.AssertionsForClassTypes.catchThrowable;
import static org.junit.jupiter.api.Assertions.*;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.collect.Lists;
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
import io.airbyte.commons.exceptions.ConfigErrorException;
import io.airbyte.commons.util.MoreIterators;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler;
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil;
import io.airbyte.protocol.models.CommonField;
import io.airbyte.protocol.models.Field;
import io.airbyte.protocol.models.JsonSchemaType;
import io.airbyte.protocol.models.v0.*;
import java.sql.JDBCType;
import java.sql.SQLException;
import java.time.Instant;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.junit.jupiter.api.*;
class MssqlSourceTest {
private static final String STREAM_NAME = "id_and_name";
private static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(Lists.newArrayList(CatalogHelpers.createAirbyteStream(
STREAM_NAME,
"dbo",
Field.of("id", JsonSchemaType.INTEGER),
Field.of("name", JsonSchemaType.STRING),
Field.of("born", JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE))
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
.withIsResumable(true)));
private MsSQLTestDatabase testdb;
private MssqlSource source() {
return new MssqlSource();
}
// how to interact with the mssql test container manaully.
// 1. exec into mssql container (not the test container container)
// 2. /opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "A_Str0ng_Required_Password"
@BeforeEach
void setup() {
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022)
.with("CREATE TABLE id_and_name(id INTEGER NOT NULL, name VARCHAR(200), born DATETIMEOFFSET(7));")
.with("INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', " +
"'2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');");
}
@AfterEach
void cleanUp() {
testdb.close();
}
private JsonNode getConfig() {
return testdb.testConfigBuilder()
.withoutSsl()
.build();
}
// if a column in mssql is used as a primary key and in a separate index the discover query returns
// the column twice. we now de-duplicate it (pr: https://github.com/airbytehq/airbyte/pull/983).
// this tests that this de-duplication is successful.
@Test
void testDiscoverWithPk() {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY CLUSTERED (id);")
.with("CREATE INDEX i1 ON id_and_name (id);");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
}
@Test
void testDiscoverWithoutPk() {
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(STREAM_NAME, actual.getStreams().get(0).getName());
assertEquals(false, actual.getStreams().get(0).getIsResumable());
}
@Test
@Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out")
public void testTableWithNullCursorValueShouldThrowException() throws Exception {
testdb
.with("ALTER TABLE id_and_name ALTER COLUMN id INTEGER NULL")
.with("INSERT INTO id_and_name(id) VALUES (7), (8), (NULL)");
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
SyncMode.INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(CatalogHelpers.createAirbyteStream(
STREAM_NAME,
testdb.getDatabaseName(),
Field.of("id", JsonSchemaType.INTEGER),
Field.of("name", JsonSchemaType.STRING),
Field.of("born", JsonSchemaType.STRING))
.withSupportedSyncModes(
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
.withSourceDefinedPrimaryKey(List.of(List.of("id"))));
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(
Collections.singletonList(configuredAirbyteStream));
final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(
source().read(getConfig(), catalog, null)));
assertThat(throwable).isInstanceOf(ConfigErrorException.class)
.hasMessageContaining(
"The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='dbo.id_and_name', cursorColumnName='id', cursorSqlType=INTEGER, cause=Cursor column contains NULL value}");
}
@Test
void testDiscoverWithNonClusteredPk() throws SQLException {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
.with("CREATE INDEX i1 ON id_and_name (id);")
.with("CREATE UNIQUE CLUSTERED INDEX n1 ON id_and_name (name)");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
final var db = source().createDatabase(getConfig());
final Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db,
new AirbyteStream().withName(
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace()));
String firstOcKey = oc.entrySet().iterator().next().getKey();
List<String> ocValues = oc.get(firstOcKey);
assertEquals(1, ocValues.size());
assertEquals("name", ocValues.get(0));
}
@Test
void testDiscoverWithNonUniqueClusteredIndex() throws SQLException {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
.with("CREATE CLUSTERED INDEX n1 ON id_and_name (name)");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
final var db = source().createDatabase(getConfig());
final Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db,
new AirbyteStream().withName(
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace()));
assertNull(oc);
}
@Test
void testDiscoverWithNonClusteredIndex() throws SQLException {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
.with("CREATE INDEX i1 ON id_and_name (id);")
.with("CREATE NONCLUSTERED INDEX n1 ON id_and_name (name)");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
final var db = source().createDatabase(getConfig());
final Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db,
new AirbyteStream().withName(
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace()));
assertNull(oc);
}
@Test
void testDiscoverWithClusteredCompositeIndex() throws SQLException {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
.with("CREATE INDEX i1 ON id_and_name (id);")
.with("CREATE UNIQUE CLUSTERED INDEX n1 ON id_and_name (id, name)");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
final var db = source().createDatabase(getConfig());
AirbyteStream stream = new AirbyteStream().withName(
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace())
.withSourceDefinedPrimaryKey(actual.getStreams().get(0).getSourceDefinedPrimaryKey());
Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db, stream);
String firstOcKey = oc.entrySet().iterator().next().getKey();
List<String> ocValues = oc.get(firstOcKey);
assertEquals(2, ocValues.size());
}
@Test
void testUsingPkWhenClusteredCompositeIndex() throws SQLException {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
.with("CREATE INDEX i1 ON id_and_name (id);")
.with("CREATE CLUSTERED INDEX n1 ON id_and_name (id, name)");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
final var db = source().createDatabase(getConfig());
AirbyteStream stream = new AirbyteStream().withName(
actual.getStreams().getFirst().getName()).withNamespace(actual.getStreams().getFirst().getNamespace())
.withSourceDefinedPrimaryKey(actual.getStreams().getFirst().getSourceDefinedPrimaryKey());
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
SyncMode.INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(stream);
final List<List<String>> primaryKey = configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey();
Optional<String> oc = MssqlInitialReadUtil.selectOcFieldName(db, configuredAirbyteStream);
assertEquals(primaryKey.getFirst().getFirst(), oc.orElse("No oc"));
}
@Test
void testNonClusteredIndex() throws SQLException {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
.with("CREATE INDEX i1 ON id_and_name (id);");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
final var db = source().createDatabase(getConfig());
AirbyteStream stream = new AirbyteStream().withName(
actual.getStreams().getFirst().getName()).withNamespace(actual.getStreams().getFirst().getNamespace())
.withSourceDefinedPrimaryKey(actual.getStreams().getFirst().getSourceDefinedPrimaryKey());
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
SyncMode.INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(stream);
Optional<String> oc = MssqlInitialReadUtil.selectOcFieldName(db, configuredAirbyteStream);
final List<List<String>> primaryKey = configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey();
assertEquals(primaryKey.getFirst().getFirst(), oc.orElse("No oc"));
}
@Test
void testNonClusteredIndexNoPK() throws SQLException {
testdb
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
.with("CREATE INDEX i1 ON id_and_name (id);")
.with("CREATE NONCLUSTERED INDEX n1 ON id_and_name (name)");
final AirbyteCatalog actual = source().discover(getConfig());
assertEquals(CATALOG, actual);
final var db = source().createDatabase(getConfig());
AirbyteStream stream = new AirbyteStream().withName(
actual.getStreams().getFirst().getName()).withNamespace(actual.getStreams().getFirst().getNamespace());
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
SyncMode.INCREMENTAL)
.withCursorField(Lists.newArrayList("id"))
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withSyncMode(SyncMode.INCREMENTAL)
.withStream(stream);
Optional<String> oc = MssqlInitialReadUtil.selectOcFieldName(db, configuredAirbyteStream);
assert (oc.isEmpty());
}
@Test
void testSetCursorCutoffInfoForValue() {
CursorInfo cursorInfo = new CursorInfo(null, null, null, null);
Instant now = Instant.parse("2024-06-01T12:34:56Z");
// DATE
CommonField<JDBCType> dateField = new CommonField<>("date_col", JDBCType.DATE);
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, dateField, now);
assertEquals("2024-06-01", cursorInfo.getCutoffTime());
// TIMESTAMP
cursorInfo = new CursorInfo(null, null, null, null);
CommonField<JDBCType> tsField = new CommonField<>("ts_col", JDBCType.TIMESTAMP);
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, tsField, now);
assertEquals("2024-06-01T00:00:00Z", cursorInfo.getCutoffTime()); // ISO_OFFSET_DATE_TIME
// TIMESTAMP_WITH_TIMEZONE
cursorInfo = new CursorInfo(null, null, null, null);
CommonField<JDBCType> tsTzField = new CommonField<>("ts_tz_col", JDBCType.TIMESTAMP_WITH_TIMEZONE);
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, tsTzField, now);
assertEquals("2024-06-01T00:00:00.000000Z", cursorInfo.getCutoffTime());
// Non-temporal type
cursorInfo = new CursorInfo(null, null, null, null);
CommonField<JDBCType> intField = new CommonField<>("int_col", JDBCType.INTEGER);
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, intField, now);
assertNull(cursorInfo.getCutoffTime());
}
}

View File

@@ -1,122 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static org.junit.jupiter.api.Assertions.fail;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.commons.exceptions.ConnectionErrorException;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.CertificateKey;
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
import io.airbyte.protocol.models.v0.AirbyteCatalog;
import java.net.InetAddress;
import java.util.Map;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MssqlSslSourceTest {
private MsSQLTestDatabase testDb;
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSslSourceTest.class);
@BeforeEach
void setup() {
testDb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT, ContainerModifier.WITH_SSL_CERTIFICATES);
}
@AfterEach
public void tearDown() {
testDb.close();
}
@ParameterizedTest
@EnumSource(CertificateKey.class)
public void testDiscoverWithCertificateTrustHostnameWithValidCertificates(CertificateKey certificateKey) throws Exception {
if (!certificateKey.isValid) {
return;
}
String certificate = testDb.getCertificate(certificateKey);
JsonNode config = testDb.testConfigBuilder()
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
"certificate", certificate))
.build();
AirbyteCatalog catalog = new MssqlSource().discover(config);
}
@ParameterizedTest
@EnumSource(CertificateKey.class)
public void testDiscoverWithCertificateTrustHostnameWithInvalidCertificates(CertificateKey certificateKey) throws Exception {
if (certificateKey.isValid) {
return;
}
String certificate = testDb.getCertificate(certificateKey);
JsonNode config = testDb.testConfigBuilder()
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
"certificate", certificate))
.build();
try {
AirbyteCatalog catalog = new MssqlSource().discover(config);
} catch (ConnectionErrorException e) {
if (!e.getCause().getCause().getMessage().contains("PKIX path validation") &&
!e.getCause().getCause().getMessage().contains("PKIX path building failed")) {
throw e;
}
}
}
@ParameterizedTest
@EnumSource(CertificateKey.class)
public void testDiscoverWithCertificateNoTrustHostnameWrongHostname(CertificateKey certificateKey) throws Throwable {
if (!certificateKey.isValid) {
return;
}
String containerIp = InetAddress.getByName(testDb.getContainer().getHost()).getHostAddress();
String certificate = testDb.getCertificate(certificateKey);
JsonNode config = testDb.configBuilder()
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
"certificate", certificate))
.with(JdbcUtils.HOST_KEY, containerIp)
.with(JdbcUtils.PORT_KEY, testDb.getContainer().getFirstMappedPort())
.withCredentials()
.withDatabase()
.build();
try {
AirbyteCatalog catalog = new MssqlSource().discover(config);
fail("discover should have failed!");
} catch (ConnectionErrorException e) {
String expectedMessage =
"Failed to validate the server name \"" + containerIp + "\"in a certificate during Secure Sockets Layer (SSL) initialization.";
if (!e.getExceptionMessage().contains(expectedMessage)) {
fail("exception message was " + e.getExceptionMessage() + "\n expected: " + expectedMessage);
}
}
}
@ParameterizedTest
@EnumSource(CertificateKey.class)
public void testDiscoverWithCertificateNoTrustHostnameAlternateHostname(CertificateKey certificateKey) throws Exception {
final String containerIp = InetAddress.getByName(testDb.getContainer().getHost()).getHostAddress();
if (certificateKey.isValid) {
String certificate = testDb.getCertificate(certificateKey);
JsonNode config = testDb.configBuilder()
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
"certificate", certificate,
"hostNameInCertificate", testDb.getContainer().getHost()))
.with(JdbcUtils.HOST_KEY, containerIp)
.with(JdbcUtils.PORT_KEY, testDb.getContainer().getFirstMappedPort())
.withCredentials()
.withDatabase()
.build();
AirbyteCatalog catalog = new MssqlSource().discover(config);
}
}
}

View File

@@ -1,46 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource;
import io.airbyte.cdk.integrations.source.jdbc.test.JdbcStressTest;
import java.sql.JDBCType;
import java.util.Optional;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
@Disabled
public class MssqlStressTest extends JdbcStressTest {
private MsSQLTestDatabase testdb;
@BeforeEach
public void setup() throws Exception {
testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022);
super.setup();
}
@Override
public Optional<String> getDefaultSchemaName() {
return Optional.of("dbo");
}
@Override
public JsonNode getConfig() {
return testdb.testConfigBuilder().with("is_test", true).build();
}
@Override
public AbstractJdbcSource<JDBCType> getSource() {
return new MssqlSource();
}
@Override
public String getDriverClass() {
return MssqlSource.DRIVER_CLASS;
}
}

View File

@@ -0,0 +1,80 @@
/* Copyright (c) 2025 Airbyte, Inc., all rights reserved. */
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.testcontainers.TestContainerFactory
import io.github.oshai.kotlinlogging.KotlinLogging
import org.testcontainers.containers.MSSQLServerContainer
import org.testcontainers.containers.Network
import org.testcontainers.utility.DockerImageName
object MsSqlServerContainerFactory {
const val COMPATIBLE_NAME = "mcr.microsoft.com/mssql/server:2022-latest"
private val log = KotlinLogging.logger {}
init {
TestContainerFactory.register(COMPATIBLE_NAME) { imageName: DockerImageName ->
MSSQLServerContainer(imageName).acceptLicense()
}
}
sealed interface MsSqlServerContainerModifier :
TestContainerFactory.ContainerModifier<MSSQLServerContainer<*>>
data object WithNetwork : MsSqlServerContainerModifier {
override fun modify(container: MSSQLServerContainer<*>) {
container.withNetwork(Network.newNetwork())
}
}
data object WithTestDatabase : MsSqlServerContainerModifier {
override fun modify(container: MSSQLServerContainer<*>) {
container.start()
container.execInContainer(
"/opt/mssql-tools18/bin/sqlcmd",
"-S",
"localhost",
"-U",
container.username,
"-P",
container.password,
"-Q",
"CREATE DATABASE test",
"-C"
)
}
}
fun exclusive(
imageName: String,
vararg modifiers: MsSqlServerContainerModifier,
): MSSQLServerContainer<*> {
val dockerImageName =
DockerImageName.parse(imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME)
return TestContainerFactory.exclusive(dockerImageName, *modifiers)
}
fun shared(
imageName: String,
vararg modifiers: MsSqlServerContainerModifier,
): MSSQLServerContainer<*> {
val dockerImageName =
DockerImageName.parse(imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME)
return TestContainerFactory.shared(dockerImageName, *modifiers)
}
@JvmStatic
fun config(
msSQLContainer: MSSQLServerContainer<*>
): MsSqlServerSourceConfigurationSpecification =
MsSqlServerSourceConfigurationSpecification().apply {
host = msSQLContainer.host
port = msSQLContainer.getMappedPort(MSSQLServerContainer.MS_SQL_SERVER_PORT)
username = msSQLContainer.username
password = msSQLContainer.password
jdbcUrlParams = ""
database = "test" // Connect to test database
checkpointTargetIntervalSeconds = 60
concurrency = 1
setIncrementalValue(UserDefinedCursor())
}
}

View File

@@ -0,0 +1,630 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.data.AirbyteSchemaType
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.discover.MetaField
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
import io.airbyte.cdk.read.DatatypeTestCase
import io.airbyte.cdk.read.DatatypeTestOperations
import io.airbyte.cdk.read.DynamicDatatypeTestFactory
import io.github.oshai.kotlinlogging.KotlinLogging
import java.sql.Connection
import org.junit.jupiter.api.BeforeAll
import org.junit.jupiter.api.DynamicNode
import org.junit.jupiter.api.TestFactory
import org.junit.jupiter.api.Timeout
import org.testcontainers.containers.MSSQLServerContainer
class MsSqlServerDatatypeIntegrationTest {
@TestFactory
@Timeout(300)
fun syncTests(): Iterable<DynamicNode> =
DynamicDatatypeTestFactory(MsSqlServerDatatypeTestOperations).build(dbContainer)
companion object {
lateinit var dbContainer: MSSQLServerContainer<*>
@JvmStatic
@BeforeAll
@Timeout(value = 300)
fun startAndProvisionTestContainer() {
dbContainer =
MsSqlServerContainerFactory.shared(
"mcr.microsoft.com/mssql/server:2022-latest",
MsSqlServerContainerFactory.WithNetwork,
MsSqlServerContainerFactory.WithTestDatabase
)
}
}
}
object MsSqlServerDatatypeTestOperations :
DatatypeTestOperations<
MSSQLServerContainer<*>,
MsSqlServerSourceConfigurationSpecification,
MsSqlServerSourceConfiguration,
MsSqlServerSourceConfigurationFactory,
MsSqlServerDatatypeTestCase
> {
private val log = KotlinLogging.logger {}
override val withGlobal: Boolean = true
override val globalCursorMetaField: MetaField =
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_CURSOR
override fun streamConfigSpec(
container: MSSQLServerContainer<*>
): MsSqlServerSourceConfigurationSpecification =
MsSqlServerContainerFactory.config(container).also {
it.setIncrementalValue(UserDefinedCursor())
}
override fun globalConfigSpec(
container: MSSQLServerContainer<*>
): MsSqlServerSourceConfigurationSpecification =
MsSqlServerContainerFactory.config(container).also { it.setIncrementalValue(Cdc()) }
override val configFactory: MsSqlServerSourceConfigurationFactory =
MsSqlServerSourceConfigurationFactory()
override fun createStreams(config: MsSqlServerSourceConfiguration) {
JdbcConnectionFactory(config).get().use { connection: Connection ->
connection.isReadOnly = false
// Enable CDC on the database (required before enabling CDC on tables)
try {
val enableDbCdcSql = "EXEC sys.sp_cdc_enable_db"
log.info { "Enabling CDC on database: $enableDbCdcSql" }
connection.createStatement().use { stmt -> stmt.execute(enableDbCdcSql) }
log.info { "Successfully enabled CDC on database" }
} catch (e: Exception) {
log.warn {
"Failed to enable CDC on database (may already be enabled): ${e.message}"
}
}
// Activate CDC to ensure initial LSN is available for testing
activateCdcWithInitialLsn(connection)
for ((_, case) in testCases) {
for (ddl in case.ddl) {
log.info { "test case ${case.id}: executing $ddl" }
connection.createStatement().use { stmt -> stmt.execute(ddl) }
}
// Enable CDC for tables that support it (CDC-compatible data types)
if (case.isGlobal) {
try {
val enableCdcSql =
"EXEC sys.sp_cdc_enable_table @source_schema = 'dbo', @source_name = '${case.id}', @role_name = 'CDC'"
log.info { "test case ${case.id}: enabling CDC with $enableCdcSql" }
connection.createStatement().use { stmt -> stmt.execute(enableCdcSql) }
log.info { "test case ${case.id}: successfully enabled CDC on table" }
} catch (e: Exception) {
log.warn { "test case ${case.id}: failed to enable CDC: ${e.message}" }
}
}
}
}
}
override fun populateStreams(config: MsSqlServerSourceConfiguration) {
JdbcConnectionFactory(config).get().use { connection: Connection ->
connection.isReadOnly = false
for ((_, case) in testCases) {
for (dml in case.dml) {
log.info { "test case ${case.id}: executing $dml" }
connection.createStatement().use { stmt -> stmt.execute(dml) }
}
}
}
// Open a NEW connection to force CDC scan after commit
JdbcConnectionFactory(config).get().use { connection: Connection ->
try {
connection.createStatement().use { stmt ->
// Manually run the CDC scan to capture all pending changes
stmt.execute("EXEC sys.sp_cdc_scan")
log.info {
"Executed sp_cdc_scan in new connection to capture committed changes"
}
}
} catch (e: Exception) {
log.error { "Failed to force CDC scan after data population: ${e.message}" }
}
}
}
/**
* Activates CDC and generates initial LSN required for testing. Creates a dummy table, enables
* CDC on it, inserts data, and ensures LSN is available.
*/
private fun activateCdcWithInitialLsn(connection: Connection) {
try {
connection.createStatement().use { stmt ->
// Drop and recreate dummy table to ensure clean state
stmt.execute("DROP TABLE IF EXISTS dbo.cdc_dummy")
stmt.execute("CREATE TABLE dbo.cdc_dummy (id INT PRIMARY KEY)")
stmt.execute(
"EXEC sys.sp_cdc_enable_table @source_schema = 'dbo', @source_name = 'cdc_dummy', @role_name = NULL"
)
// Insert data to generate LSN
stmt.execute(
"INSERT INTO dbo.cdc_dummy (id) SELECT COALESCE(MAX(id), 0) + 1 FROM dbo.cdc_dummy"
)
// Start CDC capture job and trigger scan
try {
stmt.execute("EXEC sys.sp_cdc_start_job @job_type = 'capture'")
Thread.sleep(2000)
} catch (e: Exception) {
log.debug { "CDC capture job start failed: ${e.message}" }
}
try {
stmt.execute("EXEC sys.sp_cdc_scan")
Thread.sleep(1000)
} catch (e: Exception) {
log.debug { "Manual CDC scan failed: ${e.message}" }
}
log.info { "CDC activated with dummy data for testing" }
}
} catch (e: Exception) {
log.warn { "CDC activation failed: ${e.message}" }
}
}
// Data type test values
val booleanValues =
mapOf(
"0" to "false",
"1" to "true",
"'true'" to "true",
"'false'" to "false",
"NULL" to "null",
)
val integerValues =
mapOf(
"10" to "10",
"100000000" to "100000000",
"200000000" to "200000000",
"-2147483648" to "-2147483648",
"2147483647" to "2147483647",
"NULL" to "null",
)
val bigintValues =
mapOf(
"-9223372036854775808" to "-9223372036854775808",
"9223372036854775807" to "9223372036854775807",
"0" to "0",
"NULL" to "null",
)
val smallintValues =
mapOf(
"-32768" to "-32768",
"32767" to "32767",
"NULL" to "null",
)
val tinyintValues =
mapOf(
"0" to "0",
"255" to "255",
"NULL" to "null",
)
val decimalValues =
mapOf(
"999.33" to "999.33",
"NULL" to "null",
)
val numericValues =
mapOf(
"'99999'" to "99999",
"NULL" to "null",
)
val moneyValues =
mapOf(
"'9990000.3647'" to "9990000.3647",
"NULL" to "null",
)
val smallmoneyValues =
mapOf(
"'-214748.3648'" to "-214748.3648",
"214748.3647" to "214748.3647",
"NULL" to "null",
)
val floatValues =
mapOf(
"'123'" to "123.0",
"'1234567890.1234567'" to "1234567890.1234567",
"NULL" to "null",
)
val realValues =
mapOf(
"'123'" to "123.0",
"'1234567890.1234567'" to "1234568000",
"NULL" to "null",
)
val dateValues =
mapOf(
"'0001-01-01'" to """"0001-01-01"""",
"'9999-12-31'" to """"9999-12-31"""",
"'1999-01-08'" to """"1999-01-08"""",
"NULL" to "null",
)
val smalldatetimeValues =
mapOf(
"'1900-01-01'" to """"1900-01-01T00:00:00.000000"""",
"'2079-06-06'" to """"2079-06-06T00:00:00.000000"""",
"NULL" to "null",
)
val datetimeValues =
mapOf(
"'1753-01-01'" to """"1753-01-01T00:00:00.000000"""",
"'9999-12-31'" to """"9999-12-31T00:00:00.000000"""",
"'9999-12-31T13:00:04'" to """"9999-12-31T13:00:04.000000"""",
"'9999-12-31T13:00:04.123'" to """"9999-12-31T13:00:04.123000"""",
"NULL" to "null",
)
val datetime2Values =
mapOf(
"'0001-01-01'" to """"0001-01-01T00:00:00.000000"""",
"'9999-12-31'" to """"9999-12-31T00:00:00.000000"""",
"'9999-12-31T13:00:04.123456'" to """"9999-12-31T13:00:04.123456"""",
"'2023-11-08T01:20:11.3733338'" to """"2023-11-08T01:20:11.373333"""",
"NULL" to "null",
)
val timeValues =
mapOf(
"'13:00:01'" to """"13:00:01.000000"""",
"'13:00:04Z'" to """"13:00:04.000000"""",
"'13:00:04.123456Z'" to """"13:00:04.123456"""",
"NULL" to "null",
)
val datetimeoffsetValues =
mapOf(
"'2001-01-10 00:00:00 +01:00'" to """"2001-01-10T00:00:00.000000+01:00"""",
"'9999-01-10 00:00:00 +01:00'" to """"9999-01-10T00:00:00.000000+01:00"""",
"'2024-05-10 19:00:01.604805 +03:00'" to """"2024-05-10T19:00:01.604805+03:00"""",
"'2024-03-02 19:08:07.1234567 +09:00'" to """"2024-03-02T19:08:07.123456+09:00"""",
"'0001-01-01 00:00:00.0000000 +00:00'" to """"0001-01-01T00:00:00.000000Z"""",
"NULL" to "null",
)
val charValues =
mapOf(
"'a'" to """"a """",
"'*'" to """"* """",
"'abc'" to """"abc """",
"'Hello World!'" to """"Hello World! """",
"'Test123'" to """"Test123 """",
"''" to """" """",
"NULL" to "null",
)
val varcharValues =
mapOf(
"''" to """""""",
"'*'" to """"*"""",
"'a'" to """"a"""",
"'abc'" to """"abc"""",
"N'Миші йдуть на південь, не питай чому;'" to
""""Миші йдуть на південь, не питай чому;"""",
"N'櫻花分店'" to """"櫻花分店"""",
"NULL" to "null",
)
val textValues =
mapOf(
"''" to """""""",
"'Some test text 123\$%^&*()_'" to """"Some test text 123$%^&*()_"""",
"'a'" to """"a"""",
"'abc'" to """"abc"""",
"NULL" to "null",
)
val ncharValues =
mapOf(
"'a'" to """"a """",
"'*'" to """"* """",
"'abc'" to """"abc """",
"N'Миші йдуть на південь, не питай чому;'" to
""""Миші йдуть на південь, не питай чому; """",
"N'櫻花分店'" to """"櫻花分店 """",
"''" to """" """",
"NULL" to "null",
)
val nvarcharValues =
mapOf(
"''" to """""""",
"'*'" to """"*"""",
"'a'" to """"a"""",
"'abc'" to """"abc"""",
"N'Миші йдуть на південь, не питай чому;'" to
""""Миші йдуть на південь, не питай чому;"""",
"N'櫻花分店'" to """"櫻花分店"""",
"NULL" to "null",
)
val binaryValues =
mapOf(
"CAST( 'A' AS BINARY(1))" to """"QQ=="""",
"NULL" to "null",
)
val varbinaryValues =
mapOf(
"CAST( 'ABC' AS VARBINARY)" to """"QUJD"""",
"NULL" to "null",
)
val uniqueidentifierValues =
mapOf(
"'375CFC44-CAE3-4E43-8083-821D2DF0E626'" to
""""375CFC44-CAE3-4E43-8083-821D2DF0E626"""",
"NULL" to "null",
)
val xmlValues =
mapOf(
"''" to """""""",
"'<user><user_id>1</user_id></user>'" to """"<user><user_id>1</user_id></user>"""",
"NULL" to "null",
)
val geometryValues =
mapOf(
"geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0)" to
""""LINESTRING(100 100, 20 180, 180 180)"""",
"NULL" to "null",
)
val geographyValues =
mapOf(
"geography::STGeomFromText('LINESTRING(-122.360 47.656, -122.343 47.656 )', 4326)" to
""""LINESTRING(-122.36 47.656, -122.343 47.656)"""",
"NULL" to "null",
)
val hierarchyidValues =
mapOf(
"'/1/1/'" to """"/1/1/"""",
"NULL" to "null",
)
override val testCases: Map<String, MsSqlServerDatatypeTestCase> =
listOf(
// Integer types
MsSqlServerDatatypeTestCase(
"BIGINT",
bigintValues,
LeafAirbyteSchemaType.INTEGER,
),
MsSqlServerDatatypeTestCase(
"INT",
integerValues,
LeafAirbyteSchemaType.INTEGER,
),
MsSqlServerDatatypeTestCase(
"SMALLINT",
smallintValues,
LeafAirbyteSchemaType.INTEGER,
),
MsSqlServerDatatypeTestCase(
"TINYINT",
tinyintValues,
LeafAirbyteSchemaType.INTEGER,
),
// Boolean type
MsSqlServerDatatypeTestCase(
"BIT",
booleanValues,
LeafAirbyteSchemaType.BOOLEAN,
),
// Decimal types
MsSqlServerDatatypeTestCase(
"DECIMAL(5,2)",
decimalValues,
LeafAirbyteSchemaType.NUMBER,
),
MsSqlServerDatatypeTestCase(
"NUMERIC",
numericValues,
LeafAirbyteSchemaType.NUMBER,
),
MsSqlServerDatatypeTestCase(
"MONEY",
moneyValues,
LeafAirbyteSchemaType.NUMBER,
),
MsSqlServerDatatypeTestCase(
"SMALLMONEY",
smallmoneyValues,
LeafAirbyteSchemaType.NUMBER,
),
// Float types
MsSqlServerDatatypeTestCase(
"FLOAT",
floatValues,
LeafAirbyteSchemaType.NUMBER,
),
MsSqlServerDatatypeTestCase(
"REAL",
realValues,
LeafAirbyteSchemaType.NUMBER,
),
// Date/Time types
MsSqlServerDatatypeTestCase(
"DATE",
dateValues,
LeafAirbyteSchemaType.DATE,
),
MsSqlServerDatatypeTestCase(
"SMALLDATETIME",
smalldatetimeValues,
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
),
MsSqlServerDatatypeTestCase(
"DATETIME",
datetimeValues,
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
),
MsSqlServerDatatypeTestCase(
"DATETIME2",
datetime2Values,
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
),
MsSqlServerDatatypeTestCase(
"TIME",
timeValues,
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE,
),
MsSqlServerDatatypeTestCase(
"DATETIMEOFFSET",
datetimeoffsetValues,
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE,
),
// String types
MsSqlServerDatatypeTestCase(
"CHAR(50)",
charValues,
LeafAirbyteSchemaType.STRING,
),
MsSqlServerDatatypeTestCase(
"NVARCHAR(MAX)",
varcharValues,
LeafAirbyteSchemaType.STRING,
),
MsSqlServerDatatypeTestCase(
"TEXT",
textValues,
LeafAirbyteSchemaType.STRING,
),
MsSqlServerDatatypeTestCase(
"NCHAR(50)",
ncharValues,
LeafAirbyteSchemaType.STRING,
),
MsSqlServerDatatypeTestCase(
"NVARCHAR(MAX)",
nvarcharValues,
LeafAirbyteSchemaType.STRING,
),
MsSqlServerDatatypeTestCase(
"NTEXT",
nvarcharValues,
LeafAirbyteSchemaType.STRING,
),
// Binary types
MsSqlServerDatatypeTestCase(
"BINARY(1)",
binaryValues,
LeafAirbyteSchemaType.BINARY,
),
MsSqlServerDatatypeTestCase(
"VARBINARY(3)",
varbinaryValues,
LeafAirbyteSchemaType.BINARY,
),
// Special types
MsSqlServerDatatypeTestCase(
"UNIQUEIDENTIFIER",
uniqueidentifierValues,
LeafAirbyteSchemaType.STRING,
),
MsSqlServerDatatypeTestCase(
"XML",
xmlValues,
LeafAirbyteSchemaType.STRING,
),
// Spatial types
MsSqlServerDatatypeTestCase(
"GEOMETRY",
geometryValues,
LeafAirbyteSchemaType.STRING,
),
MsSqlServerDatatypeTestCase(
"GEOGRAPHY",
geographyValues,
LeafAirbyteSchemaType.STRING,
),
// Hierarchy type - only for non-CDC tests
MsSqlServerDatatypeTestCase(
"HIERARCHYID",
hierarchyidValues,
LeafAirbyteSchemaType.STRING,
isGlobal = false, // CDC doesn't support hierarchyid properly
),
)
.associateBy { it.id }
}
data class MsSqlServerDatatypeTestCase(
val sqlType: String,
val sqlToAirbyte: Map<String, String>,
override val expectedAirbyteSchemaType: AirbyteSchemaType,
override val isGlobal: Boolean = true,
) : DatatypeTestCase {
override val isStream: Boolean
get() = true
private val typeName: String
get() =
sqlType
.replace("[^a-zA-Z0-9]".toRegex(), " ")
.trim()
.replace(" +".toRegex(), "_")
.lowercase()
override val id: String
get() = "tbl_$typeName"
override val fieldName: String
get() = "col_$typeName"
override val expectedData: List<String>
get() = sqlToAirbyte.values.map { """{"${fieldName}":$it}""" }
val ddl: List<String>
get() =
listOf(
"DROP TABLE IF EXISTS $id",
"CREATE TABLE $id " + "(pk INT IDENTITY(1,1) PRIMARY KEY, $fieldName $sqlType)",
)
val dml: List<String>
get() =
sqlToAirbyte.keys.map {
if (it == "NULL") {
"INSERT INTO $id DEFAULT VALUES"
} else {
"INSERT INTO $id ($fieldName) VALUES ($it)"
}
}
}

View File

@@ -0,0 +1,604 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.command.CliRunner
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
import io.airbyte.protocol.models.v0.CatalogHelpers
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.SyncMode
import io.github.oshai.kotlinlogging.KotlinLogging
import java.sql.Connection
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import org.junit.jupiter.api.AfterAll
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.BeforeAll
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.Timeout
import org.testcontainers.containers.MSSQLServerContainer
private val log = KotlinLogging.logger {}
class MsSqlServerExcludeTodaysDataIntegrationTest {
private val createdTables = mutableListOf<String>()
@AfterEach
fun cleanupTables() {
// Clean up all tables created during the test
if (createdTables.isNotEmpty()) {
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
createdTables.forEach { tableName ->
try {
connection.createStatement().use { stmt ->
stmt.execute("DROP TABLE IF EXISTS $tableName")
}
log.info { "Dropped test table: $tableName" }
} catch (e: Exception) {
log.warn(e) { "Failed to drop table $tableName" }
}
}
}
createdTables.clear()
}
}
@Test
@Timeout(60)
fun testExcludeTodaysDataWithDateColumn() {
// Setup: Create a table with records from different dates
val tableName = "test_exclude_today_date"
val today = LocalDate.now()
val yesterday = today.minusDays(1)
val twoDaysAgo = today.minusDays(2)
setupDateTable(tableName, today, yesterday, twoDaysAgo)
// Test with exclude_todays_data = true
val configWithExclude = createConfig(excludeTodaysData = true)
val recordsWithExclude = performSync(configWithExclude, tableName, "order_date")
// Verify: Today's records should be excluded
val recordDates = extractDates(recordsWithExclude, "order_date")
Assertions.assertFalse(
recordDates.contains(today.toString()),
"Today's records should be excluded when exclude_todays_data is true"
)
Assertions.assertTrue(
recordDates.contains(yesterday.toString()),
"Yesterday's records should be included"
)
Assertions.assertTrue(
recordDates.contains(twoDaysAgo.toString()),
"Records from two days ago should be included"
)
// Test with exclude_todays_data = false
val configWithoutExclude = createConfig(excludeTodaysData = false)
val recordsWithoutExclude = performSync(configWithoutExclude, tableName, "order_date")
// Verify: Today's records should be included
val allRecordDates = extractDates(recordsWithoutExclude, "order_date")
Assertions.assertTrue(
allRecordDates.contains(today.toString()),
"Today's records should be included when exclude_todays_data is false"
)
}
@Test
@Timeout(60)
fun testExcludeTodaysDataWithDateTimeColumn() {
// Setup: Create a table with datetime records
val tableName = "test_exclude_today_datetime"
val now = LocalDateTime.now()
val todayMorning = now.withHour(9).withMinute(0).withSecond(0)
val todayEvening = now.withHour(18).withMinute(30).withSecond(0)
val yesterdayNoon = now.minusDays(1).withHour(12).withMinute(0).withSecond(0)
val lastMidnight = now.toLocalDate().atStartOfDay()
val beforeMidnight = lastMidnight.minusMinutes(1)
setupDateTimeTable(tableName, todayMorning, todayEvening, yesterdayNoon, beforeMidnight)
// Test with exclude_todays_data = true
val configWithExclude = createConfig(excludeTodaysData = true)
val recordsWithExclude = performSync(configWithExclude, tableName, "created_at")
// Verify: Records from today (after midnight) should be excluded
val timestamps = extractTimestamps(recordsWithExclude, "created_at")
Assertions.assertFalse(
timestamps.any {
it.contains(todayMorning.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME))
},
"Today morning's records should be excluded"
)
Assertions.assertFalse(
timestamps.any {
it.contains(todayEvening.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME))
},
"Today evening's records should be excluded"
)
Assertions.assertTrue(
timestamps.any { ts -> ts.startsWith(yesterdayNoon.toLocalDate().toString()) },
"Yesterday's records should be included"
)
Assertions.assertTrue(
timestamps.any { ts -> ts.startsWith(beforeMidnight.toLocalDate().toString()) },
"Records from just before midnight should be included"
)
}
@Test
@Timeout(120)
fun testExcludeTodaysDataWithCursorBasedIncremental() {
// Setup: Create a table with date column for cursor-based incremental sync
val tableName = "test_exclude_today_incremental"
val today = LocalDate.now()
val yesterday = today.minusDays(1)
val twoDaysAgo = today.minusDays(2)
val threeDaysAgo = today.minusDays(3)
// Initial data setup with records from 3 days ago and 2 days ago
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
// Create table
val createTable =
"""
DROP TABLE IF EXISTS $tableName;
CREATE TABLE $tableName (
id INT IDENTITY(1,1) PRIMARY KEY,
order_date DATE,
amount DECIMAL(10,2),
status VARCHAR(50)
)
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(createTable) }
// Insert initial data (only old records)
val insertInitialData =
"""
INSERT INTO $tableName (order_date, amount, status) VALUES
('$threeDaysAgo', 100.00, 'initial'),
('$twoDaysAgo', 200.00, 'initial');
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(insertInitialData) }
log.info { "Created table $tableName with initial data" }
createdTables.add(tableName)
}
// First sync: Initial snapshot with exclude_todays_data = true
val configWithExclude = createConfig(excludeTodaysData = true)
val initialRecords = performSync(configWithExclude, tableName, "order_date")
// Verify initial sync contains only old records
val initialDates = extractDates(initialRecords, "order_date")
Assertions.assertEquals(2, initialRecords.size, "Initial sync should have 2 records")
Assertions.assertTrue(
initialDates.contains(threeDaysAgo.toString()),
"Initial sync should include records from 3 days ago"
)
Assertions.assertTrue(
initialDates.contains(twoDaysAgo.toString()),
"Initial sync should include records from 2 days ago"
)
// Add new records including yesterday and today
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
val insertNewData =
"""
INSERT INTO $tableName (order_date, amount, status) VALUES
('$yesterday', 300.00, 'incremental'),
('$yesterday', 350.00, 'incremental'),
('$today', 400.00, 'incremental_today'),
('$today', 450.00, 'incremental_today');
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(insertNewData) }
log.info { "Added new records including yesterday and today" }
}
// Second sync: Incremental sync should exclude today's data
val incrementalRecords = performSync(configWithExclude, tableName, "order_date")
// Extract only the new records from incremental sync
val allDates = extractDates(incrementalRecords, "order_date")
// Count records by date
val todayCount = allDates.count { it == today.toString() }
val yesterdayCount = allDates.count { it == yesterday.toString() }
// Verify: Today's records should be excluded in incremental sync
Assertions.assertEquals(
0,
todayCount,
"Today's records should be excluded during incremental sync when exclude_todays_data is true"
)
// Verify: Yesterday's records should be included
Assertions.assertTrue(
yesterdayCount >= 2,
"Yesterday's records should be included in incremental sync"
)
// Test without exclude_todays_data to confirm today's records exist
val configWithoutExclude = createConfig(excludeTodaysData = false)
val allRecordsIncludingToday = performSync(configWithoutExclude, tableName, "order_date")
val allDatesWithToday = extractDates(allRecordsIncludingToday, "order_date")
val todayCountWithoutExclude = allDatesWithToday.count { it == today.toString() }
Assertions.assertEquals(
2,
todayCountWithoutExclude,
"Today's records should be included when exclude_todays_data is false"
)
log.info { "Incremental sync test completed successfully" }
}
@Test
@Timeout(120)
fun testExcludeTodaysDataNotTriggeredForNonTemporalCursor() {
// Setup: Create a table with non-temporal cursor field (INTEGER and VARCHAR)
val tableName = "test_exclude_today_non_temporal"
val today = LocalDate.now()
val yesterday = today.minusDays(1)
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
// Create table with INTEGER primary key as cursor and date column for verification
val createTable =
"""
DROP TABLE IF EXISTS $tableName;
CREATE TABLE $tableName (
id INT IDENTITY(1,1) PRIMARY KEY,
order_date DATE,
status VARCHAR(50),
amount DECIMAL(10,2)
)
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(createTable) }
// Insert test data with today's and yesterday's dates
// IDs will be 1, 2, 3, 4, 5
val insertData =
"""
INSERT INTO $tableName (order_date, status, amount) VALUES
('$yesterday', 'old', 100.00),
('$yesterday', 'old', 150.00),
('$today', 'new', 200.00),
('$today', 'new', 250.00),
('$today', 'new', 300.00);
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(insertData) }
log.info { "Created table $tableName with non-temporal cursor test data" }
createdTables.add(tableName)
}
// Test 1: Using INTEGER cursor field with exclude_todays_data = true
// The feature should NOT be triggered, all records should be returned
val configWithExclude = createConfig(excludeTodaysData = true)
val recordsWithIntCursor = performSync(configWithExclude, tableName, "id")
// Verify: All records should be included (feature not triggered for INTEGER cursor)
Assertions.assertEquals(
5,
recordsWithIntCursor.size,
"All 5 records should be included when cursor is INTEGER, even with exclude_todays_data = true"
)
// Verify today's records are included
val dates = extractDates(recordsWithIntCursor, "order_date")
val todayCount = dates.count { it == today.toString() }
Assertions.assertEquals(
3,
todayCount,
"Today's 3 records should be included when cursor is INTEGER type"
)
// Test 2: Using VARCHAR cursor field with exclude_todays_data = true
val recordsWithStringCursor = performSync(configWithExclude, tableName, "status")
// Verify: All records should be included (feature not triggered for VARCHAR cursor)
Assertions.assertEquals(
5,
recordsWithStringCursor.size,
"All 5 records should be included when cursor is VARCHAR, even with exclude_todays_data = true"
)
// Test 3: Incremental sync with non-temporal cursor should also include all new records
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
// Add more records with today's date
val insertNewData =
"""
INSERT INTO $tableName (order_date, status, amount) VALUES
('$today', 'newer', 350.00),
('$today', 'newer', 400.00);
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(insertNewData) }
log.info { "Added new records with today's date" }
}
// Perform incremental sync with INTEGER cursor
val incrementalRecords = performSync(configWithExclude, tableName, "id")
// Verify: All 7 records should be present (5 original + 2 new)
Assertions.assertEquals(
7,
incrementalRecords.size,
"All records including new today's records should be included in incremental sync with non-temporal cursor"
)
// Verify new today's records are included
val allDates = extractDates(incrementalRecords, "order_date")
val finalTodayCount = allDates.count { it == today.toString() }
Assertions.assertEquals(
5,
finalTodayCount,
"All 5 of today's records should be included after incremental sync with non-temporal cursor"
)
log.info {
"Non-temporal cursor test completed successfully - exclude_todays_data feature was correctly NOT triggered"
}
}
@Test
@Timeout(60)
fun testExcludeTodaysDataWithDatetime2Column() {
// Setup: Create a table with datetime2 column for higher precision
val tableName = "test_exclude_today_datetime2"
val now = LocalDateTime.now()
val todayWithMicros = now.withNano(0) // Remove nanoseconds
val yesterdayWithMicros = now.minusDays(1).withNano(0) // Remove nanoseconds
setupDateTime2Table(tableName, todayWithMicros, yesterdayWithMicros)
// Test with exclude_todays_data = true
val configWithExclude = createConfig(excludeTodaysData = true)
val recordsWithExclude = performSync(configWithExclude, tableName, "updated_at")
// Verify: Today's high-precision records should be excluded
val timestamps = extractTimestamps(recordsWithExclude, "updated_at")
Assertions.assertEquals(1, timestamps.size, "Only yesterday's record should be included")
Assertions.assertTrue(
timestamps.any { ts -> ts.startsWith(yesterdayWithMicros.toLocalDate().toString()) },
"Yesterday's high-precision record should be included"
)
}
private fun setupDateTable(
tableName: String,
today: LocalDate,
yesterday: LocalDate,
twoDaysAgo: LocalDate
) {
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
// Create table
val createTable =
"""
DROP TABLE IF EXISTS $tableName;
CREATE TABLE $tableName (
id INT IDENTITY(1,1) PRIMARY KEY,
order_date DATE,
amount DECIMAL(10,2)
)
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(createTable) }
// Insert test data
val insertData =
"""
INSERT INTO $tableName (order_date, amount) VALUES
('$today', 100.00),
('$today', 150.00),
('$yesterday', 200.00),
('$yesterday', 250.00),
('$twoDaysAgo', 300.00);
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(insertData) }
log.info { "Created table $tableName with test data" }
createdTables.add(tableName)
}
}
private fun setupDateTimeTable(
tableName: String,
todayMorning: LocalDateTime,
todayEvening: LocalDateTime,
yesterdayNoon: LocalDateTime,
beforeMidnight: LocalDateTime
) {
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
// Create table using regular DATETIME (this test is for DATETIME columns)
val createTable =
"""
DROP TABLE IF EXISTS $tableName;
CREATE TABLE $tableName (
id INT IDENTITY(1,1) PRIMARY KEY,
created_at DATETIME,
description VARCHAR(100)
)
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(createTable) }
// Insert test data
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
val insertData =
"""
INSERT INTO $tableName (created_at, description) VALUES
('${todayMorning.format(formatter)}', 'Today morning'),
('${todayEvening.format(formatter)}', 'Today evening'),
('${yesterdayNoon.format(formatter)}', 'Yesterday noon'),
('${beforeMidnight.format(formatter)}', 'Just before midnight');
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(insertData) }
log.info { "Created table $tableName with datetime test data (precision workaround)" }
createdTables.add(tableName)
}
}
private fun setupDateTime2Table(
tableName: String,
todayWithMicros: LocalDateTime,
yesterdayWithMicros: LocalDateTime
) {
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
// Create table using DATETIME2(6) to handle precision mismatch
val createTable =
"""
DROP TABLE IF EXISTS $tableName;
CREATE TABLE $tableName (
id INT IDENTITY(1,1) PRIMARY KEY,
updated_at DATETIME2(6),
status VARCHAR(50)
)
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(createTable) }
// Insert test data with simpler format
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
val todayFormatted = todayWithMicros.format(formatter)
val yesterdayFormatted = yesterdayWithMicros.format(formatter)
log.info { "Inserting today: $todayFormatted, yesterday: $yesterdayFormatted" }
val insertData =
"""
INSERT INTO $tableName (updated_at, status) VALUES
('$todayFormatted', 'Today with microseconds'),
('$yesterdayFormatted', 'Yesterday with microseconds');
""".trimIndent()
connection.createStatement().use { stmt -> stmt.execute(insertData) }
log.info { "Created table $tableName with datetime2 test data (precision workaround)" }
createdTables.add(tableName)
}
}
private fun createConfig(
excludeTodaysData: Boolean
): MsSqlServerSourceConfigurationSpecification {
val config = MsSqlServerContainerFactory.config(dbContainer)
config.setIncrementalValue(
UserDefinedCursor().apply { this.excludeTodaysData = excludeTodaysData }
)
return config
}
private fun performSync(
config: MsSqlServerSourceConfigurationSpecification,
tableName: String,
cursorField: String
): List<JsonNode> {
// Discover catalog
val discoverOutput = CliRunner.source("discover", config).run()
val catalog =
discoverOutput.catalogs().firstOrNull()
?: throw IllegalStateException("No catalog discovered")
val stream =
catalog.streams.find { it.name == tableName }
?: throw IllegalStateException("Table $tableName not found in catalog")
// Configure stream for incremental sync with cursor
val configuredStream =
CatalogHelpers.toDefaultConfiguredStream(stream).apply {
syncMode = SyncMode.INCREMENTAL
this.cursorField = listOf(cursorField)
}
val configuredCatalog = ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream))
// Perform sync
val syncOutput = CliRunner.source("read", config, configuredCatalog).run()
val records = syncOutput.records().mapNotNull { it.data }
log.info { "Synced ${records.size} records from $tableName" }
return records
}
private fun extractDates(records: List<JsonNode>, fieldName: String): List<String> {
return records.mapNotNull { record -> record.get(fieldName)?.asText() }
}
private fun extractTimestamps(records: List<JsonNode>, fieldName: String): List<String> {
return records.mapNotNull { record -> record.get(fieldName)?.asText() }
}
companion object {
lateinit var dbContainer: MSSQLServerContainer<*>
val connectionFactory: JdbcConnectionFactory by lazy {
JdbcConnectionFactory(
MsSqlServerSourceConfigurationFactory()
.make(MsSqlServerContainerFactory.config(dbContainer))
)
}
@JvmStatic
@BeforeAll
@Timeout(value = 300)
fun startContainer() {
dbContainer =
MsSqlServerContainerFactory.exclusive(
"mcr.microsoft.com/mssql/server:2022-latest",
MsSqlServerContainerFactory.WithNetwork,
MsSqlServerContainerFactory.WithTestDatabase
)
// Ensure test schema exists
connectionFactory.get().use { connection: Connection ->
connection.isReadOnly = false
connection.createStatement().use { stmt ->
stmt.execute(
"IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = 'dbo') BEGIN EXEC('CREATE SCHEMA dbo') END"
)
}
}
}
@JvmStatic
@AfterAll
fun stopContainer() {
if (::dbContainer.isInitialized) {
dbContainer.stop()
}
}
}
}

View File

@@ -0,0 +1,439 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.node.BinaryNode
import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.cdk.ClockFactory
import io.airbyte.cdk.StreamIdentifier
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.discover.MetaField
import io.airbyte.cdk.discover.MetaFieldDecorator
import io.airbyte.cdk.jdbc.BinaryStreamFieldType
import io.airbyte.cdk.jdbc.DefaultJdbcConstants
import io.airbyte.cdk.jdbc.IntFieldType
import io.airbyte.cdk.jdbc.LocalDateTimeFieldType
import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType
import io.airbyte.cdk.output.BufferingOutputConsumer
import io.airbyte.cdk.output.DataChannelFormat
import io.airbyte.cdk.output.DataChannelMedium
import io.airbyte.cdk.output.sockets.NativeRecordPayload
import io.airbyte.cdk.read.ConcurrencyResource
import io.airbyte.cdk.read.ConfiguredSyncMode
import io.airbyte.cdk.read.DefaultJdbcSharedState
import io.airbyte.cdk.read.ResourceAcquirer
import io.airbyte.cdk.read.SelectQuerier
import io.airbyte.cdk.read.StateManager
import io.airbyte.cdk.read.Stream
import io.airbyte.cdk.read.StreamFeedBootstrap
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.StreamDescriptor
import io.mockk.mockk
import java.time.OffsetDateTime
import java.util.Base64
import kotlin.test.assertNull
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.CsvSource
class MsSqlServerJdbcPartitionFactoryTest {
companion object {
private val selectQueryGenerator = MsSqlSourceOperations()
private val sharedState = sharedState()
private val cdcSharedState = sharedState(global = true)
private val config = mockk<MsSqlServerSourceConfiguration>(relaxed = true)
val msSqlServerJdbcPartitionFactory =
MsSqlServerJdbcPartitionFactory(sharedState, selectQueryGenerator, config)
val msSqlServerCdcJdbcPartitionFactory =
MsSqlServerJdbcPartitionFactory(cdcSharedState, selectQueryGenerator, config)
val fieldId = Field("id", IntFieldType)
val stream =
Stream(
id =
StreamIdentifier.from(
StreamDescriptor().withNamespace("dbo").withName("test_table")
),
schema = setOf(fieldId),
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
configuredPrimaryKey = listOf(fieldId),
configuredCursor = fieldId,
)
val timestampFieldId = Field("created_at", OffsetDateTimeFieldType)
val timestampStream =
Stream(
id =
StreamIdentifier.from(
StreamDescriptor().withNamespace("dbo").withName("timestamp_table")
),
schema = setOf(timestampFieldId),
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
configuredPrimaryKey = listOf(timestampFieldId),
configuredCursor = timestampFieldId,
)
val binaryFieldId = Field("binary_col", BinaryStreamFieldType)
val binaryStream =
Stream(
id =
StreamIdentifier.from(
StreamDescriptor().withNamespace("dbo").withName("binary_table")
),
schema = setOf(binaryFieldId),
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
configuredPrimaryKey = listOf(binaryFieldId),
configuredCursor = binaryFieldId,
)
val datetimeFieldId = Field("datetime_col", LocalDateTimeFieldType)
val datetimeStream =
Stream(
id =
StreamIdentifier.from(
StreamDescriptor().withNamespace("dbo").withName("datetime_table")
),
schema = setOf(datetimeFieldId),
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
configuredPrimaryKey = listOf(datetimeFieldId),
configuredCursor = datetimeFieldId,
)
private fun sharedState(
global: Boolean = false,
): DefaultJdbcSharedState {
val configSpec =
MsSqlServerSourceConfigurationSpecification().apply {
host = "localhost"
port = 1433
username = "sa"
password = "Password123!"
database = "master"
}
if (global) {
configSpec.setIncrementalValue(Cdc())
} else {
configSpec.setIncrementalValue(UserDefinedCursor())
}
val configFactory = MsSqlServerSourceConfigurationFactory()
val configuration = configFactory.make(configSpec)
val mockSelectQuerier = mockk<SelectQuerier>()
return DefaultJdbcSharedState(
configuration,
mockSelectQuerier,
DefaultJdbcConstants(),
ConcurrencyResource(configuration),
ResourceAcquirer(emptyList())
)
}
private fun streamFeedBootstrap(
stream: Stream,
incumbentStateValue: OpaqueStateValue? = null
) =
StreamFeedBootstrap(
outputConsumer = BufferingOutputConsumer(ClockFactory().fixed()),
metaFieldDecorator =
object : MetaFieldDecorator {
override val globalCursor: MetaField? = null
override val globalMetaFields: Set<MetaField> = emptySet()
override fun decorateRecordData(
timestamp: OffsetDateTime,
globalStateValue: OpaqueStateValue?,
stream: Stream,
recordData: ObjectNode
) {}
override fun decorateRecordData(
timestamp: OffsetDateTime,
globalStateValue: OpaqueStateValue?,
stream: Stream,
recordData: NativeRecordPayload
) {
// no-op
}
},
stateManager =
StateManager(initialStreamStates = mapOf(stream to incumbentStateValue)),
stream,
DataChannelFormat.JSONL,
DataChannelMedium.STDIO,
8192,
ClockFactory().fixed(),
)
}
@Test
fun testColdStartWithPkCursorBased() {
val jdbcPartition = msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(stream))
assertTrue(jdbcPartition is MsSqlServerJdbcSnapshotWithCursorPartition)
}
@Test
fun testColdStartWithPkCdc() {
val jdbcPartition = msSqlServerCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream))
assertTrue(jdbcPartition is MsSqlServerJdbcCdcSnapshotPartition)
}
@Test
fun testColdStartWithoutPk() {
val streamWithoutPk =
Stream(
id =
StreamIdentifier.from(
StreamDescriptor().withNamespace("dbo").withName("no_pk_table")
),
schema = setOf(fieldId),
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
configuredPrimaryKey = listOf(),
configuredCursor = fieldId,
)
val jdbcPartition =
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(streamWithoutPk))
assertTrue(jdbcPartition is MsSqlServerJdbcNonResumableSnapshotWithCursorPartition)
}
@Test
fun testResumeFromCompletedCursorBasedRead() {
val incomingStateValue: OpaqueStateValue =
Jsons.readTree(
"""
{
"cursor": "12345",
"version": 3,
"state_type": "cursor_based",
"stream_name": "test_table",
"cursor_field": [
"id"
],
"stream_namespace": "dbo",
"cursor_record_count": 1
}
""".trimIndent()
)
val jdbcPartition =
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue))
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
}
@ParameterizedTest
@CsvSource(
"'2025-01-20T10:30:45', '2025-01-20T10:30:45.000000Z'",
"'2025-01-20T10:30:45.0', '2025-01-20T10:30:45.000000Z'",
"'2025-01-20T10:30:45.1', '2025-01-20T10:30:45.100000Z'",
"'2025-01-20T10:30:45.123', '2025-01-20T10:30:45.123000Z'",
"'2025-01-20T10:30:45.123456789', '2025-01-20T10:30:45.123456Z'",
"'2025-01-20T10:30:45.123+00:00', '2025-01-20T10:30:45.123000Z'",
"'2025-01-20T10:30:45Z', '2025-01-20T10:30:45.000000Z'",
"'2025-01-20T10:30:45 Z', '2025-01-20T10:30:45.000000Z'",
"'2025-01-20T10:30:45.12345 -05:00', '2025-01-20T10:30:45.123450-05:00'",
)
fun testResumeFromCompletedCursorBasedReadTimestamp(
cursorVal: String,
expectedLowerBound: String
) {
val incomingStateValue: OpaqueStateValue =
Jsons.readTree(
"""
{
"cursor": "$cursorVal",
"version": 3,
"state_type": "cursor_based",
"stream_name": "timestamp_table",
"cursor_field": [
"created_at"
],
"stream_namespace": "dbo",
"cursor_record_count": 1
}
""".trimIndent()
)
val jdbcPartition =
msSqlServerJdbcPartitionFactory.create(
streamFeedBootstrap(timestampStream, incomingStateValue)
)
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
assertEquals(
Jsons.valueToTree(expectedLowerBound),
(jdbcPartition as MsSqlServerJdbcCursorIncrementalPartition).cursorLowerBound
)
}
@Test
fun testResumeFromCompletedCursorBasedReadTimestampWithoutTimezone() {
val incomingStateValue: OpaqueStateValue =
Jsons.readTree(
"""
{
"cursor": "2025-01-20T10:30:45.123",
"version": 3,
"state_type": "cursor_based",
"stream_name": "datetime_table",
"cursor_field": [
"datetime_col"
],
"stream_namespace": "dbo",
"cursor_record_count": 1
}
""".trimIndent()
)
val jdbcPartition =
msSqlServerJdbcPartitionFactory.create(
streamFeedBootstrap(datetimeStream, incomingStateValue)
)
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
assertEquals(
Jsons.valueToTree("2025-01-20T10:30:45.123000"),
(jdbcPartition as MsSqlServerJdbcCursorIncrementalPartition).cursorLowerBound
)
}
@Test
fun testResumeFromCursorBasedReadInitialRead() {
val incomingStateValue: OpaqueStateValue =
Jsons.readTree(
"""
{
"pk_val": "100000",
"pk_name": "id",
"version": 3,
"state_type": "primary_key",
"incremental_state": {}
}
""".trimIndent()
)
val jdbcPartition =
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue))
assertTrue(jdbcPartition is MsSqlServerJdbcSnapshotWithCursorPartition)
}
@Test
fun testResumeFromCdcInitialRead() {
val incomingStateValue: OpaqueStateValue =
Jsons.readTree(
"""
{
"pk_val": "50000",
"pk_name": "id",
"version": 3,
"state_type": "primary_key",
"incremental_state": {}
}
""".trimIndent()
)
val jdbcPartition =
msSqlServerCdcJdbcPartitionFactory.create(
streamFeedBootstrap(stream, incomingStateValue)
)
assertTrue(jdbcPartition is MsSqlServerJdbcCdcSnapshotPartition)
}
@Test
fun testResumeFromCdcInitialReadComplete() {
val incomingStateValue: OpaqueStateValue =
Jsons.readTree(
"""
{
"stream_name": "test_table",
"cursor_field": [],
"stream_namespace": "dbo"
}
""".trimIndent()
)
val jdbcPartition =
msSqlServerCdcJdbcPartitionFactory.create(
streamFeedBootstrap(stream, incomingStateValue)
)
assertNull(jdbcPartition)
}
@Test
fun testResumeFromCompletedCursorBasedReadBinary() {
val incomingStateValue: OpaqueStateValue =
Jsons.readTree(
"""
{
"cursor": "QUJDREVGRw==",
"version": 3,
"state_type": "cursor_based",
"stream_name": "binary_table",
"cursor_field": [
"binary_col"
],
"stream_namespace": "dbo",
"cursor_record_count": 1
}
""".trimIndent()
)
val jdbcPartition =
msSqlServerJdbcPartitionFactory.create(
streamFeedBootstrap(binaryStream, incomingStateValue)
)
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
assertEquals(
Jsons.valueToTree<BinaryNode>(Base64.getDecoder().decode("QUJDREVGRw==")),
(jdbcPartition as MsSqlServerJdbcCursorIncrementalPartition).cursorLowerBound
)
}
@Test
fun testFullRefreshMode() {
val fullRefreshStream =
Stream(
id =
StreamIdentifier.from(
StreamDescriptor().withNamespace("dbo").withName("full_refresh_table")
),
schema = setOf(fieldId),
configuredSyncMode = ConfiguredSyncMode.FULL_REFRESH,
configuredPrimaryKey = listOf(), // No PK to avoid findPkUpperBound call
configuredCursor = null,
)
val jdbcPartition =
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(fullRefreshStream))
assertTrue(jdbcPartition is MsSqlServerJdbcNonResumableSnapshotPartition)
}
@Test
fun testCdcFullRefreshMode() {
val fullRefreshStream =
Stream(
id =
StreamIdentifier.from(
StreamDescriptor().withNamespace("dbo").withName("cdc_full_refresh_table")
),
schema = setOf(fieldId),
configuredSyncMode = ConfiguredSyncMode.FULL_REFRESH,
configuredPrimaryKey = listOf(), // No PK to avoid findPkUpperBound call
configuredCursor = null,
)
val jdbcPartition =
msSqlServerCdcJdbcPartitionFactory.create(streamFeedBootstrap(fullRefreshStream))
assertTrue(jdbcPartition is MsSqlServerJdbcNonResumableSnapshotPartition)
}
}

View File

@@ -0,0 +1,191 @@
/* Copyright (c) 2025 Airbyte, Inc., all rights reserved. */
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.ConfigErrorException
import io.airbyte.cdk.command.ConfigurationSpecificationSupplier
import io.airbyte.cdk.ssh.SshPasswordAuthTunnelMethod
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
import io.micronaut.context.annotation.Property
import io.micronaut.context.env.Environment
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@MicronautTest(environments = [Environment.TEST], rebuildContext = true)
class MsSqlServerSourceConfigurationSpecificationTest {
@Inject
lateinit var supplier:
ConfigurationSpecificationSupplier<MsSqlServerSourceConfigurationSpecification>
@Test
fun testSchemaViolation() {
Assertions.assertThrows(ConfigErrorException::class.java, supplier::get)
}
@Test
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON)
fun testJson() {
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
Assertions.assertEquals("localhost", pojo.host)
Assertions.assertEquals(1433, pojo.port)
Assertions.assertEquals("sa", pojo.username)
Assertions.assertEquals("Password123!", pojo.password)
Assertions.assertEquals("master", pojo.database)
Assertions.assertArrayEquals(arrayOf("dbo", "custom_schema"), pojo.schemas)
val encryption: EncryptionSpecification = pojo.getEncryptionValue()!!
Assertions.assertTrue(
encryption
is MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification,
encryption::class.toString()
)
val tunnelMethod: SshTunnelMethodConfiguration? = pojo.getTunnelMethodValue()
Assertions.assertTrue(
tunnelMethod is SshPasswordAuthTunnelMethod,
tunnelMethod!!::class.toString(),
)
val replicationMethod: IncrementalConfigurationSpecification = pojo.getIncrementalValue()
Assertions.assertTrue(replicationMethod is Cdc, replicationMethod::class.toString())
Assertions.assertEquals(300, pojo.checkpointTargetIntervalSeconds)
Assertions.assertEquals(2, pojo.concurrency)
Assertions.assertEquals(true, pojo.checkPrivileges)
Assertions.assertEquals(
"integratedSecurity=false&trustServerCertificate=true",
pojo.jdbcUrlParams
)
}
/**
* Verifies that the encryption mode is correctly set to "required" as the default value in the
* MsSqlServerSourceConfigurationSpecification class.
*/
@Test
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON_ENCRYPTION_CHECK)
fun testDefaultEncryption() {
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
val encryption: EncryptionSpecification = pojo.getEncryptionValue()!!
Assertions.assertTrue(
encryption is MsSqlServerEncryptionDisabledConfigurationSpecification,
encryption::class.toString()
)
}
/** Verifies that the default replication method is UserDefinedCursor when not specified. */
@Test
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON_DEFAULT_REPLICATION)
fun testDefaultReplicationMethod() {
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
val replicationMethod: IncrementalConfigurationSpecification = pojo.getIncrementalValue()
Assertions.assertTrue(
replicationMethod is UserDefinedCursor,
replicationMethod::class.toString()
)
}
/** Verifies that CDC replication method is correctly parsed. */
@Test
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON_CDC)
fun testCdcReplicationMethod() {
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
val replicationMethod: IncrementalConfigurationSpecification = pojo.getIncrementalValue()
Assertions.assertTrue(replicationMethod is Cdc, replicationMethod::class.toString())
}
companion object {
const val CONFIG_JSON: String =
"""
{
"host": "localhost",
"port": 1433,
"username": "sa",
"password": "Password123!",
"database": "master",
"schemas": ["dbo", "custom_schema"],
"ssl_mode": {
"mode": "encrypted_trust_server_certificate"
},
"tunnel_method": {
"tunnel_method": "SSH_PASSWORD_AUTH",
"tunnel_host": "localhost",
"tunnel_port": 2222,
"tunnel_user": "sshuser",
"tunnel_user_password": "sshpass"
},
"replication_method": {
"method": "CDC"
},
"checkpoint_target_interval_seconds": 300,
"jdbc_url_params": "integratedSecurity=false&trustServerCertificate=true",
"concurrency": 2,
"check_privileges": true
}
"""
const val CONFIG_JSON_ENCRYPTION_CHECK: String =
"""
{
"host": "localhost",
"port": 1433,
"username": "sa",
"password": "Password123!",
"database": "master",
"schemas": ["dbo"],
"tunnel_method": {
"tunnel_method": "SSH_PASSWORD_AUTH",
"tunnel_host": "localhost",
"tunnel_port": 2222,
"tunnel_user": "sshuser",
"tunnel_user_password": "sshpass"
},
"replication_method": {
"method": "STANDARD"
},
"checkpoint_target_interval_seconds": 300,
"jdbc_url_params": "integratedSecurity=false&trustServerCertificate=true",
"concurrency": 1
}
"""
const val CONFIG_JSON_DEFAULT_REPLICATION: String =
"""
{
"host": "localhost",
"port": 1433,
"username": "sa",
"password": "Password123!",
"database": "master",
"schemas": ["dbo"],
"ssl_mode": {
"mode": "encrypted_trust_server_certificate"
},
"replication_method": {
"method": "STANDARD"
}
}
"""
const val CONFIG_JSON_CDC: String =
"""
{
"host": "localhost",
"port": 1433,
"username": "sa",
"password": "Password123!",
"database": "master",
"schemas": ["dbo"],
"ssl_mode": {
"mode": "encrypted_trust_server_certificate"
},
"replication_method": {
"method": "CDC"
}
}
"""
}
}

View File

@@ -0,0 +1,224 @@
/* Copyright (c) 2025 Airbyte, Inc., all rights reserved. */
package io.airbyte.integrations.source.mssql
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.jdbc.DoubleFieldType
import io.airbyte.cdk.jdbc.IntFieldType
import io.airbyte.cdk.jdbc.LongFieldType
import io.airbyte.cdk.jdbc.LosslessJdbcFieldType
import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType
import io.airbyte.cdk.jdbc.StringFieldType
import io.airbyte.cdk.read.And
import io.airbyte.cdk.read.Equal
import io.airbyte.cdk.read.From
import io.airbyte.cdk.read.Greater
import io.airbyte.cdk.read.LesserOrEqual
import io.airbyte.cdk.read.Limit
import io.airbyte.cdk.read.Or
import io.airbyte.cdk.read.OrderBy
import io.airbyte.cdk.read.SelectColumnMaxValue
import io.airbyte.cdk.read.SelectColumns
import io.airbyte.cdk.read.SelectQuery
import io.airbyte.cdk.read.SelectQuerySpec
import io.airbyte.cdk.read.Where
import io.airbyte.cdk.read.optimize
import io.airbyte.cdk.util.Jsons
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
class MsSqlServerSourceSelectQueryGeneratorTest {
@Test
fun testSelectLimit0() {
SelectQuerySpec(
SelectColumns(
listOf(
Field("id", IntFieldType),
Field("name", StringFieldType),
),
),
From("users", "dbo"),
limit = Limit(0),
)
.assertSqlEquals("""SELECT TOP 0 id, name FROM dbo.users""")
}
@Test
fun testSelectMaxCursor() {
SelectQuerySpec(
SelectColumnMaxValue(Field("updated_at", OffsetDateTimeFieldType)),
From("orders", "dbo"),
)
.assertSqlEquals("""SELECT MAX(updated_at) FROM dbo.orders""")
}
@Test
fun testSelectForNonResumableInitialSync() {
SelectQuerySpec(
SelectColumns(
listOf(
Field("id", IntFieldType),
Field("description", StringFieldType),
),
),
From("products", "dbo"),
)
.assertSqlEquals("""SELECT id, description FROM dbo.products""")
}
@Test
fun testSelectForResumableInitialSync() {
val k1 = Field("pk1", IntFieldType)
val v1 = Jsons.numberNode(100)
val k2 = Field("pk2", IntFieldType)
val v2 = Jsons.numberNode(200)
val k3 = Field("pk3", IntFieldType)
val v3 = Jsons.numberNode(300)
SelectQuerySpec(
SelectColumns(listOf(k1, k2, k3, Field("data", StringFieldType))),
From("composite_table", "dbo"),
Where(
Or(
listOf(
And(listOf(Greater(k1, v1))),
And(listOf(Equal(k1, v1), Greater(k2, v2))),
And(listOf(Equal(k1, v1), Equal(k2, v2), Greater(k3, v3))),
),
),
),
OrderBy(listOf(k1, k2, k3)),
Limit(1000),
)
.assertSqlEquals(
"""SELECT TOP 1000 pk1, pk2, pk3, data FROM """ +
"""dbo.composite_table WHERE (pk1 > ?) OR """ +
"""((pk1 = ?) AND (pk2 > ?)) OR """ +
"""((pk1 = ?) AND (pk2 = ?) AND (pk3 > ?)) """ +
"""ORDER BY pk1, pk2, pk3""",
v1 to IntFieldType,
v1 to IntFieldType,
v2 to IntFieldType,
v1 to IntFieldType,
v2 to IntFieldType,
v3 to IntFieldType,
)
}
@Test
fun testSelectForCursorBasedIncrementalSync() {
val c = Field("last_modified", DoubleFieldType)
val lb = Jsons.numberNode(1.5)
val ub = Jsons.numberNode(3.5)
SelectQuerySpec(
SelectColumns(listOf(Field("content", StringFieldType), c)),
From("documents", "dbo"),
Where(And(listOf(Greater(c, lb), LesserOrEqual(c, ub)))),
OrderBy(listOf(c)),
Limit(500),
)
.assertSqlEquals(
"""SELECT TOP 500 content, last_modified FROM """ +
"""dbo.documents """ +
"""WHERE (last_modified > ?) AND (last_modified <= ?) ORDER BY last_modified""",
lb to DoubleFieldType,
ub to DoubleFieldType,
)
}
@Test
fun testSelectWithHierarchyId() {
// Test special handling for hierarchyid field type in SQL Server
val hierarchyField = Field("org_node", MsSqlSourceOperations.MsSqlServerHierarchyFieldType)
SelectQuerySpec(
SelectColumns(
listOf(
Field("employee_id", IntFieldType),
hierarchyField,
Field("employee_name", StringFieldType),
),
),
From("employees", "hr"),
)
.assertSqlEquals(
"""SELECT employee_id, org_node.ToString(), employee_name FROM hr.employees"""
)
}
@Test
fun testSelectWithoutNamespace() {
// Test query generation without namespace (schema)
SelectQuerySpec(
SelectColumns(
listOf(
Field("col1", IntFieldType),
Field("col2", StringFieldType),
),
),
From("simple_table", null),
limit = Limit(10),
)
.assertSqlEquals("""SELECT TOP 10 col1, col2 FROM simple_table""")
}
@Test
fun testSelectWithLargeLimit() {
// Test with a large limit value
val cursor = Field("sequence_id", LongFieldType)
val startValue = Jsons.numberNode(1000000L)
SelectQuerySpec(
SelectColumns(listOf(cursor, Field("payload", StringFieldType))),
From("events", "dbo"),
Where(Greater(cursor, startValue)),
OrderBy(listOf(cursor)),
Limit(10000),
)
.assertSqlEquals(
"""SELECT TOP 10000 sequence_id, payload FROM dbo.events WHERE sequence_id > ? ORDER BY sequence_id""",
startValue to LongFieldType,
)
}
@Test
fun testSelectWithMultipleDateTimeFields() {
// Test with multiple datetime fields for time-based filtering
val created = Field("created_at", OffsetDateTimeFieldType)
val updated = Field("updated_at", OffsetDateTimeFieldType)
val createdAfter = Jsons.textNode("2025-01-01T00:00:00Z")
val updatedBefore = Jsons.textNode("2025-12-31T23:59:59Z")
SelectQuerySpec(
SelectColumns(listOf(Field("id", IntFieldType), created, updated)),
From("records", "dbo"),
Where(
And(
listOf(
Greater(created, createdAfter),
LesserOrEqual(updated, updatedBefore)
)
)
),
OrderBy(listOf(created, updated)),
Limit(100),
)
.assertSqlEquals(
"""SELECT TOP 100 id, created_at, updated_at FROM dbo.records """ +
"""WHERE (created_at > ?) AND (updated_at <= ?) ORDER BY created_at, updated_at""",
createdAfter to OffsetDateTimeFieldType,
updatedBefore to OffsetDateTimeFieldType,
)
}
private fun SelectQuerySpec.assertSqlEquals(
sql: String,
vararg bindings: Pair<JsonNode, LosslessJdbcFieldType<*, *>>,
) {
val expected =
SelectQuery(
sql,
select.columns,
bindings.map { SelectQuery.Binding(it.first, it.second) },
)
val actual: SelectQuery = MsSqlSourceOperations().generate(this.optimize())
Assertions.assertEquals(expected, actual)
}
}

View File

@@ -0,0 +1,205 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.util.Jsons
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.Test
class MsSqlServerStateMigrationTest {
@Test
fun `should parse new format state correctly`() {
val newState =
"""
{
"cursor": "2024-01-01T00:00:00",
"version": 3,
"state_type": "cursor_based",
"stream_name": "users",
"cursor_field": ["created_at"],
"stream_namespace": "dataset_1tb",
"cursor_record_count": 0
}
""".trimIndent()
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(newState))
assertEquals("2024-01-01T00:00:00", parsed.cursor)
assertEquals("cursor_based", parsed.stateType)
assertEquals(listOf("created_at"), parsed.cursorField)
assertEquals(0, parsed.cursorRecordCount)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
}
@Test
fun `should migrate legacy OrderedColumnLoadStatus state correctly`() {
val legacyOrderedColumnState =
"""
{
"version": 2,
"state_type": "ordered_column",
"ordered_col": "id",
"ordered_col_val": "12345",
"incremental_state": {
"version": 2,
"state_type": "cursor_based",
"stream_name": "users",
"stream_namespace": "dataset_1tb",
"cursor_field": ["created_at"],
"cursor": "2024-01-01T00:00:00",
"cursor_record_count": 0
}
}
""".trimIndent()
val parsed =
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyOrderedColumnState))
// Should be converted to primary_key state
assertEquals("primary_key", parsed.stateType)
assertEquals("id", parsed.pkName)
assertEquals("12345", parsed.pkValue)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
// Should preserve incremental state
assertNotNull(parsed.incrementalState)
val incrementalState =
Jsons.treeToValue(parsed.incrementalState, MsSqlServerJdbcStreamStateValue::class.java)
assertEquals("cursor_based", incrementalState.stateType)
// Stream name and namespace are not tracked in the state value
assertEquals(listOf("created_at"), incrementalState.cursorField)
assertEquals("2024-01-01T00:00:00", incrementalState.cursor)
}
@Test
fun `should migrate legacy CursorBasedStatus state correctly`() {
val legacyCursorState =
"""
{
"version": 2,
"state_type": "cursor_based",
"stream_name": "users",
"stream_namespace": "dataset_1tb",
"cursor_field": ["created_at"],
"cursor": "2024-01-01T00:00:00",
"cursor_record_count": 1
}
""".trimIndent()
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyCursorState))
assertEquals("cursor_based", parsed.stateType)
// Stream name and namespace are not tracked in the state value
assertEquals(listOf("created_at"), parsed.cursorField)
assertEquals("2024-01-01T00:00:00", parsed.cursor)
assertEquals(1, parsed.cursorRecordCount)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
}
@Test
fun `should detect OrderedColumnLoadStatus by field presence`() {
val legacyStateWithoutStateType =
"""
{
"version": 2,
"ordered_col": "id",
"ordered_col_val": "12345"
}
""".trimIndent()
val parsed =
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyStateWithoutStateType))
assertEquals("primary_key", parsed.stateType)
assertEquals("id", parsed.pkName)
assertEquals("12345", parsed.pkValue)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
}
@Test
fun `should detect CursorBasedStatus by field presence`() {
val legacyStateWithoutStateType =
"""
{
"version": 2,
"stream_name": "users",
"cursor_field": ["created_at"],
"cursor": "2024-01-01T00:00:00"
}
""".trimIndent()
val parsed =
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyStateWithoutStateType))
assertEquals("cursor_based", parsed.stateType)
// Stream name is not tracked in the state value
assertEquals(listOf("created_at"), parsed.cursorField)
assertEquals("2024-01-01T00:00:00", parsed.cursor)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
}
@Test
fun `should handle unknown state format gracefully`() {
val unknownState =
"""
{
"unknown_field": "unknown_value"
}
""".trimIndent()
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(unknownState))
// Should return default state
assertEquals("cursor_based", parsed.stateType)
assertEquals("", parsed.cursor)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
}
@Test
fun `should migrate ordered column state without incremental_state`() {
val legacyOrderedColumnState =
"""
{
"version": 2,
"state_type": "ordered_column",
"ordered_col": "id",
"ordered_col_val": "12345"
}
""".trimIndent()
val parsed =
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyOrderedColumnState))
assertEquals("primary_key", parsed.stateType)
assertEquals("id", parsed.pkName)
assertEquals("12345", parsed.pkValue)
assertNull(parsed.incrementalState)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
}
@Test
fun `should handle null values in legacy state`() {
val legacyStateWithNulls =
"""
{
"version": 2,
"state_type": "cursor_based",
"stream_name": null,
"cursor_field": null,
"cursor": null
}
""".trimIndent()
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyStateWithNulls))
assertEquals("cursor_based", parsed.stateType)
// Stream name is not tracked in the state value
assertEquals(emptyList<String>(), parsed.cursorField)
assertEquals("", parsed.cursor)
assertEquals(0, parsed.cursorRecordCount)
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
}
}

View File

@@ -0,0 +1,463 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql
import io.airbyte.cdk.StreamIdentifier
import io.airbyte.cdk.check.JdbcCheckQueries
import io.airbyte.cdk.discover.JdbcMetadataQuerier
import io.airbyte.cdk.jdbc.DefaultJdbcConstants
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.airbyte.protocol.models.v0.StreamDescriptor
import io.airbyte.protocol.models.v0.SyncMode
import io.github.oshai.kotlinlogging.KotlinLogging
import java.sql.Connection
import org.junit.jupiter.api.*
import org.junit.jupiter.api.Assertions.*
import org.testcontainers.containers.MSSQLServerContainer
private val log = KotlinLogging.logger {}
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class MsSqlSourceMetadataQuerierTest {
private lateinit var dbContainer: MSSQLServerContainer<*>
private lateinit var config: MsSqlServerSourceConfiguration
private lateinit var metadataQuerier: MsSqlSourceMetadataQuerier
@BeforeAll
@Timeout(value = 300)
fun setUp() {
dbContainer =
MsSqlServerContainerFactory.shared(
"mcr.microsoft.com/mssql/server:2022-latest",
MsSqlServerContainerFactory.WithNetwork,
MsSqlServerContainerFactory.WithTestDatabase
)
val spec = MsSqlServerContainerFactory.config(dbContainer)
spec.setIncrementalValue(UserDefinedCursor())
config = MsSqlServerSourceConfigurationFactory().make(spec)
// Set up tables for testing
createTestTables()
// Create metadata querier
val jdbcConnectionFactory = JdbcConnectionFactory(config)
val sourceOperations = MsSqlSourceOperations()
val base =
JdbcMetadataQuerier(
DefaultJdbcConstants(),
config,
sourceOperations,
sourceOperations,
JdbcCheckQueries(),
jdbcConnectionFactory
)
metadataQuerier = MsSqlSourceMetadataQuerier(base)
}
private fun createTestTables() {
JdbcConnectionFactory(config).get().use { connection: Connection ->
connection.isReadOnly = false
// Clean up existing test tables
val dropStatements =
listOf(
"DROP TABLE IF EXISTS dbo.table_with_clustered_no_pk",
"DROP TABLE IF EXISTS dbo.table_with_pk_no_clustered",
"DROP TABLE IF EXISTS dbo.table_with_pk_and_single_clustered",
"DROP TABLE IF EXISTS dbo.table_with_pk_and_composite_clustered",
"DROP TABLE IF EXISTS dbo.table_no_pk_no_clustered"
)
for (ddl in dropStatements) {
connection.createStatement().use { stmt ->
try {
stmt.execute(ddl)
} catch (e: Exception) {
log.debug { "Table might not exist: ${e.message}" }
}
}
}
// Test Case 1: Table with clustered index but no primary key
// Expected: Should use the clustered index column as primary key
connection.createStatement().use { stmt ->
stmt.execute(
"""
CREATE TABLE dbo.table_with_clustered_no_pk (
id INT NOT NULL,
name NVARCHAR(100),
created_at DATETIME2
)
"""
)
stmt.execute(
"""
CREATE CLUSTERED INDEX idx_clustered_id
ON dbo.table_with_clustered_no_pk (id)
"""
)
}
// Test Case 2: Table with primary key but no clustered index
// Expected: Should use the primary key
connection.createStatement().use { stmt ->
stmt.execute(
"""
CREATE TABLE dbo.table_with_pk_no_clustered (
id INT NOT NULL,
name NVARCHAR(100),
created_at DATETIME2,
CONSTRAINT pk_table2 PRIMARY KEY NONCLUSTERED (id)
)
"""
)
}
// Test Case 3: Table with both primary key and single-column clustered index on
// different columns
// Expected: Should use the single-column clustered index
connection.createStatement().use { stmt ->
stmt.execute(
"""
CREATE TABLE dbo.table_with_pk_and_single_clustered (
id INT NOT NULL,
code NVARCHAR(50) NOT NULL,
name NVARCHAR(100),
created_at DATETIME2,
CONSTRAINT pk_table3 PRIMARY KEY NONCLUSTERED (id)
)
"""
)
stmt.execute(
"""
CREATE CLUSTERED INDEX idx_clustered_code
ON dbo.table_with_pk_and_single_clustered (code)
"""
)
}
// Test Case 4: Table with primary key and composite clustered index
// Expected: Should use the primary key (not the composite clustered index)
connection.createStatement().use { stmt ->
stmt.execute(
"""
CREATE TABLE dbo.table_with_pk_and_composite_clustered (
id INT NOT NULL,
code NVARCHAR(50) NOT NULL,
category NVARCHAR(50) NOT NULL,
name NVARCHAR(100),
created_at DATETIME2,
CONSTRAINT pk_table4 PRIMARY KEY NONCLUSTERED (id)
)
"""
)
stmt.execute(
"""
CREATE CLUSTERED INDEX idx_clustered_composite
ON dbo.table_with_pk_and_composite_clustered (code, category)
"""
)
}
// Test Case 5: Table with no primary key and no clustered index
// Expected: Should return empty list
connection.createStatement().use { stmt ->
stmt.execute(
"""
CREATE TABLE dbo.table_no_pk_no_clustered (
id INT,
name NVARCHAR(100),
created_at DATETIME2
)
"""
)
}
}
}
@Test
@DisplayName("Should use single-column clustered index when no primary key exists")
fun testClusteredIndexNoPrimaryKey() {
val streamId =
StreamIdentifier.from(
StreamDescriptor().withName("table_with_clustered_no_pk").withNamespace("dbo")
)
val primaryKey = metadataQuerier.primaryKey(streamId)
assertEquals(1, primaryKey.size, "Should have one primary key column")
assertEquals(listOf("id"), primaryKey[0], "Should use clustered index column 'id'")
}
@Test
@DisplayName("Should use primary key when no clustered index exists")
fun testPrimaryKeyNoClusteredIndex() {
val streamId =
StreamIdentifier.from(
StreamDescriptor().withName("table_with_pk_no_clustered").withNamespace("dbo")
)
val primaryKey = metadataQuerier.primaryKey(streamId)
assertEquals(1, primaryKey.size, "Should have one primary key column")
assertEquals(listOf("id"), primaryKey[0], "Should use primary key column 'id'")
}
@Test
@DisplayName("Should prefer single-column clustered index over primary key")
fun testSingleClusteredIndexOverPrimaryKey() {
val streamId =
StreamIdentifier.from(
StreamDescriptor()
.withName("table_with_pk_and_single_clustered")
.withNamespace("dbo")
)
val primaryKey = metadataQuerier.primaryKey(streamId)
assertEquals(1, primaryKey.size, "Should have one primary key column")
assertEquals(
listOf("code"),
primaryKey[0],
"Should use single-column clustered index 'code' instead of primary key 'id'"
)
}
@Test
@DisplayName("Should use primary key when clustered index is composite")
fun testPrimaryKeyWhenCompositeClusteredIndex() {
val streamId =
StreamIdentifier.from(
StreamDescriptor()
.withName("table_with_pk_and_composite_clustered")
.withNamespace("dbo")
)
val primaryKey = metadataQuerier.primaryKey(streamId)
assertEquals(1, primaryKey.size, "Should have one primary key column")
assertEquals(
listOf("id"),
primaryKey[0],
"Should use primary key 'id' instead of composite clustered index"
)
}
@Test
@DisplayName("Should return empty list when no primary key and no clustered index")
fun testNoPrimaryKeyNoClusteredIndex() {
val streamId =
StreamIdentifier.from(
StreamDescriptor().withName("table_no_pk_no_clustered").withNamespace("dbo")
)
val primaryKey = metadataQuerier.primaryKey(streamId)
assertTrue(
primaryKey.isEmpty(),
"Should return empty list when no PK and no clustered index"
)
}
@Test
@DisplayName("Verify clustered index discovery query")
fun testClusteredIndexDiscovery() {
// This test verifies that the clustered index discovery is working correctly
val memoizedClusteredIndexKeys = metadataQuerier.memoizedClusteredIndexKeys
// Find our test tables
val tables = metadataQuerier.memoizedTableNames
val testTables = tables.filter { it.name.startsWith("table_") && it.schema == "dbo" }
assertTrue(testTables.size >= 5, "Should have at least 5 test tables")
// Verify specific clustered indexes are discovered
val tableWithClusteredNoPk = testTables.find { it.name == "table_with_clustered_no_pk" }
assertNotNull(tableWithClusteredNoPk, "Should find table_with_clustered_no_pk")
val clusteredKeys = memoizedClusteredIndexKeys[tableWithClusteredNoPk]
assertNotNull(clusteredKeys, "Should have clustered index for table_with_clustered_no_pk")
assertEquals(1, clusteredKeys?.size, "Should have single column clustered index")
assertEquals(
listOf("id"),
clusteredKeys?.get(0),
"Clustered index should be on 'id' column"
)
// Verify composite clustered index
val tableWithComposite =
testTables.find { it.name == "table_with_pk_and_composite_clustered" }
assertNotNull(tableWithComposite, "Should find table_with_pk_and_composite_clustered")
val compositeKeys = memoizedClusteredIndexKeys[tableWithComposite]
assertNotNull(compositeKeys, "Should have clustered index for composite table")
assertEquals(2, compositeKeys?.size, "Should have two columns in composite clustered index")
assertEquals(listOf("code"), compositeKeys?.get(0), "First column should be 'code'")
assertEquals(
listOf("category"),
compositeKeys?.get(1),
"Second column should be 'category'"
)
}
@Test
@DisplayName("Verify primary key discovery query")
fun testPrimaryKeyDiscovery() {
// This test verifies that the primary key discovery is working correctly
val memoizedPrimaryKeys = metadataQuerier.memoizedPrimaryKeys
// Find our test tables
val tables = metadataQuerier.memoizedTableNames
val testTables = tables.filter { it.name.startsWith("table_") && it.schema == "dbo" }
// Verify primary keys are discovered correctly
val tableWithPkNoCluster = testTables.find { it.name == "table_with_pk_no_clustered" }
assertNotNull(tableWithPkNoCluster, "Should find table_with_pk_no_clustered")
val pkKeys = memoizedPrimaryKeys[tableWithPkNoCluster]
assertNotNull(pkKeys, "Should have primary key for table_with_pk_no_clustered")
assertEquals(1, pkKeys?.size, "Should have single column primary key")
assertEquals(listOf("id"), pkKeys?.get(0), "Primary key should be on 'id' column")
// Verify table without primary key
val tableNoPk = testTables.find { it.name == "table_with_clustered_no_pk" }
assertNotNull(tableNoPk, "Should find table_with_clustered_no_pk")
val noPkKeys = memoizedPrimaryKeys[tableNoPk]
assertNull(noPkKeys, "Should not have primary key for table_with_clustered_no_pk")
}
@Test
@DisplayName("Should use user-defined logical PK from catalog when no physical PK exists")
fun testUserDefinedLogicalPrimaryKey() {
val streamId =
StreamIdentifier.from(
StreamDescriptor().withName("table_no_pk_no_clustered").withNamespace("dbo")
)
// Create a ConfiguredAirbyteCatalog with a user-defined logical PK
val configuredCatalog =
ConfiguredAirbyteCatalog()
.withStreams(
listOf(
ConfiguredAirbyteStream()
.withStream(
AirbyteStream()
.withName("table_no_pk_no_clustered")
.withNamespace("dbo")
)
.withSyncMode(SyncMode.INCREMENTAL)
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withPrimaryKey(listOf(listOf("name")))
)
)
// Create a new querier with the configured catalog
val jdbcConnectionFactory = JdbcConnectionFactory(config)
val sourceOperations = MsSqlSourceOperations()
val base =
JdbcMetadataQuerier(
DefaultJdbcConstants(),
config,
sourceOperations,
sourceOperations,
JdbcCheckQueries(),
jdbcConnectionFactory
)
val querierWithCatalog = MsSqlSourceMetadataQuerier(base, configuredCatalog)
// Test that it uses the user-defined logical PK
val primaryKey = querierWithCatalog.primaryKey(streamId)
assertEquals(1, primaryKey.size, "Should have one logical primary key column")
assertEquals(
listOf("name"),
primaryKey[0],
"Should use user-defined logical primary key 'name' from catalog"
)
}
@Test
@DisplayName("Should prefer physical PK over user-defined logical PK")
fun testPhysicalPrimaryKeyPreferredOverLogical() {
val streamId =
StreamIdentifier.from(
StreamDescriptor().withName("table_with_pk_no_clustered").withNamespace("dbo")
)
// Create a ConfiguredAirbyteCatalog with a different logical PK
val configuredCatalog =
ConfiguredAirbyteCatalog()
.withStreams(
listOf(
ConfiguredAirbyteStream()
.withStream(
AirbyteStream()
.withName("table_with_pk_no_clustered")
.withNamespace("dbo")
)
.withSyncMode(SyncMode.INCREMENTAL)
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withPrimaryKey(listOf(listOf("name")))
)
)
// Create a new querier with the configured catalog
val jdbcConnectionFactory = JdbcConnectionFactory(config)
val sourceOperations = MsSqlSourceOperations()
val base =
JdbcMetadataQuerier(
DefaultJdbcConstants(),
config,
sourceOperations,
sourceOperations,
JdbcCheckQueries(),
jdbcConnectionFactory
)
val querierWithCatalog = MsSqlSourceMetadataQuerier(base, configuredCatalog)
// Test that it prefers the physical PK over the logical one
val primaryKey = querierWithCatalog.primaryKey(streamId)
assertEquals(1, primaryKey.size, "Should have one primary key column")
assertEquals(
listOf("id"),
primaryKey[0],
"Should use physical primary key 'id' even when logical PK 'name' is defined"
)
}
@AfterAll
fun tearDown() {
// Clean up test tables
try {
JdbcConnectionFactory(config).get().use { connection: Connection ->
connection.isReadOnly = false
val dropStatements =
listOf(
"DROP TABLE IF EXISTS dbo.table_with_clustered_no_pk",
"DROP TABLE IF EXISTS dbo.table_with_pk_no_clustered",
"DROP TABLE IF EXISTS dbo.table_with_pk_and_single_clustered",
"DROP TABLE IF EXISTS dbo.table_with_pk_and_composite_clustered",
"DROP TABLE IF EXISTS dbo.table_no_pk_no_clustered"
)
for (ddl in dropStatements) {
connection.createStatement().use { stmt ->
try {
stmt.execute(ddl)
} catch (e: Exception) {
log.debug { "Error dropping table: ${e.message}" }
}
}
}
}
} catch (e: Exception) {
log.error { "Error during teardown: ${e.message}" }
}
}
}

View File

@@ -1,66 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import io.airbyte.cdk.testutils.ContainerFactory;
import org.apache.commons.lang3.StringUtils;
import org.testcontainers.containers.MSSQLServerContainer;
import org.testcontainers.containers.Network;
import org.testcontainers.utility.DockerImageName;
public class MsSQLContainerFactory extends ContainerFactory<MSSQLServerContainer<?>> {
@Override
protected MSSQLServerContainer<?> createNewContainer(DockerImageName imageName) {
imageName = imageName.asCompatibleSubstituteFor("mcr.microsoft.com/mssql/server");
var container = new MSSQLServerContainer<>(imageName).acceptLicense();
container.addEnv("MSSQL_MEMORY_LIMIT_MB", "384");
withNetwork(container);
return container;
}
/**
* Create a new network and bind it to the container.
*/
public static void withNetwork(MSSQLServerContainer<?> container) {
container.withNetwork(Network.newNetwork());
}
public static void withAgent(MSSQLServerContainer<?> container) {
container.addEnv("MSSQL_AGENT_ENABLED", "True");
}
public static void withSslCertificates(MSSQLServerContainer<?> container) {
// yes, this is uglier than sin. The reason why I'm doing this is because there's no command to
// reload a SqlServer config. So I need to create all the necessary files before I start the
// SQL server. Hence this horror
String command = StringUtils.replace(
"""
mkdir /tmp/certs/ &&
openssl req -nodes -new -x509 -sha256 -keyout /tmp/certs/ca.key -out /tmp/certs/ca.crt -subj "/CN=ca" &&
openssl req -nodes -new -x509 -sha256 -keyout /tmp/certs/dummy_ca.key -out /tmp/certs/dummy_ca.crt -subj "/CN=ca" &&
openssl req -nodes -new -sha256 -keyout /tmp/certs/server.key -out /tmp/certs/server.csr -subj "/CN={hostName}" &&
openssl req -nodes -new -sha256 -keyout /tmp/certs/dummy_server.key -out /tmp/certs/dummy_server.csr -subj "/CN={hostName}" &&
openssl x509 -req -in /tmp/certs/server.csr -CA /tmp/certs/ca.crt -CAkey /tmp/certs/ca.key -out /tmp/certs/server.crt -days 365 -sha256 &&
openssl x509 -req -in /tmp/certs/dummy_server.csr -CA /tmp/certs/ca.crt -CAkey /tmp/certs/ca.key -out /tmp/certs/dummy_server.crt -days 365 -sha256 &&
openssl x509 -req -in /tmp/certs/server.csr -CA /tmp/certs/dummy_ca.crt -CAkey /tmp/certs/dummy_ca.key -out /tmp/certs/server_dummy_ca.crt -days 365 -sha256 &&
chmod 440 /tmp/certs/* &&
{
cat > /var/opt/mssql/mssql.conf <<- EOF
[network]
tlscert = /tmp/certs/server.crt
tlskey = /tmp/certs/server.key
tlsprotocols = 1.2
forceencryption = 1
EOF
} && /opt/mssql/bin/sqlservr
""",
"{hostName}", container.getHost());
container.withCommand("bash", "-c", command)
.withUrlParam("trustServerCertificate", "true");
}
}

View File

@@ -1,429 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.INVALID_CDC_CURSOR_POSITION_PROPERTY;
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.RESYNC_DATA_OPTION;
import com.google.common.collect.Sets;
import io.airbyte.cdk.db.factory.DatabaseDriver;
import io.airbyte.cdk.db.jdbc.JdbcUtils;
import io.airbyte.cdk.testutils.ContainerFactory.NamedContainerModifier;
import io.airbyte.cdk.testutils.TestDatabase;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
import io.debezium.connector.sqlserver.Lsn;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.sql.SQLException;
import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.jooq.SQLDialect;
import org.jooq.exception.DataAccessException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testcontainers.containers.MSSQLServerContainer;
public class MsSQLTestDatabase extends TestDatabase<MSSQLServerContainer<?>, MsSQLTestDatabase, MsSQLTestDatabase.MsSQLConfigBuilder> {
static private final Logger LOGGER = LoggerFactory.getLogger(MsSQLTestDatabase.class);
// Turning this to true will create a bunch of background threads that will regularly check the
// state of the database and log every time it changes. A bit verbose, but useful for debugging
private static final boolean ENABLE_BACKGROUND_THREADS = false;
// empirically, 240 is enough. If you fee like you need to increase it, you're probably mmissing a
// check somewhere
static public final int MAX_RETRIES = 240;
public enum BaseImage {
MSSQL_2022("mcr.microsoft.com/mssql/server:2022-latest"),
;
public final String reference;
BaseImage(final String reference) {
this.reference = reference;
}
}
public enum ContainerModifier implements NamedContainerModifier<MSSQLServerContainer<?>> {
AGENT(MsSQLContainerFactory::withAgent),
WITH_SSL_CERTIFICATES(MsSQLContainerFactory::withSslCertificates),
;
public final Consumer<MSSQLServerContainer<?>> modifier;
ContainerModifier(final Consumer<MSSQLServerContainer<?>> modifier) {
this.modifier = modifier;
}
@Override
public Consumer<MSSQLServerContainer<?>> modifier() {
return modifier;
}
}
static public MsSQLTestDatabase in(final BaseImage imageName, final ContainerModifier... modifiers) {
final var container = new MsSQLContainerFactory().shared(imageName.reference, modifiers);
final MsSQLTestDatabase testdb;
if (ENABLE_BACKGROUND_THREADS) {
testdb = new MsSqlTestDatabaseWithBackgroundThreads(container);
} else {
testdb = new MsSQLTestDatabase(container);
}
return testdb
.withConnectionProperty("encrypt", "false")
.withConnectionProperty("trustServerCertificate", "true")
.withConnectionProperty("databaseName", testdb.getDatabaseName())
.initialized();
}
public MsSQLTestDatabase(final MSSQLServerContainer<?> container) {
super(container);
LOGGER.info("creating new database. databaseId=" + this.databaseId + ", databaseName=" + getDatabaseName());
}
public MsSQLTestDatabase withCdc() {
LOGGER.info("enabling CDC on database {} with id {}", getDatabaseName(), databaseId);
with("EXEC sys.sp_cdc_enable_db;");
LOGGER.info("CDC enabled on database {} with id {}", getDatabaseName(), databaseId);
return this;
}
private static final String RETRYABLE_CDC_TABLE_ENABLEMENT_ERROR_CONTENT =
"The error returned was 14258: 'Cannot perform this operation while SQLServerAgent is starting. Try again later.'";
private static final String ENABLE_CDC_SQL_FMT = """
EXEC sys.sp_cdc_enable_table
\t@source_schema = N'%s',
\t@source_name = N'%s',
\t@role_name = %s,
\t@supports_net_changes = 0,
\t@capture_instance = N'%s'""";
private final Set<String> CDC_INSTANCE_NAMES = Sets.newConcurrentHashSet();
public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName) {
return withCdcForTable(schemaName, tableName, roleName, "%s_%s".formatted(schemaName, tableName));
}
public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName, String instanceName) {
LOGGER.info(formatLogLine("enabling CDC for table {}.{} and role {}, instance {}"), schemaName, tableName, roleName, instanceName);
String sqlRoleName = roleName == null ? "NULL" : "N'%s'".formatted(roleName);
for (int tryCount = 0; tryCount < MAX_RETRIES; tryCount++) {
try {
Thread.sleep(1_000);
synchronized (getContainer()) {
LOGGER.info(formatLogLine("Trying to enable CDC for table {}.{} and role {}, instance {}, try {}/{}"), schemaName, tableName, roleName,
instanceName, tryCount, MAX_RETRIES);
with(ENABLE_CDC_SQL_FMT.formatted(schemaName, tableName, sqlRoleName, instanceName));
}
CDC_INSTANCE_NAMES.add(instanceName);
return withShortenedCapturePollingInterval();
} catch (DataAccessException e) {
if (!e.getMessage().contains(RETRYABLE_CDC_TABLE_ENABLEMENT_ERROR_CONTENT)) {
throw e;
}
tryCount++;
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
throw new RuntimeException(formatLogLine("failed to enable CDC for table %s.%s within %d seconds").formatted(schemaName, tableName, MAX_RETRIES));
}
private static final String DISABLE_CDC_SQL_FMT = """
EXEC sys.sp_cdc_disable_table
\t@source_schema = N'%s',
\t@source_name = N'%s',
\t@capture_instance = N'%s'
""";
public MsSQLTestDatabase withCdcDisabledForTable(String schemaName, String tableName, String instanceName) {
LOGGER.info(formatLogLine("disabling CDC for table {}.{}, instance {}"), schemaName, tableName, instanceName);
if (!CDC_INSTANCE_NAMES.remove(instanceName)) {
throw new RuntimeException(formatLogLine("CDC was disabled for instance ") + instanceName);
}
synchronized (getContainer()) {
return with(DISABLE_CDC_SQL_FMT.formatted(schemaName, tableName, instanceName));
}
}
private static final String DISABLE_CDC_SQL = "EXEC sys.sp_cdc_disable_db;";
public MsSQLTestDatabase withoutCdc() {
CDC_INSTANCE_NAMES.clear();
synchronized (getContainer()) {
return with(DISABLE_CDC_SQL);
}
}
public MsSQLTestDatabase withAgentStarted() {
return with("EXEC master.dbo.xp_servicecontrol N'START', N'SQLServerAGENT';");
}
public MsSQLTestDatabase withAgentStopped() {
return with("EXEC master.dbo.xp_servicecontrol N'STOP', N'SQLServerAGENT';");
}
public MsSQLTestDatabase withWaitUntilAgentRunning() {
waitForAgentState(true);
return self();
}
public MsSQLTestDatabase withWaitUntilAgentStopped() {
waitForAgentState(false);
return self();
}
public MsSQLTestDatabase waitForCdcRecords(String schemaName, String tableName, int recordCount) {
return waitForCdcRecords(schemaName, tableName, "%s_%s".formatted(schemaName, tableName), recordCount);
}
public MsSQLTestDatabase waitForCdcRecords(String schemaName, String tableName, String cdcInstanceName, int recordCount) {
if (!CDC_INSTANCE_NAMES.contains(cdcInstanceName)) {
throw new RuntimeException("CDC is not enabled on instance %s".formatted(cdcInstanceName));
}
String sql = "SELECT count(*) FROM cdc.%s_ct".formatted(cdcInstanceName);
int actualRecordCount = 0;
for (int tryCount = 0; tryCount < MAX_RETRIES; tryCount++) {
LOGGER.info(formatLogLine("fetching the number of CDC records for {}.{}, instance {}"), schemaName, tableName, cdcInstanceName);
try {
Thread.sleep(1_000);
actualRecordCount = query(ctx -> ctx.fetch(sql)).get(0).get(0, Integer.class);
} catch (SQLException | DataAccessException e) {
actualRecordCount = 0;
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
LOGGER.info(formatLogLine("Found {} CDC records for {}.{} in instance {}. Expecting {}. Trying again ({}/{}"), actualRecordCount, schemaName,
tableName, cdcInstanceName,
recordCount, tryCount, MAX_RETRIES);
if (actualRecordCount >= recordCount) {
LOGGER.info(formatLogLine("found {} records after {} tries!"), actualRecordCount, tryCount);
return self();
}
}
throw new RuntimeException(formatLogLine(
"failed to find %d records after %s seconds. Only found %d!").formatted(recordCount, MAX_RETRIES, actualRecordCount));
}
private boolean shortenedPollingIntervalEnabled = false;
public MsSQLTestDatabase withShortenedCapturePollingInterval() {
if (!shortenedPollingIntervalEnabled) {
synchronized (getContainer()) {
shortenedPollingIntervalEnabled = true;
with("EXEC sys.sp_cdc_change_job @job_type = 'capture', @pollinginterval = 1;");
}
}
return this;
}
private void waitForAgentState(final boolean running) {
final String expectedValue = running ? "Running." : "Stopped.";
LOGGER.info(formatLogLine("Waiting for SQLServerAgent state to change to '{}'."), expectedValue);
for (int i = 0; i < MAX_RETRIES; i++) {
try {
Thread.sleep(1_000);
final var r = query(ctx -> ctx.fetch("EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';").get(0));
if (expectedValue.equalsIgnoreCase(r.getValue(0).toString())) {
LOGGER.info(formatLogLine("SQLServerAgent state is '{}', as expected."), expectedValue);
return;
}
LOGGER.info(formatLogLine("Retrying, SQLServerAgent state {} does not match expected '{}'."), r, expectedValue);
} catch (final SQLException e) {
LOGGER.info(formatLogLine("Retrying agent state query after catching exception {}."), e.getMessage());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
throw new RuntimeException(formatLogLine("Exhausted retry attempts while polling for agent state"));
}
public static final String MAX_LSN_QUERY = "SELECT sys.fn_cdc_get_max_lsn();";
public MsSQLTestDatabase withWaitUntilMaxLsnAvailable() {
LOGGER.info(formatLogLine("Waiting for max LSN to become available for database {}."), getDatabaseName());
for (int i = 0; i < MAX_RETRIES; i++) {
try {
Thread.sleep(1_000);
final var maxLSN = query(ctx -> ctx.fetch(MAX_LSN_QUERY).get(0).get(0, byte[].class));
if (maxLSN != null) {
LOGGER.info(formatLogLine("Max LSN available for database {}: {}"), getDatabaseName(), Lsn.valueOf(maxLSN));
return self();
}
LOGGER.info(formatLogLine("Retrying, max LSN still not available for database {}."), getDatabaseName());
} catch (final SQLException e) {
LOGGER.info(formatLogLine("Retrying max LSN query after catching exception {}"), e.getMessage());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
throw new RuntimeException("Exhausted retry attempts while polling for max LSN availability");
}
@Override
public String getPassword() {
return "S00p3rS33kr3tP4ssw0rd!";
}
@Override
public String getJdbcUrl() {
return String.format("jdbc:sqlserver://%s:%d", getContainer().getHost(), getContainer().getFirstMappedPort());
}
@Override
protected Stream<Stream<String>> inContainerBootstrapCmd() {
return Stream.of(
mssqlCmd(Stream.of(String.format("CREATE DATABASE %s", getDatabaseName()))),
mssqlCmd(Stream.of(
String.format("USE %s", getDatabaseName()),
String.format("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", getUserName(), getPassword(), getDatabaseName()),
String.format("ALTER SERVER ROLE [sysadmin] ADD MEMBER %s", getUserName()),
String.format("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", getUserName(), getUserName()),
String.format("ALTER ROLE [db_owner] ADD MEMBER %s", getUserName()))));
}
/**
* Don't drop anything when closing the test database. Instead, if cleanup is required, call
* {@link #dropDatabaseAndUser()} explicitly. Implicit cleanups may result in deadlocks and so
* aren't really worth it.
*/
@Override
protected Stream<String> inContainerUndoBootstrapCmd() {
return Stream.empty();
}
public void dropDatabaseAndUser() {
execInContainer(mssqlCmd(Stream.of(
String.format("USE master"),
String.format("ALTER DATABASE %s SET single_user WITH ROLLBACK IMMEDIATE", getDatabaseName()),
String.format("DROP DATABASE %s", getDatabaseName()))));
}
public Stream<String> mssqlCmd(final Stream<String> sql) {
return Stream.of("/opt/mssql-tools18/bin/sqlcmd",
"-U", getContainer().getUsername(),
"-P", getContainer().getPassword(),
"-Q", sql.collect(Collectors.joining("; ")),
"-b", "-e", "-C");
}
@Override
public DatabaseDriver getDatabaseDriver() {
return DatabaseDriver.MSSQLSERVER;
}
@Override
public SQLDialect getSqlDialect() {
return SQLDialect.DEFAULT;
}
public static enum CertificateKey {
CA(true),
DUMMY_CA(false),
SERVER(true),
DUMMY_SERVER(false),
SERVER_DUMMY_CA(false),
;
public final boolean isValid;
CertificateKey(final boolean isValid) {
this.isValid = isValid;
}
}
private volatile Map<CertificateKey, String> cachedCerts = new ConcurrentHashMap<>();
public String getCertificate(final CertificateKey certificateKey) {
if (!cachedCerts.containsKey(certificateKey)) {
final String certificate;
try {
final String command = "cat /tmp/certs/" + certificateKey.name().toLowerCase() + ".crt";
certificate = getContainer().execInContainer("bash", "-c", command).getStdout().trim();
} catch (final IOException e) {
throw new UncheckedIOException(e);
} catch (final InterruptedException e) {
throw new RuntimeException(e);
}
synchronized (cachedCerts) {
this.cachedCerts.put(certificateKey, certificate);
}
}
return cachedCerts.get(certificateKey);
}
@Override
public MsSQLConfigBuilder configBuilder() {
return new MsSQLConfigBuilder(this);
}
static public class MsSQLConfigBuilder extends ConfigBuilder<MsSQLTestDatabase, MsSQLConfigBuilder> {
protected MsSQLConfigBuilder(final MsSQLTestDatabase testDatabase) {
super(testDatabase);
with(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=2");
}
public MsSQLConfigBuilder withCdcReplication() {
return with("is_test", true)
.with("replication_method", Map.of(
"method", "CDC",
"initial_waiting_seconds", Duration.ofSeconds(20).getSeconds(),
INVALID_CDC_CURSOR_POSITION_PROPERTY, RESYNC_DATA_OPTION));
}
public MsSQLConfigBuilder withSchemas(final String... schemas) {
return with(JdbcUtils.SCHEMAS_KEY, List.of(schemas));
}
@Override
public MsSQLConfigBuilder withoutSsl() {
return withSsl(Map.of("ssl_method", "unencrypted"));
}
@Deprecated
public MsSQLConfigBuilder withSsl(final Map<Object, Object> sslMode) {
return with("ssl_method", sslMode);
}
public MsSQLConfigBuilder withEncrytedTrustServerCertificate() {
return withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate"));
}
public MsSQLConfigBuilder withEncrytedVerifyServerCertificate(final String certificate, final String hostnameInCertificate) {
if (hostnameInCertificate != null) {
return withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
"certificate", certificate,
"hostNameInCertificate", hostnameInCertificate));
} else {
return withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
"certificate", certificate));
}
}
}
@Override
public void close() {
MssqlDebeziumStateUtil.disposeInitialState();
super.close();
}
}

View File

@@ -1,306 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.source.mssql;
import io.airbyte.commons.logging.LoggingHelper.Color;
import io.airbyte.commons.logging.MdcScope;
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Base64;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.jooq.Record;
import org.jooq.exception.DataAccessException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testcontainers.containers.MSSQLServerContainer;
public class MsSqlTestDatabaseWithBackgroundThreads extends MsSQLTestDatabase {
private abstract class AbstractMssqlTestDatabaseBackgroundThread extends Thread {
protected Logger LOGGER = LoggerFactory.getLogger(this.getClass());
protected final boolean PRINT_EVERY_CALL = false;
AbstractMssqlTestDatabaseBackgroundThread() {
this.start();
}
protected volatile boolean stop = false;
protected String formatLogLine(String logLine) {
String retVal = this.getClass().getSimpleName() + " databaseId=" + databaseId + ", containerId=" + containerId + " - " + logLine;
return retVal;
}
@SuppressWarnings("try")
public void run() {
try (MdcScope mdcScope = new MdcScope.Builder().setPrefixColor(Color.PURPLE_BACKGROUND).setLogPrefix(this.getClass().getSimpleName())
.build()) {
while (!stop) {
try {
Thread.sleep(100);
innerRun();
} catch (final Throwable t) {
LOGGER.info(formatLogLine(
"got exception of type " + t.getClass() + ":" + StringUtils.replace(t.getMessage() + "\n" + formatStackTrace(t), "\n", "\\n")));
}
}
}
}
private String formatStackTrace(Throwable t) {
boolean belowCurrentCall = false;
List<String> stackToDisplay = new LinkedList<String>();
for (String stackString : ExceptionUtils.getStackFrames(t)) {
if (stackString.startsWith("\tat ")) {
if (!belowCurrentCall && stackString.contains(AbstractMssqlTestDatabaseBackgroundThread.class.getSimpleName())) {
belowCurrentCall = true;
}
} else {
belowCurrentCall = false;
}
if (!belowCurrentCall) {
stackToDisplay.add(stackString);
}
}
return StringUtils.join(stackToDisplay, "\n ");
}
public abstract void innerRun() throws Exception;
}
private class MssqlTestDatabaseBackgroundThreadAgentState extends AbstractMssqlTestDatabaseBackgroundThread {
private String previousValue = null;
@Override
public void innerRun() throws Exception {
String agentStateSql = "EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';";
final var r = query(ctx -> ctx.fetch(agentStateSql).get(0));
String agentState = r.getValue(0).toString();
if (PRINT_EVERY_CALL || !Objects.equals(agentState, previousValue)) {
LOGGER.info(formatLogLine("agentState changed from {} to {}"), previousValue, agentState);
previousValue = agentState;
}
}
}
private class MssqlTestDatabaseBackgroundThreadFnCdcGetMaxLsn extends AbstractMssqlTestDatabaseBackgroundThread {
private String previousValue = null;
@Override
public void innerRun() throws Exception {
String max_lsn;
try {
Object retVal = query(ctx -> ctx.fetch(MAX_LSN_QUERY)).get(0).getValue(0);
if (retVal instanceof byte[] bytes) {
max_lsn = new String(Base64.getEncoder().encode(bytes), StandardCharsets.UTF_8);
} else {
max_lsn = String.valueOf(retVal);
}
} catch (DataAccessException e) {
if (e.getMessage().contains("Invalid object name 'cdc.lsn_time_mapping'")) {
max_lsn = "DataAccessException " + e.getMessage();
} else {
throw e;
}
}
if (PRINT_EVERY_CALL || !Objects.equals(max_lsn, previousValue)) {
LOGGER.info(formatLogLine("sys.fn_cdc_get_max_lsn changed from {} to {}"), previousValue, max_lsn);
previousValue = max_lsn;
}
}
}
private class MssqlTestDatabaseBackgroundThreadLsnTimeMapping extends AbstractMssqlTestDatabaseBackgroundThread {
private String previousValue = null;
private static final String LSN_TIME_MAPPING_QUERY = "SELECT start_lsn, tran_begin_time, tran_end_time, tran_id FROM cdc.lsn_time_mapping;";
@Override
public void innerRun() throws Exception {
String results;
try {
results = query(ctx -> ctx.fetch(LSN_TIME_MAPPING_QUERY)).toString();
} catch (DataAccessException e) {
if (e.getMessage().contains("Invalid object name 'cdc.lsn_time_mapping'")) {
results = "DataAccessException " + e.getMessage();
} else {
throw e;
}
}
if (PRINT_EVERY_CALL || !Objects.equals(results, previousValue)) {
LOGGER.info(formatLogLine("sys.lsn_time_mapping changed from {} to {}"), previousValue, results);
previousValue = results;
}
}
}
private class MssqlTestDatabaseBackgroundThreadQueryJobsTable extends AbstractMssqlTestDatabaseBackgroundThread {
private String previousValue = null;
private int previousRowCount = -1;
private static final String JOBS_TABLE_QUERY = "SELECT * FROM msdb.dbo.cdc_jobs";
@Override
public void innerRun() throws Exception {
int resultSize = 0;
String resultsAsString;
try {
List<Record> results = query(ctx -> ctx.fetch(JOBS_TABLE_QUERY));
resultsAsString = results.toString();
resultSize = results.size();
} catch (DataAccessException e) {
if (e.getMessage().contains("Invalid object name 'msdb.dbo.cdc_jobs'")) {
resultsAsString = "DataAccessException " + e.getMessage();
} else {
throw e;
}
}
if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) {
LOGGER.info(formatLogLine("cdc.change_tables changed from {} rows\n{} to {} rows\n{}"), previousRowCount, previousValue, resultSize,
resultsAsString);
previousValue = resultsAsString;
previousRowCount = resultSize;
}
}
}
private class MssqlTestDatabaseBackgroundThreadQueryChangeTables extends AbstractMssqlTestDatabaseBackgroundThread {
private String previousValue = null;
private int previousRowCount = -1;
private static final String CHANGE_TABLES_QUERY = """
SELECT OBJECT_SCHEMA_NAME(source_object_id, DB_ID('%s')),
OBJECT_NAME(source_object_id, DB_ID('%s')),
capture_instance,
object_id,
start_lsn FROM cdc.change_tables""";
@Override
public void innerRun() throws Exception {
int resultSize = 0;
String resultsAsString;
try {
List<Record> results = query(ctx -> ctx.fetch(CHANGE_TABLES_QUERY.formatted(getDatabaseName(), getDatabaseName())));
resultsAsString = results.toString();
resultSize = results.size();
} catch (DataAccessException e) {
if (e.getMessage().contains("Invalid object name 'cdc.change_tables'")) {
resultsAsString = "DataAccessException " + e.getMessage();
} else {
throw e;
}
}
if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) {
LOGGER.info(formatLogLine("cdc.change_tables changed from {} rows\n{} to {} rows\n{}"), previousRowCount, previousValue, resultSize,
resultsAsString);
previousValue = resultsAsString;
previousRowCount = resultSize;
}
}
}
private class MssqlTestDatabaseBackgroundThreadQueryCdcTable extends AbstractMssqlTestDatabaseBackgroundThread {
private final String schemaName;
private final String tableName;
private final String instanceName;
private String previousValue = null;
private int previousRowCount = -1;
MssqlTestDatabaseBackgroundThreadQueryCdcTable(String schemaName, String tableName, String instanceName) {
this.schemaName = schemaName;
this.tableName = tableName;
this.instanceName = instanceName;
}
private static final String CDC_TABLE_SELECT_QUERY_STRING = "SELECT * FROM cdc.%s_ct";
@Override
public void innerRun() throws Exception {
int resultSize = 0;
String resultsAsString;
try {
List<Record> results = query(ctx -> ctx.fetch(CDC_TABLE_SELECT_QUERY_STRING.formatted(instanceName)));
resultsAsString = results.toString();
resultSize = results.size();
} catch (DataAccessException e) {
if (e.getMessage().contains("Invalid object name 'cdc.%s_ct'".formatted(instanceName))) {
resultsAsString = "DataAccessException " + e.getMessage();
} else {
throw e;
}
}
if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) {
LOGGER.info(formatLogLine("cdc table {} for {}.{} changed from {} rows\n{} to {} rows\n{}"), instanceName, schemaName, tableName,
previousRowCount, previousValue, resultSize,
resultsAsString);
previousValue = resultsAsString;
previousRowCount = resultSize;
}
}
}
private final List<AbstractMssqlTestDatabaseBackgroundThread> bgThreads = new ArrayList<>();
MsSqlTestDatabaseWithBackgroundThreads(MSSQLServerContainer<?> container) {
super(container);
}
public MsSQLTestDatabase initialized() {
super.initialized();
bgThreads.add(new MssqlTestDatabaseBackgroundThreadAgentState());
bgThreads.add(new MssqlTestDatabaseBackgroundThreadFnCdcGetMaxLsn());
bgThreads.add(new MssqlTestDatabaseBackgroundThreadLsnTimeMapping());
bgThreads.add(new MssqlTestDatabaseBackgroundThreadQueryChangeTables());
bgThreads.add(new MssqlTestDatabaseBackgroundThreadQueryJobsTable());
return self();
}
public void close() {
for (var bgThread : bgThreads) {
bgThread.stop = true;
}
super.close();
MssqlDebeziumStateUtil.disposeInitialState();
}
private final Map<String, MssqlTestDatabaseBackgroundThreadQueryCdcTable> bgThreadByInstance = new ConcurrentHashMap<>();
@Override
public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName, String instanceName) {
super.withCdcForTable(schemaName, tableName, roleName, instanceName);
MssqlTestDatabaseBackgroundThreadQueryCdcTable bgThread = new MssqlTestDatabaseBackgroundThreadQueryCdcTable(schemaName, tableName, instanceName);
bgThreadByInstance.put(instanceName, bgThread);
bgThreads.add(bgThread);
return this;
}
@Override
public MsSQLTestDatabase withCdcDisabledForTable(String schemaName, String tableName, String instanceName) {
bgThreadByInstance.get(instanceName).stop = true;
super.withCdcDisabledForTable(schemaName, tableName, instanceName);
return this;
}
}