Migrate source mssql from old CDK to new CDK (#63731)
This commit is contained in:
@@ -1,33 +0,0 @@
|
||||
# MsSQL (SQL Server) Source
|
||||
|
||||
## Performance Test
|
||||
|
||||
To run performance tests in commandline:
|
||||
|
||||
```shell
|
||||
./gradlew :airbyte-integrations:connectors:source-mssql:performanceTest [--cpulimit=cpulimit/<limit>] [--memorylimit=memorylimit/<limit>]
|
||||
```
|
||||
|
||||
In pull request:
|
||||
|
||||
```shell
|
||||
/test-performance connector=connectors/source-mssql [--cpulimit=cpulimit/<limit>] [--memorylimit=memorylimit/<limit>]
|
||||
```
|
||||
|
||||
- `cpulimit`: Limit the number of CPUs. The minimum is `2`. E.g. `--cpulimit=cpulimit/2`.
|
||||
- `memorylimit`: Limit the size of the memory. Must include the unit at the end (e.g. `MB`, `GB`). The minimum size is `6MB`. E.g. `--memorylimit=memorylimit/4GB`.
|
||||
- When none of the CPU or memory limit is provided, the performance tests will run without memory or CPU limitations. The available resource will be bound that those specified in `ResourceRequirements.java`.
|
||||
|
||||
### Use MsSQL script to populate the benchmark database
|
||||
|
||||
In order to create a database with a certain number of tables, and a certain number of records in each of them,
|
||||
you need to follow a few simple steps.
|
||||
|
||||
1. Create a new database.
|
||||
2. Follow the TODOs in [create_mssql_benchmarks.sql](src/test-performance/sql/create_mssql_benchmarks.sql) to change the number of tables, and the number of records of different sizes.
|
||||
3. Execute the script with your changes for the new database. You can run the script with the MySQL client:
|
||||
```bash
|
||||
cd airbyte-integrations/connectors/source-mssql
|
||||
sqlcmd -S Serverinstance -E -i src/test-performance/sql/create_mssql_benchmarks.sql
|
||||
```
|
||||
4. After the script finishes its work, you will receive the number of tables specified in the script, with names starting with **test_0** and ending with **test\_(the number of tables minus 1)**.
|
||||
@@ -1,9 +0,0 @@
|
||||
# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference)
|
||||
# for more information about how to configure these tests
|
||||
connector_image: airbyte/source-mssql:dev
|
||||
tests:
|
||||
spec:
|
||||
- spec_path: "src/test-integration/resources/expected_spec.json"
|
||||
config_path: "src/test-integration/resources/dummy_config.json"
|
||||
backward_compatibility_tests_config:
|
||||
disable_for_version: "0.4.25"
|
||||
@@ -1,43 +1,32 @@
|
||||
plugins {
|
||||
id 'airbyte-java-connector'
|
||||
id 'airbyte-bulk-connector'
|
||||
id "io.airbyte.gradle.docker"
|
||||
id 'airbyte-connector-docker-convention'
|
||||
}
|
||||
|
||||
airbyteJavaConnector {
|
||||
cdkVersionRequired = '0.48.18'
|
||||
features = ['db-sources']
|
||||
useLocalCdk = false
|
||||
}
|
||||
|
||||
java {
|
||||
// TODO: rewrite code to avoid javac warnings in the first place
|
||||
compileJava {
|
||||
options.compilerArgs += "-Xlint:-try,-rawtypes"
|
||||
}
|
||||
compileTestFixturesJava {
|
||||
options.compilerArgs += "-Xlint:-this-escape"
|
||||
}
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource'
|
||||
applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0']
|
||||
mainClass = 'io.airbyte.integrations.source.mssql.MsSqlServerSource'
|
||||
}
|
||||
|
||||
airbyteBulkConnector {
|
||||
core = 'extract'
|
||||
toolkits = ['extract-jdbc', 'extract-cdc']
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'com.microsoft.sqlserver:mssql-jdbc:12.10.0.jre11'
|
||||
implementation 'io.debezium:debezium-embedded:3.0.7.Final'
|
||||
implementation 'io.debezium:debezium-connector-sqlserver:3.0.7.Final'
|
||||
implementation 'com.microsoft.sqlserver:mssql-jdbc:12.10.1.jre11'
|
||||
implementation 'com.azure:azure-identity:1.15.3'
|
||||
implementation 'io.debezium:debezium-embedded:3.3.0.Final'
|
||||
implementation 'io.debezium:debezium-connector-sqlserver:3.3.0.Final'
|
||||
implementation 'org.codehaus.plexus:plexus-utils:3.4.2'
|
||||
|
||||
testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0'
|
||||
api 'org.apache.commons:commons-lang3:3.18.0'
|
||||
implementation 'org.apache.commons:commons-lang3:3.18.0'
|
||||
|
||||
testImplementation 'org.awaitility:awaitility:4.2.0'
|
||||
testImplementation 'org.hamcrest:hamcrest-all:1.3'
|
||||
testFixturesImplementation 'org.testcontainers:mssqlserver:1.19.0'
|
||||
testImplementation 'org.testcontainers:mssqlserver:1.19.0'
|
||||
}
|
||||
|
||||
compileKotlin {
|
||||
|
||||
testImplementation 'com.zaxxer:HikariCP:5.1.0'
|
||||
testImplementation("io.mockk:mockk:1.12.0")
|
||||
api 'com.google.guava:guava:33.4.0-jre'
|
||||
}
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
testExecutionConcurrency=-1
|
||||
JunitMethodExecutionTimeout=5 m
|
||||
testExecutionConcurrency=1
|
||||
JunitMethodExecutionTimeout=5m
|
||||
cdkVersion=0.1.58
|
||||
@@ -1,17 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
#
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
pytest_plugins = ("connector_acceptance_test.plugin",)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def connector_setup():
|
||||
"""This fixture is a placeholder for external resources that acceptance test might require."""
|
||||
# TODO: setup test dependencies if needed. otherwise remove the TODO comments
|
||||
yield
|
||||
# TODO: clean up test dependencies
|
||||
@@ -1,228 +0,0 @@
|
||||
CREATE
|
||||
DATABASE MSSQL_BASIC;
|
||||
|
||||
USE MSSQL_BASIC;
|
||||
|
||||
CREATE
|
||||
TABLE
|
||||
dbo.TEST_DATASET(
|
||||
id INTEGER PRIMARY KEY,
|
||||
test_column_1 BIGINT,
|
||||
test_column_10 FLOAT,
|
||||
test_column_11 REAL,
|
||||
test_column_12 DATE,
|
||||
test_column_13 smalldatetime,
|
||||
test_column_14 datetime,
|
||||
test_column_15 datetime2,
|
||||
test_column_16 TIME,
|
||||
test_column_18 CHAR,
|
||||
test_column_2 INT,
|
||||
test_column_20 text,
|
||||
test_column_21 nchar,
|
||||
test_column_22 nvarchar(MAX),
|
||||
test_column_23 ntext,
|
||||
test_column_25 VARBINARY(3),
|
||||
test_column_3 SMALLINT,
|
||||
test_column_4 tinyint,
|
||||
test_column_6 DECIMAL(
|
||||
5,
|
||||
2
|
||||
),
|
||||
test_column_7 NUMERIC
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
1,
|
||||
- 9223372036854775808,
|
||||
'123',
|
||||
'123',
|
||||
'0001-01-01',
|
||||
'1900-01-01',
|
||||
'1753-01-01',
|
||||
'0001-01-01',
|
||||
'13:00:01',
|
||||
'a',
|
||||
- 2147483648,
|
||||
'a',
|
||||
'a',
|
||||
'a',
|
||||
'a',
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
- 32768,
|
||||
0,
|
||||
999.33,
|
||||
'99999'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
2,
|
||||
9223372036854775807,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'9999-12-31',
|
||||
'2079-06-06',
|
||||
'9999-12-31',
|
||||
'9999-12-31',
|
||||
'13:00:04Z',
|
||||
'*',
|
||||
2147483647,
|
||||
'abc',
|
||||
'*',
|
||||
'abc',
|
||||
'abc',
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
32767,
|
||||
255,
|
||||
999.33,
|
||||
'99999'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
3,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'*',
|
||||
2147483647,
|
||||
'Some test text 123$%^&*()_',
|
||||
N'ї',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
32767,
|
||||
255,
|
||||
999.33,
|
||||
'99999'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
4,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'*',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
N'櫻花分店',
|
||||
N'櫻花分店',
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
32767,
|
||||
255,
|
||||
999.33,
|
||||
'99999'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
5,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'*',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
'',
|
||||
'',
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
32767,
|
||||
255,
|
||||
999.33,
|
||||
'99999'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
6,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'*',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
32767,
|
||||
255,
|
||||
999.33,
|
||||
'99999'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
7,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'*',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
32767,
|
||||
255,
|
||||
999.33,
|
||||
'99999'
|
||||
);
|
||||
@@ -1,320 +0,0 @@
|
||||
CREATE
|
||||
DATABASE MSSQL_FULL;
|
||||
|
||||
USE MSSQL_FULL;
|
||||
|
||||
CREATE
|
||||
TABLE
|
||||
dbo.TEST_DATASET(
|
||||
id INTEGER PRIMARY KEY,
|
||||
test_column_1 BIGINT,
|
||||
test_column_10 FLOAT,
|
||||
test_column_11 REAL,
|
||||
test_column_12 DATE,
|
||||
test_column_13 smalldatetime,
|
||||
test_column_14 datetime,
|
||||
test_column_15 datetime2,
|
||||
test_column_16 TIME,
|
||||
test_column_17 datetimeoffset,
|
||||
test_column_18 CHAR,
|
||||
test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8,
|
||||
test_column_2 INT,
|
||||
test_column_20 text,
|
||||
test_column_21 nchar,
|
||||
test_column_22 nvarchar(MAX),
|
||||
test_column_23 ntext,
|
||||
test_column_24 BINARY,
|
||||
test_column_25 VARBINARY(3),
|
||||
test_column_26 geometry,
|
||||
test_column_27 uniqueidentifier,
|
||||
test_column_28 xml,
|
||||
test_column_29 geography,
|
||||
test_column_3 SMALLINT,
|
||||
test_column_30 hierarchyid,
|
||||
test_column_31 sql_variant,
|
||||
test_column_4 tinyint,
|
||||
test_column_5 bit,
|
||||
test_column_6 DECIMAL(
|
||||
5,
|
||||
2
|
||||
),
|
||||
test_column_7 NUMERIC,
|
||||
test_column_8 money,
|
||||
test_column_9 smallmoney
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
1,
|
||||
- 9223372036854775808,
|
||||
'123',
|
||||
'123',
|
||||
'0001-01-01',
|
||||
'1900-01-01',
|
||||
'1753-01-01',
|
||||
'0001-01-01',
|
||||
NULL,
|
||||
'0001-01-10 00:00:00 +01:00',
|
||||
'a',
|
||||
'a',
|
||||
NULL,
|
||||
'a',
|
||||
'a',
|
||||
'a',
|
||||
'a',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'<user><user_id>1</user_id></user>',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
NULL,
|
||||
'/1/1/',
|
||||
'a',
|
||||
NULL,
|
||||
NULL,
|
||||
999.33,
|
||||
'99999',
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
2,
|
||||
9223372036854775807,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'9999-12-31',
|
||||
'2079-06-06',
|
||||
'9999-12-31',
|
||||
'9999-12-31',
|
||||
'13:00:01',
|
||||
'9999-01-10 00:00:00 +01:00',
|
||||
'*',
|
||||
'abc',
|
||||
- 2147483648,
|
||||
'abc',
|
||||
'*',
|
||||
'abc',
|
||||
'abc',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
- 32768,
|
||||
NULL,
|
||||
'abc',
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
'9990000.3647',
|
||||
'-214748.3648'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
3,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
'1999-01-08',
|
||||
NULL,
|
||||
'9999-12-31T13:00:04Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04Z',
|
||||
NULL,
|
||||
NULL,
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
2147483647,
|
||||
'Some test text 123$%^&*()_',
|
||||
N'ї',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
'',
|
||||
NULL,
|
||||
32767,
|
||||
NULL,
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
255,
|
||||
1,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
214748.3647
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
4,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
NULL,
|
||||
'13:00:04.123456Z',
|
||||
NULL,
|
||||
NULL,
|
||||
N'櫻花分店',
|
||||
NULL,
|
||||
'',
|
||||
NULL,
|
||||
N'櫻花分店',
|
||||
N'櫻花分店',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
N'櫻花分店',
|
||||
NULL,
|
||||
'true',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
5,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
'',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
'',
|
||||
'',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
'',
|
||||
NULL,
|
||||
'false',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
6,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
7,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
@@ -1,380 +0,0 @@
|
||||
CREATE
|
||||
DATABASE MSSQL_FULL_NN;
|
||||
|
||||
USE MSSQL_FULL_NN;
|
||||
|
||||
CREATE
|
||||
TABLE
|
||||
dbo.TEST_DATASET(
|
||||
id INTEGER PRIMARY KEY,
|
||||
test_column_1 BIGINT,
|
||||
test_column_10 FLOAT,
|
||||
test_column_11 REAL,
|
||||
test_column_12 DATE,
|
||||
test_column_13 smalldatetime,
|
||||
test_column_14 datetime,
|
||||
test_column_15 datetime2,
|
||||
test_column_16 TIME,
|
||||
test_column_17 datetimeoffset,
|
||||
test_column_18 CHAR,
|
||||
test_column_19 VARCHAR(MAX) COLLATE Latin1_General_100_CI_AI_SC_UTF8,
|
||||
test_column_2 INT,
|
||||
test_column_20 text,
|
||||
test_column_21 nchar,
|
||||
test_column_22 nvarchar(MAX),
|
||||
test_column_23 ntext,
|
||||
test_column_24 BINARY,
|
||||
test_column_25 VARBINARY(3),
|
||||
test_column_26 geometry,
|
||||
test_column_27 uniqueidentifier,
|
||||
test_column_28 xml,
|
||||
test_column_29 geography,
|
||||
test_column_3 SMALLINT,
|
||||
test_column_30 hierarchyid,
|
||||
test_column_31 sql_variant,
|
||||
test_column_4 tinyint,
|
||||
test_column_5 bit,
|
||||
test_column_6 DECIMAL(
|
||||
5,
|
||||
2
|
||||
),
|
||||
test_column_7 NUMERIC,
|
||||
test_column_8 money,
|
||||
test_column_9 smallmoney
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
1,
|
||||
- 9223372036854775808,
|
||||
'123',
|
||||
'123',
|
||||
'0001-01-01',
|
||||
'1900-01-01',
|
||||
'1753-01-01',
|
||||
'0001-01-01',
|
||||
'13:00:01',
|
||||
'0001-01-10 00:00:00 +01:00',
|
||||
'a',
|
||||
'a',
|
||||
- 2147483648,
|
||||
'a',
|
||||
'a',
|
||||
'a',
|
||||
'a',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'<user><user_id>1</user_id></user>',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
- 32768,
|
||||
'/1/1/',
|
||||
'a',
|
||||
0,
|
||||
0,
|
||||
999.33,
|
||||
'99999',
|
||||
'9990000.3647',
|
||||
'-214748.3648'
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
2,
|
||||
9223372036854775807,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'9999-12-31',
|
||||
'2079-06-06',
|
||||
'9999-12-31',
|
||||
'9999-12-31',
|
||||
'13:00:04Z',
|
||||
'9999-01-10 00:00:00 +01:00',
|
||||
'*',
|
||||
'abc',
|
||||
2147483647,
|
||||
'abc',
|
||||
'*',
|
||||
'abc',
|
||||
'abc',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
32767,
|
||||
'/1/1/',
|
||||
'abc',
|
||||
255,
|
||||
1,
|
||||
999.33,
|
||||
'99999',
|
||||
'9990000.3647',
|
||||
214748.3647
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
3,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'9999-01-10 00:00:00 +01:00',
|
||||
'*',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
2147483647,
|
||||
'Some test text 123$%^&*()_',
|
||||
N'ї',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
32767,
|
||||
'/1/1/',
|
||||
N'Миші йдуть на південь, не питай чому;',
|
||||
255,
|
||||
'true',
|
||||
999.33,
|
||||
'99999',
|
||||
'9990000.3647',
|
||||
214748.3647
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
4,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'9999-01-10 00:00:00 +01:00',
|
||||
'*',
|
||||
N'櫻花分店',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
N'櫻花分店',
|
||||
N'櫻花分店',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
32767,
|
||||
'/1/1/',
|
||||
N'櫻花分店',
|
||||
255,
|
||||
'false',
|
||||
999.33,
|
||||
'99999',
|
||||
'9990000.3647',
|
||||
214748.3647
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
5,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'9999-01-10 00:00:00 +01:00',
|
||||
'*',
|
||||
'',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
'',
|
||||
'',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
32767,
|
||||
'/1/1/',
|
||||
'',
|
||||
255,
|
||||
'false',
|
||||
999.33,
|
||||
'99999',
|
||||
'9990000.3647',
|
||||
214748.3647
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
6,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'9999-01-10 00:00:00 +01:00',
|
||||
'*',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
32767,
|
||||
'/1/1/',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
255,
|
||||
'false',
|
||||
999.33,
|
||||
'99999',
|
||||
'9990000.3647',
|
||||
214748.3647
|
||||
);
|
||||
|
||||
INSERT
|
||||
INTO
|
||||
dbo.TEST_DATASET
|
||||
VALUES(
|
||||
7,
|
||||
0,
|
||||
'1234567890.1234567',
|
||||
'1234567890.1234567',
|
||||
'1999-01-08',
|
||||
'2079-06-06',
|
||||
'9999-12-31T13:00:04.123Z',
|
||||
'9999-12-31T13:00:04.123456Z',
|
||||
'13:00:04.123456Z',
|
||||
'9999-01-10 00:00:00 +01:00',
|
||||
'*',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
2147483647,
|
||||
'',
|
||||
N'ї',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
CAST(
|
||||
'A' AS BINARY(1)
|
||||
),
|
||||
CAST(
|
||||
'ABC' AS VARBINARY
|
||||
),
|
||||
geometry::STGeomFromText(
|
||||
'LINESTRING (100 100, 20 180, 180 180)',
|
||||
0
|
||||
),
|
||||
'375CFC44-CAE3-4E43-8083-821D2DF0E626',
|
||||
'',
|
||||
geography::STGeomFromText(
|
||||
'LINESTRING(-122.360 47.656, -122.343 47.656 )',
|
||||
4326
|
||||
),
|
||||
32767,
|
||||
'/1/1/',
|
||||
N'\xF0\x9F\x9A\x80',
|
||||
255,
|
||||
'false',
|
||||
999.33,
|
||||
'99999',
|
||||
'9990000.3647',
|
||||
214748.3647
|
||||
);
|
||||
@@ -9,24 +9,33 @@ data:
|
||||
connectorSubtype: database
|
||||
connectorType: source
|
||||
definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1
|
||||
dockerImageTag: 4.2.6
|
||||
dockerImageTag: 4.3.0-rc.1
|
||||
dockerRepository: airbyte/source-mssql
|
||||
documentationUrl: https://docs.airbyte.com/integrations/sources/mssql
|
||||
githubIssueLabel: source-mssql
|
||||
icon: mssql.svg
|
||||
icon: icon.svg
|
||||
license: ELv2
|
||||
maxSecondsBetweenMessages: 7200
|
||||
name: Microsoft SQL Server (MSSQL)
|
||||
connectorBuildOptions:
|
||||
baseImage: docker.io/airbyte/java-connector-base:2.0.1@sha256:ec89bd1a89e825514dd2fc8730ba299a3ae1544580a078df0e35c5202c2085b3
|
||||
connectorIPCOptions:
|
||||
dataChannel:
|
||||
version: "0.0.2"
|
||||
supportedSerialization: ["JSONL", "PROTOBUF"]
|
||||
supportedTransport: ["SOCKET", "STDIO"]
|
||||
registryOverrides:
|
||||
cloud:
|
||||
enabled: true
|
||||
oss:
|
||||
enabled: true
|
||||
releaseStage: generally_available
|
||||
releaseStage: alpha
|
||||
supportLevel: certified
|
||||
tags:
|
||||
- language:java
|
||||
releases:
|
||||
rolloutConfiguration:
|
||||
enableProgressiveRollout: true
|
||||
breakingChanges:
|
||||
4.0.0:
|
||||
message: "We have overhauled our MSSQL source connector and it is now supported by the Airbyte team! To benefit from new features, including terabyte-sized table support, reliability improvements, expanded datetime data types, and various bug fixes, please opt in to the 4.0.0 version."
|
||||
@@ -37,32 +46,8 @@ data:
|
||||
2.0.0:
|
||||
message: "Add default cursor for cdc"
|
||||
upgradeDeadline: "2023-08-23"
|
||||
connectorBuildOptions:
|
||||
baseImage: docker.io/airbyte/java-connector-base:2.0.1@sha256:ec89bd1a89e825514dd2fc8730ba299a3ae1544580a078df0e35c5202c2085b3
|
||||
connectorTestSuitesOptions:
|
||||
- suite: unitTests
|
||||
- suite: integrationTests
|
||||
testSecrets:
|
||||
- name: SECRET_SOURCE-MSSQL__CREDS
|
||||
fileName: config.json
|
||||
secretStore:
|
||||
type: GSM
|
||||
alias: airbyte-connector-testing-secret-store
|
||||
- name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS
|
||||
fileName: performance-config.json
|
||||
secretStore:
|
||||
type: GSM
|
||||
alias: airbyte-connector-testing-secret-store
|
||||
- suite: acceptanceTests
|
||||
testSecrets:
|
||||
- name: SECRET_SOURCE-MSSQL__CREDS
|
||||
fileName: config.json
|
||||
secretStore:
|
||||
type: GSM
|
||||
alias: airbyte-connector-testing-secret-store
|
||||
- name: SECRET_SOURCE_MSSQL_PERFORMANCE_TEST_CREDS
|
||||
fileName: performance-config.json
|
||||
secretStore:
|
||||
type: GSM
|
||||
alias: airbyte-connector-testing-secret-store
|
||||
metadataSpecVersion: "1.0"
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
// Constants defined in
|
||||
// airbyte-integrations/connectors/source-mssql/src/main/resources/spec.json.
|
||||
public class MsSqlSpecConstants {
|
||||
|
||||
public static final String INVALID_CDC_CURSOR_POSITION_PROPERTY = "invalid_cdc_cursor_position_behavior";
|
||||
public static final String FAIL_SYNC_OPTION = "Fail sync";
|
||||
public static final String RESYNC_DATA_OPTION = "Re-sync data";
|
||||
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT;
|
||||
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import io.airbyte.cdk.integrations.debezium.CdcMetadataInjector;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
|
||||
import java.time.Instant;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
public class MssqlCdcConnectorMetadataInjector implements CdcMetadataInjector<MssqlDebeziumStateAttributes> {
|
||||
|
||||
private final long emittedAtConverted;
|
||||
|
||||
// This now makes this class stateful. Please make sure to use the same instance within a sync
|
||||
private final AtomicLong recordCounter = new AtomicLong(1);
|
||||
private static final long ONE_HUNDRED_MILLION = 100_000_000;
|
||||
private static MssqlCdcConnectorMetadataInjector mssqlCdcConnectorMetadataInjector;
|
||||
|
||||
private MssqlCdcConnectorMetadataInjector(final Instant emittedAt) {
|
||||
this.emittedAtConverted = emittedAt.getEpochSecond() * ONE_HUNDRED_MILLION;
|
||||
}
|
||||
|
||||
public static MssqlCdcConnectorMetadataInjector getInstance(final Instant emittedAt) {
|
||||
if (mssqlCdcConnectorMetadataInjector == null) {
|
||||
mssqlCdcConnectorMetadataInjector = new MssqlCdcConnectorMetadataInjector(emittedAt);
|
||||
}
|
||||
|
||||
return mssqlCdcConnectorMetadataInjector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addMetaData(final ObjectNode event, final JsonNode source) {
|
||||
final String commitLsn = source.get("commit_lsn").asText();
|
||||
final String eventSerialNo = source.get("event_serial_no").asText();
|
||||
event.put(CDC_LSN, commitLsn);
|
||||
event.put(CDC_EVENT_SERIAL_NO, eventSerialNo);
|
||||
event.put(CDC_DEFAULT_CURSOR, getCdcDefaultCursor());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addMetaDataToRowsFetchedOutsideDebezium(final ObjectNode record,
|
||||
final String transactionTimestamp,
|
||||
final MssqlDebeziumStateAttributes debeziumStateAttributes) {
|
||||
record.put(CDC_UPDATED_AT, transactionTimestamp);
|
||||
record.put(CDC_EVENT_SERIAL_NO, "1");
|
||||
record.put(CDC_LSN, debeziumStateAttributes.lsn().toString());
|
||||
record.put(CDC_DELETED_AT, (String) null);
|
||||
record.put(CDC_DEFAULT_CURSOR, getCdcDefaultCursor());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String namespace(final JsonNode source) {
|
||||
return source.get("schema").asText();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name(JsonNode source) {
|
||||
return source.get("table").asText();
|
||||
}
|
||||
|
||||
private Long getCdcDefaultCursor() {
|
||||
return this.emittedAtConverted + this.recordCounter.getAndIncrement();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,230 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.SyncMode;
|
||||
import java.time.Duration;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import org.codehaus.plexus.util.StringUtils;
|
||||
|
||||
public class MssqlCdcHelper {
|
||||
|
||||
// legacy replication method config before version 0.4.0
|
||||
// it is an enum with possible values: STANDARD and CDC
|
||||
public static final String LEGACY_REPLICATION_FIELD = "replication_method";
|
||||
// new replication method config since version 0.4.0
|
||||
// it is an oneOf object
|
||||
public static final String REPLICATION_FIELD = "replication";
|
||||
public static final String REPLICATION_TYPE_FIELD = "replication_type";
|
||||
public static final String METHOD_FIELD = "method";
|
||||
|
||||
private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10L);
|
||||
|
||||
// Test execution latency is lower when heartbeats are more frequent.
|
||||
private static final Duration HEARTBEAT_INTERVAL_IN_TESTS = Duration.ofSeconds(1L);
|
||||
|
||||
private static final Duration POLL_INTERVAL = Duration.ofSeconds(5L);
|
||||
|
||||
// The poll.interval.ms must be lower than the heartbeat.interval.ms
|
||||
private static final Duration POLL_INTERVAL_IN_TESTS = Duration.ofMillis(500L);
|
||||
|
||||
public enum ReplicationMethod {
|
||||
STANDARD,
|
||||
CDC
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static boolean isCdc(final JsonNode config) {
|
||||
if (config != null) {
|
||||
// new replication method config since version 0.4.0
|
||||
if (config.hasNonNull(LEGACY_REPLICATION_FIELD) && config.get(LEGACY_REPLICATION_FIELD).isObject()) {
|
||||
final JsonNode replicationConfig = config.get(LEGACY_REPLICATION_FIELD);
|
||||
return ReplicationMethod.valueOf(replicationConfig.get(METHOD_FIELD).asText()) == ReplicationMethod.CDC;
|
||||
}
|
||||
// legacy replication method config before version 0.4.0
|
||||
if (config.hasNonNull(LEGACY_REPLICATION_FIELD) && config.get(LEGACY_REPLICATION_FIELD).isTextual()) {
|
||||
return ReplicationMethod.valueOf(config.get(LEGACY_REPLICATION_FIELD).asText()) == ReplicationMethod.CDC;
|
||||
}
|
||||
if (config.hasNonNull(REPLICATION_FIELD)) {
|
||||
final JsonNode replicationConfig = config.get(REPLICATION_FIELD);
|
||||
return ReplicationMethod.valueOf(replicationConfig.get(REPLICATION_TYPE_FIELD).asText()) == ReplicationMethod.CDC;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static Properties getDebeziumProperties(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final boolean isSnapshot) {
|
||||
final JsonNode config = database.getSourceConfig();
|
||||
final JsonNode dbConfig = database.getDatabaseConfig();
|
||||
|
||||
final Properties props = new Properties();
|
||||
props.setProperty("connector.class", "io.debezium.connector.sqlserver.SqlServerConnector");
|
||||
|
||||
// https://debezium.io/documentation/reference/2.2/connectors/sqlserver.html#sqlserver-property-include-schema-changes
|
||||
props.setProperty("include.schema.changes", "false");
|
||||
// https://debezium.io/documentation/reference/2.2/connectors/sqlserver.html#sqlserver-property-provide-transaction-metadata
|
||||
props.setProperty("provide.transaction.metadata", "false");
|
||||
|
||||
props.setProperty("converters", "mssql_converter");
|
||||
|
||||
props.setProperty("mssql_converter.type", MssqlDebeziumConverter.class.getName());
|
||||
|
||||
// If new stream(s) are added after a previously successful sync,
|
||||
// the snapshot.mode needs to be initial_only since we don't want to continue streaming changes
|
||||
// https://debezium.io/documentation/reference/stable/connectors/sqlserver.html#sqlserver-property-snapshot-mode
|
||||
if (isSnapshot) {
|
||||
props.setProperty("snapshot.mode", "initial_only");
|
||||
} else {
|
||||
// If not in snapshot mode, initial will make sure that a snapshot is taken if the transaction log
|
||||
// is rotated out. This will also end up read streaming changes from the transaction_log.
|
||||
props.setProperty("snapshot.mode", "when_needed");
|
||||
}
|
||||
|
||||
props.setProperty("snapshot.isolation.mode", "read_committed");
|
||||
|
||||
props.setProperty("schema.include.list", getSchema(catalog));
|
||||
props.setProperty("table.include.list", getTableIncludeList(catalog));
|
||||
props.setProperty("database.names", config.get(JdbcUtils.DATABASE_KEY).asText());
|
||||
|
||||
final String msgKeyColumns = getMessageKeyColumnValue(catalog);
|
||||
System.out.println("msgKeyColumns: " + msgKeyColumns);
|
||||
if (isCdc(config) && !msgKeyColumns.isEmpty()) {
|
||||
// If the replication method is CDC, we need to set the message key columns
|
||||
props.setProperty("message.key.columns", msgKeyColumns);
|
||||
}
|
||||
|
||||
final Duration heartbeatInterval =
|
||||
(database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean())
|
||||
? HEARTBEAT_INTERVAL_IN_TESTS
|
||||
: HEARTBEAT_INTERVAL;
|
||||
props.setProperty("heartbeat.interval.ms", Long.toString(heartbeatInterval.toMillis()));
|
||||
|
||||
// Set poll.interval.ms to 5s. This parameter will determine how long Debezium will wait before
|
||||
// querying for new data. It must be lower than heartbeat.interval.ms
|
||||
final Duration pollInterval =
|
||||
(database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean())
|
||||
? POLL_INTERVAL_IN_TESTS
|
||||
: POLL_INTERVAL;
|
||||
props.setProperty("poll.interval.ms", Long.toString(pollInterval.toMillis()));
|
||||
|
||||
if (config.has("ssl_method")) {
|
||||
final JsonNode sslConfig = config.get("ssl_method");
|
||||
final String sslMethod = sslConfig.get("ssl_method").asText();
|
||||
if ("unencrypted".equals(sslMethod)) {
|
||||
props.setProperty("database.encrypt", "false");
|
||||
props.setProperty("driver.trustServerCertificate", "true");
|
||||
} else if ("encrypted_trust_server_certificate".equals(sslMethod)) {
|
||||
props.setProperty("driver.encrypt", "true");
|
||||
props.setProperty("driver.trustServerCertificate", "true");
|
||||
} else if ("encrypted_verify_certificate".equals(sslMethod)) {
|
||||
props.setProperty("driver.encrypt", "true");
|
||||
props.setProperty("driver.trustServerCertificate", "false");
|
||||
if (dbConfig.has("trustStore") && !dbConfig.get("trustStore").asText().isEmpty()) {
|
||||
props.setProperty("database.trustStore", dbConfig.get("trustStore").asText());
|
||||
}
|
||||
|
||||
if (dbConfig.has("trustStorePassword") && !dbConfig.get("trustStorePassword").asText().isEmpty()) {
|
||||
props.setProperty("database.trustStorePassword", dbConfig.get("trustStorePassword").asText());
|
||||
}
|
||||
|
||||
if (dbConfig.has("hostNameInCertificate") && !dbConfig.get("hostNameInCertificate").asText().isEmpty()) {
|
||||
props.setProperty("database.hostNameInCertificate", dbConfig.get("hostNameInCertificate").asText());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
props.setProperty("driver.trustServerCertificate", "true");
|
||||
}
|
||||
|
||||
return props;
|
||||
}
|
||||
|
||||
private static String getSchema(final ConfiguredAirbyteCatalog catalog) {
|
||||
return catalog.getStreams().stream()
|
||||
.filter(s -> s.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.map(ConfiguredAirbyteStream::getStream)
|
||||
.map(AirbyteStream::getNamespace)
|
||||
// debezium needs commas escaped to split properly
|
||||
.map(x -> StringUtils.escape(x, new char[] {','}, "\\,"))
|
||||
.collect(Collectors.joining(","));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a comma-separated list of fully-qualified table identifiers (schema.table) for Debezium's
|
||||
* table.include.list property. This ensures only explicitly selected tables are captured by CDC,
|
||||
* not all CDC-enabled tables in the schema.
|
||||
*
|
||||
* @param catalog the configured airbyte catalog
|
||||
* @return a comma-separated list of schema.table identifiers with proper escaping for Debezium
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static String getTableIncludeList(final ConfiguredAirbyteCatalog catalog) {
|
||||
return catalog.getStreams().stream()
|
||||
.filter(s -> s.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.map(ConfiguredAirbyteStream::getStream)
|
||||
.map(stream -> {
|
||||
final String schema = stream.getNamespace();
|
||||
final String table = stream.getName();
|
||||
final String fullTableId = schema + "." + table;
|
||||
// Use Pattern.quote to escape special regex characters, then escape commas for Debezium
|
||||
return StringUtils.escape(Pattern.quote(fullTableId), new char[] {','}, "\\,");
|
||||
})
|
||||
.collect(Collectors.joining(","));
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes the following special characters in the input string: comma (,), period (.), semicolon
|
||||
* (;), and colon (:). Each special character is prefixed with a backslash.
|
||||
*
|
||||
* @param input the string to escape
|
||||
* @return the escaped string
|
||||
*/
|
||||
private static String escapeSpecialChars(String input) {
|
||||
if (input == null) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (char c : input.toCharArray()) {
|
||||
if (c == ',' || c == '.' || c == ';' || c == ':') {
|
||||
sb.append('\\');
|
||||
}
|
||||
sb.append(c);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string representation of the message key columns for the streams in the catalog. The
|
||||
* format is "schema1.table1:keyCol1,keyCol2;schema2.table2:keyCol1,keyCol2". This is used to set
|
||||
* the message key columns in the debezium properties. The method filters the streams to only
|
||||
* include those with incremental sync mode and user-defined primary keys.
|
||||
*
|
||||
* @param catalog the configured airbyte catalog
|
||||
* @return a string representation of the message key columns
|
||||
*/
|
||||
private static String getMessageKeyColumnValue(final ConfiguredAirbyteCatalog catalog) {
|
||||
return catalog.getStreams().stream()
|
||||
.filter(s -> s.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.filter(s -> !s.getPrimaryKey().isEmpty())
|
||||
.map(s -> {
|
||||
final String tableId = escapeSpecialChars(s.getStream().getNamespace()) + "." + escapeSpecialChars(s.getStream().getName());
|
||||
final String keyCols = s.getPrimaryKey().get(0).stream()
|
||||
.map(col -> escapeSpecialChars(col))
|
||||
.collect(Collectors.joining(","));
|
||||
return tableId + ":" + keyCols;
|
||||
})
|
||||
.collect(Collectors.joining(";"));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.integrations.debezium.CdcSavedInfoFetcher;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
|
||||
import java.util.Optional;
|
||||
|
||||
public class MssqlCdcSavedInfoFetcher implements CdcSavedInfoFetcher {
|
||||
|
||||
private final JsonNode savedOffset;
|
||||
private final JsonNode savedSchemaHistory;
|
||||
private final boolean isSavedSchemaHistoryCompressed;
|
||||
|
||||
public MssqlCdcSavedInfoFetcher(final CdcState savedState) {
|
||||
final boolean savedStatePresent = savedState != null && savedState.getState() != null;
|
||||
this.savedOffset = savedStatePresent ? savedState.getState().get(MSSQL_CDC_OFFSET) : null;
|
||||
this.savedSchemaHistory = savedStatePresent ? savedState.getState().get(MSSQL_DB_HISTORY) : null;
|
||||
this.isSavedSchemaHistoryCompressed =
|
||||
savedStatePresent && savedState.getState().has(IS_COMPRESSED) && savedState.getState().get(IS_COMPRESSED).asBoolean();
|
||||
}
|
||||
|
||||
@Override
|
||||
public JsonNode getSavedOffset() {
|
||||
return savedOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SchemaHistory<Optional<JsonNode>> getSavedSchemaHistory() {
|
||||
return new SchemaHistory<>(Optional.ofNullable(savedSchemaHistory), isSavedSchemaHistoryCompressed);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.integrations.debezium.CdcStateHandler;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlCdcStateHandler implements CdcStateHandler {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCdcStateHandler.class);
|
||||
private final StateManager stateManager;
|
||||
|
||||
public MssqlCdcStateHandler(final StateManager stateManager) {
|
||||
this.stateManager = stateManager;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCdcCheckpointEnabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteMessage saveState(final Map<String, String> offset, final SchemaHistory<String> dbHistory) {
|
||||
final Map<String, Object> state = new HashMap<>();
|
||||
state.put(MSSQL_CDC_OFFSET, offset);
|
||||
state.put(MSSQL_DB_HISTORY, dbHistory.getSchema());
|
||||
state.put(IS_COMPRESSED, dbHistory.isCompressed());
|
||||
|
||||
final JsonNode asJson = Jsons.jsonNode(state);
|
||||
LOGGER.info("debezium state offset: {}", Jsons.jsonNode(offset));
|
||||
|
||||
final CdcState cdcState = new CdcState().withState(asJson);
|
||||
stateManager.getCdcStateManager().setCdcState(cdcState);
|
||||
/*
|
||||
* Namespace pair is ignored by global state manager, but is needed for satisfy the API contract.
|
||||
* Therefore, provide an empty optional.
|
||||
*/
|
||||
final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty());
|
||||
return new AirbyteMessage().withType(Type.STATE).withState(stateMessage);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteMessage saveStateAfterCompletionOfSnapshotOfNewStreams() {
|
||||
LOGGER.info("Snapshot of new tables is complete, saving state");
|
||||
/*
|
||||
* Namespace pair is ignored by global state manager, but is needed for satisfy the API contract.
|
||||
* Therefore, provide an empty optional.
|
||||
*/
|
||||
final AirbyteStateMessage stateMessage = stateManager.emit(Optional.empty());
|
||||
return new AirbyteMessage().withType(Type.STATE).withState(stateMessage);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean compressSchemaHistoryForState() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,144 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.debezium.CdcTargetPosition;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.ChangeEventWithMetadata;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.SnapshotMetadata;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.debezium.connector.sqlserver.Lsn;
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlCdcTargetPosition implements CdcTargetPosition<Lsn> {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCdcTargetPosition.class);
|
||||
|
||||
public final Lsn targetLsn;
|
||||
|
||||
public MssqlCdcTargetPosition(final Lsn targetLsn) {
|
||||
this.targetLsn = targetLsn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean reachedTargetPosition(final ChangeEventWithMetadata changeEventWithMetadata) {
|
||||
if (changeEventWithMetadata.isSnapshotEvent()) {
|
||||
return false;
|
||||
} else if (SnapshotMetadata.LAST == changeEventWithMetadata.getSnapshotMetadata()) {
|
||||
LOGGER.info("Signalling close because Snapshot is complete");
|
||||
return true;
|
||||
} else {
|
||||
final Lsn recordLsn = extractLsn(changeEventWithMetadata.getEventValueAsJson());
|
||||
final boolean isEventLSNAfter = targetLsn.compareTo(recordLsn) <= 0;
|
||||
if (isEventLSNAfter) {
|
||||
LOGGER.info("Signalling close because record's LSN : " + recordLsn + " is after target LSN : " + targetLsn);
|
||||
}
|
||||
return isEventLSNAfter;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Lsn extractPositionFromHeartbeatOffset(final Map<String, ?> sourceOffset) {
|
||||
final Object commitLsnValue = sourceOffset.get("commit_lsn");
|
||||
return (commitLsnValue == null) ? Lsn.NULL : Lsn.valueOf(commitLsnValue.toString());
|
||||
}
|
||||
|
||||
private Lsn extractLsn(final JsonNode valueAsJson) {
|
||||
return Optional.ofNullable(valueAsJson.get("source"))
|
||||
.flatMap(source -> Optional.ofNullable(source.get("commit_lsn").asText()))
|
||||
.map(Lsn::valueOf)
|
||||
.orElseThrow(() -> new IllegalStateException("Could not find LSN"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final MssqlCdcTargetPosition that = (MssqlCdcTargetPosition) o;
|
||||
return targetLsn.equals(that.targetLsn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return targetLsn.hashCode();
|
||||
}
|
||||
|
||||
public static MssqlCdcTargetPosition getTargetPosition(final JdbcDatabase database, final String dbName) {
|
||||
try {
|
||||
// We might have to wait a bit before querying the max_lsn to give the CDC capture job
|
||||
// a chance to catch up. This is important in tests, where reads might occur in quick succession
|
||||
// which might leave the CT tables (which Debezium consumes) in a stale state.
|
||||
final JsonNode sourceConfig = database.getSourceConfig();
|
||||
final String maxLsnQuery = """
|
||||
USE [%s];
|
||||
SELECT sys.fn_cdc_get_max_lsn() AS max_lsn;
|
||||
""".formatted(dbName);
|
||||
// Query the high-water mark.
|
||||
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(
|
||||
connection -> connection.createStatement().executeQuery(maxLsnQuery),
|
||||
JdbcUtils.getDefaultSourceOperations()::rowToJson);
|
||||
Preconditions.checkState(jsonNodes.size() == 1);
|
||||
|
||||
final Lsn maxLsn;
|
||||
if (jsonNodes.get(0).get("max_lsn") != null) {
|
||||
maxLsn = Lsn.valueOf(jsonNodes.get(0).get("max_lsn").binaryValue());
|
||||
} else {
|
||||
maxLsn = Lsn.NULL;
|
||||
}
|
||||
LOGGER.info("identified target lsn: " + maxLsn);
|
||||
return new MssqlCdcTargetPosition(maxLsn);
|
||||
} catch (final SQLException | IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHeartbeatSupported() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean reachedTargetPosition(Lsn positionFromHeartbeat) {
|
||||
return positionFromHeartbeat.compareTo(targetLsn) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEventAheadOffset(Map<String, String> offset, ChangeEventWithMetadata event) {
|
||||
if (offset == null || offset.size() != 1) {
|
||||
return false;
|
||||
}
|
||||
final Lsn eventLsn = extractLsn(event.getEventValueAsJson());
|
||||
final Lsn offsetLsn = offsetToLsn(offset);
|
||||
return eventLsn.compareTo(offsetLsn) > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSameOffset(Map<String, String> offsetA, Map<String, String> offsetB) {
|
||||
if ((offsetA == null || offsetA.size() != 1) || (offsetB == null || offsetB.size() != 1)) {
|
||||
return false;
|
||||
}
|
||||
return offsetToLsn(offsetA).equals(offsetToLsn(offsetB));
|
||||
}
|
||||
|
||||
private Lsn offsetToLsn(Map<String, String> offset) {
|
||||
final JsonNode offsetJson = Jsons.deserialize((String) offset.values().toArray()[0]);
|
||||
final JsonNode commitLsnJson = offsetJson.get("commit_lsn");
|
||||
return (commitLsnJson == null || commitLsnJson.isNull()) ? Lsn.NULL : Lsn.valueOf(commitLsnJson.asText());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,215 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.microsoft.sqlserver.jdbc.Geography;
|
||||
import com.microsoft.sqlserver.jdbc.Geometry;
|
||||
import com.microsoft.sqlserver.jdbc.SQLServerException;
|
||||
import io.airbyte.cdk.db.DataTypeUtils;
|
||||
import io.airbyte.cdk.db.jdbc.DateTimeConverter;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.DebeziumConverterUtils;
|
||||
import io.debezium.spi.converter.CustomConverter;
|
||||
import io.debezium.spi.converter.RelationalColumn;
|
||||
import java.math.BigDecimal;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.*;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
import microsoft.sql.DateTimeOffset;
|
||||
import org.apache.kafka.connect.data.SchemaBuilder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlDebeziumConverter implements CustomConverter<SchemaBuilder, RelationalColumn> {
|
||||
|
||||
private final Logger LOGGER = LoggerFactory.getLogger(MssqlDebeziumConverter.class);
|
||||
|
||||
private final Set<String> BINARY = Set.of("VARBINARY", "BINARY");
|
||||
private final Set<String> DATETIME_TYPES = Set.of("DATETIME", "DATETIME2", "SMALLDATETIME");
|
||||
private final String DATE = "DATE";
|
||||
private static final String DATETIMEOFFSET = "DATETIMEOFFSET";
|
||||
private static final String TIME_TYPE = "TIME";
|
||||
private static final String SMALLMONEY_TYPE = "SMALLMONEY";
|
||||
private static final String GEOMETRY = "GEOMETRY";
|
||||
private static final String GEOGRAPHY = "GEOGRAPHY";
|
||||
|
||||
private static final String DATETIME_FORMAT_MICROSECONDS = "yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]";
|
||||
|
||||
@Override
|
||||
public void configure(final Properties props) {}
|
||||
|
||||
@Override
|
||||
public void converterFor(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
if (DATE.equalsIgnoreCase(field.typeName())) {
|
||||
registerDate(field, registration);
|
||||
} else if (DATETIME_TYPES.contains(field.typeName().toUpperCase())) {
|
||||
registerDatetime(field, registration);
|
||||
} else if (SMALLMONEY_TYPE.equalsIgnoreCase(field.typeName())) {
|
||||
registerMoney(field, registration);
|
||||
} else if (BINARY.contains(field.typeName().toUpperCase())) {
|
||||
registerBinary(field, registration);
|
||||
} else if (GEOMETRY.equalsIgnoreCase(field.typeName())) {
|
||||
registerGeometry(field, registration);
|
||||
} else if (GEOGRAPHY.equalsIgnoreCase(field.typeName())) {
|
||||
registerGeography(field, registration);
|
||||
} else if (TIME_TYPE.equalsIgnoreCase(field.typeName())) {
|
||||
registerTime(field, registration);
|
||||
} else if (DATETIMEOFFSET.equalsIgnoreCase(field.typeName())) {
|
||||
registerDateTimeOffSet(field, registration);
|
||||
}
|
||||
}
|
||||
|
||||
private void registerGeometry(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.string(), input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
|
||||
if (input instanceof byte[]) {
|
||||
try {
|
||||
return Geometry.deserialize((byte[]) input).toString();
|
||||
} catch (final SQLServerException e) {
|
||||
LOGGER.error(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
LOGGER.warn("Uncovered Geometry class type '{}'. Use default converter",
|
||||
input.getClass().getName());
|
||||
return input.toString();
|
||||
});
|
||||
}
|
||||
|
||||
private void registerGeography(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.string(), input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
|
||||
if (input instanceof byte[]) {
|
||||
try {
|
||||
return Geography.deserialize((byte[]) input).toString();
|
||||
} catch (final SQLServerException e) {
|
||||
LOGGER.error(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
LOGGER.warn("Uncovered Geography class type '{}'. Use default converter",
|
||||
input.getClass().getName());
|
||||
return input.toString();
|
||||
});
|
||||
}
|
||||
|
||||
private void registerDate(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.string(), input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
if (field.typeName().equalsIgnoreCase("DATE")) {
|
||||
return DateTimeConverter.convertToDate(input);
|
||||
}
|
||||
return DateTimeConverter.convertToTimestamp(input);
|
||||
});
|
||||
}
|
||||
|
||||
private void registerDateTimeOffSet(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.string(), input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
|
||||
if (input instanceof DateTimeOffset) {
|
||||
var offsetDateTime = ((DateTimeOffset) input).getOffsetDateTime();
|
||||
return offsetDateTime.format(DataTypeUtils.TIMESTAMPTZ_FORMATTER);
|
||||
}
|
||||
|
||||
LOGGER.warn("Uncovered DateTimeOffSet class type '{}'. Use default converter",
|
||||
input.getClass().getName());
|
||||
return input.toString();
|
||||
});
|
||||
}
|
||||
|
||||
private void registerDatetime(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.string(),
|
||||
input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
if (input instanceof final Timestamp d) {
|
||||
final LocalDateTime localDateTime = d.toLocalDateTime();
|
||||
return localDateTime.format(DateTimeFormatter.ofPattern(DATETIME_FORMAT_MICROSECONDS));
|
||||
}
|
||||
|
||||
if (input instanceof final Long d) {
|
||||
// During schema history creation datetime input arrives in the form of epoch nanosecond
|
||||
// This is needed for example for a column defined as:
|
||||
// [TransactionDate] DATETIME2 (7) DEFAULT ('2024-01-01T00:00:00.0000000') NOT NULL
|
||||
final Instant instant = Instant.ofEpochMilli(d / 1000 / 1000);
|
||||
final LocalDateTime localDateTime = LocalDateTime.ofInstant(instant, ZoneId.of("UTC"));
|
||||
return localDateTime.format(DateTimeFormatter.ofPattern(DATETIME_FORMAT_MICROSECONDS));
|
||||
}
|
||||
|
||||
return input.toString();
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
private void registerTime(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.string(), input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
|
||||
if (input instanceof Timestamp) {
|
||||
return DataTypeUtils.toISOTimeString(((Timestamp) input).toLocalDateTime());
|
||||
}
|
||||
|
||||
LOGGER.warn("Uncovered time class type '{}'. Use default converter",
|
||||
input.getClass().getName());
|
||||
return input.toString();
|
||||
});
|
||||
}
|
||||
|
||||
private void registerMoney(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.float64(), input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
|
||||
if (input instanceof BigDecimal) {
|
||||
return ((BigDecimal) input).doubleValue();
|
||||
}
|
||||
|
||||
LOGGER.warn("Uncovered money class type '{}'. Use default converter",
|
||||
input.getClass().getName());
|
||||
return input.toString();
|
||||
});
|
||||
}
|
||||
|
||||
private void registerBinary(final RelationalColumn field,
|
||||
final ConverterRegistration<SchemaBuilder> registration) {
|
||||
registration.register(SchemaBuilder.string(), input -> {
|
||||
if (Objects.isNull(input)) {
|
||||
return DebeziumConverterUtils.convertDefaultValue(field);
|
||||
}
|
||||
|
||||
if (input instanceof byte[]) {
|
||||
return Base64.getEncoder().encodeToString((byte[]) input);
|
||||
}
|
||||
|
||||
LOGGER.warn("Uncovered binary class type '{}'. Use default converter",
|
||||
input.getClass().getName());
|
||||
return input.toString();
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,304 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifierList;
|
||||
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting;
|
||||
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getIdentifierWithQuoting;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.HIERARCHYID;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import java.math.BigDecimal;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Utility class to define constants related to querying mssql
|
||||
*/
|
||||
public class MssqlQueryUtils {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlQueryUtils.class);
|
||||
private static final String MAX_OC_VALUE_QUERY =
|
||||
"""
|
||||
SELECT MAX(%s) as %s FROM %s;
|
||||
""";
|
||||
|
||||
public record TableSizeInfo(Long tableSize, Long avgRowLength) {}
|
||||
|
||||
private static final String MAX_CURSOR_VALUE_QUERY =
|
||||
"""
|
||||
SELECT TOP 1 %s, COUNT(*) AS %s FROM %s WHERE %s = (SELECT MAX(%s) FROM %s) GROUP BY %s;
|
||||
""";
|
||||
public static final String INDEX_QUERY = "EXEC sp_helpindex N'%s'";
|
||||
|
||||
public record Index(
|
||||
@JsonProperty("index_name") String name,
|
||||
@JsonProperty("index_description") String description,
|
||||
@JsonProperty("index_keys") String keys) {}
|
||||
|
||||
public static final String TABLE_ESTIMATE_QUERY =
|
||||
"""
|
||||
EXEC sp_spaceused N'"%s"."%s"'
|
||||
""";
|
||||
|
||||
public static final String MAX_OC_COL = "max_oc";
|
||||
public static final String DATA_SIZE_HUMAN_READABLE = "data";
|
||||
public static final String NUM_ROWS = "rows";
|
||||
|
||||
public static void getIndexInfoForStreams(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog, final String quoteString) {
|
||||
for (final ConfiguredAirbyteStream stream : catalog.getStreams()) {
|
||||
final String streamName = stream.getStream().getName();
|
||||
final String schemaName = stream.getStream().getNamespace();
|
||||
final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, streamName, quoteString);
|
||||
LOGGER.info("Discovering indexes for table {}", fullTableName);
|
||||
try {
|
||||
final String query = INDEX_QUERY.formatted(fullTableName);
|
||||
LOGGER.debug("Index lookup query: {}", query);
|
||||
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(query).executeQuery(),
|
||||
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
|
||||
if (jsonNodes != null) {
|
||||
jsonNodes.stream().map(node -> Jsons.convertValue(node, Index.class))
|
||||
.forEach(i -> LOGGER.info("Index {}", i));
|
||||
}
|
||||
} catch (final Exception ex) {
|
||||
LOGGER.info("Failed to get index for {}", fullTableName);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static String getMaxOcValueForStream(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteStream stream,
|
||||
final String ocFieldName,
|
||||
final String quoteString) {
|
||||
final String name = stream.getStream().getName();
|
||||
final String namespace = stream.getStream().getNamespace();
|
||||
final String fullTableName =
|
||||
getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString);
|
||||
final String maxOcQuery = String.format(MAX_OC_VALUE_QUERY,
|
||||
getIdentifierWithQuoting(ocFieldName, quoteString),
|
||||
MAX_OC_COL,
|
||||
fullTableName);
|
||||
LOGGER.info("Querying for max oc value: {}", maxOcQuery);
|
||||
try {
|
||||
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(maxOcQuery).executeQuery(),
|
||||
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
|
||||
Preconditions.checkState(jsonNodes.size() == 1);
|
||||
if (jsonNodes.get(0).get(MAX_OC_COL) == null) {
|
||||
LOGGER.info("Max PK is null for table {} - this could indicate an empty table", fullTableName);
|
||||
return null;
|
||||
}
|
||||
return jsonNodes.get(0).get(MAX_OC_COL).asText();
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static long toBytes(final String filesize) {
|
||||
long returnValue = -1;
|
||||
final Pattern patt = Pattern.compile("([\\d.]+)[\s+]*([GMK]B)", Pattern.CASE_INSENSITIVE);
|
||||
final Matcher matcher = patt.matcher(filesize);
|
||||
Map<String, Integer> powerMap = new HashMap<String, Integer>();
|
||||
powerMap.put("GB", 3);
|
||||
powerMap.put("MB", 2);
|
||||
powerMap.put("KB", 1);
|
||||
if (matcher.find()) {
|
||||
String number = matcher.group(1).trim();
|
||||
int pow = powerMap.get(matcher.group(2).toUpperCase());
|
||||
BigDecimal bytes = new BigDecimal(number);
|
||||
bytes = bytes.multiply(BigDecimal.valueOf(1024).pow(pow));
|
||||
returnValue = bytes.longValue();
|
||||
}
|
||||
return returnValue;
|
||||
}
|
||||
|
||||
public static Map<AirbyteStreamNameNamespacePair, TableSizeInfo> getTableSizeInfoForStreams(final JdbcDatabase database,
|
||||
final List<ConfiguredAirbyteStream> streams,
|
||||
final String quoteString) {
|
||||
final Map<AirbyteStreamNameNamespacePair, TableSizeInfo> tableSizeInfoMap = new HashMap<>();
|
||||
streams.forEach(stream -> {
|
||||
try {
|
||||
final String name = stream.getStream().getName();
|
||||
final String namespace = stream.getStream().getNamespace();
|
||||
final String fullTableName =
|
||||
getFullyQualifiedTableNameWithQuoting(name, namespace, quoteString);
|
||||
final List<JsonNode> tableEstimateResult = getTableEstimate(database, namespace, name);
|
||||
|
||||
if (tableEstimateResult != null
|
||||
&& tableEstimateResult.size() == 1
|
||||
&& tableEstimateResult.get(0).get(DATA_SIZE_HUMAN_READABLE) != null
|
||||
&& tableEstimateResult.get(0).get(NUM_ROWS) != null) {
|
||||
final long tableEstimateBytes = toBytes(tableEstimateResult.get(0).get(DATA_SIZE_HUMAN_READABLE).asText());
|
||||
final long numRows = tableEstimateResult.get(0).get(NUM_ROWS).asLong();
|
||||
final long avgTableRowSizeBytes = numRows > 0 ? tableEstimateBytes / numRows : 0;
|
||||
LOGGER.info("Stream {} size estimate is {}, average row size estimate is {}", fullTableName, tableEstimateBytes, avgTableRowSizeBytes);
|
||||
final TableSizeInfo tableSizeInfo = new TableSizeInfo(tableEstimateBytes, avgTableRowSizeBytes);
|
||||
final AirbyteStreamNameNamespacePair namespacePair =
|
||||
new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
|
||||
tableSizeInfoMap.put(namespacePair, tableSizeInfo);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
LOGGER.warn("Error occurred while attempting to estimate sync size", e);
|
||||
}
|
||||
});
|
||||
return tableSizeInfoMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterates through each stream and find the max cursor value and the record count which has that
|
||||
* value based on each cursor field provided by the customer per stream This information is saved in
|
||||
* a Hashmap with the mapping being the AirbyteStreamNameNamespacepair -> CursorBasedStatus
|
||||
*
|
||||
* @param database the source db
|
||||
* @param streams streams to be synced
|
||||
* @param stateManager stream stateManager
|
||||
* @return Map of streams to statuses
|
||||
*/
|
||||
public static Map<io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair, CursorBasedStatus> getCursorBasedSyncStatusForStreams(final JdbcDatabase database,
|
||||
final List<ConfiguredAirbyteStream> streams,
|
||||
final StateManager stateManager,
|
||||
final String quoteString) {
|
||||
|
||||
final Map<io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair, CursorBasedStatus> cursorBasedStatusMap = new HashMap<>();
|
||||
streams.forEach(stream -> {
|
||||
final String name = stream.getStream().getName();
|
||||
final String namespace = stream.getStream().getNamespace();
|
||||
final String fullTableName =
|
||||
getFullyQualifiedTableNameWithQuoting(namespace, name, quoteString);
|
||||
|
||||
final Optional<CursorInfo> cursorInfoOptional =
|
||||
stateManager.getCursorInfo(new AirbyteStreamNameNamespacePair(name, namespace));
|
||||
if (cursorInfoOptional.isEmpty()) {
|
||||
throw new RuntimeException(String.format("Stream %s was not provided with an appropriate cursor", stream.getStream().getName()));
|
||||
}
|
||||
final CursorBasedStatus cursorBasedStatus = new CursorBasedStatus();
|
||||
final Optional<String> maybeCursorField = Optional.ofNullable(cursorInfoOptional.get().getCursorField());
|
||||
maybeCursorField.ifPresent(cursorField -> {
|
||||
LOGGER.info("Cursor {}. Querying max cursor value for {}.{}", cursorField, namespace, name);
|
||||
final String quotedCursorField = getIdentifierWithQuoting(cursorField, quoteString);
|
||||
final String counterField = cursorField + "_count";
|
||||
final String quotedCounterField = getIdentifierWithQuoting(counterField, quoteString);
|
||||
final String cursorBasedSyncStatusQuery = String.format(MAX_CURSOR_VALUE_QUERY,
|
||||
quotedCursorField,
|
||||
quotedCounterField,
|
||||
fullTableName,
|
||||
quotedCursorField,
|
||||
quotedCursorField,
|
||||
fullTableName,
|
||||
quotedCursorField);
|
||||
final List<JsonNode> jsonNodes;
|
||||
try {
|
||||
jsonNodes = database.bufferedResultSetQuery(conn -> conn.prepareStatement(cursorBasedSyncStatusQuery).executeQuery(),
|
||||
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to read max cursor value from %s.%s".formatted(namespace, name), e);
|
||||
}
|
||||
cursorBasedStatus.setCursorField(ImmutableList.of(cursorField));
|
||||
if (!jsonNodes.isEmpty()) {
|
||||
final JsonNode result = jsonNodes.get(0);
|
||||
LOGGER.info("Max cursor value for {}.{} is {}", namespace, fullTableName, result);
|
||||
cursorBasedStatus.setCursor(result.get(cursorField).asText());
|
||||
cursorBasedStatus.setCursorRecordCount(result.get(counterField).asLong());
|
||||
}
|
||||
cursorBasedStatus.setStateType(StateType.CURSOR_BASED);
|
||||
cursorBasedStatus.setVersion(2L);
|
||||
cursorBasedStatus.setStreamName(name);
|
||||
cursorBasedStatus.setStreamNamespace(namespace);
|
||||
cursorBasedStatusMap.put(new AirbyteStreamNameNamespacePair(name, namespace), cursorBasedStatus);
|
||||
});
|
||||
});
|
||||
|
||||
return cursorBasedStatusMap;
|
||||
}
|
||||
|
||||
private static List<JsonNode> getTableEstimate(final JdbcDatabase database, final String namespace, final String name)
|
||||
throws SQLException {
|
||||
// Construct the table estimate query.
|
||||
final String tableEstimateQuery =
|
||||
String.format(TABLE_ESTIMATE_QUERY, namespace, name);
|
||||
LOGGER.info("Querying for table estimate size: {}", tableEstimateQuery);
|
||||
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.createStatement().executeQuery(tableEstimateQuery),
|
||||
resultSet -> new MssqlSourceOperations().rowToJson(resultSet));
|
||||
Preconditions.checkState(jsonNodes.size() == 1);
|
||||
LOGGER.debug("Estimate: {}", jsonNodes);
|
||||
return jsonNodes;
|
||||
}
|
||||
|
||||
public static String prettyPrintConfiguredAirbyteStreamList(final List<ConfiguredAirbyteStream> streamList) {
|
||||
return streamList.stream().map(s -> "%s.%s".formatted(s.getStream().getNamespace(), s.getStream().getName())).collect(Collectors.joining(", "));
|
||||
}
|
||||
|
||||
/**
|
||||
* There is no support for hierarchyid even in the native SQL Server JDBC driver. Its value can be
|
||||
* converted to a nvarchar(4000) data type by calling the ToString() method. So we make a separate
|
||||
* query to get Table's MetaData, check is there any hierarchyid columns, and wrap required fields
|
||||
* with the ToString() function in the final Select query. Reference:
|
||||
* https://docs.microsoft.com/en-us/sql/t-sql/data-types/hierarchyid-data-type-method-reference?view=sql-server-ver15#data-type-conversion
|
||||
* Note: This is where the main logic for the same method in MssqlSource. Extracted logic in order
|
||||
* to be used in MssqlInitialLoadRecordIterator
|
||||
*
|
||||
* @return the list with Column names updated to handle functions (if nay) properly
|
||||
*/
|
||||
public static String getWrappedColumnNames(
|
||||
final JdbcDatabase database,
|
||||
final String quoteString,
|
||||
final List<String> columnNames,
|
||||
final String schemaName,
|
||||
final String tableName) {
|
||||
final List<String> hierarchyIdColumns = new ArrayList<>();
|
||||
try {
|
||||
final String identifierQuoteString = database.getMetaData().getIdentifierQuoteString();
|
||||
final SQLServerResultSetMetaData sqlServerResultSetMetaData = (SQLServerResultSetMetaData) database
|
||||
.queryMetadata(String
|
||||
.format("SELECT TOP 1 %s FROM %s", // only first row is enough to get field's type
|
||||
enquoteIdentifierList(columnNames, quoteString),
|
||||
getFullyQualifiedTableNameWithQuoting(schemaName, tableName, quoteString)));
|
||||
|
||||
// metadata will be null if table doesn't contain records
|
||||
if (sqlServerResultSetMetaData != null) {
|
||||
for (int i = 1; i <= sqlServerResultSetMetaData.getColumnCount(); i++) {
|
||||
if (HIERARCHYID.equals(sqlServerResultSetMetaData.getColumnTypeName(i))) {
|
||||
hierarchyIdColumns.add(sqlServerResultSetMetaData.getColumnName(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// iterate through names and replace Hierarchyid field for query is with toString() function
|
||||
// Eventually would get columns like this: testColumn.toString as "testColumn"
|
||||
// toString function in SQL server is the only way to get human-readable value, but not mssql
|
||||
// specific HEX value
|
||||
return String.join(", ", columnNames.stream()
|
||||
.map(
|
||||
el -> hierarchyIdColumns.contains(el) ? String.format("%s.ToString() as %s%s%s", el, identifierQuoteString, el, identifierQuoteString)
|
||||
: getIdentifierWithQuoting(el, quoteString))
|
||||
.toList());
|
||||
} catch (final SQLException e) {
|
||||
LOGGER.error("Failed to fetch metadata to prepare a proper request.", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,719 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.db.DataTypeUtils.TIMESTAMPTZ_FORMATTER;
|
||||
import static io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler.isAnyStreamIncrementalSyncMode;
|
||||
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT;
|
||||
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT;
|
||||
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.*;
|
||||
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbReadUtil.identifyStreamsForCursorBased;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlCdcHelper.*;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getCursorBasedSyncStatusForStreams;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getTableSizeInfoForStreams;
|
||||
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.*;
|
||||
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
|
||||
import static java.util.stream.Collectors.toList;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.db.factory.DatabaseDriver;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig;
|
||||
import io.airbyte.cdk.db.util.SSLCertificateUtils;
|
||||
import io.airbyte.cdk.integrations.base.IntegrationRunner;
|
||||
import io.airbyte.cdk.integrations.base.Source;
|
||||
import io.airbyte.cdk.integrations.base.adaptive.AdaptiveSourceRunner;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshWrappedSource;
|
||||
import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadHandler;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.TableInfo;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.NonResumableStateMessageProducer;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateMessageProducer;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateGeneratorUtils;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManagerFactory;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator;
|
||||
import io.airbyte.commons.exceptions.ConfigErrorException;
|
||||
import io.airbyte.commons.features.EnvVariableFeatureFlags;
|
||||
import io.airbyte.commons.features.FeatureFlags;
|
||||
import io.airbyte.commons.functional.CheckedConsumer;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.commons.stream.AirbyteStreamStatusHolder;
|
||||
import io.airbyte.commons.util.AutoCloseableIterator;
|
||||
import io.airbyte.commons.util.AutoCloseableIterators;
|
||||
import io.airbyte.integrations.source.mssql.cursor_based.MssqlCursorBasedStateManager;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStreamStateManager;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.CursorBasedStreams;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams;
|
||||
import io.airbyte.protocol.models.CommonField;
|
||||
import io.airbyte.protocol.models.v0.*;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.security.KeyStoreException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.cert.CertificateException;
|
||||
import java.sql.*;
|
||||
import java.time.*;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.lang3.RandomStringUtils;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlSource extends AbstractJdbcSource<JDBCType> implements Source {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSource.class);
|
||||
public static final String DESCRIBE_TABLE_QUERY =
|
||||
"""
|
||||
sp_columns "%s"
|
||||
""";
|
||||
public static final String NULL_CURSOR_VALUE_WITH_SCHEMA_QUERY =
|
||||
"""
|
||||
SELECT CASE WHEN (SELECT TOP 1 1 FROM "%s"."%s" WHERE "%s" IS NULL)=1 then 1 else 0 end as %s
|
||||
""";
|
||||
public static final String DRIVER_CLASS = DatabaseDriver.MSSQLSERVER.getDriverClassName();
|
||||
public static final String MSSQL_CDC_OFFSET = "mssql_cdc_offset";
|
||||
public static final String MSSQL_DB_HISTORY = "mssql_db_history";
|
||||
public static final String IS_COMPRESSED = "is_compressed";
|
||||
public static final String CDC_LSN = "_ab_cdc_lsn";
|
||||
public static final String CDC_EVENT_SERIAL_NO = "_ab_cdc_event_serial_no";
|
||||
public static final String HIERARCHYID = "hierarchyid";
|
||||
private static final int INTERMEDIATE_STATE_EMISSION_FREQUENCY = 10_000;
|
||||
public static final String CDC_DEFAULT_CURSOR = "_ab_cdc_cursor";
|
||||
public static final String TUNNEL_METHOD = "tunnel_method";
|
||||
public static final String NO_TUNNEL = "NO_TUNNEL";
|
||||
public static final String SSL_METHOD = "ssl_method";
|
||||
public static final String SSL_METHOD_UNENCRYPTED = "unencrypted";
|
||||
private MssqlInitialLoadStateManager initialLoadStateManager = null;
|
||||
public static final String JDBC_DELIMITER = ";";
|
||||
private List<String> schemas;
|
||||
private int stateEmissionFrequency;
|
||||
private final FeatureFlags featureFlags;
|
||||
public static final String REPLICATION_INCREMENTAL_EXCLUDE_TODAYS = "exclude_todays_data";
|
||||
|
||||
public static Source sshWrappedSource(final MssqlSource source) {
|
||||
return new SshWrappedSource(source, JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY);
|
||||
}
|
||||
|
||||
public MssqlSource() {
|
||||
this(new EnvVariableFeatureFlags());
|
||||
}
|
||||
|
||||
public MssqlSource(final FeatureFlags featureFlags) {
|
||||
super(DRIVER_CLASS, AdaptiveStreamingQueryConfig::new, new MssqlSourceOperations());
|
||||
this.featureFlags = featureFlags;
|
||||
this.stateEmissionFrequency = INTERMEDIATE_STATE_EMISSION_FREQUENCY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FeatureFlags getFeatureFlags() {
|
||||
return featureFlags;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AirbyteStateType getSupportedStateType(final JsonNode config) {
|
||||
return MssqlCdcHelper.isCdc(config) ? AirbyteStateType.GLOBAL : AirbyteStateType.STREAM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteConnectionStatus check(final JsonNode config) throws Exception {
|
||||
// #15808 Disallow connecting to db with disable, prefer or allow SSL mode when connecting directly
|
||||
// and not over SSH tunnel
|
||||
if (cloudDeploymentMode()) {
|
||||
if (config.has(TUNNEL_METHOD)
|
||||
&& config.get(TUNNEL_METHOD).has(TUNNEL_METHOD)
|
||||
&& config.get(TUNNEL_METHOD).get(TUNNEL_METHOD).asText().equals(NO_TUNNEL)) {
|
||||
// If no SSH tunnel.
|
||||
if (config.has(SSL_METHOD) && config.get(SSL_METHOD).has(SSL_METHOD) &&
|
||||
SSL_METHOD_UNENCRYPTED.equalsIgnoreCase(config.get(SSL_METHOD).get(SSL_METHOD).asText())) {
|
||||
// Fail in case SSL method is unencrypted.
|
||||
return new AirbyteConnectionStatus()
|
||||
.withStatus(AirbyteConnectionStatus.Status.FAILED)
|
||||
.withMessage("Unsecured connection not allowed. " +
|
||||
"If no SSH Tunnel set up, please use one of the following SSL methods: " +
|
||||
"encrypted_trust_server_certificate, encrypted_verify_certificate.");
|
||||
}
|
||||
}
|
||||
}
|
||||
return super.check(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link MssqlQueryUtils#getWrappedColumnNames}
|
||||
*/
|
||||
@Override
|
||||
protected String getWrappedColumnNames(final JdbcDatabase database,
|
||||
final Connection connection,
|
||||
final List<String> columnNames,
|
||||
final String schemaName,
|
||||
final String tableName) {
|
||||
return MssqlQueryUtils.getWrappedColumnNames(database, getQuoteString(), columnNames, schemaName, tableName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public JsonNode toDatabaseConfig(final JsonNode mssqlConfig) {
|
||||
final List<String> additionalParameters = new ArrayList<>();
|
||||
|
||||
final StringBuilder jdbcUrl = new StringBuilder(
|
||||
String.format("jdbc:sqlserver://%s:%s;databaseName=%s;",
|
||||
mssqlConfig.get(JdbcUtils.HOST_KEY).asText(),
|
||||
mssqlConfig.get(JdbcUtils.PORT_KEY).asText(),
|
||||
mssqlConfig.get(JdbcUtils.DATABASE_KEY).asText()));
|
||||
|
||||
if (mssqlConfig.has("schemas") && mssqlConfig.get("schemas").isArray()) {
|
||||
schemas = new ArrayList<>();
|
||||
for (final JsonNode schema : mssqlConfig.get("schemas")) {
|
||||
schemas.add(schema.asText());
|
||||
}
|
||||
}
|
||||
|
||||
if (mssqlConfig.has("ssl_method")) {
|
||||
readSsl(mssqlConfig, additionalParameters);
|
||||
} else {
|
||||
additionalParameters.add("trustServerCertificate=true");
|
||||
}
|
||||
|
||||
if (!additionalParameters.isEmpty()) {
|
||||
jdbcUrl.append(String.join(";", additionalParameters));
|
||||
}
|
||||
|
||||
final ImmutableMap.Builder<Object, Object> configBuilder = ImmutableMap.builder()
|
||||
.put(JdbcUtils.USERNAME_KEY, mssqlConfig.get(JdbcUtils.USERNAME_KEY).asText())
|
||||
.put(JdbcUtils.PASSWORD_KEY, mssqlConfig.get(JdbcUtils.PASSWORD_KEY).asText())
|
||||
.put(JdbcUtils.JDBC_URL_KEY, jdbcUrl.toString());
|
||||
|
||||
if (mssqlConfig.has(JdbcUtils.JDBC_URL_PARAMS_KEY)) {
|
||||
configBuilder.put(JdbcUtils.CONNECTION_PROPERTIES_KEY, mssqlConfig.get(JdbcUtils.JDBC_URL_PARAMS_KEY));
|
||||
}
|
||||
|
||||
final Map<String, String> additionalParams = new HashMap<>();
|
||||
additionalParameters.forEach(param -> {
|
||||
final int i = param.indexOf('=');
|
||||
additionalParams.put(param.substring(0, i), param.substring(i + 1));
|
||||
});
|
||||
|
||||
configBuilder.putAll(additionalParams);
|
||||
|
||||
return Jsons.jsonNode(configBuilder.build());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getExcludedInternalNameSpaces() {
|
||||
return Set.of(
|
||||
"INFORMATION_SCHEMA",
|
||||
"sys",
|
||||
"spt_fallback_db",
|
||||
"spt_monitor",
|
||||
"spt_values",
|
||||
"spt_fallback_usg",
|
||||
"MSreplication_options",
|
||||
"spt_fallback_dev",
|
||||
"cdc"); // is this actually ok? what if the user wants cdc schema for some reason?
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteCatalog discover(final JsonNode config) {
|
||||
final AirbyteCatalog catalog = super.discover(config);
|
||||
|
||||
if (MssqlCdcHelper.isCdc(config)) {
|
||||
|
||||
final List<AirbyteStream> streams = catalog.getStreams().stream()
|
||||
.map(MssqlSource::overrideSyncModes)
|
||||
.map(MssqlSource::setIncrementalToSourceDefined)
|
||||
.map(MssqlSource::setDefaultCursorFieldForCdc)
|
||||
.map(MssqlSource::addCdcMetadataColumns)
|
||||
.collect(toList());
|
||||
|
||||
catalog.setStreams(streams);
|
||||
}
|
||||
|
||||
return catalog;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<TableInfo<CommonField<JDBCType>>> discoverInternal(final JdbcDatabase database) throws Exception {
|
||||
if (schemas != null && !schemas.isEmpty()) {
|
||||
return schemas.stream().flatMap(schema -> {
|
||||
LOGGER.info("Get columns for schema: {}", schema);
|
||||
try {
|
||||
return super.discoverInternal(database, schema).stream();
|
||||
} catch (final Exception e) {
|
||||
throw new ConfigErrorException(String.format("Error getting columns for schema: %s", schema), e);
|
||||
}
|
||||
}).collect(toList());
|
||||
} else {
|
||||
LOGGER.info("No schemas explicitly set on UI to process, so will process all of existing schemas in DB");
|
||||
return super.discoverInternal(database);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean verifyCursorColumnValues(final JdbcDatabase database, final String schema, final String tableName, final String columnName)
|
||||
throws SQLException {
|
||||
|
||||
boolean nullValExist = false;
|
||||
final String resultColName = "nullValue";
|
||||
final String descQuery = String.format(DESCRIBE_TABLE_QUERY, tableName);
|
||||
final Optional<JsonNode> field = database.bufferedResultSetQuery(conn -> conn.createStatement()
|
||||
.executeQuery(descQuery),
|
||||
resultSet -> JdbcUtils.getDefaultSourceOperations().rowToJson(resultSet))
|
||||
.stream()
|
||||
.peek(x -> LOGGER.info("MsSQL Table Structure {}, {}, {}", x.toString(), schema, tableName))
|
||||
.filter(x -> x.get("TABLE_OWNER") != null)
|
||||
.filter(x -> x.get("COLUMN_NAME") != null)
|
||||
.filter(x -> x.get("TABLE_OWNER").asText().equals(schema))
|
||||
.filter(x -> x.get("COLUMN_NAME").asText().equalsIgnoreCase(columnName))
|
||||
.findFirst();
|
||||
if (field.isPresent()) {
|
||||
final JsonNode jsonNode = field.get();
|
||||
final JsonNode isNullable = jsonNode.get("IS_NULLABLE");
|
||||
if (isNullable != null) {
|
||||
if (isNullable.asText().equalsIgnoreCase("YES")) {
|
||||
final String query = String.format(NULL_CURSOR_VALUE_WITH_SCHEMA_QUERY,
|
||||
schema, tableName, columnName, resultColName);
|
||||
|
||||
LOGGER.debug("null value query: {}", query);
|
||||
final List<JsonNode> jsonNodes = database.bufferedResultSetQuery(conn -> conn.createStatement().executeQuery(query),
|
||||
resultSet -> JdbcUtils.getDefaultSourceOperations().rowToJson(resultSet));
|
||||
Preconditions.checkState(jsonNodes.size() == 1);
|
||||
nullValExist = jsonNodes.get(0).get(resultColName).booleanValue();
|
||||
LOGGER.info("null cursor value for MsSQL source : {}, shema {} , tableName {}, columnName {} ", nullValExist, schema, tableName,
|
||||
columnName);
|
||||
}
|
||||
}
|
||||
}
|
||||
// return !nullValExist;
|
||||
// will enable after we have sent comms to users this affects
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<CheckedConsumer<JdbcDatabase, Exception>> getCheckOperations(final JsonNode config)
|
||||
throws Exception {
|
||||
final List<CheckedConsumer<JdbcDatabase, Exception>> checkOperations = new ArrayList<>(
|
||||
super.getCheckOperations(config));
|
||||
|
||||
if (MssqlCdcHelper.isCdc(config)) {
|
||||
checkOperations.add(database -> assertCdcEnabledInDb(config, database));
|
||||
checkOperations.add(database -> assertCdcSchemaQueryable(config, database));
|
||||
checkOperations.add(database -> assertSqlServerAgentRunning(database));
|
||||
}
|
||||
|
||||
return checkOperations;
|
||||
}
|
||||
|
||||
protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase database)
|
||||
throws SQLException {
|
||||
final List<JsonNode> queryResponse = database.queryJsons(connection -> {
|
||||
final String sql = "SELECT name, is_cdc_enabled FROM sys.databases WHERE name = ?";
|
||||
final PreparedStatement ps = connection.prepareStatement(sql);
|
||||
ps.setString(1, config.get(JdbcUtils.DATABASE_KEY).asText());
|
||||
LOGGER.info(String.format("Checking that cdc is enabled on database '%s' using the query: '%s'",
|
||||
config.get(JdbcUtils.DATABASE_KEY).asText(), sql));
|
||||
return ps;
|
||||
}, sourceOperations::rowToJson);
|
||||
|
||||
if (queryResponse.size() < 1) {
|
||||
throw new RuntimeException(String.format(
|
||||
"Couldn't find '%s' in sys.databases table. Please check the spelling and that the user has relevant permissions (see docs).",
|
||||
config.get(JdbcUtils.DATABASE_KEY).asText()));
|
||||
}
|
||||
if (!(queryResponse.get(0).get("is_cdc_enabled").asBoolean())) {
|
||||
throw new RuntimeException(String.format(
|
||||
"Detected that CDC is not enabled for database '%s'. Please check the documentation on how to enable CDC on MS SQL Server.",
|
||||
config.get(JdbcUtils.DATABASE_KEY).asText()));
|
||||
}
|
||||
}
|
||||
|
||||
protected void assertCdcSchemaQueryable(final JsonNode config, final JdbcDatabase database)
|
||||
throws SQLException {
|
||||
final List<JsonNode> queryResponse = database.queryJsons(connection -> {
|
||||
boolean isAzureSQL = false;
|
||||
|
||||
try (final Statement stmt = connection.createStatement();
|
||||
final ResultSet editionRS = stmt.executeQuery("SELECT ServerProperty('Edition')")) {
|
||||
isAzureSQL = editionRS.next() && "SQL Azure".equals(editionRS.getString(1));
|
||||
}
|
||||
|
||||
// Azure SQL does not support USE clause
|
||||
final String sql =
|
||||
isAzureSQL ? "SELECT * FROM cdc.change_tables"
|
||||
: "USE [" + config.get(JdbcUtils.DATABASE_KEY).asText() + "]; SELECT * FROM cdc.change_tables";
|
||||
final PreparedStatement ps = connection.prepareStatement(sql);
|
||||
LOGGER.info(String.format(
|
||||
"Checking user '%s' can query the cdc schema and that we have at least 1 cdc enabled table using the query: '%s'",
|
||||
config.get(JdbcUtils.USERNAME_KEY).asText(), sql));
|
||||
return ps;
|
||||
}, sourceOperations::rowToJson);
|
||||
|
||||
// Ensure at least one available CDC table
|
||||
if (queryResponse.size() < 1) {
|
||||
throw new RuntimeException(
|
||||
"No cdc-enabled tables found. Please check the documentation on how to enable CDC on MS SQL Server.");
|
||||
}
|
||||
}
|
||||
|
||||
// todo: ensure this works for Azure managed SQL (since it uses different sql server agent)
|
||||
protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws SQLException {
|
||||
try {
|
||||
// EngineEdition property values can be found at
|
||||
// https://learn.microsoft.com/en-us/sql/t-sql/functions/serverproperty-transact-sql?view=sql-server-ver16
|
||||
// SQL Server Agent is always running on SQL Managed Instance:
|
||||
// https://learn.microsoft.com/en-us/azure/azure-sql/managed-instance/transact-sql-tsql-differences-sql-server?view=azuresql#sql-server-agent
|
||||
final Integer engineEdition = database.queryInt("SELECT ServerProperty('EngineEdition')");
|
||||
if (engineEdition == 8) {
|
||||
LOGGER.info(String.format("SQL Server Agent is assumed to be running when EngineEdition == '%s'", engineEdition));
|
||||
} else {
|
||||
final List<JsonNode> queryResponse = database.queryJsons(connection -> {
|
||||
final String sql =
|
||||
"SELECT status_desc FROM sys.dm_server_services WHERE [servicename] LIKE 'SQL Server Agent%' OR [servicename] LIKE 'SQL Server 代理%' ";
|
||||
final PreparedStatement ps = connection.prepareStatement(sql);
|
||||
LOGGER.info(String.format("Checking that the SQL Server Agent is running using the query: '%s'", sql));
|
||||
return ps;
|
||||
}, sourceOperations::rowToJson);
|
||||
|
||||
if (!(queryResponse.get(0).get("status_desc").toString().contains("Running"))) {
|
||||
throw new RuntimeException(String.format(
|
||||
"The SQL Server Agent is not running. Current state: '%s'. Please check the documentation on ensuring SQL Server Agent is running.",
|
||||
queryResponse.get(0).get("status_desc").toString()));
|
||||
}
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
if (e.getCause() != null && e.getCause().getClass().equals(com.microsoft.sqlserver.jdbc.SQLServerException.class)) {
|
||||
LOGGER.warn(String.format(
|
||||
"Skipping check for whether the SQL Server Agent is running, SQLServerException thrown: '%s'",
|
||||
e.getMessage()));
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NotNull List<AutoCloseableIterator<AirbyteMessage>> getIncrementalIterators(final JdbcDatabase database,
|
||||
final @NotNull ConfiguredAirbyteCatalog catalog,
|
||||
final @NotNull Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
final StateManager stateManager,
|
||||
final @NotNull Instant emittedAt) {
|
||||
final JsonNode sourceConfig = database.getSourceConfig();
|
||||
if (MssqlCdcHelper.isCdc(sourceConfig) && isAnyStreamIncrementalSyncMode(catalog)) {
|
||||
LOGGER.info("using OC + CDC");
|
||||
return MssqlInitialReadUtil.getCdcReadIterators(database, catalog, tableNameToTable, stateManager, initialLoadStateManager, emittedAt,
|
||||
getQuoteString());
|
||||
} else {
|
||||
if (isAnyStreamIncrementalSyncMode(catalog)) {
|
||||
LOGGER.info("Syncing via Primary Key");
|
||||
final MssqlCursorBasedStateManager cursorBasedStateManager = new MssqlCursorBasedStateManager(stateManager.getRawStateMessages(), catalog);
|
||||
|
||||
if (isExcludeTodayDateForCursorIncremental(sourceConfig)) {
|
||||
setCutoffCursorTime(tableNameToTable, cursorBasedStateManager.getPairToCursorInfoMap());
|
||||
}
|
||||
|
||||
final InitialLoadStreams initialLoadStreams =
|
||||
filterStreamInIncrementalMode(streamsForInitialOrderedColumnLoad(cursorBasedStateManager, catalog));
|
||||
final Map<AirbyteStreamNameNamespacePair, CursorBasedStatus> pairToCursorBasedStatus =
|
||||
getCursorBasedSyncStatusForStreams(database, initialLoadStreams.streamsForInitialLoad(), stateManager, getQuoteString());
|
||||
final CursorBasedStreams cursorBasedStreams =
|
||||
new CursorBasedStreams(identifyStreamsForCursorBased(catalog, initialLoadStreams.streamsForInitialLoad()), pairToCursorBasedStatus);
|
||||
|
||||
logStreamSyncStatus(initialLoadStreams.streamsForInitialLoad(), "Primary Key");
|
||||
logStreamSyncStatus(cursorBasedStreams.streamsForCursorBased(), "Cursor");
|
||||
|
||||
final MssqlInitialLoadHandler initialLoadHandler =
|
||||
new MssqlInitialLoadHandler(sourceConfig, database, new MssqlSourceOperations(), getQuoteString(), initialLoadStateManager,
|
||||
Optional.of(namespacePair -> Jsons.jsonNode(pairToCursorBasedStatus.get(namespacePair))),
|
||||
getTableSizeInfoForStreams(database, initialLoadStreams.streamsForInitialLoad(), getQuoteString()));
|
||||
// Cursor based incremental iterators are decorated with start and complete status traces
|
||||
final List<AutoCloseableIterator<AirbyteMessage>> initialLoadIterator = new ArrayList<>(initialLoadHandler.getIncrementalIterators(
|
||||
new ConfiguredAirbyteCatalog().withStreams(initialLoadStreams.streamsForInitialLoad()),
|
||||
tableNameToTable,
|
||||
emittedAt, true, true, Optional.empty()));
|
||||
|
||||
// Build Cursor based iterator
|
||||
final List<AutoCloseableIterator<AirbyteMessage>> cursorBasedIterator =
|
||||
new ArrayList<>(super.getIncrementalIterators(database,
|
||||
new ConfiguredAirbyteCatalog().withStreams(
|
||||
cursorBasedStreams.streamsForCursorBased()),
|
||||
tableNameToTable,
|
||||
cursorBasedStateManager, emittedAt));
|
||||
|
||||
return Stream.of(initialLoadIterator, cursorBasedIterator).flatMap(Collection::stream).collect(Collectors.toList());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
LOGGER.info("using CDC: {}", false);
|
||||
return super.getIncrementalIterators(database, catalog, tableNameToTable, stateManager, emittedAt);
|
||||
}
|
||||
|
||||
private static void setCutoffCursorTime(@NotNull Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
@NotNull Map<AirbyteStreamNameNamespacePair, CursorInfo> pairToCursorInfoMap) {
|
||||
LOGGER.info("Excluding Today's Date for incremental streams with temporal cursors");
|
||||
pairToCursorInfoMap.forEach((pair, cursorInfo) -> {
|
||||
final TableInfo<CommonField<JDBCType>> tableInfo = tableNameToTable.get("%s.%s".formatted(pair.getNamespace(), pair.getName()));
|
||||
final Optional<CommonField<JDBCType>> maybeCursorField =
|
||||
tableInfo.getFields().stream().filter(f -> f.getName().equals(cursorInfo.getCursorField()))
|
||||
.findFirst();
|
||||
maybeCursorField.ifPresent(f -> {
|
||||
LOGGER.info("Setting cutoff time for stream {} with cursor field {} ({}) to exclude today's data", pair, f.getName(), f.getType());
|
||||
setCursorCutoffInfoForValue(cursorInfo, f, Instant.now());
|
||||
LOGGER.info("Set cutoff time for stream {} with cursor field {} to {}", pair, f.getName(), cursorInfo.getCutoffTime());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static void setCursorCutoffInfoForValue(CursorInfo cursorInfo, @NotNull CommonField<JDBCType> f, Instant nowInstant) {
|
||||
switch (f.getType()) {
|
||||
case JDBCType.DATE -> {
|
||||
final var instant = nowInstant.atOffset(ZoneOffset.UTC);
|
||||
cursorInfo.setCutoffTime(ISO_LOCAL_DATE.format(instant));
|
||||
}
|
||||
case JDBCType.TIMESTAMP -> {
|
||||
final var instant = nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS);
|
||||
cursorInfo.setCutoffTime(DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(instant));
|
||||
}
|
||||
case JDBCType.TIMESTAMP_WITH_TIMEZONE -> {
|
||||
final var instant = nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS);
|
||||
cursorInfo.setCutoffTime(TIMESTAMPTZ_FORMATTER.format(instant));
|
||||
}
|
||||
default -> LOGGER.warn("Only temporal cursors can exclude today's data. Cursor {} of JDBC type {} cannot exclude today's data", f.getName(),
|
||||
f.getType());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int getStateEmissionFrequency() {
|
||||
return this.stateEmissionFrequency;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
protected void setStateEmissionFrequencyForDebug(final int stateEmissionFrequency) {
|
||||
this.stateEmissionFrequency = stateEmissionFrequency;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void checkUserHasPrivileges(final JsonNode config, final JdbcDatabase database) {}
|
||||
|
||||
private static AirbyteStream overrideSyncModes(final AirbyteStream stream) {
|
||||
return stream.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL));
|
||||
}
|
||||
|
||||
// Note: in place mutation.
|
||||
private static AirbyteStream setIncrementalToSourceDefined(final AirbyteStream stream) {
|
||||
if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) {
|
||||
stream.setSourceDefinedCursor(true);
|
||||
}
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
/*
|
||||
* To prepare for Destination v2, cdc streams must have a default cursor field Cursor format: the
|
||||
* airbyte [emittedAt] + [sync wide record counter]
|
||||
*/
|
||||
private static AirbyteStream setDefaultCursorFieldForCdc(final AirbyteStream stream) {
|
||||
if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) {
|
||||
stream.setDefaultCursorField(ImmutableList.of(CDC_DEFAULT_CURSOR));
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
// Note: in place mutation.
|
||||
private static AirbyteStream addCdcMetadataColumns(final AirbyteStream stream) {
|
||||
|
||||
final ObjectNode jsonSchema = (ObjectNode) stream.getJsonSchema();
|
||||
final ObjectNode properties = (ObjectNode) jsonSchema.get("properties");
|
||||
|
||||
final JsonNode airbyteIntegerType = Jsons.jsonNode(ImmutableMap.of("type", "number", "airbyte_type", "integer"));
|
||||
final JsonNode stringType = Jsons.jsonNode(ImmutableMap.of("type", "string"));
|
||||
properties.set(CDC_LSN, stringType);
|
||||
properties.set(CDC_UPDATED_AT, stringType);
|
||||
properties.set(CDC_DELETED_AT, stringType);
|
||||
properties.set(CDC_EVENT_SERIAL_NO, stringType);
|
||||
properties.set(CDC_DEFAULT_CURSOR, airbyteIntegerType);
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
private void readSsl(final JsonNode sslMethod, final List<String> additionalParameters) {
|
||||
final JsonNode config = sslMethod.get("ssl_method");
|
||||
switch (config.get("ssl_method").asText()) {
|
||||
case "unencrypted" -> {
|
||||
additionalParameters.add("encrypt=false");
|
||||
additionalParameters.add("trustServerCertificate=true");
|
||||
}
|
||||
case "encrypted_trust_server_certificate" -> {
|
||||
additionalParameters.add("encrypt=true");
|
||||
additionalParameters.add("trustServerCertificate=true");
|
||||
}
|
||||
case "encrypted_verify_certificate" -> {
|
||||
additionalParameters.add("encrypt=true");
|
||||
additionalParameters.add("trustServerCertificate=false");
|
||||
|
||||
if (config.has("certificate")) {
|
||||
final String certificate = config.get("certificate").asText();
|
||||
final String password = RandomStringUtils.secure().nextAlphanumeric(100);
|
||||
final URI keyStoreUri;
|
||||
try {
|
||||
keyStoreUri = SSLCertificateUtils.keyStoreFromCertificate(certificate, password, null, null);
|
||||
} catch (final IOException | KeyStoreException | NoSuchAlgorithmException | CertificateException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
additionalParameters
|
||||
.add("trustStore=" + keyStoreUri.getPath());
|
||||
additionalParameters
|
||||
.add("trustStorePassword=" + password);
|
||||
}
|
||||
|
||||
if (config.has("hostNameInCertificate")) {
|
||||
additionalParameters
|
||||
.add("hostNameInCertificate=" + config.get("hostNameInCertificate").asText());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<AutoCloseableIterator<AirbyteMessage>> readStreams(final JsonNode config,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final JsonNode state)
|
||||
throws Exception {
|
||||
final AirbyteStateType supportedType = getSupportedStateType(config);
|
||||
final StateManager stateManager = StateManagerFactory.createStateManager(supportedType,
|
||||
StateGeneratorUtils.deserializeInitialState(state, supportedType), catalog);
|
||||
final Instant emittedAt = Instant.now();
|
||||
final JdbcDatabase database = createDatabase(config);
|
||||
final Map<String, TableInfo<CommonField<JDBCType>>> fullyQualifiedTableNameToInfo =
|
||||
discoverWithoutSystemTables(database)
|
||||
.stream()
|
||||
.collect(Collectors.toMap(t -> String.format("%s.%s", t.getNameSpace(), t.getName()),
|
||||
Function
|
||||
.identity()));
|
||||
initializeForStateManager(database, catalog, fullyQualifiedTableNameToInfo, stateManager);
|
||||
logPreSyncDebugData(database, catalog);
|
||||
return super.readStreams(config, catalog, state);
|
||||
}
|
||||
|
||||
private boolean cloudDeploymentMode() {
|
||||
return AdaptiveSourceRunner.CLOUD_MODE.equalsIgnoreCase(getFeatureFlags().deploymentMode());
|
||||
}
|
||||
|
||||
public Duration getConnectionTimeoutMssql(final Map<String, String> connectionProperties) {
|
||||
return getConnectionTimeout(connectionProperties);
|
||||
}
|
||||
|
||||
@Override
|
||||
public JdbcDatabase createDatabase(final JsonNode sourceConfig) throws SQLException {
|
||||
return createDatabase(sourceConfig, JDBC_DELIMITER);
|
||||
}
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final Source source = MssqlSource.sshWrappedSource(new MssqlSource());
|
||||
final MSSqlSourceExceptionHandler exceptionHandler = new MSSqlSourceExceptionHandler();
|
||||
LOGGER.info("starting source: {}", MssqlSource.class);
|
||||
new IntegrationRunner(source).run(args, exceptionHandler);
|
||||
LOGGER.info("completed source: {}", MssqlSource.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void logPreSyncDebugData(final JdbcDatabase database, final ConfiguredAirbyteCatalog catalog) throws SQLException {
|
||||
super.logPreSyncDebugData(database, catalog);
|
||||
MssqlQueryUtils.getIndexInfoForStreams(database, catalog, getQuoteString());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void initializeForStateManager(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
final StateManager stateManager) {
|
||||
if (initialLoadStateManager != null) {
|
||||
return;
|
||||
}
|
||||
final var sourceConfig = database.getSourceConfig();
|
||||
if (isCdc(sourceConfig)) {
|
||||
initialLoadStateManager = getMssqlInitialLoadGlobalStateManager(database, catalog, stateManager, tableNameToTable, getQuoteString());
|
||||
} else {
|
||||
final MssqlCursorBasedStateManager cursorBasedStateManager = new MssqlCursorBasedStateManager(stateManager.getRawStateMessages(), catalog);
|
||||
final InitialLoadStreams initialLoadStreams = streamsForInitialOrderedColumnLoad(cursorBasedStateManager, catalog);
|
||||
initialLoadStateManager = new MssqlInitialLoadStreamStateManager(catalog, initialLoadStreams,
|
||||
initPairToOrderedColumnInfoMap(database, catalog, tableNameToTable, getQuoteString()));
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public InitialLoadHandler<JDBCType> getInitialLoadHandler(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteStream airbyteStream,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final StateManager stateManager) {
|
||||
final var sourceConfig = database.getSourceConfig();
|
||||
if (isCdc(sourceConfig)) {
|
||||
return getMssqlFullRefreshInitialLoadHandler(database, catalog, initialLoadStateManager, stateManager, airbyteStream, Instant.now(),
|
||||
getQuoteString())
|
||||
.get();
|
||||
} else {
|
||||
return new MssqlInitialLoadHandler(sourceConfig, database, new MssqlSourceOperations(), getQuoteString(), initialLoadStateManager,
|
||||
Optional.empty(),
|
||||
getTableSizeInfoForStreams(database, catalog.getStreams(), getQuoteString()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportResumableFullRefresh(final JdbcDatabase database, final ConfiguredAirbyteStream airbyteStream) {
|
||||
return airbyteStream.getStream() != null && airbyteStream.getStream().getSourceDefinedPrimaryKey() != null
|
||||
&& !airbyteStream.getStream().getSourceDefinedPrimaryKey().isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SourceStateMessageProducer<AirbyteMessage> getSourceStateProducerForNonResumableFullRefreshStream(final JdbcDatabase database) {
|
||||
return new NonResumableStateMessageProducer<>(isCdc(database.getSourceConfig()), initialLoadStateManager);
|
||||
}
|
||||
|
||||
@NotNull
|
||||
@Override
|
||||
public AutoCloseableIterator<AirbyteMessage> augmentWithStreamStatus(@NotNull final ConfiguredAirbyteStream airbyteStream,
|
||||
@NotNull final AutoCloseableIterator<AirbyteMessage> streamItrator) {
|
||||
final var pair =
|
||||
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
|
||||
final var starterStatus =
|
||||
new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED));
|
||||
final var completeStatus =
|
||||
new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE));
|
||||
return AutoCloseableIterators.concatWithEagerClose(starterStatus, streamItrator, completeStatus);
|
||||
}
|
||||
|
||||
private boolean isExcludeTodayDateForCursorIncremental(@NotNull JsonNode config) {
|
||||
if (config.hasNonNull(LEGACY_REPLICATION_FIELD)) {
|
||||
final JsonNode replicationConfig = config.get(LEGACY_REPLICATION_FIELD);
|
||||
if (MssqlCdcHelper.ReplicationMethod.valueOf(replicationConfig.get(METHOD_FIELD).asText()) == ReplicationMethod.STANDARD) {
|
||||
if (replicationConfig.hasNonNull(REPLICATION_INCREMENTAL_EXCLUDE_TODAYS)) {
|
||||
return replicationConfig.get(REPLICATION_INCREMENTAL_EXCLUDE_TODAYS).asBoolean(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,199 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.db.DataTypeUtils.TIMESTAMPTZ_FORMATTER;
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_NAME;
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE;
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_COLUMN_TYPE_NAME;
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_SCHEMA_NAME;
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcConstants.INTERNAL_TABLE_NAME;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.microsoft.sqlserver.jdbc.Geography;
|
||||
import com.microsoft.sqlserver.jdbc.Geometry;
|
||||
import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData;
|
||||
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcSourceOperations;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.CdcMetadataInjector;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import java.sql.JDBCType;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Timestamp;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.Base64;
|
||||
import java.util.Optional;
|
||||
import microsoft.sql.DateTimeOffset;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlSourceOperations extends JdbcSourceOperations {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSourceOperations.class);
|
||||
|
||||
private final Optional<CdcMetadataInjector> metadataInjector;
|
||||
|
||||
public MssqlSourceOperations() {
|
||||
super();
|
||||
this.metadataInjector = Optional.empty();
|
||||
}
|
||||
|
||||
public MssqlSourceOperations(final Optional<CdcMetadataInjector> metadataInjector) {
|
||||
super();
|
||||
this.metadataInjector = metadataInjector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteRecordData convertDatabaseRowToAirbyteRecordData(final ResultSet queryContext) throws SQLException {
|
||||
final AirbyteRecordData recordData = super.convertDatabaseRowToAirbyteRecordData(queryContext);
|
||||
final ObjectNode jsonNode = (ObjectNode) recordData.rawRowData();
|
||||
if (!metadataInjector.isPresent()) {
|
||||
return recordData;
|
||||
}
|
||||
metadataInjector.get().inject(jsonNode);
|
||||
return new AirbyteRecordData(jsonNode, recordData.meta());
|
||||
}
|
||||
|
||||
/**
|
||||
* The method is used to set json value by type. Need to be overridden as MSSQL has some its own
|
||||
* specific types (ex. Geometry, Geography, Hierarchyid, etc)
|
||||
*
|
||||
* @throws SQLException
|
||||
*/
|
||||
@Override
|
||||
public void copyToJsonField(final ResultSet resultSet, final int colIndex, final ObjectNode json)
|
||||
throws SQLException {
|
||||
final SQLServerResultSetMetaData metadata = (SQLServerResultSetMetaData) resultSet
|
||||
.getMetaData();
|
||||
final String columnName = metadata.getColumnName(colIndex);
|
||||
final String columnTypeName = metadata.getColumnTypeName(colIndex);
|
||||
|
||||
// Attempt to access the column. this allows us to know if it is null before we do
|
||||
// type-specific parsing. If the column is null, we will populate the null value and skip attempting
|
||||
// to
|
||||
// parse the column value.
|
||||
resultSet.getObject(colIndex);
|
||||
if (resultSet.wasNull()) {
|
||||
json.putNull(columnName);
|
||||
} else if (columnTypeName.equalsIgnoreCase("time")) {
|
||||
putTime(json, columnName, resultSet, colIndex);
|
||||
} else if (columnTypeName.equalsIgnoreCase("geometry")) {
|
||||
putGeometry(json, columnName, resultSet, colIndex);
|
||||
} else if (columnTypeName.equalsIgnoreCase("geography")) {
|
||||
putGeography(json, columnName, resultSet, colIndex);
|
||||
} else if (columnTypeName.equalsIgnoreCase("datetimeoffset")) {
|
||||
// JDBC will recognize such columns as VARCHAR. Thus we have to have special handling on it.
|
||||
putTimestampWithTimezone(json, columnName, resultSet, colIndex);
|
||||
} else {
|
||||
super.copyToJsonField(resultSet, colIndex, json);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public JDBCType getDatabaseFieldType(final JsonNode field) {
|
||||
try {
|
||||
final String typeName = field.get(INTERNAL_COLUMN_TYPE_NAME).asText();
|
||||
if (typeName.equalsIgnoreCase("geography")
|
||||
|| typeName.equalsIgnoreCase("geometry")
|
||||
|| typeName.equalsIgnoreCase("hierarchyid")) {
|
||||
return JDBCType.VARCHAR;
|
||||
}
|
||||
|
||||
if (typeName.equalsIgnoreCase("datetime")) {
|
||||
return JDBCType.TIMESTAMP;
|
||||
}
|
||||
|
||||
if (typeName.equalsIgnoreCase("datetimeoffset")) {
|
||||
return JDBCType.TIMESTAMP_WITH_TIMEZONE;
|
||||
}
|
||||
|
||||
if (typeName.equalsIgnoreCase("real")) {
|
||||
return JDBCType.REAL;
|
||||
}
|
||||
|
||||
return JDBCType.valueOf(field.get(INTERNAL_COLUMN_TYPE).asInt());
|
||||
} catch (final IllegalArgumentException ex) {
|
||||
LOGGER.warn(String.format("Could not convert column: %s from table: %s.%s with type: %s. Casting to VARCHAR.",
|
||||
field.get(INTERNAL_COLUMN_NAME),
|
||||
field.get(INTERNAL_SCHEMA_NAME),
|
||||
field.get(INTERNAL_TABLE_NAME),
|
||||
field.get(INTERNAL_COLUMN_TYPE)));
|
||||
return JDBCType.VARCHAR;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void putBinary(final ObjectNode node,
|
||||
final String columnName,
|
||||
final ResultSet resultSet,
|
||||
final int index)
|
||||
throws SQLException {
|
||||
final byte[] bytes = resultSet.getBytes(index);
|
||||
final String value = Base64.getEncoder().encodeToString(bytes);
|
||||
node.put(columnName, value);
|
||||
}
|
||||
|
||||
protected void putGeometry(final ObjectNode node,
|
||||
final String columnName,
|
||||
final ResultSet resultSet,
|
||||
final int index)
|
||||
throws SQLException {
|
||||
node.put(columnName, Geometry.deserialize(resultSet.getBytes(index)).toString());
|
||||
}
|
||||
|
||||
protected void putGeography(final ObjectNode node,
|
||||
final String columnName,
|
||||
final ResultSet resultSet,
|
||||
final int index)
|
||||
throws SQLException {
|
||||
node.put(columnName, Geography.deserialize(resultSet.getBytes(index)).toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void putTimestamp(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) throws SQLException {
|
||||
final DateTimeFormatter microsecondsFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss[.][SSSSSS]");
|
||||
node.put(columnName, getObject(resultSet, index, LocalDateTime.class).format(microsecondsFormatter));
|
||||
}
|
||||
|
||||
@Override
|
||||
public JsonSchemaType getAirbyteType(final JDBCType jdbcType) {
|
||||
return switch (jdbcType) {
|
||||
case TINYINT, SMALLINT, INTEGER, BIGINT -> JsonSchemaType.INTEGER;
|
||||
case DOUBLE, DECIMAL, FLOAT, NUMERIC, REAL -> JsonSchemaType.NUMBER;
|
||||
case BOOLEAN, BIT -> JsonSchemaType.BOOLEAN;
|
||||
case NULL -> JsonSchemaType.NULL;
|
||||
case BLOB, BINARY, VARBINARY, LONGVARBINARY -> JsonSchemaType.STRING_BASE_64;
|
||||
case TIME -> JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE;
|
||||
case TIMESTAMP_WITH_TIMEZONE -> JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE;
|
||||
case TIMESTAMP -> JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE;
|
||||
case DATE -> JsonSchemaType.STRING_DATE;
|
||||
default -> JsonSchemaType.STRING;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setTimestampWithTimezone(final PreparedStatement preparedStatement, final int parameterIndex, final String value)
|
||||
throws SQLException {
|
||||
try {
|
||||
final OffsetDateTime offsetDateTime = OffsetDateTime.parse(value, TIMESTAMPTZ_FORMATTER);
|
||||
final Timestamp timestamp = Timestamp.valueOf(offsetDateTime.atZoneSameInstant(offsetDateTime.getOffset()).toLocalDateTime());
|
||||
// Final step of conversion from
|
||||
// OffsetDateTime (a Java construct) object -> Timestamp (a Java construct) ->
|
||||
// DateTimeOffset (a Microsoft.sql specific construct)
|
||||
// and provide the offset in minutes to the converter
|
||||
final DateTimeOffset datetimeoffset = DateTimeOffset.valueOf(timestamp, offsetDateTime.getOffset().getTotalSeconds() / 60);
|
||||
preparedStatement.setObject(parameterIndex, datetimeoffset);
|
||||
} catch (final DateTimeParseException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.cdc;
|
||||
|
||||
public class MssqlCdcStateConstants {
|
||||
|
||||
public static final String MSSQL_CDC_OFFSET = "mssql_cdc_offset";
|
||||
public static final String MSSQL_DB_HISTORY = "mssql_db_history";
|
||||
public static final String IS_COMPRESSED = "is_compressed";
|
||||
|
||||
}
|
||||
@@ -1,306 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.cdc;
|
||||
|
||||
import static io.debezium.relational.RelationalDatabaseConnectorConfig.DATABASE_NAME;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.AirbyteFileOffsetBackingStore;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage.SchemaHistory;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.DebeziumRecordPublisher;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.DebeziumStateUtil;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumPropertiesManager;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.debezium.config.Configuration;
|
||||
import io.debezium.connector.common.OffsetReader;
|
||||
import io.debezium.connector.sqlserver.Lsn;
|
||||
import io.debezium.connector.sqlserver.SqlServerConnectorConfig;
|
||||
import io.debezium.connector.sqlserver.SqlServerOffsetContext;
|
||||
import io.debezium.connector.sqlserver.SqlServerOffsetContext.Loader;
|
||||
import io.debezium.connector.sqlserver.SqlServerPartition;
|
||||
import io.debezium.engine.ChangeEvent;
|
||||
import io.debezium.pipeline.spi.Offsets;
|
||||
import io.debezium.pipeline.spi.Partition;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.kafka.connect.storage.FileOffsetBackingStore;
|
||||
import org.apache.kafka.connect.storage.OffsetStorageReaderImpl;
|
||||
import org.codehaus.plexus.util.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlDebeziumStateUtil implements DebeziumStateUtil {
|
||||
|
||||
// Testing is done concurrently so initialState is cached in a thread local variable
|
||||
// in order to provide each test thread with its own correct initial state
|
||||
private static ThreadLocal<JsonNode> initialState = new ThreadLocal<>();
|
||||
|
||||
final static String LSN_OFFSET_INCLUDED_QUERY = """
|
||||
DECLARE @saved_lsn BINARY(10), @min_lsn BINARY(10), @max_lsn BINARY(10), @res BIT
|
||||
-- Set @saved_lsn = 0x0000DF7C000006A80006
|
||||
Set @saved_lsn = ?
|
||||
SELECT @min_lsn = MIN(start_lsn) FROM cdc.change_tables
|
||||
SELECT @max_lsn = sys.fn_cdc_get_max_lsn()
|
||||
IF (@saved_lsn >= @min_lsn)
|
||||
Set @res = 1
|
||||
ELSE
|
||||
Set @res = 0
|
||||
select @res as [included], @min_lsn as [min], @max_lsn as [max]
|
||||
""";
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlDebeziumStateUtil.class);
|
||||
|
||||
/**
|
||||
* Generate initial state for debezium state.
|
||||
*/
|
||||
public static synchronized JsonNode constructInitialDebeziumState(final Properties properties,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final JdbcDatabase database) {
|
||||
// There is no need to construct an initial state after it was already constructed in this run
|
||||
// Starting and stopping mssql debezium too many times causes it to hang during shutdown
|
||||
if (initialState.get() == null) {
|
||||
LOGGER.info("No initial state was found. Running Debezium state initialization...");
|
||||
properties.setProperty("heartbeat.interval.ms", "0");
|
||||
final JsonNode highWaterMark = constructLsnSnapshotState(database, database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText());
|
||||
final AirbyteFileOffsetBackingStore emptyOffsetManager = AirbyteFileOffsetBackingStore.initializeState(null,
|
||||
Optional.empty());
|
||||
final AirbyteSchemaHistoryStorage schemaHistoryStorage =
|
||||
AirbyteSchemaHistoryStorage.initializeDBHistory(new SchemaHistory<>(Optional.empty(), false), false);
|
||||
final LinkedBlockingQueue<ChangeEvent<String, String>> queue = new LinkedBlockingQueue<>();
|
||||
final Instant engineStartTime = Instant.now();
|
||||
boolean schemaHistoryRead = false;
|
||||
SchemaHistory<String> schemaHistory = null;
|
||||
final var debeziumPropertiesManager =
|
||||
new RelationalDbDebeziumPropertiesManager(properties, database.getSourceConfig(), catalog, Collections.emptyList());
|
||||
try {
|
||||
final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(debeziumPropertiesManager);
|
||||
publisher.start(queue, emptyOffsetManager, Optional.of(schemaHistoryStorage));
|
||||
while (!publisher.hasClosed()) {
|
||||
final ChangeEvent<String, String> event = queue.poll(10, TimeUnit.SECONDS);
|
||||
|
||||
// If no event such as an empty table, generating schema history may take a few cycles
|
||||
// depending on the size of history.
|
||||
schemaHistory = schemaHistoryStorage.read();
|
||||
schemaHistoryRead = Objects.nonNull(schemaHistory) && StringUtils.isNotBlank(schemaHistory.getSchema());
|
||||
|
||||
if (event != null || schemaHistoryRead) {
|
||||
publisher.close();
|
||||
break;
|
||||
}
|
||||
|
||||
Duration initialWaitingDuration = Duration.ofMinutes(5L);
|
||||
// If initial waiting seconds is configured and it's greater than 5 minutes, use that value instead
|
||||
// of the default value
|
||||
final Duration configuredDuration = RecordWaitTimeUtil.getFirstRecordWaitTime(database.getSourceConfig());
|
||||
if (configuredDuration.compareTo(initialWaitingDuration) > 0) {
|
||||
initialWaitingDuration = configuredDuration;
|
||||
}
|
||||
if (Duration.between(engineStartTime, Instant.now()).compareTo(initialWaitingDuration) > 0) {
|
||||
LOGGER.error("Schema history not constructed after {} seconds of waiting, closing the engine", initialWaitingDuration.getSeconds());
|
||||
publisher.close();
|
||||
throw new RuntimeException(
|
||||
"Building schema history has timed out. Please consider increasing the debezium wait time in advanced options.");
|
||||
}
|
||||
}
|
||||
} catch (final InterruptedException ine) {
|
||||
LOGGER.debug("Interrupted during closing of publisher");
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState(highWaterMark,
|
||||
Optional.empty());
|
||||
|
||||
final Map<String, String> offset = offsetManager.read();
|
||||
if (!schemaHistoryRead) {
|
||||
schemaHistory = schemaHistoryStorage.read();
|
||||
}
|
||||
|
||||
assert !offset.isEmpty();
|
||||
assert Objects.nonNull(schemaHistory);
|
||||
assert Objects.nonNull(schemaHistory.getSchema());
|
||||
|
||||
final JsonNode asJson = serialize(offset, schemaHistory);
|
||||
LOGGER.info("Initial Debezium state constructed. offset={}", Jsons.jsonNode(offset));
|
||||
|
||||
if (asJson.get(MssqlCdcStateConstants.MSSQL_DB_HISTORY).asText().isBlank()) {
|
||||
throw new RuntimeException("Schema history snapshot returned empty history.");
|
||||
}
|
||||
initialState.set(asJson);
|
||||
}
|
||||
return initialState.get();
|
||||
|
||||
}
|
||||
|
||||
public static void disposeInitialState() {
|
||||
LOGGER.debug("Dispose initial state cached for {}", Thread.currentThread());
|
||||
initialState.remove();
|
||||
}
|
||||
|
||||
private static JsonNode serialize(final Map<String, String> offset, final SchemaHistory<String> dbHistory) {
|
||||
final Map<String, Object> state = new HashMap<>();
|
||||
state.put(MssqlCdcStateConstants.MSSQL_CDC_OFFSET, offset);
|
||||
state.put(MssqlCdcStateConstants.MSSQL_DB_HISTORY, dbHistory.getSchema());
|
||||
state.put(MssqlCdcStateConstants.IS_COMPRESSED, dbHistory.isCompressed());
|
||||
|
||||
return Jsons.jsonNode(state);
|
||||
}
|
||||
|
||||
public static MssqlDebeziumStateAttributes getStateAttributesFromDB(final JdbcDatabase database) {
|
||||
try (final Stream<MssqlDebeziumStateAttributes> stream = database.unsafeResultSetQuery(
|
||||
connection -> connection.createStatement().executeQuery("select sys.fn_cdc_get_max_lsn()"),
|
||||
resultSet -> {
|
||||
final byte[] lsnBinary = resultSet.getBytes(1);
|
||||
Lsn lsn = Lsn.valueOf(lsnBinary);
|
||||
return new MssqlDebeziumStateAttributes(lsn);
|
||||
})) {
|
||||
final List<MssqlDebeziumStateAttributes> stateAttributes = stream.toList();
|
||||
assert stateAttributes.size() == 1;
|
||||
return stateAttributes.get(0);
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public record MssqlDebeziumStateAttributes(Lsn lsn) {}
|
||||
|
||||
/**
|
||||
* Method to construct initial Debezium state which can be passed onto Debezium engine to make it
|
||||
* process binlogs from a specific file and position and skip snapshot phase Example:
|
||||
* ["test",{"server":"test","database":"test"}]" :
|
||||
* "{"transaction_id":null,"event_serial_no":1,"commit_lsn":"00000644:00002ff8:0099","change_lsn":"0000062d:00017ff0:016d"}"
|
||||
*/
|
||||
static JsonNode constructLsnSnapshotState(final JdbcDatabase database, final String dbName) {
|
||||
return format(getStateAttributesFromDB(database), dbName);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public static JsonNode format(final MssqlDebeziumStateAttributes attributes, final String dbName) {
|
||||
final String key = "[\"" + dbName + "\",{\"server\":\"" + dbName + "\",\"database\":\"" + dbName + "\"}]";
|
||||
final String value =
|
||||
"{\"commit_lsn\":\"" + attributes.lsn.toString() + "\",\"snapshot\":true,\"snapshot_completed\":true"
|
||||
+ "}";
|
||||
|
||||
final Map<String, String> result = new HashMap<>();
|
||||
result.put(key, value);
|
||||
|
||||
final JsonNode jsonNode = Jsons.jsonNode(result);
|
||||
LOGGER.info("Initial Debezium state offset constructed: {}", jsonNode);
|
||||
|
||||
return jsonNode;
|
||||
}
|
||||
|
||||
public Optional<MssqlDebeziumStateAttributes> savedOffset(final Properties baseProperties,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final JsonNode cdcOffset,
|
||||
final JsonNode config) {
|
||||
if (Objects.isNull(cdcOffset)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
final var offsetManager = AirbyteFileOffsetBackingStore.initializeState(cdcOffset, Optional.empty());
|
||||
final DebeziumPropertiesManager debeziumPropertiesManager =
|
||||
new RelationalDbDebeziumPropertiesManager(baseProperties, config, catalog, Collections.emptyList());
|
||||
final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager);
|
||||
return parseSavedOffset(debeziumProperties);
|
||||
}
|
||||
|
||||
private Optional<MssqlDebeziumStateAttributes> parseSavedOffset(final Properties properties) {
|
||||
FileOffsetBackingStore fileOffsetBackingStore = null;
|
||||
OffsetStorageReaderImpl offsetStorageReader = null;
|
||||
|
||||
try {
|
||||
fileOffsetBackingStore = getFileOffsetBackingStore(properties);
|
||||
offsetStorageReader = getOffsetStorageReader(fileOffsetBackingStore, properties);
|
||||
|
||||
final SqlServerConnectorConfig connectorConfig = new SqlServerConnectorConfig(Configuration.from(properties));
|
||||
final SqlServerOffsetContext.Loader loader = new Loader(connectorConfig);
|
||||
final Set<Partition> partitions =
|
||||
Collections.singleton(new SqlServerPartition(connectorConfig.getLogicalName(), properties.getProperty(DATABASE_NAME.name())));
|
||||
final OffsetReader<Partition, SqlServerOffsetContext, Loader> offsetReader = new OffsetReader<>(offsetStorageReader, loader);
|
||||
final Map<Partition, SqlServerOffsetContext> offsets = offsetReader.offsets(partitions);
|
||||
return extractStateAttributes(partitions, offsets);
|
||||
} finally {
|
||||
LOGGER.info("Closing offsetStorageReader and fileOffsetBackingStore");
|
||||
if (offsetStorageReader != null) {
|
||||
offsetStorageReader.close();
|
||||
}
|
||||
|
||||
if (fileOffsetBackingStore != null) {
|
||||
fileOffsetBackingStore.stop();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private Optional<MssqlDebeziumStateAttributes> extractStateAttributes(final Set<Partition> partitions,
|
||||
final Map<Partition, SqlServerOffsetContext> offsets) {
|
||||
boolean found = false;
|
||||
for (final Partition partition : partitions) {
|
||||
final SqlServerOffsetContext mssqlOffsetContext = offsets.get(partition);
|
||||
|
||||
if (mssqlOffsetContext != null) {
|
||||
found = true;
|
||||
LOGGER.info("Found previous partition offset {}: {}", partition, mssqlOffsetContext.getOffset());
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
LOGGER.info("No previous offsets found");
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
final Offsets<Partition, SqlServerOffsetContext> of = Offsets.of(offsets);
|
||||
final SqlServerOffsetContext previousOffset = of.getTheOnlyOffset();
|
||||
return Optional.of(new MssqlDebeziumStateAttributes(previousOffset.getChangePosition().getCommitLsn()));
|
||||
}
|
||||
|
||||
public boolean savedOffsetStillPresentOnServer(final JdbcDatabase database, final MssqlDebeziumStateAttributes savedState) {
|
||||
final Lsn savedLsn = savedState.lsn();
|
||||
try (final Stream<Boolean> stream = database.unsafeResultSetQuery(
|
||||
connection -> {
|
||||
PreparedStatement stmt = connection.prepareStatement(LSN_OFFSET_INCLUDED_QUERY);
|
||||
stmt.setBytes(1, savedLsn.getBinary());
|
||||
return stmt.executeQuery();
|
||||
},
|
||||
resultSet -> {
|
||||
final byte[] minLsnBinary = resultSet.getBytes(2);
|
||||
Lsn min_lsn = Lsn.valueOf(minLsnBinary);
|
||||
final byte[] maxLsnBinary = resultSet.getBytes(3);
|
||||
Lsn max_lsn = Lsn.valueOf(maxLsnBinary);
|
||||
final Boolean included = resultSet.getBoolean(1);
|
||||
LOGGER.info("{} lsn exists on server: [{}]. (min server lsn: {} max server lsn: {})", savedLsn.toString(), included, min_lsn.toString(),
|
||||
max_lsn.toString());
|
||||
return included;
|
||||
})) {
|
||||
final List<Boolean> reses = stream.toList();
|
||||
assert reses.size() == 1;
|
||||
|
||||
return reses.get(0);
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.cursor_based;
|
||||
|
||||
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.MSSQL_STATE_VERSION;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StreamStateManager;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamState;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.StreamDescriptor;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlCursorBasedStateManager extends StreamStateManager {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlCursorBasedStateManager.class);
|
||||
|
||||
public MssqlCursorBasedStateManager(final List<AirbyteStateMessage> airbyteStateMessages, final ConfiguredAirbyteCatalog catalog) {
|
||||
super(airbyteStateMessages, catalog);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteStateMessage toState(final Optional<AirbyteStreamNameNamespacePair> pair) {
|
||||
if (pair.isPresent()) {
|
||||
final Map<AirbyteStreamNameNamespacePair, CursorInfo> pairToCursorInfoMap = getPairToCursorInfoMap();
|
||||
final Optional<CursorInfo> cursorInfo = Optional.ofNullable(pairToCursorInfoMap.get(pair.get()));
|
||||
|
||||
if (cursorInfo.isPresent()) {
|
||||
LOGGER.debug("Generating state message for {}...", pair);
|
||||
return new AirbyteStateMessage()
|
||||
.withType(AirbyteStateType.STREAM)
|
||||
// Temporarily include legacy state for backwards compatibility with the platform
|
||||
.withStream(generateStreamState(pair.get(), cursorInfo.get()));
|
||||
} else {
|
||||
LOGGER.warn("Cursor information could not be located in state for stream {}. Returning a new, empty state message...", pair);
|
||||
return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState());
|
||||
}
|
||||
} else {
|
||||
LOGGER.warn("Stream not provided. Returning a new, empty state message...");
|
||||
return new AirbyteStateMessage().withType(AirbyteStateType.STREAM).withStream(new AirbyteStreamState());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the stream state for the given stream and cursor information.
|
||||
*
|
||||
* @param airbyteStreamNameNamespacePair The stream.
|
||||
* @param cursorInfo The current cursor.
|
||||
* @return The {@link AirbyteStreamState} representing the current state of the stream.
|
||||
*/
|
||||
private AirbyteStreamState generateStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair,
|
||||
final CursorInfo cursorInfo) {
|
||||
return new AirbyteStreamState()
|
||||
.withStreamDescriptor(
|
||||
new StreamDescriptor().withName(airbyteStreamNameNamespacePair.getName()).withNamespace(airbyteStreamNameNamespacePair.getNamespace()))
|
||||
.withStreamState(Jsons.jsonNode(generateDbStreamState(airbyteStreamNameNamespacePair, cursorInfo)));
|
||||
}
|
||||
|
||||
private CursorBasedStatus generateDbStreamState(final AirbyteStreamNameNamespacePair airbyteStreamNameNamespacePair,
|
||||
final CursorInfo cursorInfo) {
|
||||
final CursorBasedStatus state = new CursorBasedStatus();
|
||||
state.setStateType(StateType.CURSOR_BASED);
|
||||
state.setVersion(MSSQL_STATE_VERSION);
|
||||
state.setStreamName(airbyteStreamNameNamespacePair.getName());
|
||||
state.setStreamNamespace(airbyteStreamNameNamespacePair.getNamespace());
|
||||
state.setCursorField(cursorInfo.getCursorField() == null ? Collections.emptyList() : Lists.newArrayList(cursorInfo.getCursorField()));
|
||||
state.setCursor(cursorInfo.getCursor());
|
||||
if (cursorInfo.getCursorRecordCount() > 0L) {
|
||||
state.setCursorRecordCount(cursorInfo.getCursorRecordCount());
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.initialsync;
|
||||
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import io.airbyte.integrations.source.mssql.MssqlCdcConnectorMetadataInjector;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
|
||||
|
||||
public class CdcMetadataInjector {
|
||||
|
||||
private final String transactionTimestamp;
|
||||
private final MssqlDebeziumStateAttributes stateAttributes;
|
||||
private final MssqlCdcConnectorMetadataInjector metadataInjector;
|
||||
|
||||
public CdcMetadataInjector(final String transactionTimestamp,
|
||||
final MssqlDebeziumStateAttributes stateAttributes,
|
||||
final MssqlCdcConnectorMetadataInjector metadataInjector) {
|
||||
this.transactionTimestamp = transactionTimestamp;
|
||||
this.stateAttributes = stateAttributes;
|
||||
this.metadataInjector = metadataInjector;
|
||||
}
|
||||
|
||||
public void inject(final ObjectNode record) {
|
||||
metadataInjector.addMetaDataToRowsFetchedOutsideDebezium(record, transactionTimestamp, stateAttributes);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,173 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.initialsync;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
|
||||
import io.airbyte.protocol.models.v0.*;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
|
||||
import java.util.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlInitialLoadGlobalStateManager extends MssqlInitialLoadStateManager {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadGlobalStateManager.class);
|
||||
private StateManager stateManager;
|
||||
private final CdcState initialCdcState;
|
||||
// Only one global state is emitted, which is fanned out into many entries in the DB by platform. As
|
||||
// a result, we need to keep track of streams that have completed the snapshot.
|
||||
private Set<AirbyteStreamNameNamespacePair> streamsThatHaveCompletedSnapshot;
|
||||
|
||||
// No special handling for resumable full refresh streams. We will report the cursor as it is.
|
||||
private Set<AirbyteStreamNameNamespacePair> resumableFullRefreshStreams;
|
||||
private Set<AirbyteStreamNameNamespacePair> nonResumableFullRefreshStreams;
|
||||
private Set<AirbyteStreamNameNamespacePair> completedNonResumableFullRefreshStreams;
|
||||
|
||||
public MssqlInitialLoadGlobalStateManager(final InitialLoadStreams initialLoadStreams,
|
||||
final Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOrderedColInfo,
|
||||
final StateManager stateManager,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final CdcState initialCdcState) {
|
||||
this.pairToOrderedColLoadStatus = MssqlInitialLoadStateManager.initPairToOrderedColumnLoadStatusMap(initialLoadStreams.pairToInitialLoadStatus());
|
||||
this.pairToOrderedColInfo = pairToOrderedColInfo;
|
||||
this.stateManager = stateManager;
|
||||
this.initialCdcState = initialCdcState;
|
||||
this.streamStateForIncrementalRunSupplier = pair -> Jsons.emptyObject();
|
||||
initStreams(initialLoadStreams, catalog);
|
||||
}
|
||||
|
||||
private AirbyteGlobalState generateGlobalState(final List<AirbyteStreamState> streamStates) {
|
||||
CdcState cdcState = stateManager.getCdcStateManager().getCdcState();
|
||||
if (cdcState == null || cdcState.getState() == null) {
|
||||
cdcState = initialCdcState;
|
||||
}
|
||||
|
||||
final AirbyteGlobalState globalState = new AirbyteGlobalState();
|
||||
globalState.setSharedState(Jsons.jsonNode(cdcState));
|
||||
globalState.setStreamStates(streamStates);
|
||||
return globalState;
|
||||
}
|
||||
|
||||
private void initStreams(final InitialLoadStreams initialLoadStreams,
|
||||
final ConfiguredAirbyteCatalog catalog) {
|
||||
this.streamsThatHaveCompletedSnapshot = new HashSet<>();
|
||||
this.resumableFullRefreshStreams = new HashSet<>();
|
||||
this.nonResumableFullRefreshStreams = new HashSet<>();
|
||||
this.completedNonResumableFullRefreshStreams = new HashSet<>();
|
||||
|
||||
catalog.getStreams().forEach(configuredAirbyteStream -> {
|
||||
var pairInStream =
|
||||
new AirbyteStreamNameNamespacePair(configuredAirbyteStream.getStream().getName(), configuredAirbyteStream.getStream().getNamespace());
|
||||
if (!initialLoadStreams.streamsForInitialLoad().contains(configuredAirbyteStream)
|
||||
&& configuredAirbyteStream.getSyncMode() == SyncMode.INCREMENTAL) {
|
||||
this.streamsThatHaveCompletedSnapshot.add(pairInStream);
|
||||
}
|
||||
if (configuredAirbyteStream.getSyncMode() == SyncMode.FULL_REFRESH) {
|
||||
if (configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey() != null
|
||||
&& !configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey().isEmpty()) {
|
||||
this.resumableFullRefreshStreams.add(pairInStream);
|
||||
} else {
|
||||
this.nonResumableFullRefreshStreams.add(pairInStream);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteStateMessage generateStateMessageAtCheckpoint(final ConfiguredAirbyteStream airbyteStream) {
|
||||
final List<AirbyteStreamState> streamStates = new ArrayList<>();
|
||||
streamsThatHaveCompletedSnapshot.forEach(stream -> {
|
||||
final DbStreamState state = getFinalState(stream);
|
||||
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(state)));
|
||||
});
|
||||
|
||||
resumableFullRefreshStreams.forEach(stream -> {
|
||||
var ocStatus = getOrderedColumnLoadStatus(stream);
|
||||
if (ocStatus != null) {
|
||||
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(ocStatus)));
|
||||
}
|
||||
});
|
||||
|
||||
completedNonResumableFullRefreshStreams.forEach(stream -> {
|
||||
streamStates.add(new AirbyteStreamState()
|
||||
.withStreamDescriptor(
|
||||
new StreamDescriptor().withName(stream.getName()).withNamespace(stream.getNamespace())));
|
||||
});
|
||||
|
||||
if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) {
|
||||
AirbyteStreamNameNamespacePair pair =
|
||||
new AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
|
||||
var ocStatus = getOrderedColumnLoadStatus(pair);
|
||||
streamStates.add(getAirbyteStreamState(pair, Jsons.jsonNode(ocStatus)));
|
||||
}
|
||||
|
||||
return new AirbyteStateMessage()
|
||||
.withType(AirbyteStateType.GLOBAL)
|
||||
.withGlobal(generateGlobalState(streamStates));
|
||||
}
|
||||
|
||||
private AirbyteStreamState getAirbyteStreamState(final AirbyteStreamNameNamespacePair pair, final JsonNode stateData) {
|
||||
Preconditions.checkNotNull(pair);
|
||||
Preconditions.checkNotNull(pair.getName());
|
||||
Preconditions.checkNotNull(pair.getNamespace());
|
||||
|
||||
return new AirbyteStreamState()
|
||||
.withStreamDescriptor(
|
||||
new StreamDescriptor().withName(pair.getName()).withNamespace(pair.getNamespace()))
|
||||
.withStreamState(stateData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteStateMessage createFinalStateMessage(final ConfiguredAirbyteStream airbyteStream) {
|
||||
|
||||
final io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair pair = new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(
|
||||
airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
|
||||
if (airbyteStream.getSyncMode() == SyncMode.INCREMENTAL) {
|
||||
streamsThatHaveCompletedSnapshot.add(pair);
|
||||
} else if (nonResumableFullRefreshStreams.contains(pair)) {
|
||||
completedNonResumableFullRefreshStreams.add(pair);
|
||||
}
|
||||
final List<AirbyteStreamState> streamStates = new ArrayList<>();
|
||||
streamsThatHaveCompletedSnapshot.forEach(stream -> {
|
||||
final DbStreamState state = getFinalState(stream);
|
||||
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(state)));
|
||||
});
|
||||
|
||||
resumableFullRefreshStreams.forEach(stream -> {
|
||||
var ocStatus = getOrderedColumnLoadStatus(stream);
|
||||
streamStates.add(getAirbyteStreamState(stream, Jsons.jsonNode(ocStatus)));
|
||||
});
|
||||
|
||||
completedNonResumableFullRefreshStreams.forEach(stream -> {
|
||||
streamStates.add(new AirbyteStreamState()
|
||||
.withStreamDescriptor(
|
||||
new StreamDescriptor().withName(stream.getName()).withNamespace(stream.getNamespace())));
|
||||
});
|
||||
|
||||
return new AirbyteStateMessage()
|
||||
.withType(AirbyteStateType.GLOBAL)
|
||||
.withGlobal(generateGlobalState(streamStates));
|
||||
}
|
||||
|
||||
private DbStreamState getFinalState(final AirbyteStreamNameNamespacePair pair) {
|
||||
Preconditions.checkNotNull(pair);
|
||||
Preconditions.checkNotNull(pair.getName());
|
||||
Preconditions.checkNotNull(pair.getNamespace());
|
||||
|
||||
return new DbStreamState()
|
||||
.withStreamName(pair.getName())
|
||||
.withStreamNamespace(pair.getNamespace())
|
||||
.withCursorField(Collections.emptyList())
|
||||
.withCursor(null);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,261 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.initialsync;
|
||||
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcConstants.*;
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcUtils.getFullyQualifiedTableName;
|
||||
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION_PROPERTY;
|
||||
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import io.airbyte.cdk.db.SqlDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadHandler;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.TableInfo;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateIterator;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateEmitFrequency;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator;
|
||||
import io.airbyte.commons.stream.AirbyteStreamStatusHolder;
|
||||
import io.airbyte.commons.stream.AirbyteStreamUtils;
|
||||
import io.airbyte.commons.util.AutoCloseableIterator;
|
||||
import io.airbyte.commons.util.AutoCloseableIterators;
|
||||
import io.airbyte.integrations.source.mssql.MssqlQueryUtils.TableSizeInfo;
|
||||
import io.airbyte.integrations.source.mssql.MssqlSourceOperations;
|
||||
import io.airbyte.protocol.models.CommonField;
|
||||
import io.airbyte.protocol.models.v0.*;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
|
||||
import java.sql.*;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.function.Function;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlInitialLoadHandler implements InitialLoadHandler<JDBCType> {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadHandler.class);
|
||||
private static final long RECORD_LOGGING_SAMPLE_RATE = 1_000_000;
|
||||
private final JsonNode config;
|
||||
private final JdbcDatabase database;
|
||||
private final MssqlSourceOperations sourceOperations;
|
||||
private final String quoteString;
|
||||
private final MssqlInitialLoadStateManager initialLoadStateManager;
|
||||
private final Optional<Function<AirbyteStreamNameNamespacePair, JsonNode>> streamStateForIncrementalRunSupplier;
|
||||
private static final long QUERY_TARGET_SIZE_GB = 1_073_741_824;
|
||||
private static final long DEFAULT_CHUNK_SIZE = 1_000_000;
|
||||
final Map<AirbyteStreamNameNamespacePair, TableSizeInfo> tableSizeInfoMap;
|
||||
|
||||
public MssqlInitialLoadHandler(
|
||||
final JsonNode config,
|
||||
final JdbcDatabase database,
|
||||
final MssqlSourceOperations sourceOperations,
|
||||
final String quoteString,
|
||||
final MssqlInitialLoadStateManager initialLoadStateManager,
|
||||
final Optional<Function<AirbyteStreamNameNamespacePair, JsonNode>> streamStateForIncrementalRunSupplier,
|
||||
final Map<AirbyteStreamNameNamespacePair, TableSizeInfo> tableSizeInfoMap) {
|
||||
this.config = config;
|
||||
this.database = database;
|
||||
this.sourceOperations = sourceOperations;
|
||||
this.quoteString = quoteString;
|
||||
this.initialLoadStateManager = initialLoadStateManager;
|
||||
this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier;
|
||||
this.tableSizeInfoMap = tableSizeInfoMap;
|
||||
}
|
||||
|
||||
private static String getCatalog(final SqlDatabase database) {
|
||||
return (database.getSourceConfig().has(JdbcUtils.DATABASE_KEY) ? database.getSourceConfig().get(JdbcUtils.DATABASE_KEY).asText() : null);
|
||||
}
|
||||
|
||||
public static Map<String, List<String>> discoverClusteredIndexForStream(final JdbcDatabase database,
|
||||
final AirbyteStream stream) {
|
||||
Map<String, List<String>> clusteredIndexes = new HashMap<>();
|
||||
try {
|
||||
// Get all clustered index names without specifying a table name
|
||||
clusteredIndexes = aggregateClusteredIndexes(database.bufferedResultSetQuery(
|
||||
connection -> connection.getMetaData().getIndexInfo(getCatalog(database), stream.getNamespace(), stream.getName(), true, false),
|
||||
r -> {
|
||||
if (r.getShort(JDBC_COLUMN_TYPE) == DatabaseMetaData.tableIndexClustered) {
|
||||
final String schemaName =
|
||||
r.getObject(JDBC_COLUMN_SCHEMA_NAME) != null ? r.getString(JDBC_COLUMN_SCHEMA_NAME) : r.getString(JDBC_COLUMN_DATABASE_NAME);
|
||||
final String streamName = getFullyQualifiedTableName(schemaName, r.getString(JDBC_COLUMN_TABLE_NAME));
|
||||
final String columnName = r.getString(JDBC_COLUMN_COLUMN_NAME);
|
||||
return new ClusteredIndexAttributesFromDb(streamName, columnName);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}));
|
||||
} catch (final SQLException e) {
|
||||
LOGGER.debug(String.format("Could not retrieve clustered indexes without a table name (%s), not blocking, fall back to use pk.", e));
|
||||
}
|
||||
|
||||
LOGGER.debug("Clustered Indexes: {}", clusteredIndexes);
|
||||
|
||||
return clusteredIndexes.isEmpty() ? null : clusteredIndexes;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public record ClusteredIndexAttributesFromDb(String streamName,
|
||||
String columnName) {}
|
||||
|
||||
/**
|
||||
* Aggregate list of @param entries of StreamName and clustered index column name
|
||||
*
|
||||
* @return a map by StreamName to associated columns in clustered index. If clustered index has
|
||||
* multiple columns, we always use the first column.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static Map<String, List<String>> aggregateClusteredIndexes(final List<ClusteredIndexAttributesFromDb> entries) {
|
||||
final Map<String, List<String>> result = new HashMap<>();
|
||||
|
||||
entries.forEach(entry -> {
|
||||
if (entry == null) {
|
||||
return;
|
||||
}
|
||||
if (!result.containsKey(entry.streamName())) {
|
||||
result.put(entry.streamName(), new ArrayList<>());
|
||||
}
|
||||
// Store the column name in a list to support composite clustered indexes.
|
||||
result.get(entry.streamName()).add(entry.columnName());
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<AutoCloseableIterator<AirbyteMessage>> getIncrementalIterators(
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
final Instant emittedAt,
|
||||
final boolean decorateWithStartedStatus,
|
||||
final boolean decorateWithCompletedStatus,
|
||||
@NotNull final Optional<Duration> cdcInitialLoadTimeout) {
|
||||
final List<AutoCloseableIterator<AirbyteMessage>> iteratorList = new ArrayList<>();
|
||||
for (final ConfiguredAirbyteStream airbyteStream : catalog.getStreams()) {
|
||||
final AirbyteStream stream = airbyteStream.getStream();
|
||||
final String streamName = stream.getName();
|
||||
final String namespace = stream.getNamespace();
|
||||
// TODO: need to select column according to indexing status of table. may not be primary key
|
||||
final var pair = new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(streamName, namespace);
|
||||
if (airbyteStream.getSyncMode().equals(SyncMode.INCREMENTAL)) {
|
||||
final String fullyQualifiedTableName = DbSourceDiscoverUtil.getFullyQualifiedTableName(namespace, streamName);
|
||||
|
||||
// Grab the selected fields to sync
|
||||
final TableInfo<CommonField<JDBCType>> table = tableNameToTable.get(fullyQualifiedTableName);
|
||||
if (decorateWithStartedStatus) {
|
||||
iteratorList.add(
|
||||
new StreamStatusTraceEmitterIterator(new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED)));
|
||||
}
|
||||
iteratorList.add(getIteratorForStream(airbyteStream, table, emittedAt, cdcInitialLoadTimeout));
|
||||
if (decorateWithCompletedStatus) {
|
||||
iteratorList.add(new StreamStatusTraceEmitterIterator(
|
||||
new AirbyteStreamStatusHolder(pair, AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE)));
|
||||
}
|
||||
}
|
||||
}
|
||||
return iteratorList;
|
||||
}
|
||||
|
||||
@NotNull
|
||||
@Override
|
||||
public AutoCloseableIterator<AirbyteMessage> getIteratorForStream(@NotNull final ConfiguredAirbyteStream airbyteStream,
|
||||
@NotNull final TableInfo<CommonField<JDBCType>> table,
|
||||
@NotNull final Instant emittedAt,
|
||||
@NotNull final Optional<Duration> cdcInitialLoadTimeout) {
|
||||
final AirbyteStream stream = airbyteStream.getStream();
|
||||
final String streamName = stream.getName();
|
||||
final String namespace = stream.getNamespace();
|
||||
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamName, namespace);
|
||||
final List<String> selectedDatabaseFields = table.getFields()
|
||||
.stream()
|
||||
.map(CommonField::getName)
|
||||
.filter(CatalogHelpers.getTopLevelFieldNames(airbyteStream)::contains)
|
||||
.toList();
|
||||
final AutoCloseableIterator<AirbyteRecordData> queryStream =
|
||||
new MssqlInitialLoadRecordIterator(database, sourceOperations, quoteString, initialLoadStateManager, selectedDatabaseFields, pair,
|
||||
calculateChunkSize(tableSizeInfoMap.get(pair), pair), isCompositePrimaryKey(airbyteStream), emittedAt, cdcInitialLoadTimeout);
|
||||
final AutoCloseableIterator<AirbyteMessage> recordIterator =
|
||||
getRecordIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli());
|
||||
final AutoCloseableIterator<AirbyteMessage> recordAndMessageIterator = augmentWithState(recordIterator, airbyteStream);
|
||||
return augmentWithLogs(recordAndMessageIterator, pair, streamName);
|
||||
}
|
||||
|
||||
// Transforms the given iterator to create an {@link AirbyteRecordMessage}
|
||||
private AutoCloseableIterator<AirbyteMessage> getRecordIterator(
|
||||
final AutoCloseableIterator<AirbyteRecordData> recordIterator,
|
||||
final String streamName,
|
||||
final String namespace,
|
||||
final long emittedAt) {
|
||||
return AutoCloseableIterators.transform(recordIterator, r -> new AirbyteMessage()
|
||||
.withType(Type.RECORD)
|
||||
.withRecord(new AirbyteRecordMessage()
|
||||
.withStream(streamName)
|
||||
.withNamespace(namespace)
|
||||
.withEmittedAt(emittedAt)
|
||||
.withData(r.rawRowData())
|
||||
.withMeta(isMetaChangesEmptyOrNull(r.meta()) ? null : r.meta())));
|
||||
}
|
||||
|
||||
private boolean isMetaChangesEmptyOrNull(AirbyteRecordMessageMeta meta) {
|
||||
return meta == null || meta.getChanges() == null || meta.getChanges().isEmpty();
|
||||
}
|
||||
|
||||
// Augments the given iterator with record count logs.
|
||||
private AutoCloseableIterator<AirbyteMessage> augmentWithLogs(final AutoCloseableIterator<AirbyteMessage> iterator,
|
||||
final AirbyteStreamNameNamespacePair pair,
|
||||
final String streamName) {
|
||||
final AtomicLong recordCount = new AtomicLong();
|
||||
return AutoCloseableIterators.transform(iterator,
|
||||
AirbyteStreamUtils.convertFromNameAndNamespace(pair.getName(), pair.getNamespace()),
|
||||
r -> {
|
||||
final long count = recordCount.incrementAndGet();
|
||||
if (count % RECORD_LOGGING_SAMPLE_RATE == 0) {
|
||||
LOGGER.info("Reading stream {}. Records read: {}", streamName, count);
|
||||
}
|
||||
return r;
|
||||
});
|
||||
}
|
||||
|
||||
private AutoCloseableIterator<AirbyteMessage> augmentWithState(final AutoCloseableIterator<AirbyteMessage> recordIterator,
|
||||
final ConfiguredAirbyteStream airbyteStream) {
|
||||
final AirbyteStreamNameNamespacePair pair =
|
||||
new AirbyteStreamNameNamespacePair(airbyteStream.getStream().getName(), airbyteStream.getStream().getNamespace());
|
||||
|
||||
final Duration syncCheckpointDuration =
|
||||
config.get(SYNC_CHECKPOINT_DURATION_PROPERTY) != null
|
||||
? Duration.ofSeconds(config.get(SYNC_CHECKPOINT_DURATION_PROPERTY).asLong())
|
||||
: DebeziumIteratorConstants.SYNC_CHECKPOINT_DURATION;
|
||||
final Long syncCheckpointRecords = config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY) != null ? config.get(SYNC_CHECKPOINT_RECORDS_PROPERTY).asLong()
|
||||
: DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS;
|
||||
|
||||
streamStateForIncrementalRunSupplier.ifPresent(initialLoadStateManager::setStreamStateForIncrementalRunSupplier);
|
||||
return AutoCloseableIterators.transformIterator(
|
||||
r -> new SourceStateIterator<>(r, airbyteStream, initialLoadStateManager,
|
||||
new StateEmitFrequency(syncCheckpointRecords, syncCheckpointDuration)),
|
||||
recordIterator, new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(pair.getName(), pair.getNamespace()));
|
||||
}
|
||||
|
||||
private static boolean isCompositePrimaryKey(final ConfiguredAirbyteStream stream) {
|
||||
return stream.getStream().getSourceDefinedPrimaryKey().size() > 1;
|
||||
}
|
||||
|
||||
public static long calculateChunkSize(final TableSizeInfo tableSizeInfo, final AirbyteStreamNameNamespacePair pair) {
|
||||
// If table size info could not be calculated, a default chunk size will be provided.
|
||||
if (tableSizeInfo == null || tableSizeInfo.tableSize() == 0 || tableSizeInfo.avgRowLength() == 0) {
|
||||
LOGGER.info("Chunk size could not be determined for pair: {}, defaulting to {} rows", pair, DEFAULT_CHUNK_SIZE);
|
||||
return DEFAULT_CHUNK_SIZE;
|
||||
}
|
||||
final long avgRowLength = tableSizeInfo.avgRowLength();
|
||||
final long chunkSize = QUERY_TARGET_SIZE_GB / avgRowLength;
|
||||
LOGGER.info("Chunk size determined for pair: {}, is {}", pair, chunkSize);
|
||||
return chunkSize;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.initialsync;
|
||||
|
||||
import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcSnapshotForceShutdownMessage;
|
||||
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifier;
|
||||
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting;
|
||||
|
||||
import com.google.common.collect.AbstractIterator;
|
||||
import io.airbyte.cdk.db.JdbcCompatibleSourceOperations;
|
||||
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus;
|
||||
import io.airbyte.commons.exceptions.TransientErrorException;
|
||||
import io.airbyte.commons.util.AutoCloseableIterator;
|
||||
import io.airbyte.commons.util.AutoCloseableIterators;
|
||||
import io.airbyte.integrations.source.mssql.MssqlQueryUtils;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
|
||||
import java.sql.Connection;
|
||||
import java.sql.JDBCType;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
import javax.annotation.CheckForNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@SuppressWarnings("try")
|
||||
public class MssqlInitialLoadRecordIterator extends AbstractIterator<AirbyteRecordData>
|
||||
implements AutoCloseableIterator<AirbyteRecordData> {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadRecordIterator.class);
|
||||
|
||||
private AutoCloseableIterator<AirbyteRecordData> currentIterator;
|
||||
private final JdbcDatabase database;
|
||||
private int numSubqueries = 0;
|
||||
private final String quoteString;
|
||||
private final JdbcCompatibleSourceOperations<JDBCType> sourceOperations;
|
||||
private final List<String> columnNames;
|
||||
private final AirbyteStreamNameNamespacePair pair;
|
||||
private final MssqlInitialLoadStateManager initialLoadStateManager;
|
||||
private final long chunkSize;
|
||||
private final OrderedColumnInfo ocInfo;
|
||||
private final boolean isCompositeKeyLoad;
|
||||
private final Instant startInstant;
|
||||
private Optional<Duration> cdcInitialLoadTimeout;
|
||||
private boolean isCdcSync;
|
||||
|
||||
MssqlInitialLoadRecordIterator(
|
||||
final JdbcDatabase database,
|
||||
final JdbcCompatibleSourceOperations<JDBCType> sourceOperations,
|
||||
final String quoteString,
|
||||
final MssqlInitialLoadStateManager initialLoadStateManager,
|
||||
final List<String> columnNames,
|
||||
final AirbyteStreamNameNamespacePair pair,
|
||||
final long chunkSize,
|
||||
final boolean isCompositeKeyLoad,
|
||||
final Instant startInstant,
|
||||
final Optional<Duration> cdcInitialLoadTimeout) {
|
||||
this.database = database;
|
||||
this.sourceOperations = sourceOperations;
|
||||
this.quoteString = quoteString;
|
||||
this.initialLoadStateManager = initialLoadStateManager;
|
||||
this.columnNames = columnNames;
|
||||
this.pair = pair;
|
||||
this.chunkSize = chunkSize;
|
||||
this.ocInfo = initialLoadStateManager.getOrderedColumnInfo(pair);
|
||||
this.isCompositeKeyLoad = isCompositeKeyLoad;
|
||||
this.startInstant = startInstant;
|
||||
this.cdcInitialLoadTimeout = cdcInitialLoadTimeout;
|
||||
this.isCdcSync = isCdcSync(initialLoadStateManager);
|
||||
}
|
||||
|
||||
@CheckForNull
|
||||
@Override
|
||||
protected AirbyteRecordData computeNext() {
|
||||
if (isCdcSync && cdcInitialLoadTimeout.isPresent()
|
||||
&& Duration.between(startInstant, Instant.now()).compareTo(cdcInitialLoadTimeout.get()) > 0) {
|
||||
final String cdcInitialLoadTimeoutMessage = String.format(
|
||||
"Initial load has taken longer than %s hours, Canceling sync so that CDC replication can catch-up on subsequent attempt, and then initial snapshotting will resume",
|
||||
cdcInitialLoadTimeout.get().toHours());
|
||||
LOGGER.info(cdcInitialLoadTimeoutMessage);
|
||||
AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcSnapshotForceShutdownMessage());
|
||||
throw new TransientErrorException(cdcInitialLoadTimeoutMessage);
|
||||
}
|
||||
if (shouldBuildNextSubquery()) {
|
||||
try {
|
||||
// We will only issue one query for a composite key load. If we have already processed all the data
|
||||
// associated with this
|
||||
// query, we should indicate that we are done processing for the given stream.
|
||||
if (isCompositeKeyLoad && numSubqueries >= 1) {
|
||||
return endOfData();
|
||||
}
|
||||
// Previous stream (and connection) must be manually closed in this iterator.
|
||||
if (currentIterator != null) {
|
||||
currentIterator.close();
|
||||
}
|
||||
|
||||
LOGGER.info("Subquery number : {}", numSubqueries);
|
||||
final Stream<AirbyteRecordData> stream = database.unsafeQuery(
|
||||
this::getOcPreparedStatement, sourceOperations::convertDatabaseRowToAirbyteRecordData);
|
||||
currentIterator = AutoCloseableIterators.fromStream(stream,
|
||||
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(pair.getName(), pair.getNamespace()));
|
||||
numSubqueries++;
|
||||
// If the current subquery has no records associated with it, the entire stream has been read.
|
||||
if (!currentIterator.hasNext()) {
|
||||
return endOfData();
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return currentIterator.next();
|
||||
}
|
||||
|
||||
private boolean shouldBuildNextSubquery() {
|
||||
// The next sub-query should be built if (i) it is the first subquery in the sequence. (ii) the
|
||||
// previous subquery has finished.
|
||||
return (currentIterator == null || !currentIterator.hasNext());
|
||||
}
|
||||
|
||||
private PreparedStatement getOcPreparedStatement(final Connection connection) {
|
||||
try {
|
||||
final String tableName = pair.getName();
|
||||
final String schemaName = pair.getNamespace();
|
||||
final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, tableName,
|
||||
quoteString);
|
||||
LOGGER.info("Preparing query for table: {}", fullTableName);
|
||||
final String wrappedColumnNames = MssqlQueryUtils.getWrappedColumnNames(database, quoteString, columnNames, schemaName, tableName);
|
||||
final OrderedColumnLoadStatus ocLoadStatus = initialLoadStateManager.getOrderedColumnLoadStatus(pair);
|
||||
if (ocLoadStatus == null) {
|
||||
final String quotedCursorField = enquoteIdentifier(ocInfo.ocFieldName(), quoteString);
|
||||
final String sql;
|
||||
if (isCompositeKeyLoad) {
|
||||
sql = "SELECT %s FROM %s ORDER BY %s".formatted(wrappedColumnNames, fullTableName, quotedCursorField);
|
||||
} else {
|
||||
sql = "SELECT TOP %s %s FROM %s ORDER BY %s".formatted(chunkSize, wrappedColumnNames, fullTableName, quotedCursorField);
|
||||
}
|
||||
final PreparedStatement preparedStatement = connection.prepareStatement(sql);
|
||||
LOGGER.info("Executing query for table {}: {}", tableName, sql);
|
||||
return preparedStatement;
|
||||
} else {
|
||||
LOGGER.info("ocLoadStatus value is : {}", ocLoadStatus.getOrderedColVal());
|
||||
final String quotedCursorField = enquoteIdentifier(ocInfo.ocFieldName(), quoteString);
|
||||
final String sql;
|
||||
if (isCompositeKeyLoad) {
|
||||
sql = "SELECT %s FROM %s WHERE %s >= ? ORDER BY %s".formatted(wrappedColumnNames, fullTableName,
|
||||
quotedCursorField, quotedCursorField);
|
||||
} else {
|
||||
// The ordered column max value could be null - this can happen in the case of empty tables. In this
|
||||
// case,
|
||||
// we can just issue a query without any chunking.
|
||||
if (ocInfo.ocMaxValue() != null) {
|
||||
sql = "SELECT TOP %s %s FROM %s WHERE %s > ? AND %s <= ? ORDER BY %s".formatted(chunkSize, wrappedColumnNames, fullTableName,
|
||||
quotedCursorField, quotedCursorField, quotedCursorField);
|
||||
} else {
|
||||
sql = "SELECT %s FROM %s WHERE %s > ? ORDER BY %s".formatted(wrappedColumnNames, fullTableName,
|
||||
quotedCursorField, quotedCursorField);
|
||||
}
|
||||
}
|
||||
final PreparedStatement preparedStatement = connection.prepareStatement(sql);
|
||||
final JDBCType cursorFieldType = ocInfo.fieldType();
|
||||
sourceOperations.setCursorField(preparedStatement, 1, cursorFieldType, ocLoadStatus.getOrderedColVal());
|
||||
if (!isCompositeKeyLoad && ocInfo.ocMaxValue() != null) {
|
||||
sourceOperations.setCursorField(preparedStatement, 2, cursorFieldType, ocInfo.ocMaxValue());
|
||||
}
|
||||
LOGGER.info("Executing query for table {}: {}", tableName, sql);
|
||||
return preparedStatement;
|
||||
}
|
||||
} catch (final SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
if (currentIterator != null) {
|
||||
currentIterator.close();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isCdcSync(MssqlInitialLoadStateManager initialLoadStateManager) {
|
||||
if (initialLoadStateManager instanceof MssqlInitialLoadGlobalStateManager) {
|
||||
LOGGER.info("Running a cdc sync");
|
||||
return true;
|
||||
} else {
|
||||
LOGGER.info("Not running a cdc sync");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,112 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.initialsync;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.SourceStateMessageProducer;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public abstract class MssqlInitialLoadStateManager implements SourceStateMessageProducer<AirbyteMessage> {
|
||||
|
||||
public static final long MSSQL_STATE_VERSION = 2;
|
||||
public static final String STATE_TYPE_KEY = "state_type";
|
||||
public static final String ORDERED_COL_STATE_TYPE = "ordered_column";
|
||||
protected Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToOrderedColLoadStatus;
|
||||
protected Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOrderedColInfo;
|
||||
|
||||
private OrderedColumnLoadStatus ocStatus;
|
||||
|
||||
protected Function<AirbyteStreamNameNamespacePair, JsonNode> streamStateForIncrementalRunSupplier;
|
||||
|
||||
void setStreamStateForIncrementalRunSupplier(final Function<AirbyteStreamNameNamespacePair, JsonNode> streamStateForIncrementalRunSupplier) {
|
||||
this.streamStateForIncrementalRunSupplier = streamStateForIncrementalRunSupplier;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the {@link OrderedColumnLoadStatus} for the state associated with the given pair.
|
||||
*
|
||||
* @param pair pair
|
||||
* @param ocLoadStatus updated status
|
||||
*/
|
||||
public void updateOrderedColumnLoadState(final AirbyteStreamNameNamespacePair pair, final OrderedColumnLoadStatus ocLoadStatus) {
|
||||
pairToOrderedColLoadStatus.put(pair, ocLoadStatus);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the previous state emitted. Represented as a {@link OrderedColumnLoadStatus} associated
|
||||
* with the stream.
|
||||
*
|
||||
* @param pair pair
|
||||
* @return load status
|
||||
*/
|
||||
public OrderedColumnLoadStatus getOrderedColumnLoadStatus(final AirbyteStreamNameNamespacePair pair) {
|
||||
return pairToOrderedColLoadStatus.get(pair);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current {@OrderedColumnInfo}, associated with the stream. This includes the data type
|
||||
* and the column name associated with the stream.
|
||||
*
|
||||
* @param pair pair
|
||||
* @return load status
|
||||
*/
|
||||
public OrderedColumnInfo getOrderedColumnInfo(final AirbyteStreamNameNamespacePair pair) {
|
||||
return pairToOrderedColInfo.get(pair);
|
||||
}
|
||||
|
||||
static Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> initPairToOrderedColumnLoadStatusMap(
|
||||
final Map<io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToOcStatus) {
|
||||
return pairToOcStatus.entrySet().stream()
|
||||
.collect(Collectors.toMap(
|
||||
e -> new AirbyteStreamNameNamespacePair(e.getKey().getName(), e.getKey().getNamespace()),
|
||||
Entry::getValue));
|
||||
}
|
||||
|
||||
protected JsonNode getIncrementalState(final AirbyteStreamNameNamespacePair pair) {
|
||||
final OrderedColumnLoadStatus currentOcLoadStatus = getOrderedColumnLoadStatus(pair);
|
||||
return (currentOcLoadStatus == null || currentOcLoadStatus.getIncrementalState() == null) ? streamStateForIncrementalRunSupplier.apply(pair)
|
||||
: currentOcLoadStatus.getIncrementalState();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteMessage processRecordMessage(final ConfiguredAirbyteStream stream, final AirbyteMessage message) {
|
||||
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
|
||||
final String ocFieldName = getOrderedColumnInfo(pair).ocFieldName();
|
||||
final String lastOcVal = message.getRecord().getData().get(ocFieldName).asText();
|
||||
ocStatus = new OrderedColumnLoadStatus()
|
||||
.withVersion(MSSQL_STATE_VERSION)
|
||||
.withStateType(StateType.ORDERED_COLUMN)
|
||||
.withOrderedCol(ocFieldName)
|
||||
.withOrderedColVal(lastOcVal)
|
||||
.withIncrementalState(getIncrementalState(stream));
|
||||
updateOrderedColumnLoadState(pair, ocStatus);
|
||||
return message;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldEmitStateMessage(final ConfiguredAirbyteStream stream) {
|
||||
return Objects.nonNull(getOrderedColumnInfo(new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace())));
|
||||
}
|
||||
|
||||
private JsonNode getIncrementalState(final ConfiguredAirbyteStream stream) {
|
||||
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
|
||||
final OrderedColumnLoadStatus currentOcLoadStatus = getOrderedColumnLoadStatus(pair);
|
||||
|
||||
return (currentOcLoadStatus == null || currentOcLoadStatus.getIncrementalState() == null)
|
||||
? streamStateForIncrementalRunSupplier.apply(pair)
|
||||
: currentOcLoadStatus.getIncrementalState();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.initialsync;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.InitialLoadStreams;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil.OrderedColumnInfo;
|
||||
import io.airbyte.protocol.models.v0.*;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
|
||||
import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This state manager extends the StreamStateManager to enable writing the state_type and version
|
||||
* keys to the stream state when they're going through the iterator Once we have verified that
|
||||
* expanding StreamStateManager itself to include this functionality, this class will be removed
|
||||
*/
|
||||
public class MssqlInitialLoadStreamStateManager extends MssqlInitialLoadStateManager {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialLoadStateManager.class);
|
||||
|
||||
public MssqlInitialLoadStreamStateManager(final ConfiguredAirbyteCatalog catalog,
|
||||
final InitialLoadStreams initialLoadStreams,
|
||||
final Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOrderedColInfo) {
|
||||
this.pairToOrderedColInfo = pairToOrderedColInfo;
|
||||
this.pairToOrderedColLoadStatus = MssqlInitialLoadStateManager.initPairToOrderedColumnLoadStatusMap(initialLoadStreams.pairToInitialLoadStatus());
|
||||
this.streamStateForIncrementalRunSupplier = pair -> Jsons.emptyObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteStateMessage createFinalStateMessage(final ConfiguredAirbyteStream stream) {
|
||||
AirbyteStreamNameNamespacePair pair =
|
||||
new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
|
||||
final JsonNode incrementalState = getIncrementalState(pair);
|
||||
// If there is no incremental state, save the latest OC state
|
||||
// Such as in the case of full refresh
|
||||
final JsonNode finalState;
|
||||
if (incrementalState == null || incrementalState.isEmpty()) {
|
||||
finalState = Jsons.jsonNode(getOrderedColumnLoadStatus(pair));
|
||||
} else {
|
||||
finalState = incrementalState;
|
||||
}
|
||||
return new AirbyteStateMessage()
|
||||
.withType(AirbyteStateType.STREAM)
|
||||
.withStream(getAirbyteStreamState(pair, finalState));
|
||||
}
|
||||
|
||||
@Override
|
||||
public AirbyteStateMessage generateStateMessageAtCheckpoint(final ConfiguredAirbyteStream stream) {
|
||||
AirbyteStreamNameNamespacePair pair =
|
||||
new io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
|
||||
var ocStatus = getOrderedColumnLoadStatus(pair);
|
||||
return new AirbyteStateMessage()
|
||||
.withType(AirbyteStateType.STREAM)
|
||||
.withStream(getAirbyteStreamState(pair, Jsons.jsonNode(ocStatus)));
|
||||
}
|
||||
|
||||
protected AirbyteStreamState getAirbyteStreamState(final AirbyteStreamNameNamespacePair pair, final JsonNode stateData) {
|
||||
Preconditions.checkNotNull(pair);
|
||||
Preconditions.checkNotNull(pair.getName());
|
||||
Preconditions.checkNotNull(pair.getNamespace());
|
||||
LOGGER.debug("State data for {}: {}", pair.getNamespace().concat("_").concat(pair.getName()), stateData);
|
||||
|
||||
return new AirbyteStreamState()
|
||||
.withStreamDescriptor(
|
||||
new StreamDescriptor().withName(pair.getName()).withNamespace(pair.getNamespace()))
|
||||
.withStreamState(stateData);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,585 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql.initialsync;
|
||||
|
||||
import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcCursorInvalidMessage;
|
||||
import static io.airbyte.cdk.db.DbAnalyticsUtils.cdcResyncMessage;
|
||||
import static io.airbyte.cdk.db.DbAnalyticsUtils.wassOccurrenceMessage;
|
||||
import static io.airbyte.cdk.db.jdbc.JdbcUtils.getFullyQualifiedTableName;
|
||||
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.FAIL_SYNC_OPTION;
|
||||
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.INVALID_CDC_CURSOR_POSITION_PROPERTY;
|
||||
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.RESYNC_DATA_OPTION;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlCdcHelper.getDebeziumProperties;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlQueryUtils.getTableSizeInfoForStreams;
|
||||
import static io.airbyte.integrations.source.mssql.cdc.MssqlCdcStateConstants.MSSQL_CDC_OFFSET;
|
||||
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.ORDERED_COL_STATE_TYPE;
|
||||
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.Sets;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility;
|
||||
import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumEventConverter;
|
||||
import io.airbyte.cdk.integrations.debezium.internals.RelationalDbDebeziumPropertiesManager;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.CdcStateManager;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.InitialLoadTimeoutUtil;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.TableInfo;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CdcState;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.OrderedColumnLoadStatus;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.streamstatus.StreamStatusTraceEmitterIterator;
|
||||
import io.airbyte.commons.exceptions.ConfigErrorException;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.commons.stream.AirbyteStreamStatusHolder;
|
||||
import io.airbyte.commons.util.AutoCloseableIterator;
|
||||
import io.airbyte.commons.util.AutoCloseableIterators;
|
||||
import io.airbyte.integrations.source.mssql.*;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
|
||||
import io.airbyte.protocol.models.CommonField;
|
||||
import io.airbyte.protocol.models.v0.*;
|
||||
import io.debezium.connector.sqlserver.Lsn;
|
||||
import java.sql.JDBCType;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlInitialReadUtil {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlInitialReadUtil.class);
|
||||
private static final int MIN_QUEUE_SIZE = 1000;
|
||||
private static final int MAX_QUEUE_SIZE = 10000;
|
||||
|
||||
public record InitialLoadStreams(List<ConfiguredAirbyteStream> streamsForInitialLoad,
|
||||
Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToInitialLoadStatus) {
|
||||
|
||||
}
|
||||
|
||||
public record CursorBasedStreams(List<ConfiguredAirbyteStream> streamsForCursorBased,
|
||||
Map<AirbyteStreamNameNamespacePair, CursorBasedStatus> pairToCursorBasedStatus) {
|
||||
|
||||
}
|
||||
|
||||
public record OrderedColumnInfo(String ocFieldName, JDBCType fieldType, String ocMaxValue) {}
|
||||
|
||||
public static Optional<MssqlInitialLoadHandler> getMssqlFullRefreshInitialLoadHandler(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final MssqlInitialLoadStateManager initialLoadStateManager,
|
||||
final StateManager stateManager,
|
||||
final ConfiguredAirbyteStream fullRefreshStream,
|
||||
final Instant emittedAt,
|
||||
final String quoteString) {
|
||||
final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager);
|
||||
final InitialLoadStreams initialLoadStreams =
|
||||
cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer);
|
||||
|
||||
// State manager will need to know all streams in order to produce a state message
|
||||
// But for initial load handler we only want to produce iterator on the single full refresh stream.
|
||||
if (!initialLoadStreams.streamsForInitialLoad().isEmpty()) {
|
||||
// Filter on initialLoadStream
|
||||
var pair = new AirbyteStreamNameNamespacePair(fullRefreshStream.getStream().getName(), fullRefreshStream.getStream().getNamespace());
|
||||
var ocStatus = initialLoadStreams.pairToInitialLoadStatus.get(pair);
|
||||
Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> fullRefreshOcStatus;
|
||||
if (ocStatus == null) {
|
||||
fullRefreshOcStatus = Map.of();
|
||||
} else {
|
||||
fullRefreshOcStatus = Map.of(pair, ocStatus);
|
||||
}
|
||||
|
||||
var fullRefreshStreamInitialLoad = new InitialLoadStreams(List.of(fullRefreshStream), fullRefreshOcStatus);
|
||||
return Optional
|
||||
.of(getMssqlInitialLoadHandler(database, emittedAt, quoteString, fullRefreshStreamInitialLoad, initialLoadStateManager, Optional.empty()));
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
private static MssqlInitialLoadHandler getMssqlInitialLoadHandler(final JdbcDatabase database,
|
||||
final Instant emittedAt,
|
||||
final String quoteString,
|
||||
final InitialLoadStreams initialLoadStreams,
|
||||
final MssqlInitialLoadStateManager initialLoadStateManager,
|
||||
final Optional<CdcMetadataInjector> metadataInjector) {
|
||||
final JsonNode sourceConfig = database.getSourceConfig();
|
||||
|
||||
final MssqlSourceOperations sourceOperations = new MssqlSourceOperations(metadataInjector);
|
||||
|
||||
return new MssqlInitialLoadHandler(sourceConfig, database,
|
||||
sourceOperations, quoteString, initialLoadStateManager,
|
||||
Optional.empty(),
|
||||
getTableSizeInfoForStreams(database, initialLoadStreams.streamsForInitialLoad(), quoteString));
|
||||
}
|
||||
|
||||
private static CdcState getCdcState(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final StateManager stateManager,
|
||||
final boolean savedOffsetStillPresentOnServer) {
|
||||
if (!savedOffsetStillPresentOnServer || (stateManager.getCdcStateManager().getCdcState() == null
|
||||
|| stateManager.getCdcStateManager().getCdcState().getState() == null)) {
|
||||
// Construct the initial state for Mssql. If there is already existing state, we use that instead
|
||||
// since that is associated with the debezium state associated with the initial sync.
|
||||
final JsonNode initialDebeziumState = MssqlDebeziumStateUtil.constructInitialDebeziumState(
|
||||
getDebeziumProperties(database, catalog, false), catalog, database);
|
||||
return new CdcState().withState(initialDebeziumState);
|
||||
} else {
|
||||
return stateManager.getCdcStateManager().getCdcState();
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isSavedOffsetStillPresentOnServer(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final StateManager stateManager) {
|
||||
final MssqlDebeziumStateUtil mssqlDebeziumStateUtil = new MssqlDebeziumStateUtil();
|
||||
final JsonNode sourceConfig = database.getSourceConfig();
|
||||
|
||||
final JsonNode state =
|
||||
(stateManager.getCdcStateManager().getCdcState() == null || stateManager.getCdcStateManager().getCdcState().getState() == null)
|
||||
? MssqlDebeziumStateUtil.constructInitialDebeziumState(getDebeziumProperties(database, catalog, false), catalog, database)
|
||||
: Jsons.clone(stateManager.getCdcStateManager().getCdcState().getState());
|
||||
|
||||
final Optional<MssqlDebeziumStateAttributes> savedOffset = mssqlDebeziumStateUtil.savedOffset(
|
||||
getDebeziumProperties(database, catalog, true), catalog, state.get(MSSQL_CDC_OFFSET), sourceConfig);
|
||||
|
||||
final boolean savedOffsetStillPresentOnServer =
|
||||
savedOffset.isPresent() && mssqlDebeziumStateUtil.savedOffsetStillPresentOnServer(database, savedOffset.get());
|
||||
|
||||
if (!savedOffsetStillPresentOnServer) {
|
||||
AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcCursorInvalidMessage());
|
||||
if (!sourceConfig.get("replication_method").has(INVALID_CDC_CURSOR_POSITION_PROPERTY) || sourceConfig.get("replication_method").get(
|
||||
INVALID_CDC_CURSOR_POSITION_PROPERTY).asText().equals(FAIL_SYNC_OPTION)) {
|
||||
throw new ConfigErrorException(
|
||||
"Saved offset no longer present on the server. Please reset the connection, and then increase binlog retention and/or increase sync frequency.");
|
||||
} else if (sourceConfig.get("replication_method").get(INVALID_CDC_CURSOR_POSITION_PROPERTY).asText().equals(RESYNC_DATA_OPTION)) {
|
||||
AirbyteTraceMessageUtility.emitAnalyticsTrace(cdcResyncMessage());
|
||||
LOGGER.warn("Saved offset no longer present on the server, Airbyte is going to trigger a sync from scratch");
|
||||
}
|
||||
}
|
||||
return savedOffsetStillPresentOnServer;
|
||||
}
|
||||
|
||||
public static MssqlInitialLoadGlobalStateManager getMssqlInitialLoadGlobalStateManager(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final StateManager stateManager,
|
||||
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
final String quoteString) {
|
||||
final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager);
|
||||
final InitialLoadStreams initialLoadStreams =
|
||||
cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer);
|
||||
final CdcState initialStateToBeUsed = getCdcState(database, catalog, stateManager, savedOffsetStillPresentOnServer);
|
||||
|
||||
return new MssqlInitialLoadGlobalStateManager(initialLoadStreams,
|
||||
initPairToOrderedColumnInfoMap(database, catalog, tableNameToTable, quoteString),
|
||||
stateManager, catalog, initialStateToBeUsed);
|
||||
}
|
||||
|
||||
public static List<AutoCloseableIterator<AirbyteMessage>> getCdcReadIterators(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
final StateManager stateManager,
|
||||
final MssqlInitialLoadStateManager initialLoadStateManager,
|
||||
final Instant emittedAt,
|
||||
final String quoteString) {
|
||||
final JsonNode sourceConfig = database.getSourceConfig();
|
||||
final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig);
|
||||
LOGGER.info("First record waiting time: {} seconds", firstRecordWaitTime.getSeconds());
|
||||
final int queueSize = getQueueSize(sourceConfig);
|
||||
LOGGER.info("Queue size: {}", queueSize);
|
||||
final Duration initialLoadTimeout = InitialLoadTimeoutUtil.getInitialLoadTimeout(sourceConfig);
|
||||
// Determine the streams that need to be loaded via primary key sync.
|
||||
final List<AutoCloseableIterator<AirbyteMessage>> initialLoadIterator = new ArrayList<>();
|
||||
final boolean savedOffsetStillPresentOnServer = isSavedOffsetStillPresentOnServer(database, catalog, stateManager);
|
||||
final InitialLoadStreams initialLoadStreams =
|
||||
cdcStreamsForInitialOrderedColumnLoad(stateManager.getCdcStateManager(), catalog, savedOffsetStillPresentOnServer);
|
||||
final MssqlCdcConnectorMetadataInjector metadataInjector = MssqlCdcConnectorMetadataInjector.getInstance(emittedAt);
|
||||
final CdcState stateToBeUsed = getCdcState(database, catalog, stateManager, savedOffsetStillPresentOnServer);
|
||||
|
||||
// Debezium is started for streams that have been started - that is they have been partially or
|
||||
// fully completed.
|
||||
final var startedCdcStreamList = catalog.getStreams().stream()
|
||||
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.filter(stream -> isStreamPartiallyOrFullyCompleted(stream, initialLoadStreams))
|
||||
.map(stream -> stream.getStream().getNamespace() + "." + stream.getStream().getName()).toList();
|
||||
|
||||
final var allCdcStreamList = catalog.getStreams().stream()
|
||||
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.map(stream -> stream.getStream().getNamespace() + "." + stream.getStream().getName()).toList();
|
||||
|
||||
// If there are streams to sync via ordered column load, build the relevant iterators.
|
||||
if (!initialLoadStreams.streamsForInitialLoad().isEmpty()) {
|
||||
final MssqlDebeziumStateAttributes stateAttributes = MssqlDebeziumStateUtil.getStateAttributesFromDB(database);
|
||||
final MssqlInitialLoadHandler initialLoadHandler =
|
||||
getMssqlInitialLoadHandler(database, emittedAt, quoteString, initialLoadStreams, initialLoadStateManager,
|
||||
Optional.of(new CdcMetadataInjector(emittedAt.toString(), stateAttributes, metadataInjector)));
|
||||
// Because initial load streams will be followed by cdc read of those stream, we only decorate with
|
||||
// complete status trace after CDC read is done.
|
||||
initialLoadIterator.addAll(initialLoadHandler.getIncrementalIterators(
|
||||
new ConfiguredAirbyteCatalog().withStreams(initialLoadStreams.streamsForInitialLoad()),
|
||||
tableNameToTable,
|
||||
emittedAt, false, false, Optional.empty()));
|
||||
}
|
||||
|
||||
final List<AutoCloseableIterator<AirbyteMessage>> cdcStreamsStartStatusEmitters = catalog.getStreams().stream()
|
||||
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.map(stream -> (AutoCloseableIterator<AirbyteMessage>) new StreamStatusTraceEmitterIterator(
|
||||
new AirbyteStreamStatusHolder(
|
||||
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()),
|
||||
AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.STARTED)))
|
||||
.toList();
|
||||
|
||||
final List<AutoCloseableIterator<AirbyteMessage>> cdcStreamsEndStatusEmitters = catalog.getStreams().stream()
|
||||
.filter(stream -> stream.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.map(stream -> (AutoCloseableIterator<AirbyteMessage>) new StreamStatusTraceEmitterIterator(
|
||||
new AirbyteStreamStatusHolder(
|
||||
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()),
|
||||
AirbyteStreamStatusTraceMessage.AirbyteStreamStatus.COMPLETE)))
|
||||
.toList();
|
||||
|
||||
// Build the incremental CDC iterators.
|
||||
final var targetPosition = MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get(JdbcUtils.DATABASE_KEY).asText());
|
||||
final AirbyteDebeziumHandler<Lsn> handler = new AirbyteDebeziumHandler<>(
|
||||
sourceConfig,
|
||||
targetPosition,
|
||||
true,
|
||||
firstRecordWaitTime,
|
||||
queueSize,
|
||||
false);
|
||||
|
||||
final var eventConverter = new RelationalDbDebeziumEventConverter(metadataInjector, emittedAt);
|
||||
|
||||
if (startedCdcStreamList.isEmpty()) {
|
||||
LOGGER.info("First sync - no cdc streams have been completed or started");
|
||||
/*
|
||||
* This is the first run case - no initial loads have been started. In this case, we want to run the
|
||||
* iterators in the following order: 1. Run the initial load iterators. This step will timeout and
|
||||
* throw a transient error if run for too long (> 8hrs by default). 2. Run the debezium iterators
|
||||
* with ALL of the incremental streams configured. This is because if step 1 completes, the initial
|
||||
* load can be considered finished.
|
||||
*/
|
||||
final var propertiesManager =
|
||||
new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, allCdcStreamList);
|
||||
final Supplier<AutoCloseableIterator<AirbyteMessage>> incrementalIteratorsSupplier = getCdcIncrementalIteratorsSupplier(handler,
|
||||
propertiesManager, eventConverter, stateToBeUsed, stateManager);
|
||||
return Collections.singletonList(
|
||||
AutoCloseableIterators.concatWithEagerClose(
|
||||
Stream
|
||||
.of(
|
||||
cdcStreamsStartStatusEmitters,
|
||||
initialLoadIterator,
|
||||
Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorsSupplier, null)),
|
||||
cdcStreamsEndStatusEmitters)
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList()),
|
||||
AirbyteTraceMessageUtility::emitStreamStatusTrace));
|
||||
} else if (initialLoadIterator.isEmpty()) {
|
||||
LOGGER.info("Initial load has finished completely - only reading the binlog");
|
||||
/*
|
||||
* In this case, the initial load has completed and only debezium should be run. The iterators
|
||||
* should be run in the following order: 1. Run the debezium iterators with ALL of the incremental
|
||||
* streams configured.
|
||||
*/
|
||||
final var propertiesManager =
|
||||
new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, allCdcStreamList);
|
||||
final Supplier<AutoCloseableIterator<AirbyteMessage>> incrementalIteratorSupplier = getCdcIncrementalIteratorsSupplier(handler,
|
||||
propertiesManager, eventConverter, stateToBeUsed, stateManager);
|
||||
return Collections.singletonList(
|
||||
AutoCloseableIterators.concatWithEagerClose(
|
||||
Stream
|
||||
.of(
|
||||
cdcStreamsStartStatusEmitters,
|
||||
Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorSupplier, null)),
|
||||
cdcStreamsEndStatusEmitters)
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList()),
|
||||
AirbyteTraceMessageUtility::emitStreamStatusTrace));
|
||||
} else {
|
||||
LOGGER.info("Initial load is in progress - reading binlog first and then resuming with initial load.");
|
||||
/*
|
||||
* In this case, the initial load has partially completed (WASS case). The iterators should be run
|
||||
* in the following order: 1. Run the debezium iterators with only the incremental streams which
|
||||
* have been fully or partially completed configured. 2. Resume initial load for partially completed
|
||||
* and not started streams. This step will timeout and throw a transient error if run for too long
|
||||
* (> 8hrs by default).
|
||||
*/
|
||||
AirbyteTraceMessageUtility.emitAnalyticsTrace(wassOccurrenceMessage());
|
||||
final var propertiesManager =
|
||||
new RelationalDbDebeziumPropertiesManager(getDebeziumProperties(database, catalog, false), sourceConfig, catalog, startedCdcStreamList);
|
||||
final Supplier<AutoCloseableIterator<AirbyteMessage>> incrementalIteratorSupplier = getCdcIncrementalIteratorsSupplier(handler,
|
||||
propertiesManager, eventConverter, stateToBeUsed, stateManager);
|
||||
return Collections.singletonList(
|
||||
AutoCloseableIterators.concatWithEagerClose(
|
||||
Stream
|
||||
.of(
|
||||
cdcStreamsStartStatusEmitters,
|
||||
Collections.singletonList(AutoCloseableIterators.lazyIterator(incrementalIteratorSupplier, null)),
|
||||
initialLoadIterator,
|
||||
cdcStreamsEndStatusEmitters)
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList()),
|
||||
AirbyteTraceMessageUtility::emitStreamStatusTrace));
|
||||
}
|
||||
}
|
||||
|
||||
public static InitialLoadStreams cdcStreamsForInitialOrderedColumnLoad(final CdcStateManager stateManager,
|
||||
final ConfiguredAirbyteCatalog fullCatalog,
|
||||
final boolean savedOffsetStillPresentOnServer) {
|
||||
if (!savedOffsetStillPresentOnServer) {
|
||||
// Add a filter here to identify resumable full refresh streams.
|
||||
return new InitialLoadStreams(
|
||||
fullCatalog.getStreams()
|
||||
.stream()
|
||||
.collect(Collectors.toList()),
|
||||
new HashMap<>());
|
||||
}
|
||||
final AirbyteStateMessage airbyteStateMessage = stateManager.getRawStateMessage();
|
||||
final Set<AirbyteStreamNameNamespacePair> streamsStillInOcSync = new HashSet<>();
|
||||
|
||||
// Build a map of stream <-> initial load status for streams that currently have an initial primary
|
||||
// key load in progress.
|
||||
final Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToInitialLoadStatus = new HashMap<>();
|
||||
if (airbyteStateMessage != null && airbyteStateMessage.getGlobal() != null && airbyteStateMessage.getGlobal().getStreamStates() != null) {
|
||||
LOGGER.info("Trying to extract streams need initial oc sync. State message: {}", airbyteStateMessage);
|
||||
airbyteStateMessage.getGlobal().getStreamStates().forEach(stateMessage -> {
|
||||
LOGGER.info("State message in this stream: {}", stateMessage);
|
||||
final JsonNode streamState = stateMessage.getStreamState();
|
||||
final StreamDescriptor streamDescriptor = stateMessage.getStreamDescriptor();
|
||||
if (streamState == null || streamDescriptor == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (streamState.has(STATE_TYPE_KEY)) {
|
||||
if (streamState.get(STATE_TYPE_KEY).asText().equalsIgnoreCase(ORDERED_COL_STATE_TYPE)) {
|
||||
final OrderedColumnLoadStatus orderedColumnLoadStatus = Jsons.object(streamState, OrderedColumnLoadStatus.class);
|
||||
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamDescriptor.getName(),
|
||||
streamDescriptor.getNamespace());
|
||||
pairToInitialLoadStatus.put(pair, orderedColumnLoadStatus);
|
||||
streamsStillInOcSync.add(pair);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
final List<ConfiguredAirbyteStream> streamForOcSync = new ArrayList<>();
|
||||
fullCatalog.getStreams().stream()
|
||||
.filter(stream -> streamsStillInOcSync.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())))
|
||||
.map(Jsons::clone)
|
||||
.forEach(streamForOcSync::add);
|
||||
final List<ConfiguredAirbyteStream> newlyAddedStreams = identifyStreamsToSnapshot(fullCatalog, stateManager.getInitialStreamsSynced());
|
||||
streamForOcSync.addAll(newlyAddedStreams);
|
||||
|
||||
return new InitialLoadStreams(streamForOcSync, pairToInitialLoadStatus);
|
||||
}
|
||||
|
||||
public static Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> initPairToOrderedColumnInfoMap(
|
||||
final JdbcDatabase database,
|
||||
final ConfiguredAirbyteCatalog catalog,
|
||||
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
final String quoteString) {
|
||||
final Map<AirbyteStreamNameNamespacePair, OrderedColumnInfo> pairToOcInfoMap = new HashMap<>();
|
||||
// For every stream that is in initial ordered column sync, we want to maintain information about
|
||||
// the current ordered column info associated with the stream
|
||||
catalog.getStreams().forEach(stream -> {
|
||||
final AirbyteStreamNameNamespacePair pair =
|
||||
new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace());
|
||||
final Optional<OrderedColumnInfo> ocInfo = getOrderedColumnInfo(database, stream, tableNameToTable, quoteString);
|
||||
if (ocInfo.isPresent()) {
|
||||
pairToOcInfoMap.put(pair, ocInfo.get());
|
||||
}
|
||||
});
|
||||
return pairToOcInfoMap;
|
||||
}
|
||||
|
||||
static Optional<OrderedColumnInfo> getOrderedColumnInfo(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteStream stream,
|
||||
final Map<String, TableInfo<CommonField<JDBCType>>> tableNameToTable,
|
||||
final String quoteString) {
|
||||
final String fullyQualifiedTableName =
|
||||
DbSourceDiscoverUtil.getFullyQualifiedTableName(stream.getStream().getNamespace(), stream.getStream().getName());
|
||||
final TableInfo<CommonField<JDBCType>> table = tableNameToTable
|
||||
.get(fullyQualifiedTableName);
|
||||
return getOrderedColumnInfo(database, stream, table, quoteString);
|
||||
}
|
||||
|
||||
static Optional<OrderedColumnInfo> getOrderedColumnInfo(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteStream stream,
|
||||
final TableInfo<CommonField<JDBCType>> table,
|
||||
final String quoteString) {
|
||||
// For cursor-based syncs, we cannot always assume a ordered column field exists. We need to handle
|
||||
// the case where it does not exist when we support cursor-based syncs.
|
||||
// if (stream.getStream().getSourceDefinedPrimaryKey().size() > 1) {
|
||||
// LOGGER.info("Composite primary key detected for {namespace, stream} : {}, {}",
|
||||
// stream.getStream().getNamespace(), stream.getStream().getName());
|
||||
// }
|
||||
Optional<String> ocFieldNameOpt = selectOcFieldName(database, stream);
|
||||
if (ocFieldNameOpt.isEmpty()) {
|
||||
LOGGER.info("No primary key or clustered index found for stream: " + stream.getStream().getName());
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
String ocFieldName = ocFieldNameOpt.get();
|
||||
LOGGER.info("selected ordered column field name: " + ocFieldName);
|
||||
final JDBCType ocFieldType = table.getFields().stream()
|
||||
.filter(field -> field.getName().equals(ocFieldName))
|
||||
.findFirst().get().getType();
|
||||
final String ocMaxValue = MssqlQueryUtils.getMaxOcValueForStream(database, stream, ocFieldName, quoteString);
|
||||
return Optional.of(new OrderedColumnInfo(ocFieldName, ocFieldType, ocMaxValue));
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public static Optional<String> selectOcFieldName(final JdbcDatabase database,
|
||||
final ConfiguredAirbyteStream stream) {
|
||||
|
||||
final Map<String, List<String>> clusteredIndexField = MssqlInitialLoadHandler.discoverClusteredIndexForStream(database, stream.getStream());
|
||||
final String streamName = getFullyQualifiedTableName(stream.getStream().getNamespace(), stream.getStream().getName());
|
||||
List<List<String>> primaryKey = stream.getStream().getSourceDefinedPrimaryKey();
|
||||
if (primaryKey.isEmpty()) {
|
||||
LOGGER.info("Stream does not have source defined primary key: " + stream.getStream().getName());
|
||||
LOGGER.info("Trying to use logical primary key.");
|
||||
primaryKey = stream.getPrimaryKey();
|
||||
}
|
||||
final String ocFieldName;
|
||||
|
||||
final List<String> clusterColumns = Optional.ofNullable(clusteredIndexField)
|
||||
.map(map -> map.get(streamName))
|
||||
.orElse(new ArrayList<>());
|
||||
|
||||
// Use the clustered index unless it is composite. Otherwise, default to the primary key.
|
||||
if (clusterColumns.size() == 1) {
|
||||
ocFieldName = clusterColumns.getFirst();
|
||||
} else if (!primaryKey.isEmpty()) {
|
||||
LOGGER.info("Clustered index is empty or composite. Defaulting to primary key.");
|
||||
ocFieldName = primaryKey.getFirst().getFirst();
|
||||
} else {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(ocFieldName);
|
||||
}
|
||||
|
||||
public static List<ConfiguredAirbyteStream> identifyStreamsToSnapshot(final ConfiguredAirbyteCatalog catalog,
|
||||
final Set<AirbyteStreamNameNamespacePair> alreadySyncedStreams) {
|
||||
final Set<AirbyteStreamNameNamespacePair> allStreams = AirbyteStreamNameNamespacePair.fromConfiguredCatalog(catalog);
|
||||
final Set<AirbyteStreamNameNamespacePair> newlyAddedStreams = new HashSet<>(Sets.difference(allStreams, alreadySyncedStreams));
|
||||
// Add a filter here to identify resumable full refresh streams.
|
||||
return catalog.getStreams().stream()
|
||||
.filter(stream -> newlyAddedStreams.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())))
|
||||
.map(Jsons::clone)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static InitialLoadStreams streamsForInitialOrderedColumnLoad(final StateManager stateManager,
|
||||
final ConfiguredAirbyteCatalog fullCatalog) {
|
||||
|
||||
final List<AirbyteStateMessage> rawStateMessages = stateManager.getRawStateMessages();
|
||||
final Set<AirbyteStreamNameNamespacePair> streamsStillInOrderedColumnSync = new HashSet<>();
|
||||
final Set<AirbyteStreamNameNamespacePair> alreadySeenStreamPairs = new HashSet<>();
|
||||
|
||||
// Build a map of stream <-> initial load status for streams that currently have an initial primary
|
||||
// key load in progress.
|
||||
final Map<AirbyteStreamNameNamespacePair, OrderedColumnLoadStatus> pairToInitialLoadStatus = new HashMap<>();
|
||||
LOGGER.info("raw state message: " + rawStateMessages);
|
||||
if (rawStateMessages != null) {
|
||||
rawStateMessages.forEach(stateMessage -> {
|
||||
final AirbyteStreamState stream = stateMessage.getStream();
|
||||
final JsonNode streamState = stream.getStreamState();
|
||||
final StreamDescriptor streamDescriptor = stateMessage.getStream().getStreamDescriptor();
|
||||
if (streamState == null || streamDescriptor == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair(streamDescriptor.getName(),
|
||||
streamDescriptor.getNamespace());
|
||||
|
||||
// Build a map of stream <-> initial load status for streams that currently have an initial primary
|
||||
// key load in progress.
|
||||
|
||||
if (streamState.has(STATE_TYPE_KEY)) {
|
||||
if (streamState.get(STATE_TYPE_KEY).asText().equalsIgnoreCase(ORDERED_COL_STATE_TYPE)) {
|
||||
final OrderedColumnLoadStatus orderedColumnLoadStatus = Jsons.object(streamState, OrderedColumnLoadStatus.class);
|
||||
pairToInitialLoadStatus.put(pair, orderedColumnLoadStatus);
|
||||
streamsStillInOrderedColumnSync.add(pair);
|
||||
}
|
||||
}
|
||||
alreadySeenStreamPairs.add(new AirbyteStreamNameNamespacePair(streamDescriptor.getName(), streamDescriptor.getNamespace()));
|
||||
});
|
||||
}
|
||||
final List<ConfiguredAirbyteStream> streamsForOcSync = new ArrayList<>();
|
||||
LOGGER.info("alreadySeenStreamPairs: {}", alreadySeenStreamPairs);
|
||||
fullCatalog.getStreams().stream()
|
||||
.filter(stream -> streamsStillInOrderedColumnSync.contains(AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())))
|
||||
.map(Jsons::clone)
|
||||
.forEach(streamsForOcSync::add);
|
||||
|
||||
final List<ConfiguredAirbyteStream> newlyAddedStreams = identifyStreamsToSnapshot(fullCatalog,
|
||||
Collections.unmodifiableSet(alreadySeenStreamPairs));
|
||||
streamsForOcSync.addAll(newlyAddedStreams);
|
||||
LOGGER.info("streamsForOcSync: {}", streamsForOcSync);
|
||||
return new InitialLoadStreams(streamsForOcSync.stream().filter((stream) -> !stream.getStream().getSourceDefinedPrimaryKey()
|
||||
.isEmpty()).collect(Collectors.toList()),
|
||||
pairToInitialLoadStatus);
|
||||
}
|
||||
|
||||
private static OptionalInt extractQueueSizeFromConfig(final JsonNode config) {
|
||||
final JsonNode replicationMethod = config.get("replication_method");
|
||||
if (replicationMethod != null && replicationMethod.has("queue_size")) {
|
||||
final int queueSize = config.get("replication_method").get("queue_size").asInt();
|
||||
return OptionalInt.of(queueSize);
|
||||
}
|
||||
return OptionalInt.empty();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static Supplier<AutoCloseableIterator<AirbyteMessage>> getCdcIncrementalIteratorsSupplier(AirbyteDebeziumHandler handler,
|
||||
RelationalDbDebeziumPropertiesManager propertiesManager,
|
||||
DebeziumEventConverter eventConverter,
|
||||
CdcState stateToBeUsed,
|
||||
StateManager stateManager) {
|
||||
return () -> handler.getIncrementalIterators(
|
||||
propertiesManager, eventConverter, new MssqlCdcSavedInfoFetcher(stateToBeUsed), new MssqlCdcStateHandler(stateManager));
|
||||
}
|
||||
|
||||
private static boolean isStreamPartiallyOrFullyCompleted(ConfiguredAirbyteStream stream, InitialLoadStreams initialLoadStreams) {
|
||||
boolean isStreamCompleted = !initialLoadStreams.streamsForInitialLoad.contains(stream);
|
||||
// A stream has been partially completed if an initial load status exists.
|
||||
boolean isStreamPartiallyCompleted = (initialLoadStreams.pairToInitialLoadStatus
|
||||
.get(new AirbyteStreamNameNamespacePair(stream.getStream().getName(), stream.getStream().getNamespace()))) != null;
|
||||
return isStreamCompleted || isStreamPartiallyCompleted;
|
||||
}
|
||||
|
||||
public static int getQueueSize(final JsonNode config) {
|
||||
final OptionalInt sizeFromConfig = extractQueueSizeFromConfig(config);
|
||||
if (sizeFromConfig.isPresent()) {
|
||||
final int size = sizeFromConfig.getAsInt();
|
||||
if (size < MIN_QUEUE_SIZE) {
|
||||
LOGGER.warn("Queue size is overridden to {} , which is the min allowed for safety.",
|
||||
MIN_QUEUE_SIZE);
|
||||
return MIN_QUEUE_SIZE;
|
||||
} else if (size > MAX_QUEUE_SIZE) {
|
||||
LOGGER.warn("Queue size is overridden to {} , which is the max allowed for safety.",
|
||||
MAX_QUEUE_SIZE);
|
||||
return MAX_QUEUE_SIZE;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
return MAX_QUEUE_SIZE;
|
||||
}
|
||||
|
||||
public static InitialLoadStreams filterStreamInIncrementalMode(final InitialLoadStreams stream) {
|
||||
return new InitialLoadStreams(
|
||||
stream.streamsForInitialLoad.stream().filter(airbyteStream -> airbyteStream.getSyncMode() == SyncMode.INCREMENTAL)
|
||||
.collect(Collectors.toList()),
|
||||
stream.pairToInitialLoadStatus);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.integrations.util.ConnectorErrorProfile
|
||||
import io.airbyte.cdk.integrations.util.ConnectorExceptionHandler
|
||||
import io.airbyte.cdk.integrations.util.FailureType
|
||||
|
||||
class MSSqlSourceExceptionHandler : ConnectorExceptionHandler() {
|
||||
override fun initializeErrorDictionary() {
|
||||
|
||||
// include common error profiles
|
||||
super.initializeErrorDictionary()
|
||||
|
||||
// adding connector specific error profiles
|
||||
add(
|
||||
ConnectorErrorProfile(
|
||||
errorClass = "MSSQL Exception",
|
||||
regexMatchingPattern =
|
||||
"(?i).*returned an incomplete response. The connection has been closed.*",
|
||||
failureType = FailureType.TRANSIENT,
|
||||
externalMessage =
|
||||
"(?i).*returned an incomplete response. The connection has been closed.*",
|
||||
sampleInternalMessage =
|
||||
"com.microsoft.sqlserver.jdbc.SQLServerException: SQL Server returned an incomplete response. The connection has been closed.",
|
||||
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/6623")
|
||||
),
|
||||
)
|
||||
add(
|
||||
ConnectorErrorProfile(
|
||||
errorClass = "MSSQL Exception",
|
||||
regexMatchingPattern =
|
||||
"(?i).*SQL Server did not return a response. The connection has been closed.*",
|
||||
failureType = FailureType.TRANSIENT,
|
||||
externalMessage =
|
||||
"Encountered an error while reading from the database, will retry",
|
||||
sampleInternalMessage =
|
||||
"com.microsoft.sqlserver.jdbc.SQLServerException: SQL Server did not return a response. The connection has been closed.",
|
||||
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/7757")
|
||||
),
|
||||
)
|
||||
add(
|
||||
ConnectorErrorProfile(
|
||||
errorClass = "MSSQL Exception",
|
||||
regexMatchingPattern = "(?i).*The connection is closed.*",
|
||||
failureType = FailureType.TRANSIENT,
|
||||
externalMessage = "The SQL Server connection was unexpectedly closed, will retry.",
|
||||
sampleInternalMessage =
|
||||
"com.microsoft.sqlserver.jdbc.SQLServerException: The connection is closed.",
|
||||
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/6438")
|
||||
),
|
||||
)
|
||||
add(
|
||||
// Error 1205
|
||||
// https://learn.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-1205-database-engine-error
|
||||
ConnectorErrorProfile(
|
||||
errorClass = "MSSQL Exception",
|
||||
regexMatchingPattern =
|
||||
"(?i).*was deadlocked on lock resources with another process and has been chosen as the deadlock victim. Rerun the transaction.*",
|
||||
failureType = FailureType.TRANSIENT,
|
||||
externalMessage =
|
||||
"Transaction conflicted with another process and was terminated, will retry.",
|
||||
sampleInternalMessage =
|
||||
"com.microsoft.sqlserver.jdbc.SQLServerException: " +
|
||||
"Transaction (Process ID 63) was deadlocked on lock resources with another process and has been chosen as the deadlock victim. Rerun the transaction.",
|
||||
referenceLinks = listOf("https://github.com/airbytehq/oncall/issues/6287")
|
||||
),
|
||||
)
|
||||
// This error occurs when Debezium encounters an exception.
|
||||
// We classify it as TRANSIENT since it may be resolved through automatic retries but can
|
||||
// also require investigation and manual intervention.
|
||||
add(
|
||||
ConnectorErrorProfile(
|
||||
errorClass = "Connect Exception",
|
||||
regexMatchingPattern = "(?i).*exception occurred in the change event producer.*",
|
||||
failureType = FailureType.TRANSIENT,
|
||||
externalMessage =
|
||||
"The sync encountered an unexpected error in the change event producer and has stopped. Please check the logs for details and troubleshoot accordingly.",
|
||||
sampleInternalMessage =
|
||||
"java.lang.RuntimeException: org.apache.kafka.connect.errors.ConnectException: " +
|
||||
"An exception occurred in the change event producer. This connector will be stopped.",
|
||||
referenceLinks =
|
||||
listOf(
|
||||
"https://docs.oracle.com/javase/9/docs/api/java/lang/RuntimeException.html"
|
||||
)
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.read.Stream
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
|
||||
data class MsSqlServerCdcInitialSnapshotStateValue(
|
||||
@JsonProperty("pk_val") val pkVal: String? = null,
|
||||
@JsonProperty("pk_name") val pkName: String? = null,
|
||||
@JsonProperty("version") val version: Int? = null,
|
||||
@JsonProperty("state_type") val stateType: String? = null,
|
||||
@JsonProperty("incremental_state") val incrementalState: JsonNode? = null,
|
||||
@JsonProperty("stream_name") val streamName: String? = null,
|
||||
@JsonProperty("cursor_field") val cursorField: List<String>? = null,
|
||||
@JsonProperty("stream_namespace") val streamNamespace: String? = null,
|
||||
) {
|
||||
companion object {
|
||||
/** Value representing the completion of a FULL_REFRESH snapshot. */
|
||||
fun getSnapshotCompletedState(stream: Stream): OpaqueStateValue =
|
||||
Jsons.valueToTree(
|
||||
MsSqlServerCdcInitialSnapshotStateValue(
|
||||
streamName = stream.name,
|
||||
cursorField = listOf(),
|
||||
streamNamespace = stream.namespace
|
||||
)
|
||||
)
|
||||
|
||||
/** Value representing the progress of an ongoing snapshot. */
|
||||
fun snapshotCheckpoint(
|
||||
primaryKey: List<Field>,
|
||||
primaryKeyCheckpoint: List<JsonNode>,
|
||||
): OpaqueStateValue {
|
||||
val primaryKeyField = primaryKey.first()
|
||||
return when (primaryKeyCheckpoint.first().isNull) {
|
||||
true -> Jsons.nullNode()
|
||||
false ->
|
||||
Jsons.valueToTree(
|
||||
MsSqlServerCdcInitialSnapshotStateValue(
|
||||
pkName = primaryKeyField.id,
|
||||
pkVal = primaryKeyCheckpoint.first().asText(),
|
||||
stateType = "primary_key",
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import io.airbyte.cdk.data.LeafAirbyteSchemaType
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.time.Instant
|
||||
import java.time.ZoneOffset
|
||||
import java.time.format.DateTimeFormatter
|
||||
import java.time.temporal.ChronoUnit
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
/**
|
||||
* Utility class to calculate cutoff time for "Exclude Today's Data" feature. This ensures that
|
||||
* incremental syncs using temporal cursor fields only include data up until midnight of the current
|
||||
* day.
|
||||
*/
|
||||
object MsSqlServerCursorCutoffTimeProvider {
|
||||
|
||||
private val ISO_LOCAL_DATE: DateTimeFormatter = DateTimeFormatter.ISO_LOCAL_DATE
|
||||
private val ISO_OFFSET_DATE_TIME: DateTimeFormatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME
|
||||
private val TIMESTAMPTZ_FORMATTER: DateTimeFormatter =
|
||||
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS xxx")
|
||||
private val SQL_SERVER_DATETIME_FORMATTER: DateTimeFormatter =
|
||||
MsSqlServerJdbcPartitionFactory.outputDateFormatter
|
||||
|
||||
/**
|
||||
* Calculates the cutoff time for a cursor field based on its type.
|
||||
*
|
||||
* @param cursorField The cursor field
|
||||
* @param nowInstant The current instant (for testing)
|
||||
* @return The cutoff time as JsonNode, or null if not applicable
|
||||
*/
|
||||
fun getCutoffTime(cursorField: Field, nowInstant: Instant = Instant.now()): JsonNode? {
|
||||
|
||||
return when (cursorField.type.airbyteSchemaType) {
|
||||
is LeafAirbyteSchemaType -> {
|
||||
when (cursorField.type.airbyteSchemaType as LeafAirbyteSchemaType) {
|
||||
LeafAirbyteSchemaType.DATE -> {
|
||||
// For DATE fields, exclude today by setting cutoff to today's date
|
||||
// This means we include records < today's date (i.e., yesterday and before)
|
||||
val today = nowInstant.atOffset(ZoneOffset.UTC).toLocalDate()
|
||||
val cutoffValue = Jsons.valueToTree<JsonNode>(ISO_LOCAL_DATE.format(today))
|
||||
log.info {
|
||||
"DATE cutoff for field '${cursorField.id}': ${cutoffValue.asText()}"
|
||||
}
|
||||
cutoffValue
|
||||
}
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE -> {
|
||||
// For TIMESTAMP fields, set cutoff to start of today (00:00:00)
|
||||
// Use local datetime without timezone for SQL Server DATETIME
|
||||
val startOfToday =
|
||||
nowInstant
|
||||
.atOffset(ZoneOffset.UTC)
|
||||
.truncatedTo(ChronoUnit.DAYS)
|
||||
.toLocalDateTime()
|
||||
val cutoffValue =
|
||||
Jsons.valueToTree<JsonNode>(
|
||||
SQL_SERVER_DATETIME_FORMATTER.format(startOfToday)
|
||||
)
|
||||
log.info {
|
||||
"TIMESTAMP_WITHOUT_TIMEZONE cutoff for field '${cursorField.id}': ${cutoffValue.asText()}"
|
||||
}
|
||||
cutoffValue
|
||||
}
|
||||
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE -> {
|
||||
// For TIME fields, set cutoff to start of today (00:00:00)
|
||||
val startOfToday =
|
||||
nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS)
|
||||
Jsons.valueToTree(ISO_OFFSET_DATE_TIME.format(startOfToday))
|
||||
}
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE -> {
|
||||
// For TIMESTAMP WITH TIMEZONE fields, set cutoff to start of today
|
||||
// (00:00:00)
|
||||
val startOfToday =
|
||||
nowInstant.atOffset(ZoneOffset.UTC).truncatedTo(ChronoUnit.DAYS)
|
||||
Jsons.valueToTree(TIMESTAMPTZ_FORMATTER.format(startOfToday))
|
||||
}
|
||||
else -> {
|
||||
log.warn {
|
||||
"Only temporal cursors can exclude today's data. " +
|
||||
"Field '${cursorField.id}' has type '${cursorField.type}' which is not supported."
|
||||
}
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
else -> {
|
||||
log.warn {
|
||||
"Only temporal cursors can exclude today's data. " +
|
||||
"Field '${cursorField.id}' has non-leaf type '${cursorField.type}' which is not supported."
|
||||
}
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Checks if a cursor field type supports the "Exclude Today's Data" feature. */
|
||||
fun isTemporalType(cursorField: Field): Boolean {
|
||||
val schemaType = cursorField.type.airbyteSchemaType
|
||||
return schemaType is LeafAirbyteSchemaType &&
|
||||
schemaType in
|
||||
listOf(
|
||||
LeafAirbyteSchemaType.DATE,
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE,
|
||||
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,346 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.debezium.spi.converter.CustomConverter
|
||||
import io.debezium.spi.converter.RelationalColumn
|
||||
import java.math.BigDecimal
|
||||
import java.time.Instant
|
||||
import java.time.LocalDate
|
||||
import java.time.LocalDateTime
|
||||
import java.time.LocalTime
|
||||
import java.time.OffsetDateTime
|
||||
import java.time.ZoneOffset
|
||||
import java.time.format.DateTimeFormatter
|
||||
import java.time.format.DateTimeParseException
|
||||
import java.util.*
|
||||
import microsoft.sql.DateTimeOffset
|
||||
import org.apache.kafka.connect.data.SchemaBuilder
|
||||
import org.slf4j.LoggerFactory
|
||||
|
||||
class MsSqlServerDebeziumConverter : CustomConverter<SchemaBuilder, RelationalColumn> {
|
||||
|
||||
companion object {
|
||||
private val logger = LoggerFactory.getLogger(MsSqlServerDebeziumConverter::class.java)
|
||||
private const val MSSQL_DATE_TYPE = "DATE"
|
||||
private const val MSSQL_DATETIME_TYPE = "DATETIME"
|
||||
private const val MSSQL_DATETIME2_TYPE = "DATETIME2"
|
||||
private const val MSSQL_SMALLDATETIME_TYPE = "SMALLDATETIME"
|
||||
private const val MSSQL_DATETIMEOFFSET_TYPE = "DATETIMEOFFSET"
|
||||
private const val MSSQL_TIME_TYPE = "TIME"
|
||||
private const val MSSQL_SMALLMONEY_TYPE = "SMALLMONEY"
|
||||
private const val MSSQL_MONEY_TYPE = "MONEY"
|
||||
private const val MSSQL_BINARY_TYPE = "BINARY"
|
||||
private const val MSSQL_VARBINARY_TYPE = "VARBINARY"
|
||||
private const val MSSQL_IMAGE_TYPE = "IMAGE"
|
||||
private const val MSSQL_GEOMETRY_TYPE = "GEOMETRY"
|
||||
private const val MSSQL_GEOGRAPHY_TYPE = "GEOGRAPHY"
|
||||
private const val MSSQL_UNIQUEIDENTIFIER_TYPE = "UNIQUEIDENTIFIER"
|
||||
private const val MSSQL_XML_TYPE = "XML"
|
||||
private const val MSSQL_HIERARCHYID_TYPE = "HIERARCHYID"
|
||||
private const val MSSQL_SQL_VARIANT_TYPE = "SQL_VARIANT"
|
||||
}
|
||||
|
||||
override fun configure(properties: Properties) {
|
||||
// No configuration needed
|
||||
}
|
||||
|
||||
override fun converterFor(
|
||||
field: RelationalColumn,
|
||||
registration: CustomConverter.ConverterRegistration<SchemaBuilder>
|
||||
) {
|
||||
val typeName = field.typeName().uppercase()
|
||||
|
||||
when (typeName) {
|
||||
MSSQL_DATE_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional(), this::convertDate)
|
||||
}
|
||||
MSSQL_DATETIME_TYPE,
|
||||
MSSQL_DATETIME2_TYPE,
|
||||
MSSQL_SMALLDATETIME_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional(), this::convertDateTime)
|
||||
}
|
||||
MSSQL_DATETIMEOFFSET_TYPE -> {
|
||||
registration.register(
|
||||
SchemaBuilder.string().optional(),
|
||||
this::convertDateTimeOffset
|
||||
)
|
||||
}
|
||||
MSSQL_TIME_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional(), this::convertTime)
|
||||
}
|
||||
MSSQL_SMALLMONEY_TYPE,
|
||||
MSSQL_MONEY_TYPE -> {
|
||||
registration.register(SchemaBuilder.float64().optional(), this::convertMoney)
|
||||
}
|
||||
MSSQL_BINARY_TYPE,
|
||||
MSSQL_VARBINARY_TYPE,
|
||||
MSSQL_IMAGE_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional(), this::convertBinary)
|
||||
}
|
||||
MSSQL_GEOMETRY_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional()) { value ->
|
||||
convertSpatial(value, isGeography = false)
|
||||
}
|
||||
}
|
||||
MSSQL_GEOGRAPHY_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional()) { value ->
|
||||
convertSpatial(value, isGeography = true)
|
||||
}
|
||||
}
|
||||
MSSQL_UNIQUEIDENTIFIER_TYPE -> {
|
||||
registration.register(
|
||||
SchemaBuilder.string().optional(),
|
||||
this::convertUniqueIdentifier
|
||||
)
|
||||
}
|
||||
MSSQL_XML_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional(), this::convertXml)
|
||||
}
|
||||
MSSQL_HIERARCHYID_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional(), this::convertHierarchyId)
|
||||
}
|
||||
MSSQL_SQL_VARIANT_TYPE -> {
|
||||
registration.register(SchemaBuilder.string().optional(), this::convertSqlVariant)
|
||||
}
|
||||
else -> {
|
||||
// For unhandled types, just return as string
|
||||
logger.debug("Unhandled SQL Server type: {}", typeName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertDate(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
when (value) {
|
||||
is LocalDate -> value.toString()
|
||||
is String -> {
|
||||
// Try to parse and reformat to ensure consistent format
|
||||
val date = LocalDate.parse(value, DateTimeFormatter.ISO_LOCAL_DATE)
|
||||
date.toString()
|
||||
}
|
||||
is java.sql.Date -> value.toLocalDate().toString()
|
||||
else -> value.toString()
|
||||
}
|
||||
} catch (e: DateTimeParseException) {
|
||||
logger.warn("Failed to parse date value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertDateTime(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
val formatter = MsSqlServerJdbcPartitionFactory.outputDateFormatter
|
||||
when (value) {
|
||||
is LocalDateTime -> value.format(formatter)
|
||||
is String -> {
|
||||
// Try to parse as LocalDateTime first
|
||||
val dateTime = LocalDateTime.parse(value.replace(" ", "T"))
|
||||
dateTime.format(formatter)
|
||||
}
|
||||
is java.sql.Timestamp -> value.toLocalDateTime().format(formatter)
|
||||
is Instant -> LocalDateTime.ofInstant(value, ZoneOffset.UTC).format(formatter)
|
||||
else -> value.toString()
|
||||
}
|
||||
} catch (e: DateTimeParseException) {
|
||||
logger.warn("Failed to parse datetime value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertDateTimeOffset(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX")
|
||||
when (value) {
|
||||
is DateTimeOffset -> value.offsetDateTime.format(formatter)
|
||||
is OffsetDateTime -> value.format(formatter)
|
||||
is String -> {
|
||||
// Try to parse as OffsetDateTime
|
||||
val offsetDateTime = OffsetDateTime.parse(value)
|
||||
offsetDateTime.format(formatter)
|
||||
}
|
||||
else -> value.toString()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to parse datetimeoffset value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertTime(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
val formatter = DateTimeFormatter.ofPattern("HH:mm:ss.SSSSSS")
|
||||
when (value) {
|
||||
is LocalTime -> value.format(formatter)
|
||||
is String -> {
|
||||
// Handle SQL Server TIME values that come as datetime strings
|
||||
if (value.contains(" ")) {
|
||||
// Extract time part from "1900-01-01 13:00:01.0" format
|
||||
val timePart = value.substringAfter(" ")
|
||||
val time = LocalTime.parse(timePart)
|
||||
time.format(formatter)
|
||||
} else {
|
||||
val time = LocalTime.parse(value)
|
||||
time.format(formatter)
|
||||
}
|
||||
}
|
||||
is java.sql.Time -> value.toLocalTime().format(formatter)
|
||||
else -> {
|
||||
// Handle other cases where TIME might come as datetime string
|
||||
val stringValue = value.toString()
|
||||
if (stringValue.contains(" ")) {
|
||||
val timePart = stringValue.substringAfter(" ")
|
||||
val time = LocalTime.parse(timePart)
|
||||
time.format(formatter)
|
||||
} else {
|
||||
stringValue
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to parse time value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertMoney(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
when (value) {
|
||||
is BigDecimal -> value.toDouble()
|
||||
is Double -> value
|
||||
is String -> value.toBigDecimal().toDouble()
|
||||
is Number -> value.toDouble()
|
||||
else -> value.toString().toBigDecimal().toDouble()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to parse money value: {}", value, e)
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertBinary(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return when (value) {
|
||||
is ByteArray -> Base64.getEncoder().encodeToString(value)
|
||||
is String -> value // Already base64 encoded
|
||||
else -> {
|
||||
logger.warn("Unexpected binary type: {}", value.javaClass.name)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertSpatial(value: Any?, isGeography: Boolean): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
when (value) {
|
||||
is String -> {
|
||||
// If already a string (WKT format), check if it's base64
|
||||
if (value.matches(Regex("^[A-Za-z0-9+/]+=*$"))) {
|
||||
// It's base64, decode and convert
|
||||
try {
|
||||
val bytes = Base64.getDecoder().decode(value)
|
||||
convertSpatialBytes(bytes, isGeography)
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to decode base64 spatial value: {}", e.message)
|
||||
value
|
||||
}
|
||||
} else {
|
||||
// Already WKT format
|
||||
value
|
||||
}
|
||||
}
|
||||
is ByteArray -> convertSpatialBytes(value, isGeography)
|
||||
else -> value.toString()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to convert spatial value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertSpatialBytes(bytes: ByteArray, isGeography: Boolean): String {
|
||||
return try {
|
||||
if (isGeography) {
|
||||
// Deserialize as Geography
|
||||
com.microsoft.sqlserver.jdbc.Geography.deserialize(bytes).toString()
|
||||
} else {
|
||||
// Deserialize as Geometry
|
||||
com.microsoft.sqlserver.jdbc.Geometry.deserialize(bytes).toString()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
logger.warn(
|
||||
"Failed to deserialize spatial binary as ${if (isGeography) "Geography" else "Geometry"}: {}",
|
||||
e.message
|
||||
)
|
||||
// Fallback to base64 if deserialization fails
|
||||
Base64.getEncoder().encodeToString(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertUniqueIdentifier(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
when (value) {
|
||||
is String -> {
|
||||
UUID.fromString(value).toString().uppercase()
|
||||
}
|
||||
else -> value.toString().uppercase()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to convert UUID value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertXml(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
// XML is stored as string in Airbyte
|
||||
value.toString()
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to convert XML value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertHierarchyId(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
// HierarchyID is stored as string representation
|
||||
value.toString()
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to convert HierarchyID value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
|
||||
private fun convertSqlVariant(value: Any?): Any? {
|
||||
if (value == null) return null
|
||||
|
||||
return try {
|
||||
// SQL_VARIANT can hold various types - store as string
|
||||
value.toString()
|
||||
} catch (e: Exception) {
|
||||
logger.warn("Failed to convert SQL_VARIANT value: {}", value, e)
|
||||
value.toString()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,698 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode
|
||||
import io.airbyte.cdk.data.BigDecimalCodec
|
||||
import io.airbyte.cdk.data.BinaryCodec
|
||||
import io.airbyte.cdk.data.DoubleCodec
|
||||
import io.airbyte.cdk.data.FloatCodec
|
||||
import io.airbyte.cdk.data.IntCodec
|
||||
import io.airbyte.cdk.data.JsonCodec
|
||||
import io.airbyte.cdk.data.JsonEncoder
|
||||
import io.airbyte.cdk.data.LongCodec
|
||||
import io.airbyte.cdk.data.NullCodec
|
||||
import io.airbyte.cdk.data.TextCodec
|
||||
import io.airbyte.cdk.discover.CommonMetaField
|
||||
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
|
||||
import io.airbyte.cdk.output.sockets.FieldValueEncoder
|
||||
import io.airbyte.cdk.output.sockets.NativeRecordPayload
|
||||
import io.airbyte.cdk.read.Stream
|
||||
import io.airbyte.cdk.read.cdc.AbortDebeziumWarmStartState
|
||||
import io.airbyte.cdk.read.cdc.CdcPartitionReaderDebeziumOperations
|
||||
import io.airbyte.cdk.read.cdc.CdcPartitionsCreatorDebeziumOperations
|
||||
import io.airbyte.cdk.read.cdc.DebeziumOffset
|
||||
import io.airbyte.cdk.read.cdc.DebeziumPropertiesBuilder
|
||||
import io.airbyte.cdk.read.cdc.DebeziumPropertiesBuilder.Companion.AIRBYTE_HEARTBEAT_TIMEOUT_SECONDS
|
||||
import io.airbyte.cdk.read.cdc.DebeziumRecordKey
|
||||
import io.airbyte.cdk.read.cdc.DebeziumRecordValue
|
||||
import io.airbyte.cdk.read.cdc.DebeziumSchemaHistory
|
||||
import io.airbyte.cdk.read.cdc.DebeziumWarmStartState
|
||||
import io.airbyte.cdk.read.cdc.DeserializedRecord
|
||||
import io.airbyte.cdk.read.cdc.InvalidDebeziumWarmStartState
|
||||
import io.airbyte.cdk.read.cdc.ResetDebeziumWarmStartState
|
||||
import io.airbyte.cdk.read.cdc.ValidDebeziumWarmStartState
|
||||
import io.airbyte.cdk.ssh.TunnelSession
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.debezium.connector.sqlserver.Lsn
|
||||
import io.debezium.connector.sqlserver.SqlServerConnector
|
||||
import io.debezium.document.DocumentReader
|
||||
import io.debezium.document.DocumentWriter
|
||||
import io.debezium.relational.history.HistoryRecord
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import jakarta.inject.Singleton
|
||||
import java.io.ByteArrayInputStream
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.sql.Connection
|
||||
import java.time.Instant
|
||||
import java.time.OffsetDateTime
|
||||
import java.time.ZoneOffset
|
||||
import java.util.concurrent.atomic.AtomicLong
|
||||
import java.util.zip.GZIPInputStream
|
||||
import java.util.zip.GZIPOutputStream
|
||||
import kotlin.collections.plus
|
||||
import org.apache.kafka.connect.source.SourceRecord
|
||||
import org.apache.mina.util.Base64
|
||||
|
||||
data class MsSqlServerCdcPosition(val lsn: String) : Comparable<MsSqlServerCdcPosition> {
|
||||
override fun compareTo(other: MsSqlServerCdcPosition): Int {
|
||||
return lsn.compareTo(other.lsn)
|
||||
}
|
||||
}
|
||||
|
||||
@Singleton
|
||||
class MsSqlServerDebeziumOperations(
|
||||
private val jdbcConnectionFactory: JdbcConnectionFactory,
|
||||
private val configuration: MsSqlServerSourceConfiguration
|
||||
) :
|
||||
CdcPartitionsCreatorDebeziumOperations<MsSqlServerCdcPosition>,
|
||||
CdcPartitionReaderDebeziumOperations<MsSqlServerCdcPosition> {
|
||||
|
||||
// Generates globally unique cursor values for CDC records by combining
|
||||
// current timestamp with an incrementing counter. This ensures monotonically
|
||||
// increasing values across sync restarts and avoids collisions.
|
||||
val cdcCursorGenerator = AtomicLong(Instant.now().toEpochMilli() * 10_000_000 + 1)
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
override fun deserializeRecord(
|
||||
key: DebeziumRecordKey,
|
||||
value: DebeziumRecordValue,
|
||||
stream: Stream,
|
||||
): DeserializedRecord {
|
||||
val before: JsonNode = value.before
|
||||
val after: JsonNode = value.after
|
||||
val source: JsonNode = value.source
|
||||
val isDelete: Boolean = after.isNull
|
||||
// Use either `before` or `after` as the record data, depending on the nature of the change.
|
||||
val recordData: JsonNode = if (isDelete) before else after
|
||||
|
||||
// Convert JsonNode to NativeRecordPayload based on stream schema
|
||||
val resultRow: NativeRecordPayload = mutableMapOf()
|
||||
|
||||
// Process fields based on stream schema (following MySQL pattern)
|
||||
for (field in stream.schema) {
|
||||
val fieldValue = recordData[field.id] ?: continue
|
||||
when {
|
||||
fieldValue.isNull -> {
|
||||
resultRow[field.id] = FieldValueEncoder(null, NullCodec)
|
||||
}
|
||||
else -> {
|
||||
// Use the field's jsonEncoder if available, otherwise fall back to TextCodec
|
||||
val codec: JsonCodec<*> = field.type.jsonEncoder as? JsonCodec<*> ?: TextCodec
|
||||
|
||||
// Handle numeric and binary values from Debezium (can come as JSON strings or
|
||||
// numbers)
|
||||
val decodedValue =
|
||||
when {
|
||||
// BigDecimal: handle both string and number
|
||||
fieldValue.isTextual && codec is BigDecimalCodec ->
|
||||
java.math.BigDecimal(fieldValue.asText())
|
||||
fieldValue.isNumber && codec is BigDecimalCodec ->
|
||||
fieldValue.decimalValue()
|
||||
|
||||
// Int: handle both string and number
|
||||
fieldValue.isTextual && codec is IntCodec -> fieldValue.asText().toInt()
|
||||
fieldValue.isNumber && codec is IntCodec -> fieldValue.intValue()
|
||||
|
||||
// Long: handle both string and number
|
||||
fieldValue.isTextual && codec is LongCodec ->
|
||||
fieldValue.asText().toLong()
|
||||
fieldValue.isNumber && codec is LongCodec -> fieldValue.longValue()
|
||||
|
||||
// Float: handle both string and number
|
||||
fieldValue.isTextual && codec is FloatCodec ->
|
||||
fieldValue.asText().toFloat()
|
||||
fieldValue.isNumber && codec is FloatCodec -> fieldValue.floatValue()
|
||||
|
||||
// Double: handle both string and number
|
||||
fieldValue.isTextual && codec is DoubleCodec ->
|
||||
fieldValue.asText().toDouble()
|
||||
fieldValue.isNumber && codec is DoubleCodec -> fieldValue.doubleValue()
|
||||
|
||||
// Binary: handle base64 string
|
||||
fieldValue.isTextual && codec is BinaryCodec ->
|
||||
java.nio.ByteBuffer.wrap(
|
||||
java.util.Base64.getDecoder().decode(fieldValue.asText())
|
||||
)
|
||||
else -> codec.decode(fieldValue)
|
||||
}
|
||||
|
||||
resultRow[field.id] = FieldValueEncoder(decodedValue, codec as JsonCodec<Any>)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set CDC meta-field values
|
||||
val transactionMillis: Long = source["ts_ms"].asLong()
|
||||
val transactionOffsetDateTime: OffsetDateTime =
|
||||
OffsetDateTime.ofInstant(Instant.ofEpochMilli(transactionMillis), ZoneOffset.UTC)
|
||||
|
||||
resultRow[CommonMetaField.CDC_UPDATED_AT.id] =
|
||||
FieldValueEncoder(
|
||||
transactionOffsetDateTime,
|
||||
CommonMetaField.CDC_UPDATED_AT.type.jsonEncoder as JsonEncoder<Any>
|
||||
)
|
||||
|
||||
resultRow[CommonMetaField.CDC_DELETED_AT.id] =
|
||||
FieldValueEncoder(
|
||||
if (isDelete) transactionOffsetDateTime else null,
|
||||
(if (isDelete) CommonMetaField.CDC_DELETED_AT.type.jsonEncoder else NullCodec)
|
||||
as JsonEncoder<Any>
|
||||
)
|
||||
|
||||
// Set MSSQL-specific CDC meta-fields
|
||||
val commitLsn = source["commit_lsn"].asText()
|
||||
resultRow[MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_LSN.id] =
|
||||
FieldValueEncoder(
|
||||
commitLsn,
|
||||
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_LSN.type.jsonEncoder
|
||||
as JsonEncoder<Any>
|
||||
)
|
||||
resultRow[MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_CURSOR.id] =
|
||||
FieldValueEncoder(
|
||||
cdcCursorGenerator.getAndIncrement(),
|
||||
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_CURSOR.type.jsonEncoder
|
||||
as JsonEncoder<Any>
|
||||
)
|
||||
|
||||
val eventSerialNo = source["event_serial_no"]?.asInt()?.let { "$it" } ?: "0"
|
||||
resultRow[MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO.id] =
|
||||
FieldValueEncoder(
|
||||
eventSerialNo,
|
||||
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO.type.jsonEncoder
|
||||
as JsonEncoder<Any>
|
||||
)
|
||||
|
||||
// Return a DeserializedRecord instance.
|
||||
return DeserializedRecord(resultRow, changes = emptyMap())
|
||||
}
|
||||
|
||||
override fun position(recordValue: DebeziumRecordValue): MsSqlServerCdcPosition? {
|
||||
val commitLsn = recordValue.source["commit_lsn"]?.asText()
|
||||
return commitLsn?.let { MsSqlServerCdcPosition(it) }
|
||||
}
|
||||
|
||||
override fun position(sourceRecord: SourceRecord): MsSqlServerCdcPosition? {
|
||||
val commitLsn: String =
|
||||
sourceRecord.sourceOffset()[("commit_lsn")]?.toString() ?: return null
|
||||
return MsSqlServerCdcPosition(commitLsn)
|
||||
}
|
||||
|
||||
override fun position(offset: DebeziumOffset): MsSqlServerCdcPosition {
|
||||
if (offset.wrapped.size != 1) {
|
||||
throw IllegalArgumentException("Expected exactly 1 key in $offset")
|
||||
}
|
||||
val offsetValue = offset.wrapped.values.first() as ObjectNode
|
||||
val commitLsn = offsetValue["commit_lsn"].asText()
|
||||
return MsSqlServerCdcPosition(commitLsn)
|
||||
}
|
||||
|
||||
override fun serializeState(
|
||||
offset: DebeziumOffset,
|
||||
schemaHistory: DebeziumSchemaHistory?
|
||||
): JsonNode {
|
||||
// Sanitize offset before saving to state to fix heartbeat corruption
|
||||
val sanitizedOffset = sanitizeOffset(offset)
|
||||
|
||||
val stateNode: ObjectNode = Jsons.objectNode()
|
||||
// Serialize offset.
|
||||
val offsetNode: JsonNode =
|
||||
Jsons.objectNode().apply {
|
||||
for ((k, v) in sanitizedOffset.wrapped) {
|
||||
put(Jsons.writeValueAsString(k), Jsons.writeValueAsString(v))
|
||||
}
|
||||
}
|
||||
stateNode.set<JsonNode>(MSSQL_CDC_OFFSET, offsetNode)
|
||||
|
||||
val realSchemaHistory: List<HistoryRecord>? = schemaHistory?.wrapped
|
||||
if (realSchemaHistory != null) {
|
||||
val uncompressedString: String =
|
||||
realSchemaHistory.joinToString(separator = "\n") {
|
||||
DocumentWriter.defaultWriter().write(it.document())
|
||||
}
|
||||
if (uncompressedString.length <= MSSQL_MAX_UNCOMPRESSED_LENGTH) {
|
||||
stateNode.put(MSSQL_DB_HISTORY, uncompressedString)
|
||||
stateNode.put(MSSQL_IS_COMPRESSED, false)
|
||||
} else {
|
||||
stateNode.put(MSSQL_IS_COMPRESSED, true)
|
||||
val baos = ByteArrayOutputStream()
|
||||
val builder = StringBuilder()
|
||||
GZIPOutputStream(baos).writer(Charsets.UTF_8).use { it.write(uncompressedString) }
|
||||
|
||||
builder.append("\"")
|
||||
builder.append(Base64.encodeBase64(baos.toByteArray()).toString(Charsets.UTF_8))
|
||||
builder.append("\"")
|
||||
|
||||
stateNode.put(MSSQL_DB_HISTORY, builder.toString())
|
||||
}
|
||||
}
|
||||
return Jsons.objectNode().apply { set<JsonNode>(MSSQL_STATE, stateNode) }
|
||||
}
|
||||
|
||||
override fun deserializeState(opaqueStateValue: JsonNode): DebeziumWarmStartState {
|
||||
val stateNode = opaqueStateValue[MSSQL_STATE]
|
||||
val offsetNode = stateNode[MSSQL_CDC_OFFSET] as JsonNode
|
||||
val offsetMap: Map<JsonNode, JsonNode> =
|
||||
offsetNode
|
||||
.fieldNames()
|
||||
.asSequence()
|
||||
.map { k -> Jsons.readTree(k) to Jsons.readTree(offsetNode[k].textValue()) }
|
||||
.toMap()
|
||||
|
||||
// Handle legacy state with multiple offset keys (e.g., different database name casings)
|
||||
val finalOffsetMap =
|
||||
when {
|
||||
offsetMap.size == 1 -> offsetMap
|
||||
offsetMap.size > 1 -> {
|
||||
log.warn {
|
||||
"Found ${offsetMap.size} offset keys in saved state. This may be from a legacy connector version. " +
|
||||
"Selecting the offset with the highest LSN (most recent position)."
|
||||
}
|
||||
|
||||
// Select the offset with the highest LSN
|
||||
val selectedEntry =
|
||||
offsetMap.entries.maxByOrNull { (_, value) ->
|
||||
val offsetValue = value as ObjectNode
|
||||
val commitLsn = offsetValue["commit_lsn"]?.asText()
|
||||
try {
|
||||
commitLsn?.let { Lsn.valueOf(it) } ?: Lsn.NULL
|
||||
} catch (e: Exception) {
|
||||
log.warn(e) { "Failed to parse LSN from offset value: $value" }
|
||||
Lsn.NULL
|
||||
}
|
||||
}
|
||||
|
||||
if (selectedEntry == null) {
|
||||
throw RuntimeException(
|
||||
"Unable to select valid offset from multiple keys in $opaqueStateValue"
|
||||
)
|
||||
}
|
||||
|
||||
log.info {
|
||||
"Selected offset key with commit_lsn='${(selectedEntry.value as ObjectNode)["commit_lsn"]?.asText()}' " +
|
||||
"from ${offsetMap.size} available offset keys."
|
||||
}
|
||||
|
||||
mapOf(selectedEntry.key to selectedEntry.value)
|
||||
}
|
||||
else ->
|
||||
throw RuntimeException(
|
||||
"Offset object must have at least 1 key in $opaqueStateValue"
|
||||
)
|
||||
}
|
||||
|
||||
val offset = DebeziumOffset(finalOffsetMap)
|
||||
|
||||
// Check if the saved LSN is valid
|
||||
val savedLsn =
|
||||
try {
|
||||
val offsetValue = offset.wrapped.values.first() as ObjectNode
|
||||
val commitLsn = offsetValue["commit_lsn"].asText()
|
||||
Lsn.valueOf(commitLsn)
|
||||
} catch (e: Exception) {
|
||||
log.error(e) { "Failed to parse saved LSN from offset: $offset" }
|
||||
return abortCdcSync("Invalid LSN format in saved offset")
|
||||
}
|
||||
|
||||
// Validate the saved LSN is still available in SQL Server
|
||||
val isLsnValid =
|
||||
try {
|
||||
validateLsnStillAvailable(savedLsn)
|
||||
} catch (e: Exception) {
|
||||
log.error(e) { "Failed to validate LSN availability: ${savedLsn}" }
|
||||
false
|
||||
}
|
||||
|
||||
if (!isLsnValid) {
|
||||
return abortCdcSync(
|
||||
"Saved LSN '${savedLsn}' is no longer available in SQL Server transaction logs"
|
||||
)
|
||||
}
|
||||
|
||||
val historyNode = stateNode[MSSQL_DB_HISTORY]
|
||||
val schemaHistory: DebeziumSchemaHistory? =
|
||||
historyNode?.let {
|
||||
val isCompressed: Boolean = stateNode[MSSQL_IS_COMPRESSED]?.asBoolean() ?: false
|
||||
val uncompressedString: String =
|
||||
if (isCompressed) {
|
||||
val textValue: String = it.textValue()
|
||||
val compressedBytes: ByteArray =
|
||||
textValue.substring(1, textValue.length - 1).toByteArray(Charsets.UTF_8)
|
||||
val decoded = Base64.decodeBase64(compressedBytes)
|
||||
|
||||
GZIPInputStream(ByteArrayInputStream(decoded))
|
||||
.reader(Charsets.UTF_8)
|
||||
.readText()
|
||||
} else {
|
||||
it.textValue()
|
||||
}
|
||||
val schemaHistoryList: List<HistoryRecord> =
|
||||
uncompressedString
|
||||
.lines()
|
||||
.filter { it.isNotBlank() }
|
||||
.map { HistoryRecord(DocumentReader.defaultReader().read(it)) }
|
||||
DebeziumSchemaHistory(schemaHistoryList)
|
||||
}
|
||||
|
||||
// Store the loaded offset for heartbeat sanitization comparison
|
||||
lastLoadedOffset = offset
|
||||
|
||||
return ValidDebeziumWarmStartState(offset, schemaHistory)
|
||||
}
|
||||
|
||||
// Track the last loaded offset to detect heartbeat corruption
|
||||
@Volatile private var lastLoadedOffset: DebeziumOffset? = null
|
||||
|
||||
/**
|
||||
* Sanitizes the offset before saving to state to fix heartbeat-induced corruption. SQL Server
|
||||
* heartbeats reset event_serial_no to 0 and change_lsn to NULL, causing duplicate record
|
||||
* emission on subsequent syncs.
|
||||
*
|
||||
* Compares the current offset (read from Debezium) against the offset that was loaded at the
|
||||
* start of the sync.
|
||||
*/
|
||||
private fun sanitizeOffset(currentOffset: DebeziumOffset): DebeziumOffset {
|
||||
val startingOffset = lastLoadedOffset ?: return currentOffset
|
||||
|
||||
if (startingOffset.wrapped.size != 1 || currentOffset.wrapped.size != 1) {
|
||||
return currentOffset
|
||||
}
|
||||
|
||||
val offsetKey = currentOffset.wrapped.keys.first()
|
||||
val startValue =
|
||||
startingOffset.wrapped.values.first() as? ObjectNode ?: return currentOffset
|
||||
val currentValue =
|
||||
currentOffset.wrapped.values.first() as? ObjectNode ?: return currentOffset
|
||||
|
||||
val startLsn = startValue["commit_lsn"]?.asText()
|
||||
val currentLsn = currentValue["commit_lsn"]?.asText()
|
||||
|
||||
// If LSN has progressed, the current offset is valid
|
||||
if (startLsn == null || currentLsn == null || startLsn != currentLsn) {
|
||||
return currentOffset
|
||||
}
|
||||
|
||||
// LSN hasn't progressed - check for heartbeat regression
|
||||
val startEventSerialNo = startValue["event_serial_no"]?.asInt()
|
||||
val currentEventSerialNo = currentValue["event_serial_no"]?.asInt()
|
||||
val startChangeLsn = startValue["change_lsn"]
|
||||
val currentChangeLsn = currentValue["change_lsn"]
|
||||
|
||||
val eventSerialNoRegressed =
|
||||
startEventSerialNo != null &&
|
||||
startEventSerialNo > 0 &&
|
||||
(currentEventSerialNo == null || currentEventSerialNo == 0)
|
||||
|
||||
// Check if change_lsn has regressed to NULL (either JSON null or string "NULL")
|
||||
val changeLsnRegressed =
|
||||
startChangeLsn != null &&
|
||||
!startChangeLsn.isNull &&
|
||||
(currentChangeLsn == null ||
|
||||
currentChangeLsn.isNull ||
|
||||
(currentChangeLsn.isTextual && currentChangeLsn.asText() == "NULL"))
|
||||
|
||||
if (!eventSerialNoRegressed && !changeLsnRegressed) {
|
||||
return currentOffset
|
||||
}
|
||||
|
||||
// Heartbeat has corrupted the offset - restore starting values
|
||||
log.info {
|
||||
"Detected heartbeat offset regression at LSN $currentLsn. " +
|
||||
"Preserving event_serial_no=$startEventSerialNo and change_lsn=${startChangeLsn?.asText()} " +
|
||||
"from starting offset (current had event_serial_no=$currentEventSerialNo, change_lsn=${currentChangeLsn?.asText()})"
|
||||
}
|
||||
|
||||
val sanitizedValue = currentValue.deepCopy()
|
||||
if (eventSerialNoRegressed && startEventSerialNo != null) {
|
||||
sanitizedValue.put("event_serial_no", startEventSerialNo)
|
||||
}
|
||||
if (changeLsnRegressed && !startChangeLsn.isNull) {
|
||||
sanitizedValue.set<JsonNode>("change_lsn", startChangeLsn)
|
||||
}
|
||||
|
||||
return DebeziumOffset(mapOf(offsetKey to sanitizedValue))
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if the given LSN is still available in SQL Server transaction logs. Returns true if
|
||||
* the LSN is available, false otherwise.
|
||||
*/
|
||||
private fun validateLsnStillAvailable(lsn: Lsn): Boolean {
|
||||
// Use jdbcConnectionFactory which handles SSH tunneling
|
||||
jdbcConnectionFactory.get().use { connection: Connection ->
|
||||
connection.createStatement().use { statement ->
|
||||
// Check if the LSN is within the available range
|
||||
// sys.fn_cdc_get_min_lsn returns the minimum available LSN for a capture instance
|
||||
// sys.fn_cdc_get_max_lsn returns the current maximum LSN
|
||||
val query =
|
||||
"""
|
||||
SELECT
|
||||
sys.fn_cdc_get_min_lsn('') AS min_lsn,
|
||||
sys.fn_cdc_get_max_lsn() AS max_lsn
|
||||
""".trimIndent()
|
||||
|
||||
statement.executeQuery(query).use { resultSet ->
|
||||
if (resultSet.next()) {
|
||||
val minLsnBytes = resultSet.getBytes("min_lsn")
|
||||
val maxLsnBytes = resultSet.getBytes("max_lsn")
|
||||
|
||||
if (minLsnBytes == null || maxLsnBytes == null) {
|
||||
log.warn { "CDC is not enabled or no LSN range available" }
|
||||
return false
|
||||
}
|
||||
|
||||
val minLsn = Lsn.valueOf(minLsnBytes)
|
||||
val maxLsn = Lsn.valueOf(maxLsnBytes)
|
||||
|
||||
// Check if saved LSN is within the valid range
|
||||
val isValid = lsn.compareTo(minLsn) >= 0 && lsn.compareTo(maxLsn) <= 0
|
||||
|
||||
if (!isValid) {
|
||||
log.warn {
|
||||
"Saved LSN '$lsn' is outside the available range [min: $minLsn, max: $maxLsn]. " +
|
||||
"Transaction logs may have been truncated."
|
||||
}
|
||||
}
|
||||
|
||||
return isValid
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles invalid CDC cursor position based on configured behavior. Either fails the sync or
|
||||
* resets to start fresh from current position.
|
||||
*/
|
||||
private fun abortCdcSync(reason: String): InvalidDebeziumWarmStartState {
|
||||
val cdcConfig =
|
||||
configuration.incrementalReplicationConfiguration as CdcIncrementalConfiguration
|
||||
return when (cdcConfig.invalidCdcCursorPositionBehavior) {
|
||||
InvalidCdcCursorPositionBehavior.FAIL_SYNC ->
|
||||
AbortDebeziumWarmStartState(
|
||||
"Saved offset no longer present on the server, please reset the connection. " +
|
||||
"To prevent this, increase transaction log retention and/or increase sync frequency. " +
|
||||
"$reason."
|
||||
)
|
||||
InvalidCdcCursorPositionBehavior.RESET_SYNC ->
|
||||
ResetDebeziumWarmStartState(
|
||||
"Saved offset no longer present on the server. " +
|
||||
"Automatically resetting to current position. " +
|
||||
"WARNING: Any changes between the saved position and current position will be lost. " +
|
||||
"$reason."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current maximum LSN from SQL Server for CDC cold start. This follows the pattern
|
||||
* from the old MSSQL connector and returns the Debezium Lsn type for type safety.
|
||||
*
|
||||
* @return Lsn object representing the current maximum LSN
|
||||
* @throws IllegalStateException if CDC is not enabled or LSN cannot be retrieved
|
||||
*/
|
||||
private fun getCurrentMaxLsn(): Lsn {
|
||||
// Use jdbcConnectionFactory which handles SSH tunneling
|
||||
jdbcConnectionFactory.get().use { connection: Connection ->
|
||||
connection.createStatement().use { statement ->
|
||||
// Query sys.fn_cdc_get_max_lsn() - no need for USE statement since connection is
|
||||
// already to the right database
|
||||
val query = "SELECT sys.fn_cdc_get_max_lsn() AS max_lsn"
|
||||
statement.executeQuery(query).use { resultSet ->
|
||||
if (resultSet.next()) {
|
||||
val lsnBytes = resultSet.getBytes("max_lsn")
|
||||
if (lsnBytes != null && lsnBytes.isNotEmpty()) {
|
||||
// Use Debezium's Lsn class for proper validation and formatting
|
||||
return Lsn.valueOf(lsnBytes)
|
||||
} else {
|
||||
throw IllegalStateException(
|
||||
"CDC is not enabled or no max LSN available for database '${configuration.databaseName}'. " +
|
||||
"Please ensure: 1) CDC is enabled on the database, 2) At least one table has CDC enabled, " +
|
||||
"3) The user has necessary permissions to query CDC functions."
|
||||
)
|
||||
}
|
||||
} else {
|
||||
throw IllegalStateException(
|
||||
"Failed to query max LSN from database '${configuration.databaseName}'. " +
|
||||
"The query returned no results."
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun generateColdStartOffset(): DebeziumOffset {
|
||||
val currentLsn = getCurrentMaxLsn()
|
||||
val databaseName = configuration.databaseName
|
||||
|
||||
// Create offset structure that matches SQL Server Debezium connector format
|
||||
val key =
|
||||
Jsons.arrayNode().apply {
|
||||
add(databaseName)
|
||||
add(
|
||||
Jsons.objectNode().apply {
|
||||
put("server", databaseName)
|
||||
put("database", databaseName)
|
||||
}
|
||||
)
|
||||
}
|
||||
val value =
|
||||
Jsons.objectNode().apply {
|
||||
put("commit_lsn", currentLsn.toString())
|
||||
put("snapshot", true)
|
||||
put("snapshot_completed", true)
|
||||
}
|
||||
|
||||
val offset = DebeziumOffset(mapOf(key to value))
|
||||
log.info { "Constructed SQL Server CDC cold start offset with LSN: $currentLsn" }
|
||||
return offset
|
||||
}
|
||||
|
||||
override fun generateColdStartProperties(streams: List<Stream>): Map<String, String> {
|
||||
return generateCommonDebeziumProperties(streams) + ("snapshot.mode" to "recovery")
|
||||
}
|
||||
|
||||
override fun generateWarmStartProperties(streams: List<Stream>): Map<String, String> {
|
||||
return generateCommonDebeziumProperties(streams) + ("snapshot.mode" to "when_needed")
|
||||
}
|
||||
|
||||
private fun generateCommonDebeziumProperties(streams: List<Stream>): Map<String, String> {
|
||||
val databaseName = configuration.databaseName
|
||||
val schemaList = streams.map { it.namespace }.distinct().joinToString(",")
|
||||
val messageKeyColumns = buildMessageKeyColumns(streams)
|
||||
val tunnelSession: TunnelSession = jdbcConnectionFactory.ensureTunnelSession()
|
||||
|
||||
return DebeziumPropertiesBuilder()
|
||||
.withDefault()
|
||||
.withConnector(SqlServerConnector::class.java)
|
||||
.withDebeziumName(databaseName)
|
||||
.withHeartbeats(configuration.debeziumHeartbeatInterval)
|
||||
.withOffset()
|
||||
.withSchemaHistory()
|
||||
.withStreams(streams)
|
||||
.with("include.schema.changes", "false")
|
||||
.with("provide.transaction.metadata", "false")
|
||||
.with("snapshot.isolation.mode", "read_committed")
|
||||
.with("schema.include.list", schemaList)
|
||||
.let { builder ->
|
||||
if (messageKeyColumns.isNotEmpty()) {
|
||||
builder.with("message.key.columns", messageKeyColumns)
|
||||
} else {
|
||||
builder
|
||||
}
|
||||
}
|
||||
.withDatabase("hostname", tunnelSession.address.hostName)
|
||||
.withDatabase("port", tunnelSession.address.port.toString())
|
||||
.withDatabase("user", configuration.jdbcProperties["user"].toString())
|
||||
.withDatabase("password", configuration.jdbcProperties["password"].toString())
|
||||
.withDatabase("dbname", databaseName)
|
||||
.withDatabase("names", databaseName)
|
||||
.with("database.encrypt", configuration.jdbcProperties["encrypt"] ?: "false")
|
||||
.with(
|
||||
"driver.trustServerCertificate",
|
||||
configuration.jdbcProperties["trustServerCertificate"] ?: "true"
|
||||
)
|
||||
// Register the MSSQL custom converter
|
||||
.with("converters", "mssql_converter")
|
||||
.with("mssql_converter.type", MsSqlServerDebeziumConverter::class.java.name)
|
||||
.with("binary.handling.mode", "base64")
|
||||
.with("snapshot.locking.mode", "none")
|
||||
// Set poll.interval.ms to control how often Debezium queries for new data
|
||||
// This value is now configurable and validated to be smaller than heartbeat.interval.ms
|
||||
.with(
|
||||
"poll.interval.ms",
|
||||
(configuration.incrementalReplicationConfiguration as CdcIncrementalConfiguration)
|
||||
.pollIntervalMs
|
||||
.toString()
|
||||
)
|
||||
// Enable heartbeat timeout for MSSQL to detect idle database states
|
||||
.with(
|
||||
AIRBYTE_HEARTBEAT_TIMEOUT_SECONDS,
|
||||
configuration.incrementalReplicationConfiguration.initialWaitingSeconds
|
||||
.toSeconds()
|
||||
.toString()
|
||||
)
|
||||
.buildMap()
|
||||
}
|
||||
|
||||
override fun findStreamName(key: DebeziumRecordKey, value: DebeziumRecordValue): String? {
|
||||
return value.source["table"]?.asText()
|
||||
}
|
||||
|
||||
override fun findStreamNamespace(key: DebeziumRecordKey, value: DebeziumRecordValue): String? {
|
||||
return value.source["schema"]?.asText()
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the message.key.columns property value for Debezium. Format:
|
||||
* "schema1.table1:keyCol1,keyCol2;schema2.table2:keyCol1,keyCol2" This replicates the logic
|
||||
* from the old MSSQL connector's getMessageKeyColumnValue method.
|
||||
*/
|
||||
private fun buildMessageKeyColumns(streams: List<Stream>): String {
|
||||
return streams
|
||||
.filter { it.configuredPrimaryKey?.isNotEmpty() == true }
|
||||
.joinToString(";") { stream ->
|
||||
val tableId =
|
||||
"${escapeSpecialChars(stream.namespace)}.${escapeSpecialChars(stream.name)}"
|
||||
val keyCols =
|
||||
stream.configuredPrimaryKey!!.joinToString(",") { escapeSpecialChars(it.id) }
|
||||
"$tableId:$keyCols"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes special characters for Debezium message key columns. Escapes: comma (,), period (.),
|
||||
* semicolon (;), and colon (:) This replicates the logic from the old MSSQL connector's
|
||||
* escapeSpecialChars method.
|
||||
*/
|
||||
private fun escapeSpecialChars(input: String?): String {
|
||||
if (input == null) return ""
|
||||
return input
|
||||
.map { char ->
|
||||
when (char) {
|
||||
',',
|
||||
'.',
|
||||
';',
|
||||
':' -> "\\${char}"
|
||||
else -> char.toString()
|
||||
}
|
||||
}
|
||||
.joinToString("")
|
||||
}
|
||||
|
||||
companion object {
|
||||
const val MSSQL_MAX_UNCOMPRESSED_LENGTH = 1024 * 1024
|
||||
const val MSSQL_STATE = "state"
|
||||
const val MSSQL_CDC_OFFSET = "mssql_cdc_offset"
|
||||
const val MSSQL_DB_HISTORY = "mssql_db_history"
|
||||
const val MSSQL_IS_COMPRESSED = "is_compressed"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,689 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import com.fasterxml.jackson.databind.node.BinaryNode
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.data.LeafAirbyteSchemaType
|
||||
import io.airbyte.cdk.data.OffsetDateTimeCodec
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.read.And
|
||||
import io.airbyte.cdk.read.DefaultJdbcStreamState
|
||||
import io.airbyte.cdk.read.Equal
|
||||
import io.airbyte.cdk.read.From
|
||||
import io.airbyte.cdk.read.FromSample
|
||||
import io.airbyte.cdk.read.Greater
|
||||
import io.airbyte.cdk.read.GreaterOrEqual
|
||||
import io.airbyte.cdk.read.JdbcCursorPartition
|
||||
import io.airbyte.cdk.read.JdbcPartition
|
||||
import io.airbyte.cdk.read.JdbcSplittablePartition
|
||||
import io.airbyte.cdk.read.Lesser
|
||||
import io.airbyte.cdk.read.LesserOrEqual
|
||||
import io.airbyte.cdk.read.Limit
|
||||
import io.airbyte.cdk.read.NoWhere
|
||||
import io.airbyte.cdk.read.Or
|
||||
import io.airbyte.cdk.read.OrderBy
|
||||
import io.airbyte.cdk.read.SelectColumnMaxValue
|
||||
import io.airbyte.cdk.read.SelectColumns
|
||||
import io.airbyte.cdk.read.SelectQuery
|
||||
import io.airbyte.cdk.read.SelectQueryGenerator
|
||||
import io.airbyte.cdk.read.SelectQuerySpec
|
||||
import io.airbyte.cdk.read.Stream
|
||||
import io.airbyte.cdk.read.Where
|
||||
import io.airbyte.cdk.read.WhereClauseLeafNode
|
||||
import io.airbyte.cdk.read.WhereClauseNode
|
||||
import io.airbyte.cdk.read.optimize
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.time.LocalDateTime
|
||||
import java.time.OffsetDateTime
|
||||
import java.time.ZoneOffset
|
||||
import java.time.format.DateTimeParseException
|
||||
import java.util.Base64
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
/**
|
||||
* Converts a state value string to a JsonNode based on the field type. This function handles type
|
||||
* conversions and date formatting for state checkpoints.
|
||||
*/
|
||||
fun stateValueToJsonNode(field: Field, stateValue: String?): JsonNode {
|
||||
when (field.type.airbyteSchemaType) {
|
||||
is LeafAirbyteSchemaType ->
|
||||
return when (field.type.airbyteSchemaType as LeafAirbyteSchemaType) {
|
||||
LeafAirbyteSchemaType.INTEGER -> {
|
||||
Jsons.valueToTree(stateValue?.toBigInteger())
|
||||
}
|
||||
LeafAirbyteSchemaType.NUMBER -> {
|
||||
Jsons.valueToTree(stateValue?.toDouble())
|
||||
}
|
||||
LeafAirbyteSchemaType.BINARY -> {
|
||||
val ba = Base64.getDecoder().decode(stateValue!!)
|
||||
Jsons.valueToTree<BinaryNode>(ba)
|
||||
}
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE -> {
|
||||
try {
|
||||
val parsedDate =
|
||||
LocalDateTime.parse(
|
||||
stateValue,
|
||||
MsSqlServerJdbcPartitionFactory.inputDateFormatter
|
||||
)
|
||||
val dateAsString =
|
||||
parsedDate.format(MsSqlServerJdbcPartitionFactory.outputDateFormatter)
|
||||
Jsons.textNode(dateAsString)
|
||||
} catch (e: DateTimeParseException) {
|
||||
// Resolve to use the new format.
|
||||
Jsons.valueToTree(stateValue)
|
||||
}
|
||||
}
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE -> {
|
||||
try {
|
||||
if (stateValue == null || stateValue.isEmpty()) {
|
||||
return Jsons.nullNode()
|
||||
}
|
||||
|
||||
// Normalize: remove spaces before timezone indicators
|
||||
val normalizedValue =
|
||||
stateValue.trim().replace(Regex("\\s+(?=[+\\-]|Z)"), "")
|
||||
|
||||
// Try parsing with timezone first, then fall back to assuming UTC
|
||||
val offsetDateTime =
|
||||
try {
|
||||
OffsetDateTime.parse(
|
||||
normalizedValue,
|
||||
MsSqlServerJdbcPartitionFactory.timestampWithTimezoneParser
|
||||
)
|
||||
} catch (e: DateTimeParseException) {
|
||||
// No timezone info - parse as LocalDateTime and assume UTC
|
||||
LocalDateTime.parse(
|
||||
normalizedValue,
|
||||
MsSqlServerJdbcPartitionFactory
|
||||
.timestampWithoutTimezoneParser
|
||||
)
|
||||
.atOffset(ZoneOffset.UTC)
|
||||
}
|
||||
|
||||
// Format using standard codec formatter (6 decimal places, Z or offset)
|
||||
Jsons.valueToTree(offsetDateTime.format(OffsetDateTimeCodec.formatter))
|
||||
} catch (e: DateTimeParseException) {
|
||||
// If all parsing fails, return as-is (already in new format)
|
||||
Jsons.valueToTree(stateValue)
|
||||
}
|
||||
}
|
||||
else -> Jsons.valueToTree(stateValue)
|
||||
}
|
||||
else ->
|
||||
throw IllegalStateException(
|
||||
"PK field must be leaf type but is ${field.type.airbyteSchemaType}."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
sealed class MsSqlServerJdbcPartition(
|
||||
val selectQueryGenerator: SelectQueryGenerator,
|
||||
streamState: DefaultJdbcStreamState,
|
||||
) : JdbcPartition<DefaultJdbcStreamState> {
|
||||
val stream: Stream = streamState.stream
|
||||
val from = From(stream.name, stream.namespace)
|
||||
|
||||
override val nonResumableQuery: SelectQuery
|
||||
get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize())
|
||||
|
||||
open val nonResumableQuerySpec = SelectQuerySpec(SelectColumns(stream.fields), from)
|
||||
|
||||
override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery {
|
||||
val sampleSize: Int = streamState.sharedState.maxSampleSize
|
||||
val querySpec =
|
||||
SelectQuerySpec(
|
||||
SelectColumns(stream.fields),
|
||||
From(stream.name, stream.namespace),
|
||||
limit = Limit(sampleSize.toLong()),
|
||||
)
|
||||
return selectQueryGenerator.generate(querySpec.optimize())
|
||||
}
|
||||
}
|
||||
|
||||
class MsSqlServerJdbcNonResumableSnapshotPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
) : MsSqlServerJdbcPartition(selectQueryGenerator, streamState) {
|
||||
|
||||
override val completeState: OpaqueStateValue = MsSqlServerJdbcStreamStateValue.snapshotCompleted
|
||||
}
|
||||
|
||||
class MsSqlServerJdbcNonResumableSnapshotWithCursorPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
val cursor: Field,
|
||||
val cursorCutoffTime: JsonNode? = null,
|
||||
) :
|
||||
MsSqlServerJdbcPartition(selectQueryGenerator, streamState),
|
||||
JdbcCursorPartition<DefaultJdbcStreamState> {
|
||||
|
||||
override val completeState: OpaqueStateValue
|
||||
get() =
|
||||
MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
|
||||
cursor,
|
||||
cursorCheckpoint = streamState.cursorUpperBound!!,
|
||||
)
|
||||
|
||||
override val cursorUpperBoundQuery: SelectQuery
|
||||
get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize())
|
||||
|
||||
val cursorUpperBoundQuerySpec: SelectQuerySpec
|
||||
get() =
|
||||
if (cursorCutoffTime != null) {
|
||||
// When excluding today's data, apply cutoff constraint to upper bound query too
|
||||
SelectQuerySpec(
|
||||
SelectColumnMaxValue(cursor),
|
||||
from,
|
||||
Where(Lesser(cursor, cursorCutoffTime))
|
||||
)
|
||||
} else {
|
||||
SelectQuerySpec(SelectColumnMaxValue(cursor), from)
|
||||
}
|
||||
|
||||
override val nonResumableQuerySpec: SelectQuerySpec
|
||||
get() {
|
||||
// Add cutoff time constraint if present
|
||||
return if (cursorCutoffTime != null) {
|
||||
SelectQuerySpec(
|
||||
SelectColumns(stream.fields),
|
||||
from,
|
||||
Where(Lesser(cursor, cursorCutoffTime))
|
||||
)
|
||||
} else {
|
||||
SelectQuerySpec(SelectColumns(stream.fields), from)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sealed class MsSqlServerJdbcResumablePartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
streamState: DefaultJdbcStreamState,
|
||||
val checkpointColumns: List<Field>,
|
||||
) :
|
||||
MsSqlServerJdbcPartition(selectQueryGenerator, streamState),
|
||||
JdbcSplittablePartition<DefaultJdbcStreamState> {
|
||||
abstract val lowerBound: List<JsonNode>?
|
||||
abstract val upperBound: List<JsonNode>?
|
||||
|
||||
override val nonResumableQuery: SelectQuery
|
||||
get() = selectQueryGenerator.generate(nonResumableQuerySpec.optimize())
|
||||
|
||||
override val nonResumableQuerySpec: SelectQuerySpec
|
||||
get() = SelectQuerySpec(SelectColumns(stream.fields), from, where)
|
||||
|
||||
override fun resumableQuery(limit: Long): SelectQuery {
|
||||
val querySpec =
|
||||
SelectQuerySpec(
|
||||
SelectColumns((stream.fields + checkpointColumns).distinct()),
|
||||
from,
|
||||
where,
|
||||
OrderBy(checkpointColumns),
|
||||
Limit(limit),
|
||||
)
|
||||
return selectQueryGenerator.generate(querySpec.optimize())
|
||||
}
|
||||
|
||||
override fun samplingQuery(sampleRateInvPow2: Int): SelectQuery {
|
||||
val sampleSize: Int = streamState.sharedState.maxSampleSize
|
||||
val querySpec =
|
||||
SelectQuerySpec(
|
||||
SelectColumns(stream.fields + checkpointColumns),
|
||||
FromSample(stream.name, stream.namespace, sampleRateInvPow2, sampleSize),
|
||||
NoWhere,
|
||||
OrderBy(checkpointColumns),
|
||||
Limit(sampleSize.toLong())
|
||||
)
|
||||
return selectQueryGenerator.generate(querySpec.optimize())
|
||||
}
|
||||
|
||||
val where: Where
|
||||
get() {
|
||||
val zippedLowerBound: List<Pair<Field, JsonNode>> =
|
||||
lowerBound?.let { checkpointColumns.zip(it) } ?: listOf()
|
||||
val lowerBoundDisj: List<WhereClauseNode> =
|
||||
zippedLowerBound.mapIndexed { idx: Int, (gtCol: Field, gtValue: JsonNode) ->
|
||||
val lastLeaf: WhereClauseLeafNode =
|
||||
if (isLowerBoundIncluded && idx == checkpointColumns.size - 1) {
|
||||
GreaterOrEqual(gtCol, gtValue)
|
||||
} else {
|
||||
Greater(gtCol, gtValue)
|
||||
}
|
||||
And(
|
||||
zippedLowerBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) ->
|
||||
Equal(eqCol, eqValue)
|
||||
} + listOf(lastLeaf),
|
||||
)
|
||||
}
|
||||
val zippedUpperBound: List<Pair<Field, JsonNode>> =
|
||||
upperBound?.let { checkpointColumns.zip(it) } ?: listOf()
|
||||
val upperBoundDisj: List<WhereClauseNode> =
|
||||
zippedUpperBound.mapIndexed { idx: Int, (leqCol: Field, leqValue: JsonNode) ->
|
||||
val lastLeaf: WhereClauseLeafNode =
|
||||
if (idx < zippedUpperBound.size - 1) {
|
||||
Lesser(leqCol, leqValue)
|
||||
} else {
|
||||
LesserOrEqual(leqCol, leqValue)
|
||||
}
|
||||
And(
|
||||
zippedUpperBound.take(idx).map { (eqCol: Field, eqValue: JsonNode) ->
|
||||
Equal(eqCol, eqValue)
|
||||
} + listOf(lastLeaf),
|
||||
)
|
||||
}
|
||||
val baseClause = And(Or(lowerBoundDisj), Or(upperBoundDisj))
|
||||
// Add additional where clause if present
|
||||
val additional = additionalWhereClause
|
||||
return if (additional != null) {
|
||||
Where(And(baseClause, additional))
|
||||
} else {
|
||||
Where(baseClause)
|
||||
}
|
||||
}
|
||||
|
||||
open val isLowerBoundIncluded: Boolean = false
|
||||
|
||||
open val additionalWhereClause: WhereClauseNode? = null
|
||||
}
|
||||
|
||||
/** RFR for cursor based read. */
|
||||
class MsSqlServerJdbcRfrSnapshotPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
primaryKey: List<Field>,
|
||||
override val lowerBound: List<JsonNode>?,
|
||||
override val upperBound: List<JsonNode>?,
|
||||
) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) {
|
||||
|
||||
// TODO: this needs to reflect lastRecord. Complete state needs to have last primary key value
|
||||
// in RFR case.
|
||||
override val completeState: OpaqueStateValue
|
||||
get() =
|
||||
when (upperBound) {
|
||||
null -> MsSqlServerJdbcStreamStateValue.snapshotCompleted
|
||||
else ->
|
||||
MsSqlServerJdbcStreamStateValue.snapshotCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = upperBound,
|
||||
)
|
||||
}
|
||||
|
||||
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
|
||||
MsSqlServerJdbcStreamStateValue.snapshotCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
|
||||
)
|
||||
}
|
||||
|
||||
/** RFR for CDC. */
|
||||
class MsSqlServerJdbcCdcRfrSnapshotPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
primaryKey: List<Field>,
|
||||
override val lowerBound: List<JsonNode>?,
|
||||
override val upperBound: List<JsonNode>?,
|
||||
) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) {
|
||||
override val completeState: OpaqueStateValue
|
||||
get() =
|
||||
when (upperBound) {
|
||||
null -> MsSqlServerCdcInitialSnapshotStateValue.getSnapshotCompletedState(stream)
|
||||
else ->
|
||||
MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = upperBound,
|
||||
)
|
||||
}
|
||||
|
||||
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
|
||||
MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of a [JdbcPartition] for a CDC snapshot partition. Used for incremental CDC
|
||||
* initial sync.
|
||||
*/
|
||||
class MsSqlServerJdbcCdcSnapshotPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
primaryKey: List<Field>,
|
||||
override val lowerBound: List<JsonNode>?
|
||||
) : MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, primaryKey) {
|
||||
override val upperBound: List<JsonNode>? = null
|
||||
override val completeState: OpaqueStateValue
|
||||
get() = MsSqlServerCdcInitialSnapshotStateValue.getSnapshotCompletedState(stream)
|
||||
|
||||
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
|
||||
MsSqlServerCdcInitialSnapshotStateValue.snapshotCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
|
||||
)
|
||||
}
|
||||
|
||||
sealed class MsSqlServerJdbcCursorPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
streamState: DefaultJdbcStreamState,
|
||||
checkpointColumns: List<Field>,
|
||||
val cursor: Field,
|
||||
private val explicitCursorUpperBound: JsonNode?,
|
||||
val cursorCutoffTime: JsonNode? = null,
|
||||
) :
|
||||
MsSqlServerJdbcResumablePartition(selectQueryGenerator, streamState, checkpointColumns),
|
||||
JdbcCursorPartition<DefaultJdbcStreamState> {
|
||||
|
||||
val cursorUpperBound: JsonNode
|
||||
get() = explicitCursorUpperBound ?: streamState.cursorUpperBound!!
|
||||
|
||||
override val cursorUpperBoundQuery: SelectQuery
|
||||
get() = selectQueryGenerator.generate(cursorUpperBoundQuerySpec.optimize())
|
||||
|
||||
val cursorUpperBoundQuerySpec: SelectQuerySpec
|
||||
get() =
|
||||
if (cursorCutoffTime != null && checkpointColumns.contains(cursor)) {
|
||||
// When excluding today's data, apply cutoff constraint to upper bound query too
|
||||
SelectQuerySpec(
|
||||
SelectColumnMaxValue(cursor),
|
||||
from,
|
||||
Where(Lesser(cursor, cursorCutoffTime))
|
||||
)
|
||||
} else {
|
||||
SelectQuerySpec(SelectColumnMaxValue(cursor), from)
|
||||
}
|
||||
|
||||
override val additionalWhereClause: WhereClauseNode?
|
||||
get() =
|
||||
if (cursorCutoffTime != null && checkpointColumns.contains(cursor)) {
|
||||
// Add an additional constraint for the cutoff time
|
||||
Lesser(cursor, cursorCutoffTime)
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
class MsSqlServerJdbcSnapshotWithCursorPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
primaryKey: List<Field>,
|
||||
override val lowerBound: List<JsonNode>?,
|
||||
cursor: Field,
|
||||
cursorUpperBound: JsonNode?,
|
||||
cursorCutoffTime: JsonNode? = null,
|
||||
) :
|
||||
MsSqlServerJdbcCursorPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
primaryKey,
|
||||
cursor,
|
||||
cursorUpperBound,
|
||||
cursorCutoffTime
|
||||
) {
|
||||
// UpperBound is always null for the initial partition that gets split
|
||||
override val upperBound: List<JsonNode>? = null
|
||||
|
||||
override val completeState: OpaqueStateValue
|
||||
get() {
|
||||
// Handle cursor cutoff time first
|
||||
val effectiveCursorCheckpoint =
|
||||
if (
|
||||
cursorCutoffTime != null &&
|
||||
!cursorCutoffTime.isNull &&
|
||||
!cursorUpperBound.isNull &&
|
||||
cursorCutoffTime.asText() < cursorUpperBound.asText()
|
||||
) {
|
||||
cursorCutoffTime
|
||||
} else {
|
||||
cursorUpperBound
|
||||
}
|
||||
|
||||
// Since this is the initial partition (that can be split),
|
||||
// completion means moving to cursor incremental mode
|
||||
return MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
|
||||
cursor,
|
||||
effectiveCursorCheckpoint,
|
||||
)
|
||||
}
|
||||
|
||||
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
|
||||
MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
|
||||
cursor,
|
||||
)
|
||||
}
|
||||
|
||||
class MsSqlServerJdbcSplittableSnapshotWithCursorPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
primaryKey: List<Field>,
|
||||
override val lowerBound: List<JsonNode>?,
|
||||
override val upperBound: List<JsonNode>?,
|
||||
cursor: Field,
|
||||
cursorUpperBound: JsonNode?,
|
||||
cursorCutoffTime: JsonNode? = null,
|
||||
) :
|
||||
MsSqlServerJdbcCursorPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
primaryKey,
|
||||
cursor,
|
||||
cursorUpperBound,
|
||||
cursorCutoffTime
|
||||
) {
|
||||
override val completeState: OpaqueStateValue
|
||||
get() {
|
||||
// Handle cursor cutoff time first
|
||||
val effectiveCursorCheckpoint =
|
||||
if (
|
||||
cursorCutoffTime != null &&
|
||||
!cursorCutoffTime.isNull &&
|
||||
!cursorUpperBound.isNull &&
|
||||
cursorCutoffTime.asText() < cursorUpperBound.asText()
|
||||
) {
|
||||
cursorCutoffTime
|
||||
} else {
|
||||
cursorUpperBound
|
||||
}
|
||||
|
||||
return when (upperBound) {
|
||||
null ->
|
||||
MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
|
||||
cursor,
|
||||
effectiveCursorCheckpoint,
|
||||
)
|
||||
else ->
|
||||
MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = upperBound,
|
||||
cursor,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
|
||||
MsSqlServerJdbcStreamStateValue.snapshotWithCursorCheckpoint(
|
||||
primaryKey = checkpointColumns,
|
||||
primaryKeyCheckpoint = checkpointColumns.map { lastRecord[it.id] ?: Jsons.nullNode() },
|
||||
cursor,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Default implementation of a [JdbcPartition] for a cursor incremental partition. These are always
|
||||
* splittable.
|
||||
*/
|
||||
class MsSqlServerJdbcCursorIncrementalPartition(
|
||||
selectQueryGenerator: SelectQueryGenerator,
|
||||
override val streamState: DefaultJdbcStreamState,
|
||||
cursor: Field,
|
||||
val cursorLowerBound: JsonNode,
|
||||
override val isLowerBoundIncluded: Boolean,
|
||||
cursorUpperBound: JsonNode?,
|
||||
cursorCutoffTime: JsonNode? = null,
|
||||
) :
|
||||
MsSqlServerJdbcCursorPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
listOf(cursor),
|
||||
cursor,
|
||||
cursorUpperBound,
|
||||
cursorCutoffTime
|
||||
) {
|
||||
override val lowerBound: List<JsonNode> = listOf(cursorLowerBound)
|
||||
override val upperBound: List<JsonNode>
|
||||
get() = listOf(cursorUpperBound)
|
||||
|
||||
override val completeState: OpaqueStateValue
|
||||
get() {
|
||||
// When we have a cutoff time that's less than the upper bound,
|
||||
// use the cutoff as the checkpoint since that's where we actually stopped reading
|
||||
val effectiveCheckpoint =
|
||||
if (
|
||||
cursorCutoffTime != null &&
|
||||
!cursorCutoffTime.isNull &&
|
||||
!cursorUpperBound.isNull &&
|
||||
cursorCutoffTime.asText() < cursorUpperBound.asText()
|
||||
) {
|
||||
cursorCutoffTime
|
||||
} else {
|
||||
cursorUpperBound
|
||||
}
|
||||
return MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
|
||||
cursor,
|
||||
cursorCheckpoint = effectiveCheckpoint,
|
||||
)
|
||||
}
|
||||
|
||||
override fun incompleteState(lastRecord: ObjectNode): OpaqueStateValue =
|
||||
MsSqlServerJdbcStreamStateValue.cursorIncrementalCheckpoint(
|
||||
cursor,
|
||||
cursorCheckpoint = lastRecord[cursor.id] ?: Jsons.nullNode(),
|
||||
)
|
||||
}
|
||||
|
||||
// Extension methods for splitting MSSQL partitions
|
||||
fun MsSqlServerJdbcRfrSnapshotPartition.split(
|
||||
opaqueStateValues: List<OpaqueStateValue>
|
||||
): List<MsSqlServerJdbcRfrSnapshotPartition> {
|
||||
val splitPointValues: List<MsSqlServerJdbcStreamStateValue> =
|
||||
opaqueStateValues.map { MsSqlServerStateMigration.parseStateValue(it) }
|
||||
|
||||
val inners: List<List<JsonNode>> =
|
||||
splitPointValues.mapNotNull { sv ->
|
||||
val pkField = checkpointColumns.firstOrNull()
|
||||
if (pkField != null && sv.pkValue != null) {
|
||||
listOf(stateValueToJsonNode(pkField, sv.pkValue))
|
||||
} else null
|
||||
}
|
||||
|
||||
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
|
||||
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
|
||||
|
||||
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
|
||||
MsSqlServerJdbcRfrSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
checkpointColumns,
|
||||
lowerBound,
|
||||
upperBound,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun MsSqlServerJdbcCdcRfrSnapshotPartition.split(
|
||||
opaqueStateValues: List<OpaqueStateValue>
|
||||
): List<MsSqlServerJdbcCdcRfrSnapshotPartition> {
|
||||
val splitPointValues: List<MsSqlServerCdcInitialSnapshotStateValue> =
|
||||
opaqueStateValues.map {
|
||||
Jsons.treeToValue(it, MsSqlServerCdcInitialSnapshotStateValue::class.java)
|
||||
}
|
||||
|
||||
val inners: List<List<JsonNode>> =
|
||||
splitPointValues.mapNotNull { sv ->
|
||||
val pkField = checkpointColumns.firstOrNull()
|
||||
if (pkField != null && sv.pkVal != null) {
|
||||
listOf(stateValueToJsonNode(pkField, sv.pkVal))
|
||||
} else null
|
||||
}
|
||||
|
||||
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
|
||||
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
|
||||
|
||||
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
|
||||
MsSqlServerJdbcCdcRfrSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
checkpointColumns,
|
||||
lowerBound,
|
||||
upperBound,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun MsSqlServerJdbcCdcSnapshotPartition.split(
|
||||
opaqueStateValues: List<OpaqueStateValue>
|
||||
): List<MsSqlServerJdbcCdcRfrSnapshotPartition> {
|
||||
val splitPointValues: List<MsSqlServerCdcInitialSnapshotStateValue> =
|
||||
opaqueStateValues.map {
|
||||
Jsons.treeToValue(it, MsSqlServerCdcInitialSnapshotStateValue::class.java)
|
||||
}
|
||||
|
||||
val inners: List<List<JsonNode>> =
|
||||
splitPointValues.mapNotNull { sv ->
|
||||
val pkField = checkpointColumns.firstOrNull()
|
||||
if (pkField != null && sv.pkVal != null) {
|
||||
listOf(stateValueToJsonNode(pkField, sv.pkVal))
|
||||
} else null
|
||||
}
|
||||
|
||||
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
|
||||
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
|
||||
|
||||
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
|
||||
MsSqlServerJdbcCdcRfrSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
checkpointColumns,
|
||||
lowerBound,
|
||||
upperBound,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun MsSqlServerJdbcSnapshotWithCursorPartition.split(
|
||||
opaqueStateValues: List<OpaqueStateValue>
|
||||
): List<MsSqlServerJdbcSplittableSnapshotWithCursorPartition> {
|
||||
val splitPointValues: List<MsSqlServerJdbcStreamStateValue> =
|
||||
opaqueStateValues.map { MsSqlServerStateMigration.parseStateValue(it) }
|
||||
|
||||
val inners: List<List<JsonNode>> =
|
||||
splitPointValues.mapNotNull { sv ->
|
||||
val pkField = checkpointColumns.firstOrNull()
|
||||
if (pkField != null && sv.pkValue != null) {
|
||||
listOf(stateValueToJsonNode(pkField, sv.pkValue))
|
||||
} else null
|
||||
}
|
||||
|
||||
val lbs: List<List<JsonNode>?> = listOf(lowerBound) + inners
|
||||
val ubs: List<List<JsonNode>?> = inners + listOf(upperBound)
|
||||
|
||||
return lbs.zip(ubs).map { (lowerBound, upperBound) ->
|
||||
MsSqlServerJdbcSplittableSnapshotWithCursorPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
checkpointColumns,
|
||||
lowerBound,
|
||||
upperBound,
|
||||
cursor,
|
||||
cursorUpperBound,
|
||||
cursorCutoffTime,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,354 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import io.airbyte.cdk.ConfigErrorException
|
||||
import io.airbyte.cdk.StreamIdentifier
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
|
||||
import io.airbyte.cdk.jdbc.JdbcFieldType
|
||||
import io.airbyte.cdk.read.ConfiguredSyncMode
|
||||
import io.airbyte.cdk.read.DefaultJdbcSharedState
|
||||
import io.airbyte.cdk.read.DefaultJdbcStreamState
|
||||
import io.airbyte.cdk.read.From
|
||||
import io.airbyte.cdk.read.JdbcPartitionFactory
|
||||
import io.airbyte.cdk.read.SelectColumnMaxValue
|
||||
import io.airbyte.cdk.read.SelectQuerySpec
|
||||
import io.airbyte.cdk.read.Stream
|
||||
import io.airbyte.cdk.read.StreamFeedBootstrap
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.micronaut.context.annotation.Primary
|
||||
import java.time.format.DateTimeFormatter
|
||||
import java.time.format.DateTimeFormatterBuilder
|
||||
import java.time.temporal.ChronoField
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Primary
|
||||
@Singleton
|
||||
class MsSqlServerJdbcPartitionFactory(
|
||||
override val sharedState: DefaultJdbcSharedState,
|
||||
val selectQueryGenerator: MsSqlSourceOperations,
|
||||
val config: MsSqlServerSourceConfiguration,
|
||||
) :
|
||||
JdbcPartitionFactory<
|
||||
DefaultJdbcSharedState,
|
||||
DefaultJdbcStreamState,
|
||||
MsSqlServerJdbcPartition,
|
||||
> {
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
private val streamStates = ConcurrentHashMap<StreamIdentifier, DefaultJdbcStreamState>()
|
||||
|
||||
override fun streamState(streamFeedBootstrap: StreamFeedBootstrap): DefaultJdbcStreamState =
|
||||
streamStates.getOrPut(streamFeedBootstrap.feed.id) {
|
||||
DefaultJdbcStreamState(sharedState, streamFeedBootstrap)
|
||||
}
|
||||
|
||||
private fun findPkUpperBound(stream: Stream, pkChosenFromCatalog: List<Field>): JsonNode {
|
||||
// find upper bound using maxPk query
|
||||
val jdbcConnectionFactory = JdbcConnectionFactory(config)
|
||||
val from = From(stream.name, stream.namespace)
|
||||
val maxPkQuery = SelectQuerySpec(SelectColumnMaxValue(pkChosenFromCatalog[0]), from)
|
||||
|
||||
jdbcConnectionFactory.get().use { connection ->
|
||||
val stmt = connection.prepareStatement(selectQueryGenerator.generate(maxPkQuery).sql)
|
||||
val rs = stmt.executeQuery()
|
||||
|
||||
if (rs.next()) {
|
||||
val jdbcFieldType = pkChosenFromCatalog[0].type as JdbcFieldType<*>
|
||||
val pkUpperBound: JsonNode = jdbcFieldType.get(rs, 1)
|
||||
return pkUpperBound
|
||||
} else {
|
||||
// Table might be empty thus there is no max PK value.
|
||||
return Jsons.nullNode()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun coldStart(streamState: DefaultJdbcStreamState): MsSqlServerJdbcPartition {
|
||||
val stream: Stream = streamState.stream
|
||||
val pkChosenFromCatalog: List<Field> = stream.configuredPrimaryKey ?: listOf()
|
||||
|
||||
if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) {
|
||||
if (pkChosenFromCatalog.isEmpty()) {
|
||||
return MsSqlServerJdbcNonResumableSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
)
|
||||
}
|
||||
|
||||
val upperBound = findPkUpperBound(stream, pkChosenFromCatalog)
|
||||
return if (sharedState.configuration.global) {
|
||||
MsSqlServerJdbcCdcRfrSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
lowerBound = null,
|
||||
upperBound = listOf(upperBound),
|
||||
)
|
||||
} else {
|
||||
MsSqlServerJdbcRfrSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
lowerBound = null,
|
||||
upperBound = listOf(upperBound),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (sharedState.configuration.global) {
|
||||
return MsSqlServerJdbcCdcSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
lowerBound = null,
|
||||
)
|
||||
}
|
||||
|
||||
val cursorChosenFromCatalog: Field =
|
||||
stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor")
|
||||
|
||||
// Calculate cutoff time for cursor if exclude today's data is enabled
|
||||
val cursorCutoffTime = getCursorCutoffTime(cursorChosenFromCatalog)
|
||||
|
||||
if (pkChosenFromCatalog.isEmpty()) {
|
||||
return MsSqlServerJdbcNonResumableSnapshotWithCursorPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
cursorChosenFromCatalog,
|
||||
cursorCutoffTime = cursorCutoffTime,
|
||||
)
|
||||
}
|
||||
return MsSqlServerJdbcSnapshotWithCursorPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
lowerBound = null,
|
||||
cursorChosenFromCatalog,
|
||||
cursorUpperBound = null,
|
||||
cursorCutoffTime = cursorCutoffTime,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Flowchart:
|
||||
* 1. If the input state is null - using coldstart.
|
||||
* ```
|
||||
* a. If it's global but without PK, use non-resumable snapshot.
|
||||
* b. If it's global with PK, use snapshot.
|
||||
* c. If it's not global, use snapshot with cursor.
|
||||
* ```
|
||||
* 2. If the input state is not null -
|
||||
* ```
|
||||
* a. If it's in global mode, JdbcPartitionFactory will not handle this. (TODO)
|
||||
* b. If it's cursor based, it could be either in PK read phase (initial read) or
|
||||
* cursor read phase (incremental read). This is differentiated by the stateType.
|
||||
* i. In PK read phase, use snapshot with cursor. If no PKs were found,
|
||||
* use non-resumable snapshot with cursor.
|
||||
* ii. In cursor read phase, use cursor incremental.
|
||||
* ```
|
||||
*/
|
||||
override fun create(streamFeedBootstrap: StreamFeedBootstrap): MsSqlServerJdbcPartition? {
|
||||
val stream: Stream = streamFeedBootstrap.feed
|
||||
val streamState: DefaultJdbcStreamState = streamState(streamFeedBootstrap)
|
||||
val opaqueStateValue: OpaqueStateValue =
|
||||
streamFeedBootstrap.currentState ?: return coldStart(streamState)
|
||||
|
||||
val isCursorBased: Boolean = !sharedState.configuration.global
|
||||
|
||||
val pkChosenFromCatalog: List<Field> = stream.configuredPrimaryKey ?: listOf()
|
||||
|
||||
if (
|
||||
pkChosenFromCatalog.isEmpty() &&
|
||||
stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH
|
||||
) {
|
||||
if (
|
||||
streamState.streamFeedBootstrap.currentState ==
|
||||
MsSqlServerJdbcStreamStateValue.snapshotCompleted
|
||||
) {
|
||||
return null
|
||||
}
|
||||
return MsSqlServerJdbcNonResumableSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
)
|
||||
}
|
||||
|
||||
if (!isCursorBased) {
|
||||
val sv: MsSqlServerCdcInitialSnapshotStateValue =
|
||||
Jsons.treeToValue(
|
||||
opaqueStateValue,
|
||||
MsSqlServerCdcInitialSnapshotStateValue::class.java
|
||||
)
|
||||
|
||||
if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) {
|
||||
val upperBound = findPkUpperBound(stream, pkChosenFromCatalog)
|
||||
if (sv.pkVal == upperBound.asText()) {
|
||||
return null
|
||||
}
|
||||
val pkLowerBound: JsonNode = stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkVal)
|
||||
|
||||
return MsSqlServerJdbcRfrSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound),
|
||||
upperBound = listOf(upperBound)
|
||||
)
|
||||
}
|
||||
|
||||
if (sv.pkName == null) {
|
||||
// This indicates initial snapshot has been completed. CDC snapshot will be handled
|
||||
// by CDCPartitionFactory.
|
||||
// Nothing to do here.
|
||||
return null
|
||||
} else {
|
||||
// This branch indicates snapshot is incomplete. We need to resume based on previous
|
||||
// snapshot state.
|
||||
val pkField = pkChosenFromCatalog.first()
|
||||
val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkVal)
|
||||
|
||||
return MsSqlServerJdbcCdcSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
listOf(pkLowerBound),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
val sv: MsSqlServerJdbcStreamStateValue =
|
||||
MsSqlServerStateMigration.parseStateValue(opaqueStateValue)
|
||||
|
||||
if (stream.configuredSyncMode == ConfiguredSyncMode.FULL_REFRESH) {
|
||||
val upperBound = findPkUpperBound(stream, pkChosenFromCatalog)
|
||||
if (sv.pkValue == upperBound.asText()) {
|
||||
return null
|
||||
}
|
||||
val pkLowerBound: JsonNode =
|
||||
stateValueToJsonNode(pkChosenFromCatalog[0], sv.pkValue)
|
||||
|
||||
return MsSqlServerJdbcRfrSnapshotPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
lowerBound = if (pkLowerBound.isNull) null else listOf(pkLowerBound),
|
||||
upperBound = listOf(upperBound)
|
||||
)
|
||||
}
|
||||
|
||||
if (sv.stateType != StateType.CURSOR_BASED.stateType) {
|
||||
// Loading value from catalog. Note there could be unexpected behaviors if user
|
||||
// updates their schema but did not reset their state.
|
||||
val pkField = pkChosenFromCatalog.first()
|
||||
val pkLowerBound: JsonNode = stateValueToJsonNode(pkField, sv.pkValue)
|
||||
|
||||
val cursorChosenFromCatalog: Field =
|
||||
stream.configuredCursor as? Field ?: throw ConfigErrorException("no cursor")
|
||||
|
||||
// in a state where it's still in primary_key read part.
|
||||
return MsSqlServerJdbcSnapshotWithCursorPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
pkChosenFromCatalog,
|
||||
lowerBound = listOf(pkLowerBound),
|
||||
cursorChosenFromCatalog,
|
||||
cursorUpperBound = null,
|
||||
cursorCutoffTime = getCursorCutoffTime(cursorChosenFromCatalog),
|
||||
)
|
||||
}
|
||||
// resume back to cursor based increment.
|
||||
val cursor: Field = stream.fields.find { it.id == sv.cursorField.first() } as Field
|
||||
val cursorCheckpoint: JsonNode = stateValueToJsonNode(cursor, sv.cursor)
|
||||
|
||||
// Compose a jsonnode of cursor label to cursor value to fit in
|
||||
// DefaultJdbcCursorIncrementalPartition
|
||||
if (cursorCheckpoint.toString() == streamState.cursorUpperBound?.toString()) {
|
||||
// Incremental complete.
|
||||
return null
|
||||
}
|
||||
return MsSqlServerJdbcCursorIncrementalPartition(
|
||||
selectQueryGenerator,
|
||||
streamState,
|
||||
cursor,
|
||||
cursorLowerBound = cursorCheckpoint,
|
||||
isLowerBoundIncluded = false,
|
||||
cursorUpperBound = streamState.cursorUpperBound,
|
||||
cursorCutoffTime = getCursorCutoffTime(cursor),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private fun getCursorCutoffTime(cursorField: Field): JsonNode? {
|
||||
val incrementalConfig = config.incrementalReplicationConfiguration
|
||||
return if (
|
||||
incrementalConfig is UserDefinedCursorIncrementalConfiguration &&
|
||||
incrementalConfig.excludeTodaysData &&
|
||||
MsSqlServerCursorCutoffTimeProvider.isTemporalType(
|
||||
cursorField,
|
||||
)
|
||||
) {
|
||||
val cutoffTime = MsSqlServerCursorCutoffTimeProvider.getCutoffTime(cursorField)
|
||||
log.info { "Using cursor cutoff time: $cutoffTime for field '${cursorField.id}'" }
|
||||
cutoffTime
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
override fun split(
|
||||
unsplitPartition: MsSqlServerJdbcPartition,
|
||||
opaqueStateValues: List<OpaqueStateValue>
|
||||
): List<MsSqlServerJdbcPartition> {
|
||||
return when (unsplitPartition) {
|
||||
is MsSqlServerJdbcRfrSnapshotPartition -> unsplitPartition.split(opaqueStateValues)
|
||||
is MsSqlServerJdbcCdcRfrSnapshotPartition -> unsplitPartition.split(opaqueStateValues)
|
||||
is MsSqlServerJdbcCdcSnapshotPartition -> unsplitPartition.split(opaqueStateValues)
|
||||
is MsSqlServerJdbcSnapshotWithCursorPartition ->
|
||||
unsplitPartition.split(opaqueStateValues)
|
||||
is MsSqlServerJdbcSplittableSnapshotWithCursorPartition -> listOf(unsplitPartition)
|
||||
is MsSqlServerJdbcCursorIncrementalPartition -> listOf(unsplitPartition)
|
||||
is MsSqlServerJdbcNonResumableSnapshotPartition -> listOf(unsplitPartition)
|
||||
is MsSqlServerJdbcNonResumableSnapshotWithCursorPartition -> listOf(unsplitPartition)
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
const val DATETIME_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
|
||||
val outputDateFormatter: DateTimeFormatter = DateTimeFormatter.ofPattern(DATETIME_PATTERN)
|
||||
|
||||
val TIMESTAMP_WITHOUT_FRACT_SECOND_PATTERN = "yyyy-MM-dd'T'HH:mm:ss"
|
||||
val inputDateFormatter: DateTimeFormatter =
|
||||
DateTimeFormatterBuilder()
|
||||
.appendPattern(TIMESTAMP_WITHOUT_FRACT_SECOND_PATTERN)
|
||||
.optionalStart()
|
||||
.appendFraction(ChronoField.NANO_OF_SECOND, 1, 6, true)
|
||||
.optionalEnd()
|
||||
.toFormatter()
|
||||
|
||||
// Parser for timestamps without timezone info
|
||||
val timestampWithoutTimezoneParser: DateTimeFormatter =
|
||||
DateTimeFormatterBuilder()
|
||||
.appendPattern("yyyy-MM-dd'T'HH:mm:ss")
|
||||
.optionalStart()
|
||||
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
|
||||
.optionalEnd()
|
||||
.toFormatter()
|
||||
|
||||
// Parser for timestamps with timezone info
|
||||
val timestampWithTimezoneParser: DateTimeFormatter =
|
||||
DateTimeFormatterBuilder()
|
||||
.appendPattern("yyyy-MM-dd'T'HH:mm:ss")
|
||||
.optionalStart()
|
||||
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
|
||||
.optionalEnd()
|
||||
.appendOffset("+HH:MM", "Z")
|
||||
.toFormatter()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
data class MsSqlServerJdbcStreamStateValue(
|
||||
@JsonProperty("cursor") val cursor: String = "",
|
||||
@JsonProperty("version") val version: Int = CURRENT_VERSION,
|
||||
@JsonProperty("state_type") val stateType: String = StateType.CURSOR_BASED.stateType,
|
||||
@JsonProperty("cursor_field") val cursorField: List<String> = listOf(),
|
||||
@JsonProperty("cursor_record_count") val cursorRecordCount: Int = 0,
|
||||
@JsonProperty("pk_name") val pkName: String? = null,
|
||||
@JsonProperty("pk_val") val pkValue: String? = null,
|
||||
@JsonProperty("incremental_state") val incrementalState: JsonNode? = null,
|
||||
) {
|
||||
companion object {
|
||||
/** Current state version used by the new CDK MSSQL connector */
|
||||
const val CURRENT_VERSION = 3
|
||||
|
||||
/** Legacy state version used by the old CDK MSSQL connector */
|
||||
const val LEGACY_VERSION = 2
|
||||
|
||||
/**
|
||||
* Determines if a given version number represents a legacy state format
|
||||
* @param version The version number to check (null is considered legacy)
|
||||
* @return true if the version is legacy and needs migration
|
||||
*/
|
||||
fun isLegacy(version: Int?): Boolean = version == null || version <= LEGACY_VERSION
|
||||
|
||||
/** Value representing the completion of a FULL_REFRESH snapshot. */
|
||||
val snapshotCompleted: OpaqueStateValue
|
||||
get() = Jsons.valueToTree(MsSqlServerJdbcStreamStateValue(stateType = "primary_key"))
|
||||
|
||||
/** Value representing the progress of an ongoing incremental cursor read. */
|
||||
fun cursorIncrementalCheckpoint(
|
||||
cursor: Field,
|
||||
cursorCheckpoint: JsonNode,
|
||||
): OpaqueStateValue {
|
||||
return Jsons.valueToTree(
|
||||
MsSqlServerJdbcStreamStateValue(
|
||||
cursorField = listOf(cursor.id),
|
||||
cursor = cursorCheckpoint.asText(),
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Value representing the progress of an ongoing snapshot not involving cursor columns. */
|
||||
fun snapshotCheckpoint(
|
||||
primaryKey: List<Field>,
|
||||
primaryKeyCheckpoint: List<JsonNode>,
|
||||
): OpaqueStateValue {
|
||||
val primaryKeyField = primaryKey.first()
|
||||
return Jsons.valueToTree(
|
||||
MsSqlServerJdbcStreamStateValue(
|
||||
pkName = primaryKeyField.id,
|
||||
pkValue = primaryKeyCheckpoint.first().asText(),
|
||||
stateType = StateType.PRIMARY_KEY.stateType,
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Value representing the progress of an ongoing snapshot involving cursor columns. */
|
||||
fun snapshotWithCursorCheckpoint(
|
||||
primaryKey: List<Field>,
|
||||
primaryKeyCheckpoint: List<JsonNode>,
|
||||
cursor: Field,
|
||||
): OpaqueStateValue {
|
||||
val primaryKeyField = primaryKey.first()
|
||||
return Jsons.valueToTree(
|
||||
MsSqlServerJdbcStreamStateValue(
|
||||
pkName = primaryKeyField.id,
|
||||
pkValue = primaryKeyCheckpoint.first().asText(),
|
||||
stateType = StateType.PRIMARY_KEY.stateType,
|
||||
incrementalState =
|
||||
Jsons.valueToTree(
|
||||
MsSqlServerJdbcStreamStateValue(
|
||||
cursorField = listOf(cursor.id),
|
||||
)
|
||||
),
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum class StateType(val stateType: String) {
|
||||
PRIMARY_KEY("primary_key"),
|
||||
CURSOR_BASED("cursor_based"),
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.AirbyteSourceRunner
|
||||
|
||||
object MsSqlServerSource {
|
||||
@JvmStatic
|
||||
fun main(args: Array<String>) {
|
||||
AirbyteSourceRunner.run(*args)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.ConfigErrorException
|
||||
import io.airbyte.cdk.command.*
|
||||
import io.airbyte.cdk.jdbc.SSLCertificateUtils
|
||||
import io.airbyte.cdk.output.DataChannelMedium
|
||||
import io.airbyte.cdk.output.sockets.DATA_CHANNEL_PROPERTY_PREFIX
|
||||
import io.airbyte.cdk.ssh.SshConnectionOptions
|
||||
import io.airbyte.cdk.ssh.SshNoTunnelMethod
|
||||
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.micronaut.context.annotation.Factory
|
||||
import io.micronaut.context.annotation.Value
|
||||
import jakarta.inject.Inject
|
||||
import jakarta.inject.Singleton
|
||||
import java.net.URLDecoder
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.time.Duration
|
||||
import org.apache.commons.lang3.RandomStringUtils
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
class MsSqlServerSourceConfiguration(
|
||||
override val realHost: String,
|
||||
override val realPort: Int,
|
||||
override val sshTunnel: SshTunnelMethodConfiguration?,
|
||||
override val sshConnectionOptions: SshConnectionOptions,
|
||||
override val jdbcUrlFmt: String,
|
||||
override val jdbcProperties: Map<String, String>,
|
||||
override val namespaces: Set<String>,
|
||||
override val maxConcurrency: Int,
|
||||
override val resourceAcquisitionHeartbeat: Duration = Duration.ofMillis(100L),
|
||||
override val checkpointTargetInterval: Duration,
|
||||
override val checkPrivileges: Boolean,
|
||||
override val debeziumHeartbeatInterval: Duration = Duration.ofSeconds(10),
|
||||
val incrementalReplicationConfiguration: IncrementalConfiguration,
|
||||
val databaseName: String,
|
||||
) : JdbcSourceConfiguration, CdcSourceConfiguration {
|
||||
override val global = incrementalReplicationConfiguration is CdcIncrementalConfiguration
|
||||
override val maxSnapshotReadDuration: Duration? =
|
||||
(incrementalReplicationConfiguration as? CdcIncrementalConfiguration)?.initialLoadTimeout
|
||||
|
||||
/** Required to inject [MsSqlServerSourceConfiguration] directly. */
|
||||
@Factory
|
||||
private class MicronautFactory {
|
||||
@Singleton
|
||||
fun mssqlServerSourceConfig(
|
||||
factory:
|
||||
SourceConfigurationFactory<
|
||||
MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration>,
|
||||
supplier:
|
||||
ConfigurationSpecificationSupplier<MsSqlServerSourceConfigurationSpecification>,
|
||||
): MsSqlServerSourceConfiguration = factory.make(supplier.get())
|
||||
}
|
||||
}
|
||||
|
||||
sealed interface IncrementalConfiguration
|
||||
|
||||
data class UserDefinedCursorIncrementalConfiguration(val excludeTodaysData: Boolean = false) :
|
||||
IncrementalConfiguration
|
||||
|
||||
data class CdcIncrementalConfiguration(
|
||||
val initialWaitingSeconds: Duration,
|
||||
val invalidCdcCursorPositionBehavior: InvalidCdcCursorPositionBehavior,
|
||||
val initialLoadTimeout: Duration,
|
||||
val pollIntervalMs: Int
|
||||
) : IncrementalConfiguration
|
||||
|
||||
enum class InvalidCdcCursorPositionBehavior {
|
||||
FAIL_SYNC,
|
||||
RESET_SYNC,
|
||||
}
|
||||
|
||||
@Singleton
|
||||
class MsSqlServerSourceConfigurationFactory
|
||||
@Inject
|
||||
constructor(
|
||||
val featureFlags: Set<FeatureFlag>,
|
||||
@Value("\${${DATA_CHANNEL_PROPERTY_PREFIX}.medium}")
|
||||
val dataChannelMedium: String = DataChannelMedium.STDIO.name,
|
||||
@Value("\${${DATA_CHANNEL_PROPERTY_PREFIX}.socket-paths}")
|
||||
val socketPaths: List<String> = emptyList(),
|
||||
) :
|
||||
SourceConfigurationFactory<
|
||||
MsSqlServerSourceConfigurationSpecification, MsSqlServerSourceConfiguration> {
|
||||
|
||||
constructor() : this(emptySet(), DataChannelMedium.STDIO.name, emptyList())
|
||||
|
||||
override fun makeWithoutExceptionHandling(
|
||||
pojo: MsSqlServerSourceConfigurationSpecification,
|
||||
): MsSqlServerSourceConfiguration {
|
||||
val incrementalSpec = pojo.getIncrementalValue()
|
||||
val incrementalReplicationConfiguration =
|
||||
when (incrementalSpec) {
|
||||
is UserDefinedCursor -> {
|
||||
UserDefinedCursorIncrementalConfiguration(
|
||||
excludeTodaysData = incrementalSpec.excludeTodaysData ?: false
|
||||
)
|
||||
}
|
||||
is Cdc -> {
|
||||
val initialWaitingSeconds: Duration =
|
||||
Duration.ofSeconds(incrementalSpec.initialWaitingSeconds?.toLong() ?: 300L)
|
||||
val initialLoadTimeout: Duration =
|
||||
Duration.ofHours(incrementalSpec.initialLoadTimeoutHours?.toLong() ?: 8L)
|
||||
val invalidCdcCursorPositionBehavior: InvalidCdcCursorPositionBehavior =
|
||||
if (incrementalSpec.invalidCdcCursorPositionBehavior == "Fail sync") {
|
||||
InvalidCdcCursorPositionBehavior.FAIL_SYNC
|
||||
} else {
|
||||
InvalidCdcCursorPositionBehavior.RESET_SYNC
|
||||
}
|
||||
|
||||
// Validate poll interval vs heartbeat interval
|
||||
val pollIntervalMs = incrementalSpec.pollIntervalMs ?: 500
|
||||
val heartbeatIntervalMs =
|
||||
MsSqlServerSourceConfigurationSpecification.DEFAULT_HEARTBEAT_INTERVAL_MS
|
||||
if (pollIntervalMs >= heartbeatIntervalMs) {
|
||||
throw ConfigErrorException(
|
||||
"Poll interval ($pollIntervalMs ms) must be smaller than heartbeat interval ($heartbeatIntervalMs ms). " +
|
||||
"Please reduce the poll interval to a value less than $heartbeatIntervalMs ms."
|
||||
)
|
||||
}
|
||||
|
||||
CdcIncrementalConfiguration(
|
||||
initialWaitingSeconds,
|
||||
invalidCdcCursorPositionBehavior,
|
||||
initialLoadTimeout,
|
||||
pollIntervalMs,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
val sshTunnel: SshTunnelMethodConfiguration? = pojo.getTunnelMethodValue()
|
||||
|
||||
// Check if encryption was explicitly set in JSON (encryptionJson != null)
|
||||
// vs using the default value (encryptionJson == null).
|
||||
// Old connector used "ssl_method" field which was optional, so legacy configs
|
||||
// won't have ssl_mode at all, resulting in encryptionJson being null.
|
||||
val isLegacyConfig = pojo.encryptionJson == null
|
||||
val jdbcEncryption =
|
||||
when (val encryptionSpec: EncryptionSpecification? = pojo.getEncryptionValue()) {
|
||||
is MsSqlServerEncryptionDisabledConfigurationSpecification -> {
|
||||
// For legacy configs without ssl_mode field, allow unencrypted for backward
|
||||
// compatibility
|
||||
// even in cloud deployments. This handles migration from old connector
|
||||
// versions.
|
||||
if (isLegacyConfig) {
|
||||
log.warn {
|
||||
"No encryption configuration found in JSON. " +
|
||||
"This appears to be a legacy configuration migrated from an older connector version. " +
|
||||
"Consider adding SSL encryption for better security."
|
||||
}
|
||||
mapOf("encrypt" to "false", "trustServerCertificate" to "true")
|
||||
} else {
|
||||
// Explicitly disabled encryption (user set ssl_mode.mode = "unencrypted")
|
||||
// should fail in cloud without SSH tunnel
|
||||
if (
|
||||
featureFlags.contains(FeatureFlag.AIRBYTE_CLOUD_DEPLOYMENT) &&
|
||||
sshTunnel is SshNoTunnelMethod
|
||||
) {
|
||||
throw ConfigErrorException(
|
||||
"Connection from Airbyte Cloud requires " +
|
||||
"SSL encryption or an SSH tunnel."
|
||||
)
|
||||
} else {
|
||||
mapOf("encrypt" to "false", "trustServerCertificate" to "true")
|
||||
}
|
||||
}
|
||||
}
|
||||
null -> {
|
||||
// This should never happen since getEncryptionValue() has a default
|
||||
mapOf("encrypt" to "false", "trustServerCertificate" to "true")
|
||||
}
|
||||
is MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification ->
|
||||
mapOf("encrypt" to "true", "trustServerCertificate" to "true")
|
||||
is SslVerifyCertificate -> {
|
||||
val certificate = encryptionSpec.certificate
|
||||
val trustStoreProperties =
|
||||
if (certificate == null) {
|
||||
emptyMap()
|
||||
} else {
|
||||
val password = RandomStringUtils.secure().next(100)
|
||||
val keyStoreUri =
|
||||
SSLCertificateUtils.keyStoreFromCertificate(certificate, password)
|
||||
mapOf(
|
||||
"trustStore" to keyStoreUri.path,
|
||||
"trustStorePassword" to password
|
||||
)
|
||||
}
|
||||
val hostNameInCertificate = encryptionSpec.hostNameInCertificate
|
||||
val hostNameProperties =
|
||||
if (hostNameInCertificate == null) {
|
||||
emptyMap()
|
||||
} else {
|
||||
mapOf("hostNameInCertificate" to hostNameInCertificate)
|
||||
}
|
||||
trustStoreProperties +
|
||||
hostNameProperties +
|
||||
mapOf("encrypt" to "true", "trustServerCertificate" to "false")
|
||||
}
|
||||
}
|
||||
|
||||
// Parse JDBC URL parameters
|
||||
val jdbcProperties = mutableMapOf<String, String>()
|
||||
jdbcProperties["user"] = pojo.username
|
||||
jdbcProperties["password"] = pojo.password
|
||||
|
||||
// Parse URL parameters from jdbcUrlParams
|
||||
val pattern = "^([^=]+)=(.*)$".toRegex()
|
||||
for (pair in (pojo.jdbcUrlParams ?: "").trim().split("&".toRegex())) {
|
||||
if (pair.isBlank()) {
|
||||
continue
|
||||
}
|
||||
val result: MatchResult? = pattern.matchEntire(pair)
|
||||
if (result == null) {
|
||||
log.warn { "ignoring invalid JDBC URL param '$pair'" }
|
||||
} else {
|
||||
val key: String = result.groupValues[1].trim()
|
||||
val urlEncodedValue: String = result.groupValues[2].trim()
|
||||
jdbcProperties[key] = URLDecoder.decode(urlEncodedValue, StandardCharsets.UTF_8)
|
||||
}
|
||||
}
|
||||
jdbcProperties.putAll(jdbcEncryption)
|
||||
|
||||
// Validate and process configuration values
|
||||
val checkpointTargetInterval: Duration =
|
||||
Duration.ofSeconds(pojo.checkpointTargetIntervalSeconds?.toLong() ?: 300L)
|
||||
if (!checkpointTargetInterval.isPositive) {
|
||||
throw ConfigErrorException("Checkpoint Target Interval should be positive")
|
||||
}
|
||||
|
||||
var maxConcurrency: Int? = pojo.concurrency
|
||||
|
||||
log.info { "maxConcurrency: $maxConcurrency. socket paths: ${socketPaths.size}" }
|
||||
|
||||
// If maxConcurrency is set, we use it.
|
||||
// Otherwise, we use the number of socket paths provided for speed mode
|
||||
// Or 1 for legacy mode
|
||||
maxConcurrency =
|
||||
when (DataChannelMedium.valueOf(dataChannelMedium)) {
|
||||
DataChannelMedium.STDIO -> maxConcurrency ?: 1
|
||||
DataChannelMedium.SOCKET -> maxConcurrency ?: socketPaths.size
|
||||
}
|
||||
log.info { "Effective concurrency: $maxConcurrency" }
|
||||
|
||||
if (maxConcurrency <= 0) {
|
||||
throw ConfigErrorException("Concurrency setting should be positive")
|
||||
}
|
||||
|
||||
return MsSqlServerSourceConfiguration(
|
||||
realHost = pojo.host,
|
||||
realPort = pojo.port,
|
||||
sshTunnel = sshTunnel,
|
||||
sshConnectionOptions = SshConnectionOptions.fromAdditionalProperties(emptyMap()),
|
||||
checkpointTargetInterval = checkpointTargetInterval,
|
||||
jdbcUrlFmt = "jdbc:sqlserver://%s:%d;databaseName=${pojo.database}",
|
||||
namespaces = pojo.schemas?.toSet() ?: setOf("dbo"),
|
||||
jdbcProperties = jdbcProperties,
|
||||
maxConcurrency = maxConcurrency,
|
||||
checkPrivileges = pojo.checkPrivileges ?: true,
|
||||
debeziumHeartbeatInterval =
|
||||
Duration.ofMillis(
|
||||
MsSqlServerSourceConfigurationSpecification.DEFAULT_HEARTBEAT_INTERVAL_MS
|
||||
),
|
||||
resourceAcquisitionHeartbeat = Duration.ofSeconds(15),
|
||||
incrementalReplicationConfiguration = incrementalReplicationConfiguration,
|
||||
databaseName = pojo.database
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAnyGetter
|
||||
import com.fasterxml.jackson.annotation.JsonAnySetter
|
||||
import com.fasterxml.jackson.annotation.JsonGetter
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore
|
||||
import com.fasterxml.jackson.annotation.JsonProperty
|
||||
import com.fasterxml.jackson.annotation.JsonPropertyDescription
|
||||
import com.fasterxml.jackson.annotation.JsonPropertyOrder
|
||||
import com.fasterxml.jackson.annotation.JsonSetter
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo
|
||||
import com.fasterxml.jackson.annotation.JsonValue
|
||||
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDefault
|
||||
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaDescription
|
||||
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject
|
||||
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
|
||||
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
|
||||
import io.airbyte.cdk.ConfigErrorException
|
||||
import io.airbyte.cdk.command.CONNECTOR_CONFIG_PREFIX
|
||||
import io.airbyte.cdk.command.ConfigurationSpecification
|
||||
import io.airbyte.cdk.ssh.MicronautPropertiesFriendlySshTunnelMethodConfigurationSpecification
|
||||
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
|
||||
import io.micronaut.context.annotation.ConfigurationBuilder
|
||||
import io.micronaut.context.annotation.ConfigurationProperties
|
||||
import jakarta.inject.Singleton
|
||||
|
||||
/**
|
||||
* The object which is mapped to the MS SQL Server source configuration JSON.
|
||||
*
|
||||
* Use [MsSqlServerSourceConfiguration] instead wherever possible. This object also allows injecting
|
||||
* values through Micronaut properties, this is made possible by the classes named
|
||||
* `MicronautPropertiesFriendly.*`.
|
||||
*/
|
||||
@JsonSchemaTitle("MSSQL Source Spec")
|
||||
@JsonPropertyOrder(
|
||||
value = ["host", "port", "database", "username", "replication_method"],
|
||||
)
|
||||
@Singleton
|
||||
@ConfigurationProperties(CONNECTOR_CONFIG_PREFIX)
|
||||
@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI")
|
||||
class MsSqlServerSourceConfigurationSpecification : ConfigurationSpecification() {
|
||||
@JsonProperty("host")
|
||||
@JsonSchemaTitle("Host")
|
||||
@JsonSchemaInject(json = """{"order":0}""")
|
||||
@JsonPropertyDescription("The hostname of the database.")
|
||||
lateinit var host: String
|
||||
|
||||
@JsonProperty("port")
|
||||
@JsonSchemaTitle("Port")
|
||||
@JsonSchemaInject(json = """{"order":1,"minimum": 0,"maximum": 65536, "examples":["1433"]}""")
|
||||
@JsonSchemaDefault("1433")
|
||||
@JsonPropertyDescription(
|
||||
"The port of the database.",
|
||||
)
|
||||
var port: Int = 1433
|
||||
|
||||
@JsonProperty("database")
|
||||
@JsonSchemaTitle("Database")
|
||||
@JsonPropertyDescription("The name of the database.")
|
||||
@JsonSchemaInject(json = """{"order":2, "examples":["master"]}""")
|
||||
lateinit var database: String
|
||||
|
||||
@JsonProperty("schemas")
|
||||
@JsonSchemaTitle("Schemas")
|
||||
@JsonPropertyDescription("The list of schemas to sync from. Defaults to user. Case sensitive.")
|
||||
@JsonSchemaInject(json = """{"order":3, "default":["dbo"], "minItems":0, "uniqueItems":true}""")
|
||||
var schemas: Array<String>? = arrayOf("dbo")
|
||||
|
||||
@JsonProperty("username")
|
||||
@JsonSchemaTitle("Username")
|
||||
@JsonPropertyDescription("The username which is used to access the database.")
|
||||
@JsonSchemaInject(json = """{"order":4}""")
|
||||
lateinit var username: String
|
||||
|
||||
@JsonProperty("password")
|
||||
@JsonSchemaTitle("Password")
|
||||
@JsonPropertyDescription("The password associated with the username.")
|
||||
@JsonSchemaInject(json = """{"order":5,"airbyte_secret":true}""")
|
||||
lateinit var password: String
|
||||
|
||||
@JsonProperty("jdbc_url_params")
|
||||
@JsonSchemaTitle("JDBC URL Params")
|
||||
@JsonPropertyDescription(
|
||||
"Additional properties to pass to the JDBC URL string when connecting to the database " +
|
||||
"formatted as 'key=value' pairs separated by the symbol '&'. " +
|
||||
"(example: key1=value1&key2=value2&key3=value3).",
|
||||
)
|
||||
@JsonSchemaInject(json = """{"order":6}""")
|
||||
var jdbcUrlParams: String? = null
|
||||
|
||||
@JsonIgnore
|
||||
@ConfigurationBuilder(configurationPrefix = "ssl_mode")
|
||||
var encryption = MicronautPropertiesFriendlyEncryptionSpecification()
|
||||
|
||||
@JsonIgnore var encryptionJson: EncryptionSpecification? = null
|
||||
|
||||
@JsonSetter("ssl_mode")
|
||||
fun setEncryptionValue(value: EncryptionSpecification) {
|
||||
encryptionJson = value
|
||||
}
|
||||
|
||||
@JsonGetter("ssl_mode")
|
||||
@JsonSchemaTitle("Encryption")
|
||||
@JsonPropertyDescription(
|
||||
"The encryption method which is used when communicating with the database.",
|
||||
)
|
||||
@JsonSchemaInject(json = """{"order":8,"default":"required"}""")
|
||||
fun getEncryptionValue(): EncryptionSpecification? = encryptionJson ?: encryption.asEncryption()
|
||||
|
||||
@JsonIgnore
|
||||
@ConfigurationBuilder(configurationPrefix = "tunnel_method")
|
||||
val tunnelMethod = MicronautPropertiesFriendlySshTunnelMethodConfigurationSpecification()
|
||||
|
||||
@JsonIgnore var tunnelMethodJson: SshTunnelMethodConfiguration? = null
|
||||
|
||||
@JsonSetter("tunnel_method")
|
||||
fun setTunnelMethodValue(value: SshTunnelMethodConfiguration) {
|
||||
tunnelMethodJson = value
|
||||
}
|
||||
|
||||
@JsonGetter("tunnel_method")
|
||||
@JsonSchemaTitle("SSH Tunnel Method")
|
||||
@JsonPropertyDescription(
|
||||
"Whether to initiate an SSH tunnel before connecting to the database, " +
|
||||
"and if so, which kind of authentication to use.",
|
||||
)
|
||||
@JsonSchemaInject(json = """{"order":9}""")
|
||||
fun getTunnelMethodValue(): SshTunnelMethodConfiguration? =
|
||||
tunnelMethodJson ?: tunnelMethod.asSshTunnelMethod()
|
||||
|
||||
@JsonIgnore
|
||||
@ConfigurationBuilder(configurationPrefix = "replication_method")
|
||||
var replicationMethod = MicronautPropertiesFriendlyIncrementalConfigurationSpecification()
|
||||
|
||||
@JsonIgnore var replicationMethodJson: IncrementalConfigurationSpecification? = null
|
||||
|
||||
@JsonSetter("replication_method")
|
||||
fun setIncrementalValue(value: IncrementalConfigurationSpecification) {
|
||||
replicationMethodJson = value
|
||||
}
|
||||
|
||||
@JsonGetter("replication_method")
|
||||
@JsonSchemaTitle("Update Method")
|
||||
@JsonPropertyDescription("Configures how data is extracted from the database.")
|
||||
@JsonSchemaInject(json = """{"order":10,"display_type":"radio"}""")
|
||||
fun getIncrementalValue(): IncrementalConfigurationSpecification =
|
||||
replicationMethodJson ?: replicationMethod.asCursorMethodConfiguration()
|
||||
|
||||
@JsonProperty("checkpoint_target_interval_seconds")
|
||||
@JsonSchemaTitle("Checkpoint Target Time Interval")
|
||||
@JsonSchemaInject(json = """{"order":11}""")
|
||||
@JsonSchemaDefault("300")
|
||||
@JsonPropertyDescription("How often (in seconds) a stream should checkpoint, when possible.")
|
||||
var checkpointTargetIntervalSeconds: Int? = 300
|
||||
|
||||
@JsonProperty("concurrency")
|
||||
@JsonSchemaTitle("Concurrency")
|
||||
@JsonSchemaInject(json = """{"order":12}""")
|
||||
@JsonPropertyDescription("Maximum number of concurrent queries to the database.")
|
||||
var concurrency: Int? = 1
|
||||
|
||||
@JsonProperty("check_privileges")
|
||||
@JsonSchemaTitle("Check Table and Column Access Privileges")
|
||||
@JsonSchemaInject(json = """{"order":13}""")
|
||||
@JsonSchemaDefault("true")
|
||||
@JsonPropertyDescription(
|
||||
"When this feature is enabled, during schema discovery the connector " +
|
||||
"will query each table or view individually to check access privileges " +
|
||||
"and inaccessible tables, views, or columns therein will be removed. " +
|
||||
"In large schemas, this might cause schema discovery to take too long, " +
|
||||
"in which case it might be advisable to disable this feature.",
|
||||
)
|
||||
var checkPrivileges: Boolean? = true
|
||||
|
||||
@JsonIgnore var additionalPropertiesMap = mutableMapOf<String, Any>()
|
||||
|
||||
@JsonAnyGetter fun getAdditionalProperties(): Map<String, Any> = additionalPropertiesMap
|
||||
|
||||
@JsonAnySetter
|
||||
fun setAdditionalProperty(
|
||||
name: String,
|
||||
value: Any,
|
||||
) {
|
||||
additionalPropertiesMap[name] = value
|
||||
}
|
||||
|
||||
companion object {
|
||||
const val DEFAULT_HEARTBEAT_INTERVAL_MS = 15000L
|
||||
}
|
||||
}
|
||||
|
||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "mode")
|
||||
@JsonSubTypes(
|
||||
JsonSubTypes.Type(
|
||||
value = MsSqlServerEncryptionDisabledConfigurationSpecification::class,
|
||||
name = "unencrypted"
|
||||
),
|
||||
JsonSubTypes.Type(
|
||||
value =
|
||||
MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification::class,
|
||||
name = "encrypted_trust_server_certificate"
|
||||
),
|
||||
JsonSubTypes.Type(value = SslVerifyCertificate::class, name = "encrypted_verify_certificate"),
|
||||
)
|
||||
@JsonSchemaTitle("Encryption")
|
||||
@JsonSchemaDescription("The encryption method which is used when communicating with the database.")
|
||||
sealed interface EncryptionSpecification
|
||||
|
||||
@JsonSchemaTitle("Unencrypted")
|
||||
@JsonSchemaDescription(
|
||||
"Data transfer will not be encrypted.",
|
||||
)
|
||||
data object MsSqlServerEncryptionDisabledConfigurationSpecification : EncryptionSpecification
|
||||
|
||||
@JsonSchemaTitle("Encrypted (trust server certificate)")
|
||||
@JsonSchemaDescription(
|
||||
"Use the certificate provided by the server without verification. (For testing purposes only!)"
|
||||
)
|
||||
data object MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification :
|
||||
EncryptionSpecification
|
||||
|
||||
@JsonSchemaTitle("Encrypted (verify certificate)")
|
||||
@JsonSchemaDescription("Verify and use the certificate provided by the server.")
|
||||
@SuppressFBWarnings(value = ["NP_NONNULL_RETURN_VIOLATION"], justification = "Micronaut DI")
|
||||
class SslVerifyCertificate : EncryptionSpecification {
|
||||
@JsonProperty("hostNameInCertificate")
|
||||
@JsonSchemaTitle("Host Name In Certificate")
|
||||
@JsonPropertyDescription(
|
||||
"Specifies the host name of the server. The value of this property must match the subject property of the certificate.",
|
||||
)
|
||||
@JsonSchemaInject(json = """{"order":0}""")
|
||||
var hostNameInCertificate: String? = null
|
||||
|
||||
@JsonProperty("certificate", required = false)
|
||||
@JsonSchemaTitle("Certificate")
|
||||
@JsonPropertyDescription(
|
||||
"certificate of the server, or of the CA that signed the server certificate",
|
||||
)
|
||||
@JsonSchemaInject(json = """{"order":1,"airbyte_secret":true,"multiline":true}""")
|
||||
var certificate: String? = null
|
||||
}
|
||||
|
||||
@ConfigurationProperties("$CONNECTOR_CONFIG_PREFIX.ssl_mode")
|
||||
class MicronautPropertiesFriendlyEncryptionSpecification {
|
||||
var mode: String = "unencrypted"
|
||||
var sslCertificate: String? = null
|
||||
|
||||
@JsonValue
|
||||
fun asEncryption(): EncryptionSpecification =
|
||||
when (mode) {
|
||||
"unencrypted" -> MsSqlServerEncryptionDisabledConfigurationSpecification
|
||||
"Encrypted (trust server certificate)" ->
|
||||
MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification
|
||||
"Encrypted (verify certificate)" ->
|
||||
SslVerifyCertificate().also { it.certificate = sslCertificate!! }
|
||||
else -> throw ConfigErrorException("invalid value $mode")
|
||||
}
|
||||
}
|
||||
|
||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "method")
|
||||
@JsonSubTypes(
|
||||
JsonSubTypes.Type(value = UserDefinedCursor::class, name = "STANDARD"),
|
||||
JsonSubTypes.Type(value = Cdc::class, name = "CDC")
|
||||
)
|
||||
@JsonSchemaTitle("Update Method")
|
||||
@JsonSchemaDescription("Configures how data is extracted from the database.")
|
||||
sealed interface IncrementalConfigurationSpecification
|
||||
|
||||
@JsonSchemaTitle("Scan Changes with User Defined Cursor")
|
||||
@JsonSchemaDescription(
|
||||
"Incrementally detects new inserts and updates using the " +
|
||||
"<a href=\"https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/" +
|
||||
"#user-defined-cursor\">cursor column</a> chosen when configuring a connection " +
|
||||
"(e.g. created_at, updated_at).",
|
||||
)
|
||||
class UserDefinedCursor : IncrementalConfigurationSpecification {
|
||||
@JsonProperty("exclude_todays_data")
|
||||
@JsonSchemaTitle("Exclude Today's Data")
|
||||
@JsonPropertyDescription(
|
||||
"When enabled incremental syncs using a cursor of a temporal type (date or datetime) will include cursor values only up until the previous midnight UTC"
|
||||
)
|
||||
@JsonSchemaDefault("false")
|
||||
@JsonSchemaInject(json = """{"order":1,"always_show":true}""")
|
||||
var excludeTodaysData: Boolean? = false
|
||||
}
|
||||
|
||||
@JsonSchemaTitle("Read Changes using Change Data Capture (CDC)")
|
||||
@JsonSchemaDescription(
|
||||
"<i>Recommended</i> - " +
|
||||
"Incrementally reads new inserts, updates, and deletes using MSSQL's <a href=" +
|
||||
"\"https://docs.airbyte.com/integrations/sources/mssql/#change-data-capture-cdc\"" +
|
||||
"> change data capture feature</a>. This must be enabled on your database.",
|
||||
)
|
||||
class Cdc : IncrementalConfigurationSpecification {
|
||||
@JsonProperty("initial_waiting_seconds")
|
||||
@JsonSchemaTitle("Initial Waiting Time in Seconds (Advanced)")
|
||||
@JsonPropertyDescription(
|
||||
"The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about <a href=\"https://docs.airbyte.com/integrations/sources/mssql#setting-up-cdc-for-mssql\">initial waiting time</a>"
|
||||
)
|
||||
@JsonSchemaInject(json = """{"order":1,"always_show":true}""")
|
||||
var initialWaitingSeconds: Int? = null
|
||||
|
||||
@JsonProperty("invalid_cdc_cursor_position_behavior")
|
||||
@JsonSchemaTitle("Invalid CDC Position Behavior (Advanced)")
|
||||
@JsonPropertyDescription(
|
||||
"Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value in the mined logs. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.",
|
||||
)
|
||||
@JsonSchemaDefault("Fail sync")
|
||||
@JsonSchemaInject(
|
||||
json = """{"order":2,"always_show":true, "enum": ["Fail sync","Re-sync data"]}"""
|
||||
)
|
||||
var invalidCdcCursorPositionBehavior: String? = "Fail sync"
|
||||
|
||||
@JsonProperty("initial_load_timeout_hours")
|
||||
@JsonSchemaTitle("Initial Load Timeout in Hours (Advanced)")
|
||||
@JsonPropertyDescription(
|
||||
"The amount of time an initial load is allowed to continue for before catching up on CDC logs.",
|
||||
)
|
||||
@JsonSchemaDefault("8")
|
||||
@JsonSchemaInject(json = """{"order":3, "max": 24, "min": 4,"always_show": true}""")
|
||||
var initialLoadTimeoutHours: Int? = 8
|
||||
|
||||
@JsonProperty("poll_interval_ms")
|
||||
@JsonSchemaTitle("Poll Interval in Milliseconds (Advanced)")
|
||||
@JsonPropertyDescription(
|
||||
"How often (in milliseconds) Debezium should poll for new data. Must be smaller than heartbeat interval (15000ms). Lower values provide more responsive data capture but may increase database load.",
|
||||
)
|
||||
@JsonSchemaDefault("500")
|
||||
@JsonSchemaInject(json = """{"order":4, "max": 14999, "min": 100,"always_show": true}""")
|
||||
var pollIntervalMs: Int? = 500
|
||||
}
|
||||
|
||||
@ConfigurationProperties("$CONNECTOR_CONFIG_PREFIX.replication_method")
|
||||
class MicronautPropertiesFriendlyIncrementalConfigurationSpecification {
|
||||
var method: String = "STANDARD"
|
||||
|
||||
fun asCursorMethodConfiguration(): IncrementalConfigurationSpecification =
|
||||
when (method) {
|
||||
"STANDARD" -> UserDefinedCursor()
|
||||
"CDC" -> Cdc()
|
||||
else -> throw ConfigErrorException("invalid value $method")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
/** Represents the old OrderedColumnLoadStatus format used by the legacy MSSQL connector */
|
||||
data class LegacyOrderedColumnLoadStatus(
|
||||
@JsonProperty("version") val version: Long? = null,
|
||||
@JsonProperty("state_type") val stateType: String? = null,
|
||||
@JsonProperty("ordered_col") val orderedCol: String? = null,
|
||||
@JsonProperty("ordered_col_val") val orderedColVal: String? = null,
|
||||
@JsonProperty("incremental_state") val incrementalState: JsonNode? = null,
|
||||
)
|
||||
|
||||
/** Represents the old CursorBasedStatus format used by the legacy MSSQL connector */
|
||||
data class LegacyCursorBasedStatus(
|
||||
@JsonProperty("version") val version: Long? = null,
|
||||
@JsonProperty("state_type") val stateType: String? = null,
|
||||
@JsonProperty("stream_name") val streamName: String? = null,
|
||||
@JsonProperty("stream_namespace") val streamNamespace: String? = null,
|
||||
@JsonProperty("cursor_field") val cursorField: List<String>? = null,
|
||||
@JsonProperty("cursor") val cursor: String? = null,
|
||||
@JsonProperty("cursor_record_count") val cursorRecordCount: Long? = null,
|
||||
)
|
||||
|
||||
/** Helper class to migrate legacy MSSQL connector states to the new v2 format */
|
||||
object MsSqlServerStateMigration {
|
||||
|
||||
/** Parses state value and handles backward compatibility with legacy formats */
|
||||
fun parseStateValue(opaqueStateValue: OpaqueStateValue): MsSqlServerJdbcStreamStateValue {
|
||||
// Check version to detect legacy state using centralized version constants
|
||||
val version = opaqueStateValue.get("version")?.asInt()
|
||||
val isLegacy = MsSqlServerJdbcStreamStateValue.isLegacy(version)
|
||||
|
||||
return if (isLegacy) {
|
||||
log.info {
|
||||
"Detected legacy state (version=$version), migrating to version ${MsSqlServerJdbcStreamStateValue.CURRENT_VERSION}"
|
||||
}
|
||||
migrateLegacyState(opaqueStateValue)
|
||||
} else {
|
||||
try {
|
||||
// Version 3+ states should parse directly
|
||||
Jsons.treeToValue(opaqueStateValue, MsSqlServerJdbcStreamStateValue::class.java)
|
||||
} catch (e: Exception) {
|
||||
throw IllegalStateException(
|
||||
"Failed to parse state with version $version as MsSqlServerJdbcStreamStateValue.",
|
||||
e
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Migrates legacy state formats to new MsSqlServerJdbcStreamStateValue format */
|
||||
private fun migrateLegacyState(
|
||||
opaqueStateValue: OpaqueStateValue
|
||||
): MsSqlServerJdbcStreamStateValue {
|
||||
val stateType = opaqueStateValue.get("state_type")?.asText()
|
||||
|
||||
return when (stateType) {
|
||||
"ordered_column" -> migrateOrderedColumnLoadStatus(opaqueStateValue)
|
||||
"cursor_based" -> migrateCursorBasedStatus(opaqueStateValue)
|
||||
else -> {
|
||||
// Try to detect format based on field presence
|
||||
when {
|
||||
opaqueStateValue.has("ordered_col") ->
|
||||
migrateOrderedColumnLoadStatus(opaqueStateValue)
|
||||
opaqueStateValue.has("cursor_field") ->
|
||||
migrateCursorBasedStatus(opaqueStateValue)
|
||||
else -> {
|
||||
log.warn {
|
||||
"Unknown legacy state format, falling back to default: $opaqueStateValue"
|
||||
}
|
||||
MsSqlServerJdbcStreamStateValue()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Migrates OrderedColumnLoadStatus (primary key based initial sync) to new format */
|
||||
private fun migrateOrderedColumnLoadStatus(
|
||||
opaqueStateValue: OpaqueStateValue
|
||||
): MsSqlServerJdbcStreamStateValue {
|
||||
val legacy = Jsons.treeToValue(opaqueStateValue, LegacyOrderedColumnLoadStatus::class.java)
|
||||
|
||||
log.info {
|
||||
"Migrating OrderedColumnLoadStatus state: ordered_col=${legacy.orderedCol}, ordered_col_val=${legacy.orderedColVal}"
|
||||
}
|
||||
|
||||
// Extract incremental state if present
|
||||
val incrementalState = legacy.incrementalState?.let { migrateCursorBasedStatusFromJson(it) }
|
||||
|
||||
return MsSqlServerJdbcStreamStateValue(
|
||||
version = MsSqlServerJdbcStreamStateValue.CURRENT_VERSION,
|
||||
stateType =
|
||||
StateType.PRIMARY_KEY.stateType, // Convert "ordered_column" to "primary_key"
|
||||
pkName = legacy.orderedCol,
|
||||
pkValue = legacy.orderedColVal,
|
||||
// If there's incremental state, embed it for transition after initial sync completes
|
||||
incrementalState = incrementalState?.let { Jsons.valueToTree(it) }
|
||||
)
|
||||
}
|
||||
|
||||
/** Migrates CursorBasedStatus (cursor-based incremental) to new format */
|
||||
private fun migrateCursorBasedStatusFromJson(
|
||||
stateValue: JsonNode
|
||||
): MsSqlServerJdbcStreamStateValue {
|
||||
val legacy = Jsons.treeToValue(stateValue, LegacyCursorBasedStatus::class.java)
|
||||
|
||||
log.info {
|
||||
"Migrating CursorBasedStatus state: stream=${legacy.streamName}, cursor_field=${legacy.cursorField}, cursor=${legacy.cursor}"
|
||||
}
|
||||
|
||||
return MsSqlServerJdbcStreamStateValue(
|
||||
version = MsSqlServerJdbcStreamStateValue.CURRENT_VERSION,
|
||||
stateType = StateType.CURSOR_BASED.stateType,
|
||||
cursorField = legacy.cursorField ?: emptyList(),
|
||||
cursor = legacy.cursor ?: "",
|
||||
cursorRecordCount = legacy.cursorRecordCount?.toInt() ?: 0
|
||||
)
|
||||
}
|
||||
|
||||
private fun migrateCursorBasedStatus(
|
||||
opaqueStateValue: OpaqueStateValue
|
||||
): MsSqlServerJdbcStreamStateValue {
|
||||
return migrateCursorBasedStatusFromJson(opaqueStateValue)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,430 @@
|
||||
/* Copyright (c) 2024 Airbyte, Inc., all rights reserved. */
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.ConfigErrorException
|
||||
import io.airbyte.cdk.StreamIdentifier
|
||||
import io.airbyte.cdk.check.JdbcCheckQueries
|
||||
import io.airbyte.cdk.command.SourceConfiguration
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.discover.JdbcMetadataQuerier
|
||||
import io.airbyte.cdk.discover.MetadataQuerier
|
||||
import io.airbyte.cdk.discover.TableName
|
||||
import io.airbyte.cdk.jdbc.DefaultJdbcConstants
|
||||
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
|
||||
import io.airbyte.cdk.read.SelectQueryGenerator
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
|
||||
import io.airbyte.protocol.models.v0.StreamDescriptor
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.micronaut.context.annotation.Primary
|
||||
import jakarta.inject.Singleton
|
||||
import java.sql.ResultSet
|
||||
import java.sql.SQLException
|
||||
import java.sql.Statement
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
/** Delegates to [JdbcMetadataQuerier] except for [fields]. */
|
||||
class MsSqlSourceMetadataQuerier(
|
||||
val base: JdbcMetadataQuerier,
|
||||
val configuredCatalog: ConfiguredAirbyteCatalog? = null,
|
||||
) : MetadataQuerier by base {
|
||||
|
||||
override fun extraChecks() {
|
||||
base.extraChecks()
|
||||
if (base.config.global) {
|
||||
// Extra checks for CDC
|
||||
checkSqlServerAgentRunning()
|
||||
checkDatabaseCdcEnabled()
|
||||
}
|
||||
}
|
||||
|
||||
private fun checkSqlServerAgentRunning() {
|
||||
try {
|
||||
base.conn.createStatement().use { stmt: Statement ->
|
||||
stmt
|
||||
.executeQuery(
|
||||
"SELECT servicename, status_desc FROM sys.dm_server_services WHERE servicename LIKE '%SQL Server Agent%'"
|
||||
)
|
||||
.use { rs: ResultSet ->
|
||||
if (!rs.next()) {
|
||||
throw ConfigErrorException(
|
||||
"SQL Server Agent service is not found. CDC requires SQL Server Agent to be running."
|
||||
)
|
||||
}
|
||||
val status = rs.getString("status_desc")
|
||||
if (status != "Running") {
|
||||
throw ConfigErrorException(
|
||||
"SQL Server Agent is not running (status: $status). CDC requires SQL Server Agent to be running."
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: SQLException) {
|
||||
throw ConfigErrorException("Failed to check SQL Server Agent status: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
private fun checkDatabaseCdcEnabled() {
|
||||
try {
|
||||
base.conn.createStatement().use { stmt: Statement ->
|
||||
stmt
|
||||
.executeQuery("SELECT is_cdc_enabled FROM sys.databases WHERE name = DB_NAME()")
|
||||
.use { rs: ResultSet ->
|
||||
if (!rs.next()) {
|
||||
throw ConfigErrorException(
|
||||
"Could not determine CDC status for current database"
|
||||
)
|
||||
}
|
||||
val cdcEnabled = rs.getBoolean("is_cdc_enabled")
|
||||
if (!cdcEnabled) {
|
||||
throw ConfigErrorException(
|
||||
"CDC is not enabled for the database. Please enable CDC with: EXEC sys.sp_cdc_enable_db"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: SQLException) {
|
||||
throw ConfigErrorException("Failed to check database CDC status: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
override fun fields(streamID: StreamIdentifier): List<Field> {
|
||||
val table: TableName = findTableName(streamID) ?: return listOf()
|
||||
if (table !in base.memoizedColumnMetadata) return listOf()
|
||||
return base.memoizedColumnMetadata[table]!!.map {
|
||||
Field(it.label, base.fieldTypeMapper.toFieldType(it))
|
||||
}
|
||||
}
|
||||
|
||||
override fun streamNamespaces(): List<String> = base.config.namespaces.toList()
|
||||
|
||||
val memoizedTableNames: List<TableName> by lazy {
|
||||
log.info { "Querying SQL Server table names for catalog discovery." }
|
||||
try {
|
||||
val allTables = mutableSetOf<TableName>()
|
||||
val dbmd = base.conn.metaData
|
||||
val currentDatabase = base.conn.catalog
|
||||
|
||||
for (namespace in
|
||||
base.config.namespaces + base.config.namespaces.map { it.uppercase() }) {
|
||||
// For SQL Server with SCHEMA namespace kind, use current database as catalog
|
||||
dbmd.getTables(currentDatabase, namespace, null, null).use { rs ->
|
||||
while (rs.next()) {
|
||||
allTables.add(
|
||||
TableName(
|
||||
catalog = rs.getString("TABLE_CAT"),
|
||||
schema = rs.getString("TABLE_SCHEM"),
|
||||
name = rs.getString("TABLE_NAME"),
|
||||
type = rs.getString("TABLE_TYPE") ?: "",
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.info {
|
||||
"Discovered ${allTables.size} table(s) in SQL Server database '$currentDatabase'."
|
||||
}
|
||||
return@lazy allTables.toList()
|
||||
} catch (e: Exception) {
|
||||
throw RuntimeException("SQL Server table discovery query failed: ${e.message}", e)
|
||||
}
|
||||
}
|
||||
|
||||
override fun streamNames(streamNamespace: String?): List<StreamIdentifier> =
|
||||
memoizedTableNames
|
||||
.filter { it.schema == streamNamespace }
|
||||
.map { StreamDescriptor().withName(it.name).withNamespace(streamNamespace) }
|
||||
.map(StreamIdentifier::from)
|
||||
|
||||
fun findTableName(
|
||||
streamID: StreamIdentifier,
|
||||
): TableName? =
|
||||
memoizedTableNames.find { it.name == streamID.name && it.schema == streamID.namespace }
|
||||
|
||||
val memoizedClusteredIndexKeys: Map<TableName, List<List<String>>> by lazy {
|
||||
val results = mutableListOf<AllClusteredIndexKeysRow>()
|
||||
val schemas: List<String> = streamNamespaces()
|
||||
val sql: String = CLUSTERED_INDEX_QUERY_FMTSTR.format(schemas.joinToString { "'$it'" })
|
||||
log.info {
|
||||
"Querying SQL Server system tables for all clustered index keys for catalog discovery."
|
||||
}
|
||||
try {
|
||||
base.conn.createStatement().use { stmt: Statement ->
|
||||
stmt.executeQuery(sql).use { rs: ResultSet ->
|
||||
while (rs.next()) {
|
||||
results.add(
|
||||
AllClusteredIndexKeysRow(
|
||||
rs.getString("table_schema"),
|
||||
rs.getString("table_name"),
|
||||
rs.getString("index_name"),
|
||||
rs.getInt("key_ordinal").takeUnless { rs.wasNull() },
|
||||
rs.getString("column_name").takeUnless { rs.wasNull() },
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.info {
|
||||
"Discovered all clustered index keys in ${schemas.size} SQL Server schema(s)."
|
||||
}
|
||||
return@lazy results
|
||||
.groupBy {
|
||||
findTableName(
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withName(it.tableName).withNamespace(it.tableSchema),
|
||||
),
|
||||
)
|
||||
}
|
||||
.mapNotNull { (table, rowsByTable) ->
|
||||
if (table == null) return@mapNotNull null
|
||||
val clusteredIndexRows: List<AllClusteredIndexKeysRow> =
|
||||
rowsByTable
|
||||
.groupBy { it.indexName }
|
||||
.filterValues { rowsByIndex: List<AllClusteredIndexKeysRow> ->
|
||||
rowsByIndex.all { it.keyOrdinal != null && it.columnName != null }
|
||||
}
|
||||
.values
|
||||
.firstOrNull()
|
||||
?: return@mapNotNull null
|
||||
val clusteredIndexColumnNames: List<List<String>> =
|
||||
clusteredIndexRows
|
||||
.sortedBy { it.keyOrdinal }
|
||||
.mapNotNull { it.columnName }
|
||||
.map { listOf(it) }
|
||||
table to clusteredIndexColumnNames
|
||||
}
|
||||
.toMap()
|
||||
} catch (e: Exception) {
|
||||
throw RuntimeException(
|
||||
"SQL Server clustered index discovery query failed: ${e.message}",
|
||||
e
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The logic flow:
|
||||
* 1. Check for clustered index
|
||||
* 2. If single-column clustered index exists → Use it
|
||||
* 3. If composite clustered index exists → Use primary key
|
||||
* 4. If no clustered index exists → Use primary key
|
||||
* 5. If no primary key exists → Check configured catalog for user-defined logical PK
|
||||
* 6. If no logical PK exists → Return empty list
|
||||
*/
|
||||
override fun primaryKey(
|
||||
streamID: StreamIdentifier,
|
||||
): List<List<String>> {
|
||||
val table: TableName = findTableName(streamID) ?: return listOf()
|
||||
|
||||
// First try to get clustered index keys
|
||||
val clusteredIndexKeys = memoizedClusteredIndexKeys[table]
|
||||
|
||||
// Use clustered index if it exists and is a single column
|
||||
// For composite clustered indexes, fall back to primary key
|
||||
val databasePK =
|
||||
when {
|
||||
clusteredIndexKeys != null && clusteredIndexKeys.size == 1 -> {
|
||||
log.info {
|
||||
"Using single-column clustered index for table ${table.schema}.${table.name}"
|
||||
}
|
||||
clusteredIndexKeys
|
||||
}
|
||||
clusteredIndexKeys != null && clusteredIndexKeys.size > 1 -> {
|
||||
log.info {
|
||||
"Clustered index is composite for table ${table.schema}.${table.name}. Falling back to primary key."
|
||||
}
|
||||
memoizedPrimaryKeys[table]
|
||||
}
|
||||
else -> {
|
||||
log.info {
|
||||
"No clustered index found for table ${table.schema}.${table.name}. Using primary key."
|
||||
}
|
||||
memoizedPrimaryKeys[table]
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a database PK, use it
|
||||
if (!databasePK.isNullOrEmpty()) {
|
||||
return databasePK
|
||||
}
|
||||
|
||||
// Fall back to user-defined logical PK from configured catalog
|
||||
// This handles migration from old connector where tables without physical PKs
|
||||
// could have logical PKs configured in the UI
|
||||
val logicalPK = getUserDefinedPrimaryKey(streamID)
|
||||
if (logicalPK.isNotEmpty()) {
|
||||
log.info {
|
||||
"No physical primary key found for table ${table.schema}.${table.name}. " +
|
||||
"Using user-defined logical primary key from configured catalog: $logicalPK"
|
||||
}
|
||||
return logicalPK
|
||||
}
|
||||
|
||||
return listOf()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the user-defined logical primary key from the configured catalog. This is used for
|
||||
* backward compatibility with the old connector where users could configure logical PKs for
|
||||
* tables without physical PKs.
|
||||
*/
|
||||
private fun getUserDefinedPrimaryKey(streamID: StreamIdentifier): List<List<String>> {
|
||||
if (configuredCatalog == null) {
|
||||
return listOf()
|
||||
}
|
||||
|
||||
val configuredStream: ConfiguredAirbyteStream? =
|
||||
configuredCatalog.streams.find {
|
||||
it.stream.name == streamID.name && it.stream.namespace == streamID.namespace
|
||||
}
|
||||
|
||||
return configuredStream?.primaryKey ?: listOf()
|
||||
}
|
||||
|
||||
val memoizedPrimaryKeys: Map<TableName, List<List<String>>> by lazy {
|
||||
val results = mutableListOf<AllPrimaryKeysRow>()
|
||||
val schemas: List<String> = streamNamespaces()
|
||||
val sql: String = PK_QUERY_FMTSTR.format(schemas.joinToString { "'$it'" })
|
||||
log.info { "Querying SQL Server system tables for all primary keys for catalog discovery." }
|
||||
try {
|
||||
// Get primary keys for the specified table
|
||||
base.conn.createStatement().use { stmt: Statement ->
|
||||
stmt.executeQuery(sql).use { rs: ResultSet ->
|
||||
while (rs.next()) {
|
||||
results.add(
|
||||
AllPrimaryKeysRow(
|
||||
rs.getString("table_schema"),
|
||||
rs.getString("table_name"),
|
||||
rs.getString("constraint_name"),
|
||||
rs.getInt("ordinal_position").takeUnless { rs.wasNull() },
|
||||
rs.getString("column_name").takeUnless { rs.wasNull() },
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
log.info { "Discovered all primary keys in ${schemas.size} SQL Server schema(s)." }
|
||||
return@lazy results
|
||||
.groupBy {
|
||||
findTableName(
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withName(it.tableName).withNamespace(it.tableSchema),
|
||||
),
|
||||
)
|
||||
}
|
||||
.mapNotNull { (table, rowsByTable) ->
|
||||
if (table == null) return@mapNotNull null
|
||||
val pkRows: List<AllPrimaryKeysRow> =
|
||||
rowsByTable
|
||||
.groupBy { it.constraintName }
|
||||
.filterValues { rowsByPK: List<AllPrimaryKeysRow> ->
|
||||
rowsByPK.all { it.position != null && it.columnName != null }
|
||||
}
|
||||
.values
|
||||
.firstOrNull()
|
||||
?: return@mapNotNull null
|
||||
val pkColumnNames: List<List<String>> =
|
||||
pkRows
|
||||
.sortedBy { it.position }
|
||||
.mapNotNull { it.columnName }
|
||||
.map { listOf(it) }
|
||||
table to pkColumnNames
|
||||
}
|
||||
.toMap()
|
||||
} catch (e: Exception) {
|
||||
throw RuntimeException("SQL Server primary key discovery query failed: ${e.message}", e)
|
||||
}
|
||||
}
|
||||
|
||||
private data class AllClusteredIndexKeysRow(
|
||||
val tableSchema: String,
|
||||
val tableName: String,
|
||||
val indexName: String,
|
||||
val keyOrdinal: Int?,
|
||||
val columnName: String?,
|
||||
)
|
||||
|
||||
private data class AllPrimaryKeysRow(
|
||||
val tableSchema: String,
|
||||
val tableName: String,
|
||||
val constraintName: String,
|
||||
val position: Int?,
|
||||
val columnName: String?,
|
||||
)
|
||||
|
||||
companion object {
|
||||
|
||||
const val CLUSTERED_INDEX_QUERY_FMTSTR =
|
||||
"""
|
||||
SELECT
|
||||
s.name as table_schema,
|
||||
t.name as table_name,
|
||||
i.name as index_name,
|
||||
ic.key_ordinal,
|
||||
c.name as column_name
|
||||
FROM
|
||||
sys.tables t
|
||||
INNER JOIN
|
||||
sys.schemas s ON t.schema_id = s.schema_id
|
||||
INNER JOIN
|
||||
sys.indexes i ON t.object_id = i.object_id
|
||||
INNER JOIN
|
||||
sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
|
||||
INNER JOIN
|
||||
sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
|
||||
WHERE
|
||||
s.name IN (%s)
|
||||
AND i.type = 1 -- Clustered index
|
||||
AND ic.is_included_column = 0 -- Only key columns, not included columns
|
||||
ORDER BY
|
||||
s.name, t.name, ic.key_ordinal;
|
||||
"""
|
||||
|
||||
const val PK_QUERY_FMTSTR =
|
||||
"""
|
||||
SELECT
|
||||
kcu.TABLE_SCHEMA as table_schema,
|
||||
kcu.TABLE_NAME as table_name,
|
||||
kcu.COLUMN_NAME as column_name,
|
||||
kcu.ORDINAL_POSITION as ordinal_position,
|
||||
kcu.CONSTRAINT_NAME as constraint_name
|
||||
FROM
|
||||
INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
||||
INNER JOIN
|
||||
INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
||||
ON
|
||||
kcu.CONSTRAINT_NAME = tc.CONSTRAINT_NAME
|
||||
AND kcu.TABLE_SCHEMA = tc.TABLE_SCHEMA
|
||||
WHERE
|
||||
kcu.TABLE_SCHEMA IN (%s)
|
||||
AND tc.CONSTRAINT_TYPE = 'PRIMARY KEY';
|
||||
"""
|
||||
}
|
||||
|
||||
/** SQL Server implementation of [MetadataQuerier.Factory]. */
|
||||
@Singleton
|
||||
@Primary
|
||||
class Factory(
|
||||
val constants: DefaultJdbcConstants,
|
||||
val selectQueryGenerator: SelectQueryGenerator,
|
||||
val fieldTypeMapper: JdbcMetadataQuerier.FieldTypeMapper,
|
||||
val checkQueries: JdbcCheckQueries,
|
||||
val configuredCatalog: ConfiguredAirbyteCatalog? = null,
|
||||
) : MetadataQuerier.Factory<MsSqlServerSourceConfiguration> {
|
||||
/** The [SourceConfiguration] is deliberately not injected in order to support tests. */
|
||||
override fun session(config: MsSqlServerSourceConfiguration): MetadataQuerier {
|
||||
val jdbcConnectionFactory = JdbcConnectionFactory(config)
|
||||
val base =
|
||||
JdbcMetadataQuerier(
|
||||
constants,
|
||||
config,
|
||||
selectQueryGenerator,
|
||||
fieldTypeMapper,
|
||||
checkQueries,
|
||||
jdbcConnectionFactory,
|
||||
)
|
||||
return MsSqlSourceMetadataQuerier(base, configuredCatalog)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,437 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode
|
||||
import com.microsoft.sqlserver.jdbc.Geography
|
||||
import com.microsoft.sqlserver.jdbc.Geometry
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.data.FloatCodec
|
||||
import io.airbyte.cdk.data.JsonEncoder
|
||||
import io.airbyte.cdk.data.LeafAirbyteSchemaType
|
||||
import io.airbyte.cdk.data.TextCodec
|
||||
import io.airbyte.cdk.discover.CdcIntegerMetaFieldType
|
||||
import io.airbyte.cdk.discover.CdcOffsetDateTimeMetaFieldType
|
||||
import io.airbyte.cdk.discover.CdcStringMetaFieldType
|
||||
import io.airbyte.cdk.discover.CommonMetaField
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.discover.FieldType
|
||||
import io.airbyte.cdk.discover.JdbcAirbyteStreamFactory
|
||||
import io.airbyte.cdk.discover.JdbcMetadataQuerier
|
||||
import io.airbyte.cdk.discover.MetaField
|
||||
import io.airbyte.cdk.discover.SystemType
|
||||
import io.airbyte.cdk.jdbc.*
|
||||
import io.airbyte.cdk.jdbc.LosslessJdbcFieldType
|
||||
import io.airbyte.cdk.output.sockets.FieldValueEncoder
|
||||
import io.airbyte.cdk.output.sockets.NativeRecordPayload
|
||||
import io.airbyte.cdk.read.*
|
||||
import io.airbyte.cdk.read.SelectQueryGenerator
|
||||
import io.airbyte.cdk.read.Stream
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.micronaut.context.annotation.Primary
|
||||
import jakarta.inject.Singleton
|
||||
import java.sql.JDBCType
|
||||
import java.sql.PreparedStatement
|
||||
import java.sql.ResultSet
|
||||
import java.time.OffsetDateTime
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
@Singleton
|
||||
@Primary
|
||||
class MsSqlSourceOperations :
|
||||
JdbcMetadataQuerier.FieldTypeMapper, SelectQueryGenerator, JdbcAirbyteStreamFactory {
|
||||
override fun toFieldType(c: JdbcMetadataQuerier.ColumnMetadata): FieldType {
|
||||
when (val type = c.type) {
|
||||
is SystemType -> {
|
||||
val retVal = leafType(type)
|
||||
return retVal
|
||||
}
|
||||
else -> {
|
||||
return PokemonFieldType
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun leafType(type: SystemType): JdbcFieldType<*> {
|
||||
val retVal =
|
||||
MsSqlServerSqlType.fromName(type.typeName)?.jdbcType
|
||||
?: when (type.jdbcType) {
|
||||
JDBCType.BIT,
|
||||
JDBCType.BOOLEAN -> BooleanFieldType
|
||||
JDBCType.TINYINT,
|
||||
JDBCType.SMALLINT -> ShortFieldType
|
||||
JDBCType.INTEGER -> IntFieldType
|
||||
JDBCType.BIGINT -> BigIntegerFieldType
|
||||
JDBCType.FLOAT -> FloatFieldType
|
||||
JDBCType.REAL ->
|
||||
// according to
|
||||
// https://learn.microsoft.com/en-us/sql/t-sql/data-types/float-and-real-transact-sql?view=sql-server-ver16,
|
||||
// when precision is less than 25, the value is stored in a 4 bytes
|
||||
// structure, which corresponds to a float in Java.
|
||||
// Between 25 and 53, it's stored in a 8 bytes structure, which corresponds
|
||||
// to a double in Java.
|
||||
// Correspondance between SQLServer and java was mostly by experience, and
|
||||
// the sizes match
|
||||
if (type.precision!! < 25) FloatFieldType else DoubleFieldType
|
||||
JDBCType.DOUBLE -> DoubleFieldType
|
||||
JDBCType.NUMERIC,
|
||||
JDBCType.DECIMAL -> BigDecimalFieldType
|
||||
JDBCType.CHAR,
|
||||
JDBCType.VARCHAR,
|
||||
JDBCType.LONGVARCHAR,
|
||||
JDBCType.NCHAR,
|
||||
JDBCType.NVARCHAR,
|
||||
JDBCType.LONGNVARCHAR -> StringFieldType
|
||||
JDBCType.DATE -> LocalDateFieldType
|
||||
JDBCType.TIME -> LocalTimeFieldType
|
||||
JDBCType.TIMESTAMP -> LocalDateTimeFieldType
|
||||
JDBCType.BINARY,
|
||||
JDBCType.VARBINARY,
|
||||
JDBCType.LONGVARBINARY -> BytesFieldType
|
||||
JDBCType.BLOB -> BinaryStreamFieldType
|
||||
JDBCType.CLOB,
|
||||
JDBCType.NCLOB -> CharacterStreamFieldType
|
||||
JDBCType.TIME_WITH_TIMEZONE -> OffsetTimeFieldType
|
||||
JDBCType.TIMESTAMP_WITH_TIMEZONE -> OffsetDateTimeFieldType
|
||||
JDBCType.NULL -> NullFieldType
|
||||
JDBCType.SQLXML -> XmlFieldType
|
||||
JDBCType.OTHER,
|
||||
JDBCType.JAVA_OBJECT,
|
||||
JDBCType.DISTINCT,
|
||||
JDBCType.STRUCT,
|
||||
JDBCType.ARRAY,
|
||||
JDBCType.REF,
|
||||
JDBCType.DATALINK,
|
||||
JDBCType.ROWID,
|
||||
JDBCType.REF_CURSOR,
|
||||
null -> PokemonFieldType
|
||||
}
|
||||
return retVal
|
||||
}
|
||||
|
||||
data object MsSqlServerFloatAccessor : JdbcAccessor<Float> {
|
||||
override fun get(
|
||||
rs: ResultSet,
|
||||
colIdx: Int,
|
||||
): Float? {
|
||||
val retVal = rs.getFloat(colIdx).takeUnless { rs.wasNull() }
|
||||
return retVal
|
||||
}
|
||||
|
||||
override fun set(
|
||||
stmt: PreparedStatement,
|
||||
paramIdx: Int,
|
||||
value: Float,
|
||||
) {
|
||||
stmt.setFloat(paramIdx, value)
|
||||
}
|
||||
}
|
||||
|
||||
data object MsSqlServerFloatFieldType :
|
||||
SymmetricJdbcFieldType<Float>(
|
||||
LeafAirbyteSchemaType.NUMBER,
|
||||
MsSqlServerFloatAccessor,
|
||||
FloatCodec,
|
||||
)
|
||||
|
||||
data object MsSqlServerGeographyFieldType :
|
||||
SymmetricJdbcFieldType<String>(
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
MsSqlServerGeographyAccessor,
|
||||
TextCodec,
|
||||
)
|
||||
|
||||
data object MsSqlServerGeographyAccessor : JdbcAccessor<String> {
|
||||
override fun get(
|
||||
rs: ResultSet,
|
||||
colIdx: Int,
|
||||
): String? {
|
||||
val bytes = rs.getBytes(colIdx)
|
||||
if (rs.wasNull() || bytes == null) return null
|
||||
return Geography.deserialize(bytes).toString()
|
||||
}
|
||||
|
||||
override fun set(
|
||||
stmt: PreparedStatement,
|
||||
paramIdx: Int,
|
||||
value: String,
|
||||
) {
|
||||
stmt.setBytes(paramIdx, Geography.parse(value).serialize())
|
||||
}
|
||||
}
|
||||
|
||||
data object MsSqlServerGeometryFieldType :
|
||||
SymmetricJdbcFieldType<String>(
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
MsSqlServerGeometryAccessor,
|
||||
TextCodec,
|
||||
)
|
||||
|
||||
data object MsSqlServerGeometryAccessor : JdbcAccessor<String> {
|
||||
override fun get(
|
||||
rs: ResultSet,
|
||||
colIdx: Int,
|
||||
): String? {
|
||||
val bytes = rs.getBytes(colIdx)
|
||||
if (rs.wasNull() || bytes == null) return null
|
||||
return Geometry.deserialize(bytes).toString()
|
||||
}
|
||||
|
||||
override fun set(
|
||||
stmt: PreparedStatement,
|
||||
paramIdx: Int,
|
||||
value: String,
|
||||
) {
|
||||
stmt.setBytes(paramIdx, Geometry.parse(value).serialize())
|
||||
}
|
||||
}
|
||||
|
||||
data object MsSqlServerHierarchyFieldType :
|
||||
SymmetricJdbcFieldType<String>(
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
StringAccessor,
|
||||
TextCodec,
|
||||
)
|
||||
|
||||
enum class MsSqlServerSqlType(
|
||||
val names: List<String>,
|
||||
val jdbcType: JdbcFieldType<*>,
|
||||
) {
|
||||
BINARY_FIELD(BinaryStreamFieldType, "VARBINARY", "BINARY"),
|
||||
DATETIME_TYPES(LocalDateTimeFieldType, "DATETIME", "DATETIME2", "SMALLDATETIME"),
|
||||
DATE(LocalDateFieldType, "DATE"),
|
||||
DATETIMEOFFSET(OffsetDateTimeFieldType, "DATETIMEOFFSET"),
|
||||
TIME_TYPE(LocalTimeFieldType, "TIME"),
|
||||
GEOMETRY(MsSqlServerGeometryFieldType, "GEOMETRY"),
|
||||
GEOGRAPHY(MsSqlServerGeographyFieldType, "GEOGRAPHY"),
|
||||
DOUBLE(DoubleFieldType, "MONEY", "SMALLMONEY"),
|
||||
HIERARCHY(MsSqlServerHierarchyFieldType, "HIERARCHYID"),
|
||||
;
|
||||
|
||||
constructor(
|
||||
jdbcType: JdbcFieldType<*>,
|
||||
vararg names: String,
|
||||
) : this(names.toList(), jdbcType) {}
|
||||
|
||||
companion object {
|
||||
private val nameToValue =
|
||||
MsSqlServerSqlType.entries
|
||||
.flatMap { msSqlServerSqlType ->
|
||||
msSqlServerSqlType.names.map { name ->
|
||||
name.uppercase() to msSqlServerSqlType
|
||||
}
|
||||
}
|
||||
.toMap()
|
||||
|
||||
fun fromName(name: String?): MsSqlServerSqlType? {
|
||||
val retVal = nameToValue[name?.uppercase()]
|
||||
return retVal
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun generate(ast: SelectQuerySpec): SelectQuery =
|
||||
SelectQuery(ast.sql(), ast.select.columns, ast.bindings())
|
||||
|
||||
fun SelectQuerySpec.sql(): String {
|
||||
val components: List<String> =
|
||||
listOf(sql(select, limit), from.sql(), where.sql(), orderBy.sql())
|
||||
val sql: String = components.filter { it.isNotBlank() }.joinToString(" ")
|
||||
return sql
|
||||
}
|
||||
|
||||
fun sql(
|
||||
selectNode: SelectNode,
|
||||
limit: LimitNode,
|
||||
): String {
|
||||
val topClause: String =
|
||||
when (limit) {
|
||||
NoLimit -> ""
|
||||
Limit(0) -> "TOP 0 "
|
||||
is Limit -> "TOP ${limit.n} "
|
||||
}
|
||||
return "SELECT $topClause" +
|
||||
when (selectNode) {
|
||||
is SelectColumns -> selectNode.columns.joinToString(", ") { it.sql() }
|
||||
is SelectColumnMaxValue -> "MAX(${selectNode.column.sql()})"
|
||||
}
|
||||
}
|
||||
|
||||
fun Field.sql(): String = if (type is MsSqlServerHierarchyFieldType) "$id.ToString()" else "$id"
|
||||
|
||||
fun FromNode.sql(): String =
|
||||
when (this) {
|
||||
NoFrom -> ""
|
||||
is From -> if (this.namespace == null) "FROM $name" else "FROM $namespace.$name"
|
||||
is FromSample -> {
|
||||
if (sampleRateInv == 1L) {
|
||||
if (namespace == null) "FROM $name" else "FROM $namespace.$name"
|
||||
} else {
|
||||
val tableName = if (namespace == null) name else "$namespace.$name"
|
||||
val samplePercent = sampleRatePercentage.toPlainString()
|
||||
|
||||
"FROM (SELECT TOP $sampleSize * FROM $tableName TABLESAMPLE ($samplePercent PERCENT) ORDER BY NEWID()) AS randomly_sampled"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun WhereNode.sql(): String =
|
||||
when (this) {
|
||||
NoWhere -> ""
|
||||
is Where -> "WHERE ${clause.sql()}"
|
||||
}
|
||||
|
||||
fun WhereClauseNode.sql(): String =
|
||||
when (this) {
|
||||
is And -> conj.joinToString(") AND (", "(", ")") { it.sql() }
|
||||
is Or -> disj.joinToString(") OR (", "(", ")") { it.sql() }
|
||||
is Equal -> "${column.sql()} = ?"
|
||||
is Greater -> "${column.sql()} > ?"
|
||||
is GreaterOrEqual -> "${column.sql()} >= ?"
|
||||
is LesserOrEqual -> "${column.sql()} <= ?"
|
||||
is Lesser -> "${column.sql()} < ?"
|
||||
}
|
||||
|
||||
fun OrderByNode.sql(): String =
|
||||
when (this) {
|
||||
NoOrderBy -> ""
|
||||
is OrderBy -> "ORDER BY " + columns.joinToString(", ") { it.sql() }
|
||||
}
|
||||
|
||||
fun SelectQuerySpec.bindings(): List<SelectQuery.Binding> = where.bindings() + limit.bindings()
|
||||
|
||||
fun WhereNode.bindings(): List<SelectQuery.Binding> =
|
||||
when (this) {
|
||||
is NoWhere -> listOf()
|
||||
is Where -> clause.bindings()
|
||||
}
|
||||
|
||||
fun WhereClauseNode.bindings(): List<SelectQuery.Binding> =
|
||||
when (this) {
|
||||
is And -> conj.flatMap { it.bindings() }
|
||||
is Or -> disj.flatMap { it.bindings() }
|
||||
is WhereClauseLeafNode -> {
|
||||
val type = column.type as LosslessJdbcFieldType<*, *>
|
||||
listOf(SelectQuery.Binding(bindingValue, type))
|
||||
}
|
||||
}
|
||||
|
||||
fun LimitNode.bindings(): List<SelectQuery.Binding> =
|
||||
when (this) {
|
||||
NoLimit,
|
||||
Limit(0),
|
||||
is Limit, -> emptyList()
|
||||
}
|
||||
|
||||
override val globalCursor: MetaField = MsSqlServerCdcMetaFields.CDC_CURSOR
|
||||
override val globalMetaFields: Set<MetaField> =
|
||||
setOf(
|
||||
CommonMetaField.CDC_UPDATED_AT,
|
||||
CommonMetaField.CDC_DELETED_AT,
|
||||
MsSqlServerCdcMetaFields.CDC_CURSOR,
|
||||
MsSqlServerCdcMetaFields.CDC_EVENT_SERIAL_NO,
|
||||
MsSqlServerCdcMetaFields.CDC_LSN,
|
||||
)
|
||||
|
||||
override fun decorateRecordData(
|
||||
timestamp: OffsetDateTime,
|
||||
globalStateValue: OpaqueStateValue?,
|
||||
stream: Stream,
|
||||
recordData: ObjectNode,
|
||||
) {
|
||||
recordData.set<JsonNode>(
|
||||
CommonMetaField.CDC_UPDATED_AT.id,
|
||||
CdcOffsetDateTimeMetaFieldType.jsonEncoder.encode(timestamp),
|
||||
)
|
||||
recordData.set<JsonNode>(
|
||||
MsSqlServerCdcMetaFields.CDC_LSN.id,
|
||||
CdcStringMetaFieldType.jsonEncoder.encode(""),
|
||||
)
|
||||
if (globalStateValue == null) {
|
||||
return
|
||||
}
|
||||
// For MSSQL, we would need to deserialize the state to get the LSN
|
||||
// This is a placeholder implementation - actual implementation would extract LSN from state
|
||||
try {
|
||||
val stateNode = globalStateValue["state"] as? ObjectNode
|
||||
if (stateNode != null) {
|
||||
val offsetNode = stateNode["mssql_cdc_offset"] as? ObjectNode
|
||||
if (offsetNode != null && offsetNode.size() > 0) {
|
||||
// Extract LSN from the offset if available
|
||||
val offsetValue = offsetNode.values().asSequence().first()
|
||||
val lsn = Jsons.readTree(offsetValue.textValue())["commit_lsn"]?.asText()
|
||||
if (lsn != null) {
|
||||
recordData.set<JsonNode>(
|
||||
MsSqlServerCdcMetaFields.CDC_LSN.id,
|
||||
CdcStringMetaFieldType.jsonEncoder.encode(lsn),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
log.warn(e) {
|
||||
"Failed to extract LSN from CDC state for stream ${stream.name}. Using empty LSN value."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
override fun decorateRecordData(
|
||||
timestamp: OffsetDateTime,
|
||||
globalStateValue: OpaqueStateValue?,
|
||||
stream: Stream,
|
||||
recordData: NativeRecordPayload
|
||||
) {
|
||||
// Add CDC_UPDATED_AT field
|
||||
recordData[CommonMetaField.CDC_UPDATED_AT.id] =
|
||||
FieldValueEncoder(
|
||||
timestamp,
|
||||
CommonMetaField.CDC_UPDATED_AT.type.jsonEncoder as JsonEncoder<Any>
|
||||
)
|
||||
|
||||
// Add CDC_LSN field with empty string as default
|
||||
var lsnValue = ""
|
||||
|
||||
if (globalStateValue != null) {
|
||||
// For MSSQL, extract the LSN from the state if available
|
||||
try {
|
||||
val stateNode = globalStateValue["state"] as? ObjectNode
|
||||
if (stateNode != null) {
|
||||
val offsetNode = stateNode["mssql_cdc_offset"] as? ObjectNode
|
||||
if (offsetNode != null && offsetNode.size() > 0) {
|
||||
// Extract LSN from the offset if available
|
||||
val offsetValue = offsetNode.values().asSequence().first()
|
||||
val lsn = Jsons.readTree(offsetValue.textValue())["commit_lsn"]?.asText()
|
||||
if (lsn != null) {
|
||||
lsnValue = lsn
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
log.warn(e) {
|
||||
"Failed to extract LSN from CDC state for stream ${stream.name}. Using empty LSN value."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordData[MsSqlServerCdcMetaFields.CDC_LSN.id] =
|
||||
FieldValueEncoder(
|
||||
lsnValue,
|
||||
MsSqlServerCdcMetaFields.CDC_LSN.type.jsonEncoder as JsonEncoder<Any>
|
||||
)
|
||||
}
|
||||
|
||||
enum class MsSqlServerCdcMetaFields(override val type: FieldType) : MetaField {
|
||||
CDC_CURSOR(CdcIntegerMetaFieldType),
|
||||
CDC_LSN(CdcStringMetaFieldType),
|
||||
CDC_EVENT_SERIAL_NO(CdcStringMetaFieldType);
|
||||
|
||||
override val id: String
|
||||
get() = MetaField.META_PREFIX + name.lowercase()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,165 @@
|
||||
---
|
||||
airbyte:
|
||||
connector:
|
||||
data-channel:
|
||||
medium: ${DATA_CHANNEL_MEDIUM:STDIO}
|
||||
format: ${DATA_CHANNEL_FORMAT:JSONL}
|
||||
socket-paths: ${DATA_CHANNEL_SOCKET_PATHS}
|
||||
output:
|
||||
buffer-byte-size-threshold-for-flush: 8192
|
||||
extract:
|
||||
jdbc:
|
||||
mode: concurrent
|
||||
with-sampling: true
|
||||
table-sample-size: 1024
|
||||
throughput-bytes-per-second: 10000000
|
||||
min-fetch-size: 10
|
||||
default-fetch-size: 1024
|
||||
max-fetch-size: 1000000000
|
||||
memory-capacity-ratio: 0.6
|
||||
estimated-record-overhead-bytes: 16
|
||||
estimated-field-overhead-bytes: 16
|
||||
check:
|
||||
jdbc:
|
||||
queries:
|
||||
- >-
|
||||
SELECT 1 WHERE 1 = 0;
|
||||
|
||||
exception-classifiers:
|
||||
regex:
|
||||
# The following rules are for the RegexExceptionClassifier [0] which are applied
|
||||
# sequentially on a Throwable's message [1] and its nested messages by cause [2].
|
||||
#
|
||||
# This classifier's rules are applied ahead of the JdbcExceptionClassifier's further down.
|
||||
#
|
||||
# [0] https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/bulk/core/base/src/main/kotlin/io/airbyte/cdk/output/ExceptionClassifier.kt
|
||||
# [1] https://docs.oracle.com/javase/8/docs/api/java/lang/Throwable.html#getMessage--
|
||||
# [2] https://docs.oracle.com/javase/8/docs/api/java/lang/Throwable.html#getCause--
|
||||
|
||||
rules:
|
||||
## REGEX RULE TEMPLATE:
|
||||
# pattern: Required; regex pattern, c.f. https://www.freeformatter.com/java-regex-tester.html.
|
||||
# Note that regex patterns are not case-sensitive and are multiline.
|
||||
# input-example: Required, string matching regex pattern.
|
||||
# error: Required, one of (transient|config|system).
|
||||
# group: Optional, string prefixing user-facing error message.
|
||||
# output: Optional, user-facing error message; when not set, the exception message is used instead.
|
||||
# reference-links: Optional, list of URLs appended to user-facing message after a newline.
|
||||
|
||||
- pattern: (?i).*connection is not available, request timed out after.*
|
||||
input-example: >-
|
||||
java.sql.SQLTransientConnectionException: HikariPool-x -
|
||||
Connection is not available, request timed out after 10 ms
|
||||
error: transient
|
||||
group: Hikari Connection Pool Timeout
|
||||
output: The sync encountered a database read failure due to a connection timeout, will retry.
|
||||
reference-links: https://docs.oracle.com/javase/9/docs/api/java/sql/SQLTransientConnectionException.html
|
||||
|
||||
- pattern: (?i).*the tcp\/ip connection to the host.*has failed.*
|
||||
input-example: >-
|
||||
com.microsoft.sqlserver.jdbc.SQLServerException: The TCP/IP connection to the host localhost, port 1433 has failed.
|
||||
error: transient
|
||||
group: SQL Server Connection Error
|
||||
output: The sync encountered a network connection issue while connecting to the SQL Server, will retry.
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/connect/jdbc/troubleshooting-connectivity
|
||||
|
||||
- pattern: (?i).*login failed for user.*
|
||||
input-example: >-
|
||||
com.microsoft.sqlserver.jdbc.SQLServerException: Login failed for user 'sa'.
|
||||
error: config
|
||||
group: SQL Server Authentication Error
|
||||
output: >-
|
||||
The sync failed because the provided credentials are invalid.
|
||||
Please verify your username and password configuration.
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/security/authentication-access/troubleshoot-connecting-to-the-sql-server-database-engine
|
||||
|
||||
- pattern: (?i).*cannot open database.*requested by the login.*
|
||||
input-example: >-
|
||||
com.microsoft.sqlserver.jdbc.SQLServerException: Cannot open database "testdb" requested by the login. The login failed.
|
||||
error: config
|
||||
group: SQL Server Database Access Error
|
||||
output: >-
|
||||
The sync failed because the specified database cannot be accessed with the provided credentials.
|
||||
Please verify your database name and user permissions.
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/security/authentication-access/database-level-roles
|
||||
|
||||
- pattern: (?i).*invalid object name.*
|
||||
input-example: >-
|
||||
com.microsoft.sqlserver.jdbc.SQLServerException: Invalid object name 'dbo.test_table'.
|
||||
error: config
|
||||
group: SQL Server Object Error
|
||||
output: >-
|
||||
The sync failed because a required table or view does not exist in the database.
|
||||
Please verify your table/view names and schema configuration.
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/t-sql/language-elements/database-identifiers
|
||||
|
||||
- pattern: (?i).*timeout expired.*
|
||||
input-example: >-
|
||||
com.microsoft.sqlserver.jdbc.SQLServerException: The query has timed out.
|
||||
error: transient
|
||||
group: SQL Server Query Timeout
|
||||
output: The sync was aborted because the query took too long to return results, will retry.
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/connect/jdbc/setting-the-connection-properties
|
||||
|
||||
- pattern: (?i).*an exception occurred in the change event producer.*
|
||||
input-example: >-
|
||||
java.lang.RuntimeException: org.apache.kafka.connect.errors.ConnectException:
|
||||
An exception occurred in the change event producer. This connector will be stopped.
|
||||
error: config
|
||||
group: SQL Server CDC Error
|
||||
output: >-
|
||||
The sync encountered an unexpected error in the change event producer and has stopped.
|
||||
Please ensure CDC is properly configured and the SQL Server Agent is running.
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/track-changes/enable-and-disable-change-data-capture-sql-server
|
||||
|
||||
jdbc:
|
||||
# The following rules are for the JdbcExceptionClassifier [0] which are applied on a
|
||||
# SQL Server's error code [1]. The vendor error code is printed in the exception
|
||||
# message, and is not to be confused with the SQLState [2] which is also in the message.
|
||||
#
|
||||
# [0] https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/bulk/toolkits/extract-jdbc/src/main/kotlin/io/airbyte/cdk/output/JdbcExceptionClassifier.kt
|
||||
# [1] https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/database-engine-events-and-errors
|
||||
# [2] https://en.wikipedia.org/wiki/SQLSTATE
|
||||
#
|
||||
rules:
|
||||
## JDBC RULE TEMPLATE
|
||||
# code: Required, SQL Server vendor error code.
|
||||
# error: Required, one of (transient|config|system).
|
||||
# output: Optional, user-facing error message; the exception message is used instead when this is not defined.
|
||||
# reference-links: Optional, list of URLs appended to user-facing message after newline.
|
||||
|
||||
- code: 18456
|
||||
error: config
|
||||
output: >-
|
||||
The sync failed because the provided credentials are invalid.
|
||||
Please verify your username and password configuration.
|
||||
group: SQL Server Authentication Error
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/security/authentication-access/troubleshoot-connecting-to-the-sql-server-database-engine
|
||||
|
||||
- code: 4060
|
||||
error: config
|
||||
output: >-
|
||||
The sync failed because the specified database cannot be accessed.
|
||||
Please verify your database name and connection configuration.
|
||||
group: SQL Server Database Access Error
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-4060-database-engine-error
|
||||
|
||||
- code: 208
|
||||
error: config
|
||||
output: >-
|
||||
The sync failed because a required table or view does not exist in the database.
|
||||
Please verify your table/view names and schema configuration.
|
||||
group: SQL Server Object Error
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-208-database-engine-error
|
||||
|
||||
- code: 2
|
||||
error: transient
|
||||
output: The sync encountered a network connection issue while connecting to the SQL Server, will retry.
|
||||
group: SQL Server Network Error
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-2-database-engine-error
|
||||
|
||||
- code: 1205
|
||||
error: transient
|
||||
output: The sync was aborted due to a deadlock, will retry.
|
||||
group: SQL Server Deadlock
|
||||
reference-links: https://docs.microsoft.com/en-us/sql/relational-databases/errors-events/mssqlserver-1205-database-engine-error
|
||||
@@ -1,196 +0,0 @@
|
||||
{
|
||||
"documentationUrl": "https://docs.airbyte.com/integrations/destinations/mssql",
|
||||
"connectionSpecification": {
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "MSSQL Source Spec",
|
||||
"type": "object",
|
||||
"required": ["host", "port", "database", "username", "password"],
|
||||
"properties": {
|
||||
"host": {
|
||||
"description": "The hostname of the database.",
|
||||
"title": "Host",
|
||||
"type": "string",
|
||||
"order": 0
|
||||
},
|
||||
"port": {
|
||||
"description": "The port of the database.",
|
||||
"title": "Port",
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 65536,
|
||||
"examples": ["1433"],
|
||||
"order": 1
|
||||
},
|
||||
"database": {
|
||||
"description": "The name of the database.",
|
||||
"title": "Database",
|
||||
"type": "string",
|
||||
"examples": ["master"],
|
||||
"order": 2
|
||||
},
|
||||
"schemas": {
|
||||
"title": "Schemas",
|
||||
"description": "The list of schemas to sync from. Defaults to user. Case sensitive.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 0,
|
||||
"uniqueItems": true,
|
||||
"default": ["dbo"],
|
||||
"order": 3
|
||||
},
|
||||
"username": {
|
||||
"description": "The username which is used to access the database.",
|
||||
"title": "Username",
|
||||
"type": "string",
|
||||
"order": 4
|
||||
},
|
||||
"password": {
|
||||
"description": "The password associated with the username.",
|
||||
"title": "Password",
|
||||
"type": "string",
|
||||
"airbyte_secret": true,
|
||||
"order": 5
|
||||
},
|
||||
"jdbc_url_params": {
|
||||
"title": "JDBC URL Params",
|
||||
"description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).",
|
||||
"type": "string",
|
||||
"order": 6
|
||||
},
|
||||
"ssl_method": {
|
||||
"title": "SSL Method",
|
||||
"type": "object",
|
||||
"description": "The encryption method which is used when communicating with the database.",
|
||||
"order": 7,
|
||||
"oneOf": [
|
||||
{
|
||||
"title": "Unencrypted",
|
||||
"description": "Data transfer will not be encrypted.",
|
||||
"required": ["ssl_method"],
|
||||
"properties": {
|
||||
"ssl_method": {
|
||||
"type": "string",
|
||||
"const": "unencrypted"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Encrypted (trust server certificate)",
|
||||
"description": "Use the certificate provided by the server without verification. (For testing purposes only!)",
|
||||
"required": ["ssl_method"],
|
||||
"properties": {
|
||||
"ssl_method": {
|
||||
"type": "string",
|
||||
"const": "encrypted_trust_server_certificate"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Encrypted (verify certificate)",
|
||||
"description": "Verify and use the certificate provided by the server.",
|
||||
"required": ["ssl_method"],
|
||||
"properties": {
|
||||
"ssl_method": {
|
||||
"type": "string",
|
||||
"const": "encrypted_verify_certificate"
|
||||
},
|
||||
"hostNameInCertificate": {
|
||||
"title": "Host Name In Certificate",
|
||||
"type": "string",
|
||||
"description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.",
|
||||
"order": 0
|
||||
},
|
||||
"certificate": {
|
||||
"title": "Certificate",
|
||||
"type": "string",
|
||||
"description": "certificate of the server, or of the CA that signed the server certificate",
|
||||
"order": 1,
|
||||
"airbyte_secret": true,
|
||||
"multiline": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"replication_method": {
|
||||
"type": "object",
|
||||
"title": "Update Method",
|
||||
"description": "Configures how data is extracted from the database.",
|
||||
"default": "CDC",
|
||||
"display_type": "radio",
|
||||
"order": 8,
|
||||
"oneOf": [
|
||||
{
|
||||
"title": "Read Changes using Change Data Capture (CDC)",
|
||||
"description": "<i>Recommended</i> - Incrementally reads new inserts, updates, and deletes using the SQL Server's <a href=\"https://docs.airbyte.com/integrations/sources/mssql/#change-data-capture-cdc\">change data capture feature</a>. This must be enabled on your database.",
|
||||
"required": ["method"],
|
||||
"properties": {
|
||||
"method": {
|
||||
"type": "string",
|
||||
"const": "CDC",
|
||||
"order": 0
|
||||
},
|
||||
"initial_waiting_seconds": {
|
||||
"type": "integer",
|
||||
"title": "Initial Waiting Time in Seconds (Advanced)",
|
||||
"description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about <a href=\"https://docs.airbyte.com/integrations/sources/mysql/#change-data-capture-cdc\">initial waiting time</a>.",
|
||||
"default": 300,
|
||||
"min": 120,
|
||||
"max": 3600,
|
||||
"order": 3
|
||||
},
|
||||
"invalid_cdc_cursor_position_behavior": {
|
||||
"type": "string",
|
||||
"title": "Invalid CDC position behavior (Advanced)",
|
||||
"description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.",
|
||||
"enum": ["Fail sync", "Re-sync data"],
|
||||
"default": "Fail sync",
|
||||
"order": 4
|
||||
},
|
||||
"queue_size": {
|
||||
"type": "integer",
|
||||
"title": "Size of the queue (Advanced)",
|
||||
"description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.",
|
||||
"default": 10000,
|
||||
"order": 5,
|
||||
"min": 1000,
|
||||
"max": 10000
|
||||
},
|
||||
"initial_load_timeout_hours": {
|
||||
"type": "integer",
|
||||
"title": "Initial Load Timeout in Hours (Advanced)",
|
||||
"description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.",
|
||||
"default": 8,
|
||||
"min": 4,
|
||||
"max": 24,
|
||||
"order": 6
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Scan Changes with User Defined Cursor",
|
||||
"description": "Incrementally detects new inserts and updates using the <a href=\"https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor\">cursor column</a> chosen when configuring a connection (e.g. created_at, updated_at).",
|
||||
"required": ["method"],
|
||||
"properties": {
|
||||
"method": {
|
||||
"type": "string",
|
||||
"const": "STANDARD",
|
||||
"order": 0
|
||||
},
|
||||
"exclude_todays_data": {
|
||||
"title": "Exclude Today's Data",
|
||||
"description": "When enabled incremental syncs using a cursor of a temporal types (date or datetime) will include cursor values only up until last midnight (Advanced)",
|
||||
"default": false,
|
||||
"type": "boolean",
|
||||
"always_show": true,
|
||||
"order": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,360 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
|
||||
public abstract class AbstractMssqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest {
|
||||
|
||||
protected MsSQLTestDatabase testdb;
|
||||
|
||||
@Override
|
||||
protected String getNameSpace() {
|
||||
return "dbo";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getImageName() {
|
||||
return "airbyte/source-mssql:dev";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown(final TestDestinationEnv testEnv) {
|
||||
testdb.close();
|
||||
}
|
||||
|
||||
protected static final String CREATE_TABLE_SQL = "CREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)";
|
||||
|
||||
@Override
|
||||
protected void initTests() {
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("bigint")
|
||||
.airbyteType(JsonSchemaType.INTEGER)
|
||||
.addInsertValues("-9223372036854775808", "9223372036854775807", "0", "null")
|
||||
.addExpectedValues("-9223372036854775808", "9223372036854775807", "0", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("int")
|
||||
.airbyteType(JsonSchemaType.INTEGER)
|
||||
.addInsertValues("null", "-2147483648", "2147483647")
|
||||
.addExpectedValues(null, "-2147483648", "2147483647")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("smallint")
|
||||
.airbyteType(JsonSchemaType.INTEGER)
|
||||
.addInsertValues("null", "-32768", "32767")
|
||||
.addExpectedValues(null, "-32768", "32767")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("tinyint")
|
||||
.airbyteType(JsonSchemaType.INTEGER)
|
||||
.addInsertValues("null", "0", "255")
|
||||
.addExpectedValues(null, "0", "255")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("bit")
|
||||
.airbyteType(JsonSchemaType.BOOLEAN)
|
||||
.addInsertValues("null", "0", "1", "'true'", "'false'")
|
||||
.addExpectedValues(null, "false", "true", "true", "false")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("decimal")
|
||||
.fullSourceDataType("DECIMAL(5,2)")
|
||||
.airbyteType(JsonSchemaType.NUMBER)
|
||||
.addInsertValues("999.33", "null")
|
||||
.addExpectedValues("999.33", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("numeric")
|
||||
.airbyteType(JsonSchemaType.NUMBER)
|
||||
.addInsertValues("'99999'", "null")
|
||||
.addExpectedValues("99999", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("money")
|
||||
.airbyteType(JsonSchemaType.NUMBER)
|
||||
.addInsertValues("null", "'9990000.3647'")
|
||||
.addExpectedValues(null, "9990000.3647")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("smallmoney")
|
||||
.airbyteType(JsonSchemaType.NUMBER)
|
||||
.addInsertValues("null", "'-214748.3648'", "214748.3647")
|
||||
.addExpectedValues(null, "-214748.3648", "214748.3647")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("float")
|
||||
.airbyteType(JsonSchemaType.NUMBER)
|
||||
.addInsertValues("'123'", "'1234567890.1234567'", "null")
|
||||
.addExpectedValues("123.0", "1.2345678901234567E9", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(TestDataHolder.builder()
|
||||
.sourceType("real")
|
||||
.airbyteType(JsonSchemaType.NUMBER)
|
||||
.addInsertValues("'123'", "'1234567890.1234567'", "null")
|
||||
.addExpectedValues("123.0", "1.234568E9", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("date")
|
||||
.airbyteType(JsonSchemaType.STRING_DATE)
|
||||
.addInsertValues("'0001-01-01'", "'9999-12-31'", "'1999-01-08'", "null")
|
||||
.addExpectedValues("0001-01-01", "9999-12-31", "1999-01-08", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("smalldatetime")
|
||||
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE)
|
||||
.addInsertValues("'1900-01-01'", "'2079-06-06'", "null")
|
||||
.addExpectedValues("1900-01-01T00:00:00.000000", "2079-06-06T00:00:00.000000", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("datetime")
|
||||
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE)
|
||||
.addInsertValues("'1753-01-01'", "'9999-12-31'", "'9999-12-31T13:00:04'",
|
||||
"'9999-12-31T13:00:04.123'", "null")
|
||||
.addExpectedValues("1753-01-01T00:00:00.000000", "9999-12-31T00:00:00.000000", "9999-12-31T13:00:04.000000",
|
||||
"9999-12-31T13:00:04.123000", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("datetime2")
|
||||
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE)
|
||||
.addInsertValues("'0001-01-01'", "'9999-12-31'", "'9999-12-31T13:00:04.123456'", "null", "'2023-11-08T01:20:11.3733338'")
|
||||
.addExpectedValues("0001-01-01T00:00:00.000000", "9999-12-31T00:00:00.000000", "9999-12-31T13:00:04.123456", null,
|
||||
"2023-11-08T01:20:11.373333")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("time")
|
||||
.airbyteType(JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE)
|
||||
.addInsertValues("null", "'13:00:01'", "'13:00:04Z'", "'13:00:04.123456Z'")
|
||||
.addExpectedValues(null, "13:00:01", "13:00:04", "13:00:04.123456")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("datetimeoffset")
|
||||
.airbyteType(JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE)
|
||||
.addInsertValues("'2001-01-10 00:00:00 +01:00'", "'9999-01-10 00:00:00 +01:00'", "null", "'2024-05-10 19:00:01.604805 +03:00'",
|
||||
"'2024-03-02 19:08:07.1234567 +09:00'", "'2024-03-02 19:08:07.12345678 +09:00'",
|
||||
"'0001-01-01 00:00:00.0000000 +00:00'")
|
||||
.addExpectedValues("2001-01-10T00:00:00.000000+01:00",
|
||||
"9999-01-10T00:00:00.000000+01:00", null, "2024-05-10T19:00:01.604805+03:00", "2024-03-02T19:08:07.123456+09:00",
|
||||
"2024-03-02T19:08:07.123456+09:00", "0001-01-01T00:00:00.000000Z")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("char")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'a'", "'*'", "null")
|
||||
.addExpectedValues("a", "*", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("varchar")
|
||||
.fullSourceDataType("varchar(max) COLLATE Latin1_General_100_CI_AI_SC_UTF8")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
|
||||
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
|
||||
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
|
||||
null, "\\xF0\\x9F\\x9A\\x80")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("text")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'a'", "'abc'", "'Some test text 123$%^&*()_'", "''", "null")
|
||||
.addExpectedValues("a", "abc", "Some test text 123$%^&*()_", "", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("nchar")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'a'", "'*'", "N'ї'", "null")
|
||||
.addExpectedValues("a", "*", "ї", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("nvarchar")
|
||||
.fullSourceDataType("nvarchar(max)")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
|
||||
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
|
||||
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
|
||||
null, "\\xF0\\x9F\\x9A\\x80")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("ntext")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
|
||||
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
|
||||
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
|
||||
null, "\\xF0\\x9F\\x9A\\x80")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("binary")
|
||||
.airbyteType(JsonSchemaType.STRING_BASE_64)
|
||||
.addInsertValues("CAST( 'A' AS BINARY(1))", "null")
|
||||
.addExpectedValues("QQ==", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("varbinary")
|
||||
.fullSourceDataType("varbinary(3)")
|
||||
.airbyteType(JsonSchemaType.STRING_BASE_64)
|
||||
.addInsertValues("CAST( 'ABC' AS VARBINARY)", "null")
|
||||
.addExpectedValues("QUJD", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
// Proper select query example: SELECT test_column.STAsText() from dbo_1_geometry;
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("geometry")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0)",
|
||||
"null")
|
||||
.addExpectedValues("LINESTRING(100 100, 20 180, 180 180)", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("uniqueidentifier")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'375CFC44-CAE3-4E43-8083-821D2DF0E626'", "null")
|
||||
.addExpectedValues("375CFC44-CAE3-4E43-8083-821D2DF0E626", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("xml")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues(
|
||||
"'<user><user_id>1</user_id></user>'", "null", "''")
|
||||
.addExpectedValues("<user><user_id>1</user_id></user>", null, "")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
// Proper select query example: SELECT test_column.STAsText() from dbo_1_geography;
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("geography")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues(
|
||||
"geography::STGeomFromText('LINESTRING(-122.360 47.656, -122.343 47.656 )', 4326)",
|
||||
"null")
|
||||
.addExpectedValues("LINESTRING(-122.36 47.656, -122.343 47.656)", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
// sql_variant is not supported by debezium, always getting null. So only works for regular sync.
|
||||
// The hierarchyid is returned in binary state, but mssql doesn't provide any parcers for it.
|
||||
// On a regular sync we do a pre-flight request and then do additional wrap to sql query in case
|
||||
// if we have hierarchyid. But this option is not available as we use a third-party tool "Debezium"
|
||||
// as a CDC client.
|
||||
if (this instanceof MssqlSourceDatatypeTest) {
|
||||
// create table dbo_1_hierarchyid1 (test_column hierarchyid);
|
||||
// insert dbo_1_hierarchyid1 values ('/1/1/');
|
||||
// select test_column ,test_column.ToString() AS [Node Text],test_column.GetLevel() [Node Level]
|
||||
// from dbo_1_hierarchyid1;
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("hierarchyid")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'/1/1/'", "null")
|
||||
.addExpectedValues("/1/1/", null)
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("sql_variant")
|
||||
.airbyteType(JsonSchemaType.STRING)
|
||||
.addInsertValues("'a'", "'abc'", "N'Миші йдуть на південь, не питай чому;'", "N'櫻花分店'",
|
||||
"''", "null", "N'\\xF0\\x9F\\x9A\\x80'")
|
||||
.addExpectedValues("a", "abc", "Миші йдуть на південь, не питай чому;", "櫻花分店", "",
|
||||
null, "\\xF0\\x9F\\x9A\\x80")
|
||||
.createTablePatternSql(CREATE_TABLE_SQL)
|
||||
.build());
|
||||
|
||||
}
|
||||
|
||||
addDataTypeTestData(
|
||||
TestDataHolder.builder()
|
||||
.sourceType("int")
|
||||
.airbyteType(JsonSchemaType.INTEGER)
|
||||
.addInsertValues("null", "1234", "7878")
|
||||
.addExpectedValues(null, "1234", "7878")
|
||||
.createTablePatternSql("CREATE TABLE %1$s(%2$s INTEGER NULL DEFAULT ((7878)), %3$s %4$s)")
|
||||
.build());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,152 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.db.Database;
|
||||
import io.airbyte.cdk.db.factory.DSLContextFactory;
|
||||
import io.airbyte.cdk.db.factory.DatabaseDriver;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshHelpers;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshTunnel;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
|
||||
import io.airbyte.commons.functional.CheckedFunction;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.protocol.models.Field;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import io.airbyte.protocol.models.v0.CatalogHelpers;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.ConnectorSpecification;
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode;
|
||||
import io.airbyte.protocol.models.v0.SyncMode;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import org.jooq.SQLDialect;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public abstract class AbstractSshMssqlSourceAcceptanceTest extends SourceAcceptanceTest {
|
||||
|
||||
static private final Logger LOGGER = LoggerFactory.getLogger(AbstractSshMssqlSourceAcceptanceTest.class);
|
||||
|
||||
private static final String SCHEMA_NAME = "dbo";
|
||||
private static final String STREAM_NAME = "id_and_name";
|
||||
private static final String STREAM_NAME2 = "starships";
|
||||
|
||||
public abstract SshTunnel.TunnelMethod getTunnelMethod();
|
||||
|
||||
private final SshBastionContainer bastion = new SshBastionContainer();
|
||||
private MsSQLTestDatabase testdb;
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
try {
|
||||
return testdb.integrationTestConfigBuilder()
|
||||
.withoutSsl()
|
||||
.with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), true))
|
||||
.build();
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void populateDatabaseTestData() throws Exception {
|
||||
final var outerConfig = testdb.integrationTestConfigBuilder()
|
||||
.withSchemas("public")
|
||||
.withoutSsl()
|
||||
.with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), false))
|
||||
.build();
|
||||
SshTunnel.sshWrap(
|
||||
outerConfig,
|
||||
JdbcUtils.HOST_LIST_KEY,
|
||||
JdbcUtils.PORT_LIST_KEY,
|
||||
(CheckedFunction<JsonNode, List<JsonNode>, Exception>) mangledConfig -> getDatabaseFromConfig(mangledConfig)
|
||||
.query(ctx -> {
|
||||
ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));");
|
||||
ctx.fetch("INSERT INTO id_and_name (id, name, born) VALUES " +
|
||||
"(1, 'picard', '2124-03-04T01:01:01Z'), " +
|
||||
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
|
||||
"(3, 'vash', '2124-03-04T01:01:01Z');");
|
||||
return null;
|
||||
}));
|
||||
}
|
||||
|
||||
private static Database getDatabaseFromConfig(final JsonNode config) {
|
||||
return new Database(
|
||||
DSLContextFactory.create(
|
||||
config.get(JdbcUtils.USERNAME_KEY).asText(),
|
||||
config.get(JdbcUtils.PASSWORD_KEY).asText(),
|
||||
DatabaseDriver.MSSQLSERVER.getDriverClassName(),
|
||||
String.format(DatabaseDriver.MSSQLSERVER.getUrlFormatString(),
|
||||
config.get(JdbcUtils.HOST_KEY).asText(),
|
||||
config.get(JdbcUtils.PORT_KEY).asInt(),
|
||||
config.get(JdbcUtils.DATABASE_KEY).asText()) + ";encrypt=false;trustServerCertificate=true",
|
||||
SQLDialect.DEFAULT));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setupEnvironment(final TestDestinationEnv environment) throws Exception {
|
||||
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
|
||||
LOGGER.info("starting bastion");
|
||||
bastion.initAndStartBastion(testdb.getContainer().getNetwork());
|
||||
LOGGER.info("bastion started");
|
||||
populateDatabaseTestData();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown(final TestDestinationEnv testEnv) {
|
||||
bastion.stopAndClose();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getImageName() {
|
||||
return "airbyte/source-mssql:dev";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ConnectorSpecification getSpec() throws Exception {
|
||||
return SshHelpers.getSpecAndInjectSsh();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ConfiguredAirbyteCatalog getConfiguredCatalog() {
|
||||
return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME, SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withSupportedSyncModes(
|
||||
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))),
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME2, SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withSupportedSyncModes(
|
||||
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)))));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getState() {
|
||||
return Jsons.jsonNode(new HashMap<>());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,247 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.protocol.models.v0.SyncMode.FULL_REFRESH;
|
||||
import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshHelpers;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
|
||||
import io.airbyte.protocol.models.Field;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamState;
|
||||
import io.airbyte.protocol.models.v0.CatalogHelpers;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.ConnectorSpecification;
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode;
|
||||
import io.airbyte.protocol.models.v0.SyncMode;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.junit.Assert;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.TestInstance;
|
||||
import org.junit.jupiter.api.TestInstance.Lifecycle;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
|
||||
@TestInstance(Lifecycle.PER_METHOD)
|
||||
@Execution(ExecutionMode.CONCURRENT)
|
||||
public class CdcMssqlSourceAcceptanceTest extends SourceAcceptanceTest {
|
||||
|
||||
private static final String SCHEMA_NAME = "dbo";
|
||||
private static final String STREAM_NAME = "id_and_name";
|
||||
private static final String STREAM_NAME2 = "starships";
|
||||
private static final String CDC_ROLE_NAME = "cdc_selector";
|
||||
private static final String STREAM_NAME3 = "stream3";
|
||||
|
||||
private MsSQLTestDatabase testdb;
|
||||
|
||||
@Override
|
||||
protected String getImageName() {
|
||||
return "airbyte/source-mssql:dev";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ConnectorSpecification getSpec() throws Exception {
|
||||
return SshHelpers.getSpecAndInjectSsh();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
return testdb.integrationTestConfigBuilder()
|
||||
.withCdcReplication()
|
||||
.withoutSsl()
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ConfiguredAirbyteCatalog getConfiguredCatalog() {
|
||||
return new ConfiguredAirbyteCatalog().withStreams(getConfiguredAirbyteStreams());
|
||||
}
|
||||
|
||||
protected List<ConfiguredAirbyteStream> getConfiguredAirbyteStreams() {
|
||||
return Lists.newArrayList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME, SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withSourceDefinedCursor(true)
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
|
||||
.withSupportedSyncModes(
|
||||
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))),
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME2, SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withSourceDefinedCursor(true)
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
|
||||
.withSupportedSyncModes(
|
||||
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getState() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setupEnvironment(final TestDestinationEnv environment) {
|
||||
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT);
|
||||
testdb
|
||||
.withWaitUntilAgentRunning()
|
||||
.withCdc()
|
||||
// create tables
|
||||
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME)
|
||||
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
|
||||
// populate tables
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", SCHEMA_NAME, STREAM_NAME)
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3)
|
||||
// enable cdc on tables for designated role
|
||||
.withCdcForTable(SCHEMA_NAME, STREAM_NAME, CDC_ROLE_NAME)
|
||||
.withCdcForTable(SCHEMA_NAME, STREAM_NAME2, CDC_ROLE_NAME)
|
||||
.withCdcForTable(SCHEMA_NAME, STREAM_NAME3, CDC_ROLE_NAME)
|
||||
// revoke user permissions
|
||||
.with("REVOKE ALL FROM %s CASCADE;", testdb.getUserName())
|
||||
.with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testdb.getUserName())
|
||||
// grant user permissions
|
||||
.with("EXEC sp_addrolemember N'%s', N'%s';", "db_datareader", testdb.getUserName())
|
||||
.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName())
|
||||
.with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testdb.getUserName())
|
||||
.withWaitUntilMaxLsnAvailable();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown(final TestDestinationEnv testEnv) {
|
||||
testdb.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testAddNewStreamToExistingSync() throws Exception {
|
||||
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
|
||||
new ConfiguredAirbyteCatalog().withStreams(List.of(getConfiguredAirbyteStreams().get(0)));
|
||||
|
||||
// Start a sync with one stream
|
||||
final List<AirbyteMessage> messages = runRead(configuredCatalogWithOneStream);
|
||||
final List<AirbyteRecordMessage> recordMessages = filterRecords(messages);
|
||||
final List<AirbyteStateMessage> stateMessages = filterStateMessages(messages);
|
||||
final List<AirbyteStreamState> streamStates = stateMessages.get(0).getGlobal().getStreamStates();
|
||||
|
||||
assertEquals(3, recordMessages.size());
|
||||
assertEquals(2, stateMessages.size());
|
||||
assertEquals(1, streamStates.size());
|
||||
assertEquals(STREAM_NAME, streamStates.get(0).getStreamDescriptor().getName());
|
||||
assertEquals(SCHEMA_NAME, streamStates.get(0).getStreamDescriptor().getNamespace());
|
||||
|
||||
final AirbyteStateMessage lastStateMessage = Iterables.getLast(stateMessages);
|
||||
|
||||
final ConfiguredAirbyteCatalog configuredCatalogWithTwoStreams = configuredCatalogWithOneStream.withStreams(getConfiguredAirbyteStreams());
|
||||
|
||||
// Start another sync with a newly added stream
|
||||
final List<AirbyteMessage> messages2 = runRead(configuredCatalogWithTwoStreams, Jsons.jsonNode(List.of(lastStateMessage)));
|
||||
final List<AirbyteRecordMessage> recordMessages2 = filterRecords(messages2);
|
||||
final List<AirbyteStateMessage> stateMessages2 = filterStateMessages(messages2);
|
||||
|
||||
assertEquals(3, recordMessages2.size());
|
||||
assertEquals(2, stateMessages2.size());
|
||||
|
||||
final AirbyteStateMessage lastStateMessage2 = Iterables.getLast(stateMessages2);
|
||||
final List<AirbyteStreamState> streamStates2 = lastStateMessage2.getGlobal().getStreamStates();
|
||||
|
||||
assertEquals(2, streamStates2.size());
|
||||
|
||||
assertEquals(STREAM_NAME, streamStates2.get(0).getStreamDescriptor().getName());
|
||||
assertEquals(SCHEMA_NAME, streamStates2.get(0).getStreamDescriptor().getNamespace());
|
||||
assertEquals(STREAM_NAME2, streamStates2.get(1).getStreamDescriptor().getName());
|
||||
assertEquals(SCHEMA_NAME, streamStates2.get(1).getStreamDescriptor().getNamespace());
|
||||
}
|
||||
|
||||
private List<AirbyteStateMessage> filterStateMessages(final List<AirbyteMessage> messages) {
|
||||
return messages.stream().filter(r -> r.getType() == AirbyteMessage.Type.STATE).map(AirbyteMessage::getState)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Test
|
||||
protected void testNullValueConversion() throws Exception {
|
||||
final List<ConfiguredAirbyteStream> configuredAirbyteStreams =
|
||||
Lists.newArrayList(new ConfiguredAirbyteStream()
|
||||
.withSyncMode(INCREMENTAL)
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(STREAM_NAME3,
|
||||
SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING),
|
||||
Field.of("userid", JsonSchemaType.NUMBER))
|
||||
.withSourceDefinedCursor(true)
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
|
||||
.withSupportedSyncModes(Lists.newArrayList(FULL_REFRESH, INCREMENTAL))));
|
||||
|
||||
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
|
||||
new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0)));
|
||||
|
||||
final List<AirbyteMessage> airbyteMessages = runRead(configuredCatalogWithOneStream, getState());
|
||||
final List<AirbyteRecordMessage> recordMessages = filterRecords(airbyteMessages);
|
||||
final List<AirbyteStateMessage> stateMessages = airbyteMessages
|
||||
.stream()
|
||||
.filter(m -> m.getType() == AirbyteMessage.Type.STATE)
|
||||
.map(AirbyteMessage::getState)
|
||||
.collect(Collectors.toList());
|
||||
Assert.assertEquals(recordMessages.size(), 1);
|
||||
assertFalse(stateMessages.isEmpty(), "Reason");
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
assertTrue(cdcFieldsOmitted(recordMessages.get(0).getData()).equals(
|
||||
mapper.readTree("{\"id\":4, \"name\":\"voyager\", \"userid\":null}")));
|
||||
|
||||
// when we run incremental sync again there should be no new records. Run a sync with the latest
|
||||
// state message and assert no records were emitted.
|
||||
JsonNode latestState = extractLatestState(stateMessages);
|
||||
|
||||
testdb.getDatabase().query(c -> c.query("INSERT INTO %s.%s (id, name) VALUES (5,'deep space nine')".formatted(SCHEMA_NAME, STREAM_NAME3)))
|
||||
.execute();
|
||||
|
||||
assert Objects.nonNull(latestState);
|
||||
final List<AirbyteRecordMessage> secondSyncRecords = filterRecords(runRead(configuredCatalogWithOneStream, latestState));
|
||||
assertFalse(
|
||||
secondSyncRecords.isEmpty(),
|
||||
"Expected the second incremental sync to produce records.");
|
||||
assertEquals(cdcFieldsOmitted(secondSyncRecords.get(0).getData()),
|
||||
mapper.readTree("{\"id\":5, \"name\":\"deep space nine\", \"userid\":null}"));
|
||||
}
|
||||
|
||||
private JsonNode cdcFieldsOmitted(final JsonNode node) {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
ObjectNode object = mapper.createObjectNode();
|
||||
node.fieldNames().forEachRemaining(name -> {
|
||||
if (!name.toLowerCase().startsWith("_ab_cdc_")) {
|
||||
object.put(name, node.get(name));
|
||||
}
|
||||
});
|
||||
return object;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.db.Database;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import org.junit.jupiter.api.TestInstance;
|
||||
import org.junit.jupiter.api.TestInstance.Lifecycle;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
|
||||
@TestInstance(Lifecycle.PER_METHOD)
|
||||
@Execution(ExecutionMode.CONCURRENT)
|
||||
public class CdcMssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest {
|
||||
|
||||
private final ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
return testdb.integrationTestConfigBuilder()
|
||||
.withCdcReplication()
|
||||
.withoutSsl()
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Database setupDatabase() {
|
||||
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT)
|
||||
.withCdc();
|
||||
return testdb.getDatabase();
|
||||
}
|
||||
|
||||
protected void createTables() throws Exception {
|
||||
List<Callable<MsSQLTestDatabase>> createTableTasks = new ArrayList<>();
|
||||
List<Callable<MsSQLTestDatabase>> enableCdcForTableTasks = new ArrayList<>();
|
||||
for (var test : testDataHolders) {
|
||||
createTableTasks.add(() -> testdb.with(test.getCreateSqlQuery()));
|
||||
enableCdcForTableTasks.add(() -> testdb.withCdcForTable(test.getNameSpace(), test.getNameWithTestPrefix(), null));
|
||||
}
|
||||
executor.invokeAll(createTableTasks);
|
||||
executor.invokeAll(enableCdcForTableTasks);
|
||||
}
|
||||
|
||||
protected void populateTables() throws Exception {
|
||||
List<Callable<MsSQLTestDatabase>> insertTasks = new ArrayList<>();
|
||||
List<Callable<MsSQLTestDatabase>> waitForCdcRecordsTasks = new ArrayList<>();
|
||||
for (var test : testDataHolders) {
|
||||
insertTasks.add(() -> {
|
||||
this.database.query((ctx) -> {
|
||||
List<String> sql = test.getInsertSqlQueries();
|
||||
Objects.requireNonNull(ctx);
|
||||
sql.forEach(ctx::fetch);
|
||||
return null;
|
||||
});
|
||||
return null;
|
||||
});
|
||||
waitForCdcRecordsTasks.add(() -> testdb.waitForCdcRecords(test.getNameSpace(), test.getNameWithTestPrefix(), test.getExpectedValues().size()));
|
||||
}
|
||||
// executor.invokeAll(insertTasks);
|
||||
executor.invokeAll(insertTasks);
|
||||
executor.invokeAll(waitForCdcRecordsTasks);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean testCatalog() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
|
||||
import io.airbyte.commons.features.FeatureFlags;
|
||||
import io.airbyte.commons.features.FeatureFlagsWrapper;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
|
||||
public class CloudDeploymentSslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest {
|
||||
|
||||
@Override
|
||||
protected void setupEnvironment(final TestDestinationEnv environment) {
|
||||
final var container = new MsSQLContainerFactory().shared(BaseImage.MSSQL_2022.reference);
|
||||
testdb = new MsSQLTestDatabase(container);
|
||||
testdb = testdb
|
||||
.withConnectionProperty("encrypt", "true")
|
||||
.withConnectionProperty("trustServerCertificate", "true")
|
||||
.withConnectionProperty("databaseName", testdb.getDatabaseName())
|
||||
.initialized()
|
||||
.with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));")
|
||||
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("INSERT INTO id_and_name (id, name, born) VALUES " +
|
||||
"(1,'picard', '2124-03-04T01:01:01Z'), " +
|
||||
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
|
||||
"(3, 'vash', '2124-03-04T01:01:01Z');")
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FeatureFlags featureFlags() {
|
||||
return FeatureFlagsWrapper.overridingDeploymentMode(super.featureFlags(), "CLOUD");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
return testdb.integrationTestConfigBuilder()
|
||||
.withEncrytedTrustServerCertificate()
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,215 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshHelpers;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.protocol.models.Field;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamState;
|
||||
import io.airbyte.protocol.models.v0.CatalogHelpers;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.ConnectorSpecification;
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode;
|
||||
import io.airbyte.protocol.models.v0.SyncMode;
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class MssqlSourceAcceptanceTest extends SourceAcceptanceTest {
|
||||
|
||||
protected static final String SCHEMA_NAME = "dbo";
|
||||
protected static final String STREAM_NAME = "id_and_name";
|
||||
protected static final String STREAM_NAME2 = "starships";
|
||||
protected static final String STREAM_NAME3 = "stream3";
|
||||
|
||||
protected MsSQLTestDatabase testdb;
|
||||
|
||||
@Override
|
||||
protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException {
|
||||
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022)
|
||||
.with("CREATE TABLE %s.%s (id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));", SCHEMA_NAME, STREAM_NAME)
|
||||
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("INSERT INTO id_and_name (id, name, born) VALUES " +
|
||||
"(1, 'picard', '2124-03-04T01:01:01Z'), " +
|
||||
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
|
||||
"(3, 'vash', '2124-03-04T01:01:01Z');")
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown(final TestDestinationEnv testEnv) {
|
||||
testdb.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getImageName() {
|
||||
return "airbyte/source-mssql:dev";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ConnectorSpecification getSpec() throws Exception {
|
||||
return SshHelpers.getSpecAndInjectSsh();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
return testdb.integrationTestConfigBuilder()
|
||||
.withoutSsl()
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ConfiguredAirbyteCatalog getConfiguredCatalog() {
|
||||
return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME, SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, INCREMENTAL))),
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME2, SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, INCREMENTAL)))));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getState() {
|
||||
return Jsons.jsonNode(new HashMap<>());
|
||||
}
|
||||
|
||||
@Test
|
||||
protected void testAddNewStreamToExistingSync() throws Exception {
|
||||
final List<ConfiguredAirbyteStream> configuredAirbyteStreams =
|
||||
Lists.newArrayList(CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME,
|
||||
SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withSyncMode(INCREMENTAL)
|
||||
.withCursorField(List.of("id")),
|
||||
CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME2,
|
||||
SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withSyncMode(INCREMENTAL)
|
||||
.withCursorField(List.of("id")));
|
||||
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
|
||||
new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0)));
|
||||
|
||||
// Start a sync with one stream
|
||||
final List<AirbyteMessage> messages = runRead(withSourceDefinedCursors(configuredCatalogWithOneStream));
|
||||
final List<AirbyteRecordMessage> recordMessages = filterRecords(messages);
|
||||
final List<AirbyteStateMessage> stateMessages = filterStateMessages(messages);
|
||||
final AirbyteStateMessage lastStateMessage = Iterables.getLast(stateMessages);
|
||||
final AirbyteStreamState streamState = lastStateMessage.getStream();
|
||||
|
||||
assertEquals(3, recordMessages.size());
|
||||
assertEquals(1, stateMessages.size());
|
||||
assertEquals(STREAM_NAME, streamState.getStreamDescriptor().getName());
|
||||
assertEquals(SCHEMA_NAME, streamState.getStreamDescriptor().getNamespace());
|
||||
|
||||
final ConfiguredAirbyteCatalog configuredCatalogWithTwoStreams =
|
||||
new ConfiguredAirbyteCatalog().withStreams(configuredAirbyteStreams);
|
||||
|
||||
// Start another sync with a newly added stream
|
||||
final List<AirbyteMessage> messages2 = runRead(configuredCatalogWithTwoStreams, Jsons.jsonNode(List.of(lastStateMessage)));
|
||||
final List<AirbyteRecordMessage> recordMessages2 = filterRecords(messages2);
|
||||
final List<AirbyteStateMessage> stateMessages2 = filterStateMessages(messages2);
|
||||
|
||||
assertEquals(4, recordMessages2.size());
|
||||
assertEquals(2, stateMessages2.size());
|
||||
|
||||
assertEquals(2, stateMessages2.size());
|
||||
assertEquals(STREAM_NAME, stateMessages2.get(0).getStream().getStreamDescriptor().getName());
|
||||
assertEquals(SCHEMA_NAME, stateMessages2.get(0).getStream().getStreamDescriptor().getNamespace());
|
||||
assertEquals(STREAM_NAME2, stateMessages2.get(1).getStream().getStreamDescriptor().getName());
|
||||
assertEquals(SCHEMA_NAME, stateMessages2.get(1).getStream().getStreamDescriptor().getNamespace());
|
||||
}
|
||||
|
||||
@Test
|
||||
protected void testNullValueConversion() throws Exception {
|
||||
final List<ConfiguredAirbyteStream> configuredAirbyteStreams =
|
||||
Lists.newArrayList(CatalogHelpers.createConfiguredAirbyteStream(STREAM_NAME3,
|
||||
SCHEMA_NAME,
|
||||
Field.of("id", JsonSchemaType.NUMBER),
|
||||
Field.of("name", JsonSchemaType.STRING),
|
||||
Field.of("userid", JsonSchemaType.NUMBER))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withSyncMode(INCREMENTAL)
|
||||
.withCursorField(List.of("id")));
|
||||
final ConfiguredAirbyteCatalog configuredCatalogWithOneStream =
|
||||
new ConfiguredAirbyteCatalog().withStreams(List.of(configuredAirbyteStreams.get(0)));
|
||||
|
||||
final List<AirbyteMessage> airbyteMessages = runRead(configuredCatalogWithOneStream, getState());
|
||||
final List<AirbyteRecordMessage> recordMessages = filterRecords(airbyteMessages);
|
||||
final List<AirbyteStateMessage> stateMessages = airbyteMessages
|
||||
.stream()
|
||||
.filter(m -> m.getType() == AirbyteMessage.Type.STATE)
|
||||
.map(AirbyteMessage::getState)
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(recordMessages.size(), 1);
|
||||
assertFalse(stateMessages.isEmpty(), "Reason");
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
assertTrue(recordMessages.get(0).getData().equals(
|
||||
mapper.readTree("{\"id\":4, \"name\":\"voyager\", \"userid\":null}}")));
|
||||
|
||||
// when we run incremental sync again there should be no new records. Run a sync with the latest
|
||||
// state message and assert no records were emitted.
|
||||
JsonNode latestState = extractLatestState(stateMessages);
|
||||
|
||||
testdb.getDatabase().query(c -> {
|
||||
return c.query("INSERT INTO %s.%s (id, name) VALUES (5,'deep space nine');".formatted(SCHEMA_NAME, STREAM_NAME3));
|
||||
}).execute();
|
||||
|
||||
assert Objects.nonNull(latestState);
|
||||
final List<AirbyteRecordMessage> secondSyncRecords = filterRecords(runRead(configuredCatalogWithOneStream, latestState));
|
||||
assertFalse(
|
||||
secondSyncRecords.isEmpty(),
|
||||
"Expected the second incremental sync to produce records.");
|
||||
assertTrue(secondSyncRecords.get(0).getData().equals(
|
||||
mapper.readTree("{\"id\":5, \"name\":\"deep space nine\", \"userid\":null}}")));
|
||||
|
||||
}
|
||||
|
||||
private List<AirbyteStateMessage> filterStateMessages(final List<AirbyteMessage> messages) {
|
||||
return messages.stream().filter(r -> r.getType() == AirbyteMessage.Type.STATE).map(AirbyteMessage::getState)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.db.Database;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
|
||||
public class MssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest {
|
||||
|
||||
@Override
|
||||
protected Database setupDatabase() {
|
||||
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
|
||||
return testdb.getDatabase();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
return testdb.integrationTestConfigBuilder()
|
||||
.withoutSsl()
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean testCatalog() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,94 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.containsInAnyOrder;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import java.sql.Connection;
|
||||
import java.sql.JDBCType;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class MssqlSourceOperationsTest {
|
||||
|
||||
private final MssqlSourceOperations mssqlSourceOperations = new MssqlSourceOperations();
|
||||
|
||||
private MsSQLTestDatabase testdb;
|
||||
|
||||
private final String cursorColumn = "cursor_column";
|
||||
|
||||
@BeforeEach
|
||||
public void init() {
|
||||
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
testdb.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void setDateTimeOffsetColumnAsCursor() throws SQLException {
|
||||
final String tableName = "datetimeoffset_table";
|
||||
final String createTableQuery = String.format("CREATE TABLE %s(id INTEGER PRIMARY KEY IDENTITY(1,1), %s DATETIMEOFFSET(7));",
|
||||
tableName,
|
||||
cursorColumn);
|
||||
executeQuery(createTableQuery);
|
||||
final List<JsonNode> expectedRecords = new ArrayList<>();
|
||||
for (int i = 1; i <= 4; i++) {
|
||||
final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap());
|
||||
// Manually generate DATETIMEOFFSET data
|
||||
final String cursorValue = String.format("'2023-0%s-10T10:00:00.100000Z'", i, i * 10);
|
||||
jsonNode.put("id", i);
|
||||
// Remove single quotes from string since the date being retrieved will not have quotes
|
||||
jsonNode.put(cursorColumn, cursorValue.replaceAll("\'", ""));
|
||||
final String insertQuery = String.format("INSERT INTO %s (%s) VALUES (CAST(%s as DATETIMEOFFSET))", tableName, cursorColumn, cursorValue);
|
||||
|
||||
executeQuery(insertQuery);
|
||||
expectedRecords.add(jsonNode);
|
||||
}
|
||||
final String cursorAnchorValue = "2023-01-01T00:00:00.000000+00:00";
|
||||
final List<JsonNode> actualRecords = new ArrayList<>();
|
||||
try (final Connection connection = testdb.getContainer().createConnection("")) {
|
||||
final PreparedStatement preparedStatement = connection.prepareStatement(
|
||||
"SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?");
|
||||
mssqlSourceOperations.setCursorField(preparedStatement,
|
||||
1,
|
||||
JDBCType.TIMESTAMP_WITH_TIMEZONE,
|
||||
cursorAnchorValue);
|
||||
|
||||
try (final ResultSet resultSet = preparedStatement.executeQuery()) {
|
||||
final int columnCount = resultSet.getMetaData().getColumnCount();
|
||||
while (resultSet.next()) {
|
||||
final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap());
|
||||
for (int i = 1; i <= columnCount; i++) {
|
||||
mssqlSourceOperations.copyToJsonField(resultSet, i, jsonNode);
|
||||
}
|
||||
actualRecords.add(jsonNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray()));
|
||||
}
|
||||
|
||||
protected void executeQuery(final String query) throws SQLException {
|
||||
try (final Connection connection = testdb.getContainer().createConnection("")) {
|
||||
connection.createStatement().execute(query);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshTunnel.TunnelMethod;
|
||||
|
||||
public class SshKeyMssqlSourceAcceptanceTest extends AbstractSshMssqlSourceAcceptanceTest {
|
||||
|
||||
@Override
|
||||
public TunnelMethod getTunnelMethod() {
|
||||
return TunnelMethod.SSH_KEY_AUTH;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshTunnel.TunnelMethod;
|
||||
|
||||
public class SshPasswordMssqlSourceAcceptanceTest extends AbstractSshMssqlSourceAcceptanceTest {
|
||||
|
||||
@Override
|
||||
public TunnelMethod getTunnelMethod() {
|
||||
return TunnelMethod.SSH_PASSWORD_AUTH;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
|
||||
public class SslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest {
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
return testdb.integrationTestConfigBuilder()
|
||||
.withEncrytedTrustServerCertificate()
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setupEnvironment(final TestDestinationEnv environment) {
|
||||
final var container = new MsSQLContainerFactory().shared(BaseImage.MSSQL_2022.reference);
|
||||
testdb = new MsSQLTestDatabase(container);
|
||||
testdb = testdb
|
||||
.withConnectionProperty("encrypt", "true")
|
||||
.withConnectionProperty("trustServerCertificate", "true")
|
||||
.withConnectionProperty("databaseName", testdb.getDatabaseName())
|
||||
.initialized()
|
||||
.with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));")
|
||||
.with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("INSERT INTO id_and_name (id, name, born) VALUES " +
|
||||
"(1, 'picard', '2124-03-04T01:01:01Z'), " +
|
||||
"(2, 'crusher', '2124-03-04T01:01:01Z'), " +
|
||||
"(3, 'vash', '2124-03-04T01:01:01Z');")
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato'), (4, 'Argo');", SCHEMA_NAME, STREAM_NAME2)
|
||||
.with("CREATE TABLE %s.%s (id INTEGER PRIMARY KEY, name VARCHAR(200), userid INTEGER DEFAULT NULL);", SCHEMA_NAME, STREAM_NAME3)
|
||||
.with("INSERT INTO %s.%s (id, name) VALUES (4,'voyager');", SCHEMA_NAME, STREAM_NAME3);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"host": "default",
|
||||
"port": 5555,
|
||||
"database": "default",
|
||||
"username": "default",
|
||||
"password": "default"
|
||||
}
|
||||
@@ -1,313 +0,0 @@
|
||||
{
|
||||
"documentationUrl": "https://docs.airbyte.com/integrations/destinations/mssql",
|
||||
"connectionSpecification": {
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "MSSQL Source Spec",
|
||||
"type": "object",
|
||||
"required": ["host", "port", "database", "username", "password"],
|
||||
"properties": {
|
||||
"host": {
|
||||
"description": "The hostname of the database.",
|
||||
"title": "Host",
|
||||
"type": "string",
|
||||
"order": 0
|
||||
},
|
||||
"port": {
|
||||
"description": "The port of the database.",
|
||||
"title": "Port",
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 65536,
|
||||
"examples": ["1433"],
|
||||
"order": 1
|
||||
},
|
||||
"database": {
|
||||
"description": "The name of the database.",
|
||||
"title": "Database",
|
||||
"type": "string",
|
||||
"examples": ["master"],
|
||||
"order": 2
|
||||
},
|
||||
"schemas": {
|
||||
"title": "Schemas",
|
||||
"description": "The list of schemas to sync from. Defaults to user. Case sensitive.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 0,
|
||||
"uniqueItems": true,
|
||||
"default": ["dbo"],
|
||||
"order": 3
|
||||
},
|
||||
"username": {
|
||||
"description": "The username which is used to access the database.",
|
||||
"title": "Username",
|
||||
"type": "string",
|
||||
"order": 4
|
||||
},
|
||||
"password": {
|
||||
"description": "The password associated with the username.",
|
||||
"title": "Password",
|
||||
"type": "string",
|
||||
"airbyte_secret": true,
|
||||
"order": 5
|
||||
},
|
||||
"jdbc_url_params": {
|
||||
"title": "JDBC URL Params",
|
||||
"description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).",
|
||||
"type": "string",
|
||||
"order": 6
|
||||
},
|
||||
"ssl_method": {
|
||||
"title": "SSL Method",
|
||||
"type": "object",
|
||||
"description": "The encryption method which is used when communicating with the database.",
|
||||
"order": 7,
|
||||
"oneOf": [
|
||||
{
|
||||
"title": "Unencrypted",
|
||||
"description": "Data transfer will not be encrypted.",
|
||||
"required": ["ssl_method"],
|
||||
"properties": {
|
||||
"ssl_method": {
|
||||
"type": "string",
|
||||
"const": "unencrypted"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Encrypted (trust server certificate)",
|
||||
"description": "Use the certificate provided by the server without verification. (For testing purposes only!)",
|
||||
"required": ["ssl_method"],
|
||||
"properties": {
|
||||
"ssl_method": {
|
||||
"type": "string",
|
||||
"const": "encrypted_trust_server_certificate"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Encrypted (verify certificate)",
|
||||
"description": "Verify and use the certificate provided by the server.",
|
||||
"required": ["ssl_method"],
|
||||
"properties": {
|
||||
"ssl_method": {
|
||||
"type": "string",
|
||||
"const": "encrypted_verify_certificate"
|
||||
},
|
||||
"hostNameInCertificate": {
|
||||
"title": "Host Name In Certificate",
|
||||
"type": "string",
|
||||
"description": "Specifies the host name of the server. The value of this property must match the subject property of the certificate.",
|
||||
"order": 0
|
||||
},
|
||||
"certificate": {
|
||||
"title": "Certificate",
|
||||
"type": "string",
|
||||
"description": "certificate of the server, or of the CA that signed the server certificate",
|
||||
"order": 1,
|
||||
"airbyte_secret": true,
|
||||
"multiline": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"replication_method": {
|
||||
"type": "object",
|
||||
"title": "Update Method",
|
||||
"description": "Configures how data is extracted from the database.",
|
||||
"default": "CDC",
|
||||
"display_type": "radio",
|
||||
"order": 8,
|
||||
"oneOf": [
|
||||
{
|
||||
"title": "Read Changes using Change Data Capture (CDC)",
|
||||
"description": "<i>Recommended</i> - Incrementally reads new inserts, updates, and deletes using the SQL Server's <a href=\"https://docs.airbyte.com/integrations/sources/mssql/#change-data-capture-cdc\">change data capture feature</a>. This must be enabled on your database.",
|
||||
"required": ["method"],
|
||||
"properties": {
|
||||
"method": {
|
||||
"type": "string",
|
||||
"const": "CDC",
|
||||
"order": 0
|
||||
},
|
||||
"initial_waiting_seconds": {
|
||||
"type": "integer",
|
||||
"title": "Initial Waiting Time in Seconds (Advanced)",
|
||||
"description": "The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 3600 seconds. Read about <a href=\"https://docs.airbyte.com/integrations/sources/mysql/#change-data-capture-cdc\">initial waiting time</a>.",
|
||||
"default": 300,
|
||||
"min": 120,
|
||||
"max": 3600,
|
||||
"order": 3
|
||||
},
|
||||
"invalid_cdc_cursor_position_behavior": {
|
||||
"type": "string",
|
||||
"title": "Invalid CDC position behavior (Advanced)",
|
||||
"description": "Determines whether Airbyte should fail or re-sync data in case of an stale/invalid cursor value into the WAL. If 'Fail sync' is chosen, a user will have to manually reset the connection before being able to continue syncing data. If 'Re-sync data' is chosen, Airbyte will automatically trigger a refresh but could lead to higher cloud costs and data loss.",
|
||||
"enum": ["Fail sync", "Re-sync data"],
|
||||
"default": "Fail sync",
|
||||
"order": 4
|
||||
},
|
||||
"queue_size": {
|
||||
"type": "integer",
|
||||
"title": "Size of the queue (Advanced)",
|
||||
"description": "The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful.",
|
||||
"default": 10000,
|
||||
"order": 5,
|
||||
"min": 1000,
|
||||
"max": 10000
|
||||
},
|
||||
"initial_load_timeout_hours": {
|
||||
"type": "integer",
|
||||
"title": "Initial Load Timeout in Hours (Advanced)",
|
||||
"description": "The amount of time an initial load is allowed to continue for before catching up on CDC logs.",
|
||||
"default": 8,
|
||||
"min": 4,
|
||||
"max": 24,
|
||||
"order": 6
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Scan Changes with User Defined Cursor",
|
||||
"description": "Incrementally detects new inserts and updates using the <a href=\"https://docs.airbyte.com/understanding-airbyte/connections/incremental-append/#user-defined-cursor\">cursor column</a> chosen when configuring a connection (e.g. created_at, updated_at).",
|
||||
"required": ["method"],
|
||||
"properties": {
|
||||
"method": {
|
||||
"type": "string",
|
||||
"const": "STANDARD",
|
||||
"order": 0
|
||||
},
|
||||
"exclude_todays_data": {
|
||||
"title": "Exclude Today's Data",
|
||||
"description": "When enabled incremental syncs using a cursor of a temporal types (date or datetime) will include cursor values only up until last midnight (Advanced)",
|
||||
"default": false,
|
||||
"type": "boolean",
|
||||
"always_show": true,
|
||||
"order": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"tunnel_method": {
|
||||
"type": "object",
|
||||
"title": "SSH Tunnel Method",
|
||||
"description": "Whether to initiate an SSH tunnel before connecting to the database, and if so, which kind of authentication to use.",
|
||||
"oneOf": [
|
||||
{
|
||||
"title": "No Tunnel",
|
||||
"required": ["tunnel_method"],
|
||||
"properties": {
|
||||
"tunnel_method": {
|
||||
"description": "No ssh tunnel needed to connect to database",
|
||||
"type": "string",
|
||||
"const": "NO_TUNNEL",
|
||||
"order": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "SSH Key Authentication",
|
||||
"required": [
|
||||
"tunnel_method",
|
||||
"tunnel_host",
|
||||
"tunnel_port",
|
||||
"tunnel_user",
|
||||
"ssh_key"
|
||||
],
|
||||
"properties": {
|
||||
"tunnel_method": {
|
||||
"description": "Connect through a jump server tunnel host using username and ssh key",
|
||||
"type": "string",
|
||||
"const": "SSH_KEY_AUTH",
|
||||
"order": 0
|
||||
},
|
||||
"tunnel_host": {
|
||||
"title": "SSH Tunnel Jump Server Host",
|
||||
"description": "Hostname of the jump server host that allows inbound ssh tunnel.",
|
||||
"type": "string",
|
||||
"order": 1
|
||||
},
|
||||
"tunnel_port": {
|
||||
"title": "SSH Connection Port",
|
||||
"description": "Port on the proxy/jump server that accepts inbound ssh connections.",
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 65536,
|
||||
"default": 22,
|
||||
"examples": ["22"],
|
||||
"order": 2
|
||||
},
|
||||
"tunnel_user": {
|
||||
"title": "SSH Login Username",
|
||||
"description": "OS-level username for logging into the jump server host.",
|
||||
"type": "string",
|
||||
"order": 3
|
||||
},
|
||||
"ssh_key": {
|
||||
"title": "SSH Private Key",
|
||||
"description": "OS-level user account ssh key credentials in RSA PEM format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )",
|
||||
"type": "string",
|
||||
"airbyte_secret": true,
|
||||
"multiline": true,
|
||||
"order": 4
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Password Authentication",
|
||||
"required": [
|
||||
"tunnel_method",
|
||||
"tunnel_host",
|
||||
"tunnel_port",
|
||||
"tunnel_user",
|
||||
"tunnel_user_password"
|
||||
],
|
||||
"properties": {
|
||||
"tunnel_method": {
|
||||
"description": "Connect through a jump server tunnel host using username and password authentication",
|
||||
"type": "string",
|
||||
"const": "SSH_PASSWORD_AUTH",
|
||||
"order": 0
|
||||
},
|
||||
"tunnel_host": {
|
||||
"title": "SSH Tunnel Jump Server Host",
|
||||
"description": "Hostname of the jump server host that allows inbound ssh tunnel.",
|
||||
"type": "string",
|
||||
"order": 1
|
||||
},
|
||||
"tunnel_port": {
|
||||
"title": "SSH Connection Port",
|
||||
"description": "Port on the proxy/jump server that accepts inbound ssh connections.",
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 65536,
|
||||
"default": 22,
|
||||
"examples": ["22"],
|
||||
"order": 2
|
||||
},
|
||||
"tunnel_user": {
|
||||
"title": "SSH Login Username",
|
||||
"description": "OS-level username for logging into the jump server host",
|
||||
"type": "string",
|
||||
"order": 3
|
||||
},
|
||||
"tunnel_user_password": {
|
||||
"title": "Password",
|
||||
"description": "OS-level password for logging into the jump server host",
|
||||
"type": "string",
|
||||
"airbyte_secret": true,
|
||||
"order": 4
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"supportsNormalization": false,
|
||||
"supportsDBT": false,
|
||||
"supported_destination_sync_modes": []
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import io.airbyte.cdk.db.Database;
|
||||
import io.airbyte.cdk.db.factory.DSLContextFactory;
|
||||
import io.airbyte.cdk.db.factory.DatabaseDriver;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.performancetest.AbstractSourceFillDbWithTestData;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import java.util.stream.Stream;
|
||||
import org.jooq.DSLContext;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
|
||||
public class FillMsSqlTestDbScriptTest extends AbstractSourceFillDbWithTestData {
|
||||
|
||||
private JsonNode config;
|
||||
private DSLContext dslContext;
|
||||
|
||||
@Override
|
||||
protected JsonNode getConfig() {
|
||||
return config;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown(final TestDestinationEnv testEnv) {}
|
||||
|
||||
@Override
|
||||
protected String getImageName() {
|
||||
return "airbyte/source-mssql:dev";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Database setupDatabase(final String dbName) {
|
||||
final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder()
|
||||
.put("method", "Standard")
|
||||
.build());
|
||||
|
||||
config = Jsons.jsonNode(ImmutableMap.builder()
|
||||
.put(JdbcUtils.HOST_KEY, "your_host")
|
||||
.put(JdbcUtils.PORT_KEY, 1433)
|
||||
.put(JdbcUtils.DATABASE_KEY, dbName) // set your db name
|
||||
.put(JdbcUtils.USERNAME_KEY, "your_username")
|
||||
.put(JdbcUtils.PASSWORD_KEY, "your_pass")
|
||||
.put("replication_method", replicationMethod)
|
||||
.build());
|
||||
|
||||
dslContext = DSLContextFactory.create(
|
||||
config.get(JdbcUtils.USERNAME_KEY).asText(),
|
||||
config.get(JdbcUtils.PASSWORD_KEY).asText(),
|
||||
DatabaseDriver.MSSQLSERVER.getDriverClassName(),
|
||||
String.format("jdbc:sqlserver://%s:%s;databaseName=%s;",
|
||||
config.get(JdbcUtils.HOST_KEY).asText(),
|
||||
config.get(JdbcUtils.PORT_KEY).asInt(),
|
||||
dbName),
|
||||
null);
|
||||
|
||||
return new Database(dslContext);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a data provider for fill DB script,, Each argument's group would be ran as a separate
|
||||
* test. 1st arg - a name of DB that will be used in jdbc connection string. 2nd arg - a schemaName
|
||||
* that will be ised as a NameSpace in Configured Airbyte Catalog. 3rd arg - a number of expected
|
||||
* records retrieved in each stream. 4th arg - a number of messages batches
|
||||
* (numberOfMessages*numberOfBatches, ex. 100*2=200 messages in total in each stream) 5th arg - a
|
||||
* number of columns in each stream\table that will be use for Airbyte Cataloq configuration 6th arg
|
||||
* - a number of streams to read in configured airbyte Catalog. Each stream\table in DB should be
|
||||
* names like "test_0", "test_1",..., test_n.
|
||||
*/
|
||||
@Override
|
||||
protected Stream<Arguments> provideParameters() {
|
||||
return Stream.of(Arguments.of("your_db_name", "dbo", 100, 2, 240, 1000) // "dbo" is a default schema name in MsSQl DB
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.standardtest.source.performancetest.AbstractSourcePerformanceTest;
|
||||
import io.airbyte.commons.io.IOs;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import java.nio.file.Path;
|
||||
import java.util.stream.Stream;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
|
||||
public class MssqlSourcePerformanceTest extends AbstractSourcePerformanceTest {
|
||||
|
||||
private static final String PERFORMANCE_SECRET_CREDS = "secrets/performance-config.json";
|
||||
|
||||
@Override
|
||||
protected String getImageName() {
|
||||
return "airbyte/source-mssql:dev";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setupDatabase(final String dbName) {
|
||||
final JsonNode plainConfig = Jsons.deserialize(IOs.readFile(Path.of(PERFORMANCE_SECRET_CREDS)));
|
||||
|
||||
setConfig(Jsons.jsonNode(ImmutableMap.builder()
|
||||
.put(JdbcUtils.HOST_KEY, plainConfig.get(JdbcUtils.HOST_KEY))
|
||||
.put(JdbcUtils.PORT_KEY, plainConfig.get(JdbcUtils.PORT_KEY))
|
||||
.put(JdbcUtils.DATABASE_KEY, dbName)
|
||||
.put(JdbcUtils.USERNAME_KEY, plainConfig.get(JdbcUtils.USERNAME_KEY))
|
||||
.put(JdbcUtils.PASSWORD_KEY, plainConfig.get(JdbcUtils.PASSWORD_KEY))
|
||||
.build()));
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a data provider for performance tests, Each argument's group would be ran as a separate
|
||||
* test. 1st arg - a name of DB that will be used in jdbc connection string. 2nd arg - a schemaName
|
||||
* that will be used as a NameSpace in Configured Airbyte Catalog. 3rd arg - a number of expected
|
||||
* records retrieved in each stream. 4th arg - a number of columns in each stream\table that will be
|
||||
* use for Airbyte Cataloq configuration 5th arg - a number of streams to read in configured airbyte
|
||||
* Catalog. Each stream\table in DB should be names like "test_0", "test_1",..., test_n.
|
||||
*/
|
||||
@Override
|
||||
protected Stream<Arguments> provideParameters() {
|
||||
return Stream.of(
|
||||
Arguments.of("t1000_c240_r200", "dbo", 200, 240, 1000),
|
||||
Arguments.of("t25_c8_r50k_s10kb", "dbo", 50000, 8, 25),
|
||||
Arguments.of("t1000_c8_r10k_s500b", "dbo", 10000, 8, 1000));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,305 +0,0 @@
|
||||
CREATE
|
||||
PROCEDURE table_copy(
|
||||
@tablecount INT
|
||||
) AS BEGIN
|
||||
SET
|
||||
nocount ON;
|
||||
|
||||
DECLARE @v_max_table INT;
|
||||
|
||||
DECLARE @v_counter_table INT;
|
||||
|
||||
DECLARE @tnamee VARCHAR(255);
|
||||
SET
|
||||
@v_max_table = @tablecount;
|
||||
SET
|
||||
@v_counter_table = 1;
|
||||
|
||||
while @v_counter_table < @v_max_table BEGIN
|
||||
SET
|
||||
@tnamee = concat(
|
||||
'SELECT * INTO test_',
|
||||
@v_counter_table,
|
||||
' FROM test;'
|
||||
);
|
||||
|
||||
EXEC(@tnamee);
|
||||
SET
|
||||
@v_counter_table = @v_counter_table + 1;
|
||||
END;
|
||||
END;
|
||||
|
||||
GO --
|
||||
CREATE
|
||||
PROCEDURE insert_rows(
|
||||
@allrows INT,
|
||||
@insertcount INT,
|
||||
@value NVARCHAR(MAX)
|
||||
) AS BEGIN
|
||||
SET
|
||||
nocount ON;
|
||||
|
||||
DECLARE @dummyIpsum VARCHAR(255) DECLARE @fieldText NVARCHAR(MAX)
|
||||
SET
|
||||
@fieldText = @value DECLARE @vmax INT;
|
||||
|
||||
DECLARE @vmaxx INT;
|
||||
|
||||
DECLARE @vmaxoneinsert INT;
|
||||
|
||||
DECLARE @counter INT;
|
||||
|
||||
DECLARE @lastinsertcounter INT;
|
||||
|
||||
DECLARE @lastinsert INT;
|
||||
|
||||
DECLARE @fullloop INT;
|
||||
|
||||
DECLARE @fullloopcounter INT;
|
||||
SET
|
||||
@vmax = @allrows;
|
||||
SET
|
||||
@vmaxx = @allrows;
|
||||
SET
|
||||
@vmaxoneinsert = @insertcount;
|
||||
SET
|
||||
@counter = 1;
|
||||
SET
|
||||
@lastinsertcounter = 1;
|
||||
SET
|
||||
@lastinsert = 0;
|
||||
SET
|
||||
@fullloop = 0;
|
||||
SET
|
||||
@fullloopcounter = 0;
|
||||
SET
|
||||
@dummyIpsum = '''dummy_ipsum''' while @vmaxx <= @vmaxoneinsert BEGIN
|
||||
SET
|
||||
@vmaxoneinsert = @vmaxx;
|
||||
SET
|
||||
@fullloop = @fullloop + 1;
|
||||
SET
|
||||
@vmaxx = @vmaxx + 1;
|
||||
END;
|
||||
|
||||
while @vmax > @vmaxoneinsert BEGIN
|
||||
SET
|
||||
@fullloop = @fullloop + 1;
|
||||
SET
|
||||
@vmax = @vmax - @vmaxoneinsert;
|
||||
SET
|
||||
@lastinsert = @vmax;
|
||||
END;
|
||||
|
||||
DECLARE @insertTable NVARCHAR(MAX)
|
||||
SET
|
||||
@insertTable = CONVERT(
|
||||
NVARCHAR(MAX),
|
||||
'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values ('
|
||||
);
|
||||
|
||||
while @counter < @vmaxoneinsert BEGIN
|
||||
SET
|
||||
@insertTable = CONVERT(
|
||||
NVARCHAR(MAX),
|
||||
concat(
|
||||
@insertTable,
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@fieldText,
|
||||
', CURRENT_TIMESTAMP), ('
|
||||
)
|
||||
);
|
||||
SET
|
||||
@counter = @counter + 1;
|
||||
END;
|
||||
SET
|
||||
@insertTable = CONVERT(
|
||||
NVARCHAR(MAX),
|
||||
concat(
|
||||
@insertTable,
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@fieldText,
|
||||
', CURRENT_TIMESTAMP);'
|
||||
)
|
||||
);
|
||||
|
||||
while @vmax < 1 BEGIN
|
||||
SET
|
||||
@fullloop = 0
|
||||
SET
|
||||
@vmax = 1
|
||||
END;
|
||||
|
||||
while @fullloopcounter < @fullloop BEGIN EXEC(@insertTable);
|
||||
SET
|
||||
@fullloopcounter = @fullloopcounter + 1;
|
||||
END;
|
||||
|
||||
DECLARE @insertTableLasted NVARCHAR(MAX);
|
||||
SET
|
||||
@insertTableLasted = CONVERT(
|
||||
NVARCHAR(MAX),
|
||||
'insert into test (varchar1, varchar2, varchar3, varchar4, varchar5, longtextfield, timestampfield) values ('
|
||||
);
|
||||
|
||||
while @lastinsertcounter < @lastinsert BEGIN
|
||||
SET
|
||||
@insertTableLasted = CONVERT(
|
||||
NVARCHAR(MAX),
|
||||
concat(
|
||||
@insertTableLasted,
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@fieldText,
|
||||
', CURRENT_TIMESTAMP), ('
|
||||
)
|
||||
);
|
||||
SET
|
||||
@lastinsertcounter = @lastinsertcounter + 1;
|
||||
END;
|
||||
SET
|
||||
@insertTableLasted = CONVERT(
|
||||
NVARCHAR(MAX),
|
||||
concat(
|
||||
@insertTableLasted,
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@dummyIpsum,
|
||||
', ',
|
||||
@fieldText,
|
||||
', CURRENT_TIMESTAMP);'
|
||||
)
|
||||
);
|
||||
|
||||
while @lastinsert > 0 BEGIN EXEC(@insertTableLasted);
|
||||
SET
|
||||
@lastinsert = 0;
|
||||
END;
|
||||
END;
|
||||
|
||||
GO --
|
||||
CREATE
|
||||
PROCEDURE table_create(
|
||||
@val INT
|
||||
) AS BEGIN
|
||||
SET
|
||||
nocount ON;
|
||||
|
||||
-- SQLINES LICENSE FOR EVALUATION USE ONLY
|
||||
CREATE
|
||||
TABLE
|
||||
test(
|
||||
id INT CHECK(
|
||||
id > 0
|
||||
) NOT NULL IDENTITY PRIMARY KEY,
|
||||
varchar1 VARCHAR(255),
|
||||
varchar2 VARCHAR(255),
|
||||
varchar3 VARCHAR(255),
|
||||
varchar4 VARCHAR(255),
|
||||
varchar5 VARCHAR(255),
|
||||
longtextfield nvarchar(MAX),
|
||||
timestampfield datetime2(0)
|
||||
);
|
||||
|
||||
DECLARE @extraSmallText NVARCHAR(MAX);
|
||||
|
||||
DECLARE @smallText NVARCHAR(MAX);
|
||||
|
||||
DECLARE @regularText NVARCHAR(MAX);
|
||||
|
||||
DECLARE @largeText NVARCHAR(MAX);
|
||||
|
||||
DECLARE @someText nvarchar(MAX);
|
||||
|
||||
SELECT
|
||||
@someText = N'some text, some text, ';
|
||||
SET
|
||||
@extraSmallText = N'''test weight 50b - some text, some text, some text''';
|
||||
SET
|
||||
@smallText = N'''test weight 500b - ';
|
||||
SET
|
||||
@regularText = N'''test weight 10kb - ';
|
||||
SET
|
||||
@largeText = N'''test weight 100kb - ';
|
||||
|
||||
SELECT
|
||||
@smallText = @smallText + REPLICATE(
|
||||
@someText,
|
||||
20
|
||||
)+ N'''';
|
||||
|
||||
SELECT
|
||||
@regularText = @regularText + REPLICATE(
|
||||
@someText,
|
||||
590
|
||||
)+ N'some text''';
|
||||
|
||||
SELECT
|
||||
@largeText = @largeText + REPLICATE(
|
||||
@someText,
|
||||
4450
|
||||
)+ N'some text''';
|
||||
|
||||
) -- TODO: change the following @allrows to control the number of records with different sizes
|
||||
-- number of 50B records
|
||||
EXEC insert_rows @allrows = 0,
|
||||
@insertcount = 998,
|
||||
@value = @extraSmallText -- number of 500B records
|
||||
EXEC insert_rows @allrows = 0,
|
||||
@insertcount = 998,
|
||||
@value = @smallText -- number of 10Kb records
|
||||
EXEC insert_rows @allrows = 0,
|
||||
@insertcount = 998,
|
||||
@value = @regularText -- number of 100Kb records
|
||||
EXEC insert_rows @allrows = 0,
|
||||
@insertcount = 98,
|
||||
@value = @largeText
|
||||
END;
|
||||
|
||||
GO --
|
||||
EXEC table_create @val = 0 DROP
|
||||
PROCEDURE IF EXISTS insert_rows;
|
||||
|
||||
DROP
|
||||
PROCEDURE IF EXISTS table_create;
|
||||
|
||||
-- TODO: change the value to control the number of tables
|
||||
EXEC table_copy @tablecount = 1;
|
||||
|
||||
DROP
|
||||
PROCEDURE IF EXISTS table_copy;
|
||||
|
||||
EXEC sp_rename 'test',
|
||||
'test_0';
|
||||
@@ -1,749 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
|
||||
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_DELETED_AT;
|
||||
import static io.airbyte.cdk.integrations.debezium.internals.DebeziumEventConverter.CDC_UPDATED_AT;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
|
||||
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.ORDERED_COL_STATE_TYPE;
|
||||
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY;
|
||||
import static org.awaitility.Awaitility.await;
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Streams;
|
||||
import io.airbyte.cdk.db.factory.DataSourceFactory;
|
||||
import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig;
|
||||
import io.airbyte.cdk.integrations.JdbcConnector;
|
||||
import io.airbyte.cdk.integrations.debezium.CdcSourceTest;
|
||||
import io.airbyte.cdk.integrations.debezium.CdcTargetPosition;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.commons.util.AutoCloseableIterator;
|
||||
import io.airbyte.commons.util.AutoCloseableIterators;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
|
||||
import io.airbyte.protocol.models.Field;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import io.airbyte.protocol.models.v0.*;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
|
||||
import io.debezium.connector.sqlserver.Lsn;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
import javax.sql.DataSource;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.junit.jupiter.api.TestInstance.Lifecycle;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@TestInstance(Lifecycle.PER_METHOD)
|
||||
@Execution(ExecutionMode.CONCURRENT)
|
||||
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH")
|
||||
public class CdcMssqlSourceTest extends CdcSourceTest<MssqlSource, MsSQLTestDatabase> {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(CdcSourceTest.class);
|
||||
|
||||
static private final String CDC_ROLE_NAME = "cdc_selector";
|
||||
|
||||
static private final String TEST_USER_NAME_PREFIX = "cdc_test_user";
|
||||
|
||||
private DataSource testDataSource;
|
||||
|
||||
protected final String testUserName() {
|
||||
return testdb.withNamespace(TEST_USER_NAME_PREFIX);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AirbyteCatalog expectedCatalogForDiscover() {
|
||||
final String COL_ID = "id";
|
||||
final String COL_MAKE_ID = "make_id";
|
||||
final String COL_MODEL = "model";
|
||||
final String MODELS_STREAM_NAME_2 = "models_stream_2";
|
||||
final String MODELS_STREAM_NAME = "models";
|
||||
|
||||
AirbyteCatalog expectedCatalog = new AirbyteCatalog()
|
||||
.withStreams(
|
||||
java.util.List.of(
|
||||
CatalogHelpers.createAirbyteStream(
|
||||
MODELS_STREAM_NAME,
|
||||
modelsSchema(),
|
||||
Field.of(COL_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MODEL, JsonSchemaType.STRING))
|
||||
.withSupportedSyncModes(
|
||||
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(
|
||||
java.util.List.of(java.util.List.of(COL_ID)))
|
||||
.withIsResumable(true)));
|
||||
|
||||
Map<String, String> columns = ImmutableMap.of(
|
||||
COL_ID, "INTEGER",
|
||||
COL_MAKE_ID, "INTEGER",
|
||||
COL_MODEL, "VARCHAR(200)");
|
||||
testdb.with(
|
||||
createTableSqlFmt(),
|
||||
modelsSchema(),
|
||||
MODELS_STREAM_NAME_2,
|
||||
columnClause(columns, Optional.empty()));
|
||||
|
||||
List<AirbyteStream> streams = new ArrayList<>(expectedCatalog.getStreams());
|
||||
// stream with PK
|
||||
streams.get(0).setSourceDefinedCursor(true);
|
||||
streams.get(0).setIsResumable(true);
|
||||
addCdcMetadataColumns(streams.get(0));
|
||||
addCdcDefaultCursorField(streams.get(0));
|
||||
|
||||
AirbyteStream streamWithoutPK = CatalogHelpers.createAirbyteStream(
|
||||
MODELS_STREAM_NAME_2,
|
||||
modelsSchema(),
|
||||
Field.of(COL_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MODEL, JsonSchemaType.STRING));
|
||||
streamWithoutPK.setSourceDefinedPrimaryKey(Collections.emptyList());
|
||||
streamWithoutPK.setSupportedSyncModes(java.util.List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL));
|
||||
streamWithoutPK.setSourceDefinedCursor(true);
|
||||
addCdcDefaultCursorField(streamWithoutPK);
|
||||
addCdcMetadataColumns(streamWithoutPK);
|
||||
addIsResumableFlagForNonPkTable(streamWithoutPK);
|
||||
|
||||
AirbyteStream randomStream = CatalogHelpers.createAirbyteStream(
|
||||
RANDOM_TABLE_NAME,
|
||||
randomSchema(),
|
||||
Field.of(COL_ID + "_random", JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MAKE_ID + "_random", JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MODEL + "_random", JsonSchemaType.STRING))
|
||||
.withSourceDefinedCursor(true)
|
||||
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(
|
||||
java.util.List.of(java.util.List.of(COL_ID + "_random")))
|
||||
.withIsResumable(true);
|
||||
|
||||
addCdcDefaultCursorField(randomStream);
|
||||
addCdcMetadataColumns(randomStream);
|
||||
|
||||
streams.add(streamWithoutPK);
|
||||
streams.add(randomStream);
|
||||
expectedCatalog.withStreams(streams);
|
||||
return expectedCatalog;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MsSQLTestDatabase createTestDatabase() {
|
||||
return MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT)
|
||||
.withWaitUntilAgentRunning()
|
||||
.withCdc();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MssqlSource source() {
|
||||
return new MssqlSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode config() {
|
||||
return testdb.configBuilder()
|
||||
.withHostAndPort()
|
||||
.withDatabase()
|
||||
.with(JdbcUtils.USERNAME_KEY, testUserName())
|
||||
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
|
||||
.withSchemas(modelsSchema(), randomSchema())
|
||||
.withCdcReplication()
|
||||
.withoutSsl()
|
||||
.with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertExpectedStateMessageCountMatches(final List<? extends AirbyteStateMessage> stateMessages, long totalCount) {
|
||||
AtomicLong count = new AtomicLong(0L);
|
||||
stateMessages.stream().forEach(
|
||||
stateMessage -> count.addAndGet(stateMessage.getSourceStats() != null ? stateMessage.getSourceStats().getRecordCount().longValue() : 0L));
|
||||
assertEquals(totalCount, count.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
@BeforeEach
|
||||
protected void setup() {
|
||||
testdb = createTestDatabase();
|
||||
createTables();
|
||||
// Enables cdc on MODELS_SCHEMA.MODELS_STREAM_NAME, giving CDC_ROLE_NAME select access.
|
||||
testdb
|
||||
.withCdcForTable(modelsSchema(), MODELS_STREAM_NAME, CDC_ROLE_NAME)
|
||||
.withCdcForTable(randomSchema(), RANDOM_TABLE_NAME, CDC_ROLE_NAME);
|
||||
|
||||
// Create a test user to be used by the source, with proper permissions.
|
||||
testdb
|
||||
.with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName())
|
||||
.with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName())
|
||||
.with("REVOKE ALL FROM %s CASCADE;", testUserName())
|
||||
.with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName())
|
||||
.with("GRANT SELECT ON SCHEMA :: [%s] TO %s", modelsSchema(), testUserName())
|
||||
.with("GRANT SELECT ON SCHEMA :: [%s] TO %s", randomSchema(), testUserName())
|
||||
.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName())
|
||||
.with("USE [master]")
|
||||
.with("GRANT VIEW SERVER STATE TO %s", testUserName())
|
||||
.with("USE [%s]", testdb.getDatabaseName())
|
||||
.with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName());
|
||||
|
||||
populateTables();
|
||||
waitForCdcRecords();
|
||||
testDataSource = createTestDataSource();
|
||||
}
|
||||
|
||||
public void waitForCdcRecords() {
|
||||
testdb.waitForCdcRecords(modelsSchema(), MODELS_STREAM_NAME, MODEL_RECORDS.size());
|
||||
testdb.waitForCdcRecords(randomSchema(), RANDOM_TABLE_NAME, MODEL_RECORDS_RANDOM.size());
|
||||
|
||||
}
|
||||
|
||||
protected DataSource createTestDataSource() {
|
||||
return DataSourceFactory.create(
|
||||
testUserName(),
|
||||
testdb.getPassword(),
|
||||
testdb.getDatabaseDriver().getDriverClassName(),
|
||||
testdb.getJdbcUrl(),
|
||||
Map.of("encrypt", "false", "trustServerCertificate", "true"),
|
||||
JdbcConnector.CONNECT_TIMEOUT_DEFAULT);
|
||||
}
|
||||
|
||||
@Override
|
||||
@AfterEach
|
||||
protected void tearDown() {
|
||||
try {
|
||||
DataSourceFactory.close(testDataSource);
|
||||
} catch (final Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
private JdbcDatabase testDatabase() {
|
||||
return new DefaultJdbcDatabase(testDataSource);
|
||||
}
|
||||
|
||||
// TODO : Delete this Override when MSSQL supports individual table snapshot
|
||||
@Override
|
||||
public void newTableSnapshotTest() {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addIsResumableFlagForNonPkTable(final AirbyteStream stream) {
|
||||
stream.setIsResumable(false);
|
||||
}
|
||||
|
||||
// Utilize the setup to do test on MssqlDebeziumStateUtil.
|
||||
@Test
|
||||
public void testCdcSnapshot() {
|
||||
|
||||
JdbcDatabase testDatabase = testDatabase();
|
||||
testDatabase.setSourceConfig(config());
|
||||
testDatabase.setDatabaseConfig(source().toDatabaseConfig(config()));
|
||||
|
||||
JsonNode debeziumState =
|
||||
MssqlDebeziumStateUtil.constructInitialDebeziumState(MssqlCdcHelper.getDebeziumProperties(testDatabase, getConfiguredCatalog(), true),
|
||||
getConfiguredCatalog(), testDatabase);
|
||||
|
||||
Assertions.assertEquals(3, Jsons.object(debeziumState, Map.class).size());
|
||||
Assertions.assertTrue(debeziumState.has("is_compressed"));
|
||||
Assertions.assertFalse(debeziumState.get("is_compressed").asBoolean());
|
||||
Assertions.assertTrue(debeziumState.has("mssql_db_history"));
|
||||
Assertions.assertNotNull(debeziumState.get("mssql_db_history"));
|
||||
Assertions.assertTrue(debeziumState.has("mssql_cdc_offset"));
|
||||
}
|
||||
|
||||
// Tests even with consistent inserting operations, CDC snapshot and incremental load will not lose
|
||||
// data.
|
||||
@Test
|
||||
@Timeout(value = 5,
|
||||
unit = TimeUnit.MINUTES)
|
||||
public void testCdcNotLoseDataWithConsistentWriting() throws Exception {
|
||||
ExecutorService executor = Executors.newFixedThreadPool(10);
|
||||
|
||||
// Inserting 50 records in 10 seconds.
|
||||
// Intention is to insert records while we are running the first snapshot read. And we check with
|
||||
// the first snapshot read operations
|
||||
// and a following incremental read operation, we will be able to capture all data.
|
||||
int numberOfRecordsToInsert = 50;
|
||||
var insertingProcess = executor.submit(() -> {
|
||||
for (int i = 0; i < numberOfRecordsToInsert; i++) {
|
||||
testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');",
|
||||
modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019 + i, i, "car description");
|
||||
try {
|
||||
Thread.sleep(200);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
final AutoCloseableIterator<AirbyteMessage> read1 = source()
|
||||
.read(config(), getConfiguredCatalog(), null);
|
||||
final List<AirbyteMessage> actualRecords1 = AutoCloseableIterators.toListAndClose(read1);
|
||||
final Set<AirbyteRecordMessage> recordMessages = extractRecordMessages(actualRecords1);
|
||||
final List<AirbyteStateMessage> stateMessagesFromFirstSync = extractStateMessages(actualRecords1);
|
||||
final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessagesFromFirstSync.get(stateMessagesFromFirstSync.size() - 1)));
|
||||
// Make sure we have finished inserting process and read from previous state.
|
||||
insertingProcess.get();
|
||||
|
||||
final AutoCloseableIterator<AirbyteMessage> read2 = source()
|
||||
.read(config(), getConfiguredCatalog(), state);
|
||||
final List<AirbyteMessage> actualRecords2 = AutoCloseableIterators.toListAndClose(read2);
|
||||
|
||||
recordMessages.addAll(extractRecordMessages(actualRecords2));
|
||||
|
||||
final Set<Integer> ids = recordMessages.stream().map(message -> message.getData().get("id").intValue()).collect(Collectors.toSet());
|
||||
// Originally in setup we have inserted 6 records in the table.
|
||||
assertEquals(ids.size(), numberOfRecordsToInsert + 6);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String columnClause(final Map<String, String> columnsWithDataType, final Optional<String> primaryKey) {
|
||||
final StringBuilder columnClause = new StringBuilder();
|
||||
int i = 0;
|
||||
for (final Map.Entry<String, String> column : columnsWithDataType.entrySet()) {
|
||||
columnClause.append(column.getKey());
|
||||
columnClause.append(" ");
|
||||
columnClause.append(column.getValue());
|
||||
if (primaryKey.isPresent() && primaryKey.get().equals(column.getKey())) {
|
||||
columnClause.append(" PRIMARY KEY");
|
||||
}
|
||||
if (i < (columnsWithDataType.size() - 1)) {
|
||||
columnClause.append(",");
|
||||
columnClause.append(" ");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return columnClause.toString();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testAssertCdcEnabledInDb() {
|
||||
// since we enable cdc in setup, assert that we successfully pass this first
|
||||
assertDoesNotThrow(() -> source().assertCdcEnabledInDb(config(), testDatabase()));
|
||||
// then disable cdc and assert the check fails
|
||||
testdb.withoutCdc();
|
||||
assertThrows(RuntimeException.class, () -> source().assertCdcEnabledInDb(config(), testDatabase()));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testAssertCdcSchemaQueryable() {
|
||||
// correct access granted by setup so assert check passes
|
||||
assertDoesNotThrow(() -> source().assertCdcSchemaQueryable(config(), testDatabase()));
|
||||
// now revoke perms and assert that check fails
|
||||
testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testUserName());
|
||||
assertThrows(com.microsoft.sqlserver.jdbc.SQLServerException.class,
|
||||
() -> source().assertCdcSchemaQueryable(config(), testDatabase()));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCdcCheckOperationsWithDot() throws Exception {
|
||||
final String dbNameWithDot = testdb.getDatabaseName().replace("_", ".");
|
||||
testdb.with("CREATE DATABASE [%s];", dbNameWithDot)
|
||||
.with("USE [%s]", dbNameWithDot)
|
||||
.with("EXEC sys.sp_cdc_enable_db;");
|
||||
final AirbyteConnectionStatus status = source().check(config());
|
||||
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.SUCCEEDED);
|
||||
}
|
||||
|
||||
// todo: check LSN returned is actually the max LSN
|
||||
// todo: check we fail as expected under certain conditions
|
||||
@Test
|
||||
void testGetTargetPosition() throws Exception {
|
||||
// check that getTargetPosition returns higher Lsn after inserting new row
|
||||
testdb.withWaitUntilMaxLsnAvailable();
|
||||
final Lsn firstLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn;
|
||||
testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');",
|
||||
modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019, 1, "another car");
|
||||
// Wait for Agent capture job to log CDC change.
|
||||
await().atMost(Duration.ofSeconds(45)).until(() -> {
|
||||
final Lsn secondLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn;
|
||||
return secondLsn.compareTo(firstLsn) > 0;
|
||||
});
|
||||
}
|
||||
|
||||
// Remove all timestamp related fields in shared state. We want to make sure other information will
|
||||
// not change.
|
||||
private void pruneSharedStateTimestamp(final JsonNode rootNode) throws Exception {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
// Navigate to the specific node
|
||||
JsonNode historyNode = rootNode.path("state").path("mssql_db_history");
|
||||
if (historyNode.isMissingNode()) {
|
||||
return; // Node not found, nothing to do
|
||||
}
|
||||
String historyJson = historyNode.asText();
|
||||
JsonNode historyJsonNode = mapper.readTree(historyJson);
|
||||
|
||||
ObjectNode objectNode = (ObjectNode) historyJsonNode;
|
||||
objectNode.remove("ts_ms");
|
||||
|
||||
if (objectNode.has("position") && objectNode.get("position").has("ts_sec")) {
|
||||
((ObjectNode) objectNode.get("position")).remove("ts_sec");
|
||||
}
|
||||
|
||||
JsonNode offsetNode = rootNode.path("state").path("mssql_cdc_offset");
|
||||
JsonNode offsetJsonNode = mapper.readTree(offsetNode.asText());
|
||||
if (offsetJsonNode.has("ts_sec")) {
|
||||
((ObjectNode) offsetJsonNode).remove("ts_sec");
|
||||
}
|
||||
|
||||
// Replace the original string with the modified one
|
||||
((ObjectNode) rootNode.path("state")).put("mssql_db_history", mapper.writeValueAsString(historyJsonNode));
|
||||
((ObjectNode) rootNode.path("state")).put("mssql_cdc_offset", mapper.writeValueAsString(offsetJsonNode));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTwoStreamSync() throws Exception {
|
||||
// Add another stream models_2 and read that one as well.
|
||||
final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog());
|
||||
|
||||
final List<JsonNode> MODEL_RECORDS_2 = ImmutableList.of(
|
||||
Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")),
|
||||
Jsons.jsonNode(ImmutableMap.of(COL_ID, 120, COL_MAKE_ID, 1, COL_MODEL, "Focus-2")),
|
||||
Jsons.jsonNode(ImmutableMap.of(COL_ID, 130, COL_MAKE_ID, 1, COL_MODEL, "Ranger-2")),
|
||||
Jsons.jsonNode(ImmutableMap.of(COL_ID, 140, COL_MAKE_ID, 2, COL_MODEL, "GLA-2")),
|
||||
Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")),
|
||||
Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2")));
|
||||
|
||||
testdb.with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME + "_2",
|
||||
columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID)));
|
||||
|
||||
for (final JsonNode recordJson : MODEL_RECORDS_2) {
|
||||
writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME + "_2", COL_ID,
|
||||
COL_MAKE_ID, COL_MODEL);
|
||||
}
|
||||
|
||||
final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream()
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
MODELS_STREAM_NAME + "_2",
|
||||
modelsSchema(),
|
||||
Field.of(COL_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_MODEL, JsonSchemaType.STRING))
|
||||
.withSupportedSyncModes(
|
||||
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))));
|
||||
airbyteStream.setSyncMode(SyncMode.INCREMENTAL);
|
||||
|
||||
final List<ConfiguredAirbyteStream> streams = configuredCatalog.getStreams();
|
||||
streams.add(airbyteStream);
|
||||
configuredCatalog.withStreams(streams);
|
||||
|
||||
final AutoCloseableIterator<AirbyteMessage> read1 = source()
|
||||
.read(config(), configuredCatalog, null);
|
||||
final List<AirbyteMessage> actualRecords1 = AutoCloseableIterators.toListAndClose(read1);
|
||||
|
||||
final Set<AirbyteRecordMessage> recordMessages1 = extractRecordMessages(actualRecords1);
|
||||
final List<AirbyteStateMessage> stateMessages1 = extractStateMessages(actualRecords1);
|
||||
assertEquals(13, stateMessages1.size());
|
||||
assertExpectedStateMessagesWithTotalCount(stateMessages1, 12);
|
||||
|
||||
JsonNode sharedState = null;
|
||||
StreamDescriptor firstStreamInState = null;
|
||||
for (int i = 0; i < stateMessages1.size(); i++) {
|
||||
final AirbyteStateMessage stateMessage = stateMessages1.get(i);
|
||||
assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType());
|
||||
final AirbyteGlobalState global = stateMessage.getGlobal();
|
||||
assertNotNull(global.getSharedState());
|
||||
if (Objects.isNull(sharedState)) {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
sharedState = mapper.valueToTree(global.getSharedState());
|
||||
pruneSharedStateTimestamp(sharedState);
|
||||
} else {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
var newSharedState = mapper.valueToTree(global.getSharedState());
|
||||
pruneSharedStateTimestamp(newSharedState);
|
||||
assertEquals(sharedState, newSharedState);
|
||||
}
|
||||
|
||||
if (Objects.isNull(firstStreamInState)) {
|
||||
assertEquals(1, global.getStreamStates().size());
|
||||
firstStreamInState = global.getStreamStates().get(0).getStreamDescriptor();
|
||||
}
|
||||
|
||||
if (i <= 4) {
|
||||
// First 4 state messages are pk state
|
||||
assertEquals(1, global.getStreamStates().size());
|
||||
final AirbyteStreamState streamState = global.getStreamStates().get(0);
|
||||
assertTrue(streamState.getStreamState().has(STATE_TYPE_KEY));
|
||||
assertEquals(ORDERED_COL_STATE_TYPE, streamState.getStreamState().get(STATE_TYPE_KEY).asText());
|
||||
} else if (i == 5) {
|
||||
// 5th state message is the final state message emitted for the stream
|
||||
assertEquals(1, global.getStreamStates().size());
|
||||
final AirbyteStreamState streamState = global.getStreamStates().get(0);
|
||||
assertFalse(streamState.getStreamState().has(STATE_TYPE_KEY));
|
||||
} else if (i <= 10) {
|
||||
// 6th to 10th is the primary_key state message for the 2nd stream but final state message for 1st
|
||||
// stream
|
||||
assertEquals(2, global.getStreamStates().size());
|
||||
final StreamDescriptor finalFirstStreamInState = firstStreamInState;
|
||||
global.getStreamStates().forEach(c -> {
|
||||
if (c.getStreamDescriptor().equals(finalFirstStreamInState)) {
|
||||
assertFalse(c.getStreamState().has(STATE_TYPE_KEY));
|
||||
} else {
|
||||
assertTrue(c.getStreamState().has(STATE_TYPE_KEY));
|
||||
assertEquals(ORDERED_COL_STATE_TYPE, c.getStreamState().get(STATE_TYPE_KEY).asText());
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// last 2 state messages don't contain primary_key info cause primary_key sync should be complete
|
||||
assertEquals(2, global.getStreamStates().size());
|
||||
global.getStreamStates().forEach(c -> assertFalse(c.getStreamState().has(STATE_TYPE_KEY)));
|
||||
}
|
||||
}
|
||||
|
||||
final Set<String> names = new HashSet<>(STREAM_NAMES);
|
||||
names.add(MODELS_STREAM_NAME + "_2");
|
||||
assertExpectedRecords(Streams.concat(MODEL_RECORDS_2.stream(), MODEL_RECORDS.stream())
|
||||
.collect(Collectors.toSet()),
|
||||
recordMessages1,
|
||||
names,
|
||||
names,
|
||||
modelsSchema());
|
||||
|
||||
assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()), firstStreamInState);
|
||||
|
||||
// Triggering a sync with a primary_key state for 1 stream and complete state for other stream
|
||||
final AutoCloseableIterator<AirbyteMessage> read2 = source()
|
||||
.read(config(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6))));
|
||||
final List<AirbyteMessage> actualRecords2 = AutoCloseableIterators.toListAndClose(read2);
|
||||
|
||||
final List<AirbyteStateMessage> stateMessages2 = extractStateMessages(actualRecords2);
|
||||
|
||||
assertEquals(6, stateMessages2.size());
|
||||
// State was reset to the 7th; thus 5 remaining records were expected to be reloaded.
|
||||
assertExpectedStateMessagesWithTotalCount(stateMessages2, 5);
|
||||
for (int i = 0; i < stateMessages2.size(); i++) {
|
||||
final AirbyteStateMessage stateMessage = stateMessages2.get(i);
|
||||
assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType());
|
||||
final AirbyteGlobalState global = stateMessage.getGlobal();
|
||||
assertNotNull(global.getSharedState());
|
||||
assertEquals(2, global.getStreamStates().size());
|
||||
|
||||
if (i <= 4) {
|
||||
final StreamDescriptor finalFirstStreamInState = firstStreamInState;
|
||||
global.getStreamStates().forEach(c -> {
|
||||
// First 5 state messages are primary_key state for the stream that didn't complete primary_key sync
|
||||
// the first time
|
||||
if (c.getStreamDescriptor().equals(finalFirstStreamInState)) {
|
||||
assertFalse(c.getStreamState().has(STATE_TYPE_KEY));
|
||||
} else {
|
||||
assertTrue(c.getStreamState().has(STATE_TYPE_KEY));
|
||||
assertEquals(ORDERED_COL_STATE_TYPE, c.getStreamState().get(STATE_TYPE_KEY).asText());
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// last state messages doesn't contain primary_key info cause primary_key sync should be complete
|
||||
global.getStreamStates().forEach(c -> assertFalse(c.getStreamState().has(STATE_TYPE_KEY)));
|
||||
}
|
||||
}
|
||||
|
||||
final Set<AirbyteRecordMessage> recordMessages2 = extractRecordMessages(actualRecords2);
|
||||
assertEquals(5, recordMessages2.size());
|
||||
assertExpectedRecords(new HashSet<>(MODEL_RECORDS_2.subList(1, MODEL_RECORDS_2.size())),
|
||||
recordMessages2,
|
||||
names,
|
||||
names,
|
||||
modelsSchema());
|
||||
}
|
||||
|
||||
protected void assertExpectedStateMessagesWithTotalCount(final List<AirbyteStateMessage> stateMessages, final long totalRecordCount) {
|
||||
long actualRecordCount = 0L;
|
||||
for (final AirbyteStateMessage message : stateMessages) {
|
||||
actualRecordCount += message.getSourceStats().getRecordCount();
|
||||
}
|
||||
assertEquals(actualRecordCount, totalRecordCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void removeCDCColumns(final ObjectNode data) {
|
||||
data.remove(CDC_LSN);
|
||||
data.remove(CDC_UPDATED_AT);
|
||||
data.remove(CDC_DELETED_AT);
|
||||
data.remove(CDC_EVENT_SERIAL_NO);
|
||||
data.remove(CDC_DEFAULT_CURSOR);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MssqlCdcTargetPosition cdcLatestTargetPosition() {
|
||||
testdb.withWaitUntilMaxLsnAvailable();
|
||||
final JdbcDatabase jdbcDatabase = new StreamingJdbcDatabase(
|
||||
testDataSource,
|
||||
new MssqlSourceOperations(),
|
||||
AdaptiveStreamingQueryConfig::new);
|
||||
return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, testdb.getDatabaseName());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MssqlCdcTargetPosition extractPosition(final JsonNode record) {
|
||||
return new MssqlCdcTargetPosition(Lsn.valueOf(record.get(CDC_LSN).asText()));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertNullCdcMetaData(final JsonNode data) {
|
||||
assertNull(data.get(CDC_LSN));
|
||||
assertNull(data.get(CDC_UPDATED_AT));
|
||||
assertNull(data.get(CDC_DELETED_AT));
|
||||
assertNull(data.get(CDC_EVENT_SERIAL_NO));
|
||||
assertNull(data.get(CDC_DEFAULT_CURSOR));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertCdcMetaData(final JsonNode data, final boolean deletedAtNull) {
|
||||
assertNotNull(data.get(CDC_LSN));
|
||||
assertNotNull(data.get(CDC_EVENT_SERIAL_NO));
|
||||
assertNotNull(data.get(CDC_UPDATED_AT));
|
||||
assertNotNull(data.get(CDC_DEFAULT_CURSOR));
|
||||
if (deletedAtNull) {
|
||||
assertTrue(data.get(CDC_DELETED_AT).isNull());
|
||||
} else {
|
||||
assertFalse(data.get(CDC_DELETED_AT).isNull());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addCdcMetadataColumns(final AirbyteStream stream) {
|
||||
final ObjectNode jsonSchema = (ObjectNode) stream.getJsonSchema();
|
||||
final ObjectNode properties = (ObjectNode) jsonSchema.get("properties");
|
||||
|
||||
final JsonNode airbyteIntegerType = Jsons.jsonNode(ImmutableMap.of("type", "number", "airbyte_type", "integer"));
|
||||
final JsonNode stringType = Jsons.jsonNode(ImmutableMap.of("type", "string"));
|
||||
properties.set(CDC_LSN, stringType);
|
||||
properties.set(CDC_UPDATED_AT, stringType);
|
||||
properties.set(CDC_DELETED_AT, stringType);
|
||||
properties.set(CDC_EVENT_SERIAL_NO, stringType);
|
||||
properties.set(CDC_DEFAULT_CURSOR, airbyteIntegerType);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addCdcDefaultCursorField(final AirbyteStream stream) {
|
||||
if (stream.getSupportedSyncModes().contains(SyncMode.INCREMENTAL)) {
|
||||
stream.setDefaultCursorField(ImmutableList.of(CDC_DEFAULT_CURSOR));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertExpectedStateMessages(final List<? extends AirbyteStateMessage> stateMessages) {
|
||||
assertEquals(7, stateMessages.size());
|
||||
assertStateTypes(stateMessages, 4);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertExpectedStateMessagesFromIncrementalSync(final List<? extends AirbyteStateMessage> stateMessages) {
|
||||
assertEquals(1, stateMessages.size());
|
||||
assertNotNull(stateMessages.get(0).getData());
|
||||
for (final AirbyteStateMessage stateMessage : stateMessages) {
|
||||
assertNotNull(stateMessage.getData().get("cdc_state").get("state").get(MSSQL_CDC_OFFSET));
|
||||
assertNotNull(stateMessage.getData().get("cdc_state").get("state").get(MSSQL_DB_HISTORY));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertExpectedStateMessagesForNoData(final List<? extends AirbyteStateMessage> stateMessages) {
|
||||
assertEquals(2, stateMessages.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertExpectedStateMessagesForRecordsProducedDuringAndAfterSync(final List<? extends AirbyteStateMessage> stateAfterFirstBatch) {
|
||||
assertEquals(27, stateAfterFirstBatch.size());
|
||||
assertStateTypes(stateAfterFirstBatch, 24);
|
||||
}
|
||||
|
||||
private void assertStateTypes(final List<? extends AirbyteStateMessage> stateMessages, final int indexTillWhichExpectOcState) {
|
||||
JsonNode sharedState = null;
|
||||
LOGGER.info("*** states to assert: {}", Arrays.deepToString(stateMessages.toArray()));
|
||||
for (int i = 0; i < stateMessages.size(); i++) {
|
||||
final AirbyteStateMessage stateMessage = stateMessages.get(i);
|
||||
assertEquals(AirbyteStateType.GLOBAL, stateMessage.getType());
|
||||
final AirbyteGlobalState global = stateMessage.getGlobal();
|
||||
assertNotNull(global.getSharedState());
|
||||
if (Objects.isNull(sharedState)) {
|
||||
sharedState = global.getSharedState();
|
||||
} else {
|
||||
assertEquals(sharedState, global.getSharedState(), "states were " + Arrays.deepToString(stateMessages.toArray()));
|
||||
// assertEquals(sharedState.toString().replaceAll("ts_ms\\\\\":\\d+", ""),
|
||||
// global.getSharedState().toString().replaceAll("ts_ms\\\\\":\\d+", ""));
|
||||
}
|
||||
assertEquals(1, global.getStreamStates().size());
|
||||
final AirbyteStreamState streamState = global.getStreamStates().get(0);
|
||||
if (i <= indexTillWhichExpectOcState) {
|
||||
assertTrue(streamState.getStreamState().has(STATE_TYPE_KEY));
|
||||
assertEquals(ORDERED_COL_STATE_TYPE, streamState.getStreamState().get(STATE_TYPE_KEY).asText());
|
||||
} else {
|
||||
assertFalse(streamState.getStreamState().has(STATE_TYPE_KEY));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void compareTargetPositionFromTheRecordsWithTargetPostionGeneratedBeforeSync(final CdcTargetPosition targetPosition,
|
||||
final AirbyteRecordMessage record) {
|
||||
// The LSN from records should be either equal or grater than the position value before the sync
|
||||
// started.
|
||||
// Since we're using shared containers, the current LSN can move forward without any data
|
||||
// modifications
|
||||
// (INSERT, UPDATE, DELETE) in the current DB
|
||||
assert targetPosition instanceof MssqlCdcTargetPosition;
|
||||
assertTrue(extractPosition(record.getData()).targetLsn.compareTo(((MssqlCdcTargetPosition) targetPosition).targetLsn) >= 0);
|
||||
}
|
||||
|
||||
protected void waitForCdcRecords(String schemaName, String tableName, int recordCount)
|
||||
throws Exception {
|
||||
testdb.waitForCdcRecords(schemaName, tableName, recordCount);
|
||||
}
|
||||
|
||||
protected void deleteCommand(final String streamName) {
|
||||
String selectCountSql = "SELECT COUNT(*) FROM %s.%s".formatted(modelsSchema(), streamName);
|
||||
try {
|
||||
int rowCount = testdb.query(ctx -> ctx.fetch(selectCountSql)).get(0).get(0, Integer.class);
|
||||
LOGGER.info("deleting all {} rows from table {}.{}", rowCount, modelsSchema(), streamName);
|
||||
super.deleteCommand(streamName);
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportResumableFullRefresh() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void assertExpectedStateMessagesForFullRefresh(final List<? extends AirbyteStateMessage> stateMessages) {
|
||||
// Full refresh will only send 6 state messages - one for each record (including the final one).
|
||||
assertEquals(6, stateMessages.size());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.db.factory.DataSourceFactory;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.JdbcConnector;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.CertificateKey;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.util.Map;
|
||||
import javax.sql.DataSource;
|
||||
import org.junit.jupiter.api.TestInstance;
|
||||
import org.junit.jupiter.api.TestInstance.Lifecycle;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
|
||||
@TestInstance(Lifecycle.PER_METHOD)
|
||||
@Execution(ExecutionMode.CONCURRENT)
|
||||
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH")
|
||||
public class CdcMssqlSslSourceTest extends CdcMssqlSourceTest {
|
||||
|
||||
@Override
|
||||
final protected MsSQLTestDatabase createTestDatabase() {
|
||||
final var testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT, ContainerModifier.WITH_SSL_CERTIFICATES);
|
||||
return testdb.withWaitUntilAgentRunning()
|
||||
.withCdc();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DataSource createTestDataSource() {
|
||||
return DataSourceFactory.create(
|
||||
testUserName(),
|
||||
testdb.getPassword(),
|
||||
testdb.getDatabaseDriver().getDriverClassName(),
|
||||
testdb.getJdbcUrl(),
|
||||
Map.of("encrypt", "true", "databaseName", testdb.getDatabaseName(), "trustServerCertificate", "true"),
|
||||
JdbcConnector.CONNECT_TIMEOUT_DEFAULT);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode config() {
|
||||
final String containerIp;
|
||||
try {
|
||||
containerIp = InetAddress.getByName(testdb.getContainer().getHost())
|
||||
.getHostAddress();
|
||||
} catch (final UnknownHostException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
final String certificate = testdb.getCertificate(CertificateKey.SERVER);
|
||||
return testdb.configBuilder()
|
||||
.withEncrytedVerifyServerCertificate(certificate, testdb.getContainer().getHost())
|
||||
.with(JdbcUtils.HOST_KEY, containerIp)
|
||||
.with(JdbcUtils.PORT_KEY, testdb.getContainer().getFirstMappedPort())
|
||||
.withDatabase()
|
||||
.with(JdbcUtils.USERNAME_KEY, testUserName())
|
||||
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
|
||||
.withSchemas(modelsSchema(), randomSchema())
|
||||
.withCdcReplication()
|
||||
.with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1)
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,266 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.IS_COMPRESSED;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET;
|
||||
import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.state.StateGeneratorUtils;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.commons.util.AutoCloseableIterators;
|
||||
import io.airbyte.protocol.models.Field;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import io.airbyte.protocol.models.v0.AirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
|
||||
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.CatalogHelpers;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.SyncMode;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.stream.Collectors;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class CdcStateCompressionTest {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(CdcStateCompressionTest.class);
|
||||
|
||||
static private final String CDC_ROLE_NAME = "cdc_selector";
|
||||
|
||||
static private final String TEST_USER_NAME_PREFIX = "cdc_test_user";
|
||||
|
||||
static private final String TEST_SCHEMA = "test_schema";
|
||||
|
||||
static private final int TEST_TABLES = 4;
|
||||
|
||||
// SQLServer tables can't have more than 1024 columns.
|
||||
static private final int ADDED_COLUMNS = 1000;
|
||||
|
||||
private MsSQLTestDatabase testdb;
|
||||
private final ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
||||
private static final String ALTER_TABLE_ADD_COLUMN_SQL;
|
||||
static {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("ALTER TABLE ").append(TEST_SCHEMA).append(".%s ADD");
|
||||
for (int j = 0; j < ADDED_COLUMNS; j++) {
|
||||
sb.append((j > 0) ? ", " : " ")
|
||||
// Sqlserver column names can't be longer than 128 characters
|
||||
.append("rather_long_column_name_________________________________________________________________________________________").append(j)
|
||||
.append(" INT NULL");
|
||||
}
|
||||
ALTER_TABLE_ADD_COLUMN_SQL = sb.toString();
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws Exception {
|
||||
testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022, MsSQLTestDatabase.ContainerModifier.AGENT)
|
||||
.withWaitUntilAgentRunning()
|
||||
.withCdc();
|
||||
|
||||
// Create a test schema and a bunch of test tables with CDC enabled.
|
||||
// Insert one row in each table so that they're not empty.
|
||||
testdb.with("CREATE SCHEMA %s;", TEST_SCHEMA);
|
||||
List<Callable<MsSQLTestDatabase>> createAndPopulateTableTasks = new ArrayList<>();
|
||||
List<Callable<MsSQLTestDatabase>> waitForCdcRecordTasks = new ArrayList<>();
|
||||
List<Callable<MsSQLTestDatabase>> alterTabletasks = new ArrayList<>();
|
||||
List<Callable<MsSQLTestDatabase>> enableTableCdctasks = new ArrayList<>();
|
||||
List<Callable<MsSQLTestDatabase>> disableTableCdctasks = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < TEST_TABLES; i++) {
|
||||
String tableName = "test_table_%d".formatted(i);
|
||||
String initialCdcInstanceName = "capture_instance_%d_%d".formatted(i, 1);
|
||||
String finalCdcInstanceName = "capture_instance_%d_%d".formatted(i, 2);
|
||||
createAndPopulateTableTasks.add(() -> testdb
|
||||
.with("CREATE TABLE %s.%s (id INT IDENTITY(1,1) PRIMARY KEY);", TEST_SCHEMA, tableName)
|
||||
.withCdcForTable(TEST_SCHEMA, tableName, CDC_ROLE_NAME, initialCdcInstanceName)
|
||||
.with("INSERT INTO %s.%s DEFAULT VALUES", TEST_SCHEMA, tableName));
|
||||
waitForCdcRecordTasks.add(() -> testdb.waitForCdcRecords(TEST_SCHEMA, tableName, initialCdcInstanceName, 1));
|
||||
|
||||
// Increase schema history size to trigger state compression.
|
||||
// We do this by adding lots of columns with long names,
|
||||
// then migrating to a new CDC capture instance for each table.
|
||||
// This is admittedly somewhat awkward and perhaps could be improved.
|
||||
alterTabletasks.add(() -> testdb.with(ALTER_TABLE_ADD_COLUMN_SQL.formatted(tableName)));
|
||||
enableTableCdctasks.add(() -> testdb.withCdcForTable(TEST_SCHEMA, tableName, CDC_ROLE_NAME, finalCdcInstanceName));
|
||||
disableTableCdctasks.add(() -> testdb.withCdcDisabledForTable(TEST_SCHEMA, tableName, initialCdcInstanceName));
|
||||
}
|
||||
executor.invokeAll(createAndPopulateTableTasks);
|
||||
executor.invokeAll(waitForCdcRecordTasks);
|
||||
|
||||
// Create a test user to be used by the source, with proper permissions.
|
||||
testdb
|
||||
.with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName())
|
||||
.with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName())
|
||||
.with("REVOKE ALL FROM %s CASCADE;", testUserName())
|
||||
.with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName())
|
||||
.with("GRANT SELECT ON SCHEMA :: [%s] TO %s", TEST_SCHEMA, testUserName())
|
||||
.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName())
|
||||
.with("USE [master]")
|
||||
.with("GRANT VIEW SERVER STATE TO %s", testUserName())
|
||||
.with("USE [%s]", testdb.getDatabaseName())
|
||||
.with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName());
|
||||
executor.invokeAll(alterTabletasks);
|
||||
executor.invokeAll(enableTableCdctasks);
|
||||
executor.invokeAll(disableTableCdctasks);
|
||||
}
|
||||
|
||||
private AirbyteCatalog getCatalog() {
|
||||
final var streams = new ArrayList<AirbyteStream>();
|
||||
for (int i = 0; i < TEST_TABLES; i++) {
|
||||
streams.add(CatalogHelpers.createAirbyteStream(
|
||||
"test_table_%d".formatted(i),
|
||||
TEST_SCHEMA,
|
||||
Field.of("id", JsonSchemaType.INTEGER))
|
||||
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of("id"))));
|
||||
}
|
||||
return new AirbyteCatalog().withStreams(streams);
|
||||
}
|
||||
|
||||
private ConfiguredAirbyteCatalog getConfiguredCatalog() {
|
||||
final var configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(getCatalog());
|
||||
configuredCatalog.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL));
|
||||
return configuredCatalog;
|
||||
}
|
||||
|
||||
private MssqlSource source() {
|
||||
return new MssqlSource();
|
||||
}
|
||||
|
||||
private JsonNode config() {
|
||||
return testdb.configBuilder()
|
||||
.withHostAndPort()
|
||||
.withDatabase()
|
||||
.with(JdbcUtils.USERNAME_KEY, testUserName())
|
||||
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
|
||||
.withSchemas(TEST_SCHEMA)
|
||||
.withoutSsl()
|
||||
// Configure for CDC replication but with a higher timeout than usual.
|
||||
// This is because Debezium requires more time than usual to build the initial snapshot.
|
||||
.with("is_test", true)
|
||||
.with("replication_method", Map.of(
|
||||
"method", "CDC",
|
||||
"initial_waiting_seconds", 20))
|
||||
.build();
|
||||
}
|
||||
|
||||
private String testUserName() {
|
||||
return testdb.withNamespace(TEST_USER_NAME_PREFIX);
|
||||
}
|
||||
|
||||
/**
|
||||
* This test is similar in principle to CdcMysqlSourceTest.testCompressedSchemaHistory.
|
||||
*/
|
||||
@Test
|
||||
public void testCompressedSchemaHistory() throws Exception {
|
||||
// First sync.
|
||||
final var firstBatchIterator = source().read(config(), getConfiguredCatalog(), null);
|
||||
final var dataFromFirstBatch = AutoCloseableIterators.toListAndClose(firstBatchIterator);
|
||||
final AirbyteStateMessage lastStateMessageFromFirstBatch =
|
||||
StateGeneratorUtils.convertLegacyStateToGlobalState(Iterables.getLast(extractStateMessages(dataFromFirstBatch)));
|
||||
assertNotNull(lastStateMessageFromFirstBatch.getGlobal().getSharedState());
|
||||
final var lastSharedStateFromFirstBatch = lastStateMessageFromFirstBatch.getGlobal().getSharedState().get("state");
|
||||
assertNotNull(lastSharedStateFromFirstBatch);
|
||||
assertNotNull(lastSharedStateFromFirstBatch.get(MSSQL_DB_HISTORY));
|
||||
assertNotNull(lastSharedStateFromFirstBatch.get(MSSQL_CDC_OFFSET));
|
||||
assertNotNull(lastSharedStateFromFirstBatch.get(IS_COMPRESSED));
|
||||
assertTrue(lastSharedStateFromFirstBatch.get(IS_COMPRESSED).asBoolean());
|
||||
final var recordsFromFirstBatch = extractRecordMessages(dataFromFirstBatch);
|
||||
assertEquals(TEST_TABLES, recordsFromFirstBatch.size());
|
||||
for (final var record : recordsFromFirstBatch) {
|
||||
assertEquals("1", record.getData().get("id").toString());
|
||||
}
|
||||
|
||||
LOGGER.info("inserting new data into test tables");
|
||||
List<Callable<MsSQLTestDatabase>> waitForCdcTasks = new ArrayList<>();
|
||||
// Insert a bunch of records (1 per table, again).
|
||||
for (int i = 0; i < TEST_TABLES; i++) {
|
||||
String tableName = "test_table_%d".formatted(i);
|
||||
String cdcInstanceName = "capture_instance_%d_%d".formatted(i, 2);
|
||||
testdb.with("INSERT %s.%s DEFAULT VALUES;", TEST_SCHEMA, tableName);
|
||||
waitForCdcTasks.add(() -> testdb.waitForCdcRecords(TEST_SCHEMA, tableName, cdcInstanceName, 1));
|
||||
}
|
||||
LOGGER.info("waiting for CDC records");
|
||||
executor.invokeAll(waitForCdcTasks);
|
||||
|
||||
LOGGER.info("starting second sync");
|
||||
// Second sync.
|
||||
final var secondBatchStateForRead = Jsons.jsonNode(Collections.singletonList(Iterables.getLast(extractStateMessages(dataFromFirstBatch))));
|
||||
final var secondBatchIterator = source().read(config(), getConfiguredCatalog(), secondBatchStateForRead);
|
||||
final var dataFromSecondBatch = AutoCloseableIterators.toListAndClose(secondBatchIterator);
|
||||
final AirbyteStateMessage lastStateMessageFromSecondBatch =
|
||||
StateGeneratorUtils.convertLegacyStateToGlobalState(Iterables.getLast(extractStateMessages(dataFromSecondBatch)));
|
||||
assertNotNull(lastStateMessageFromSecondBatch.getGlobal().getSharedState());
|
||||
final var lastSharedStateFromSecondBatch = lastStateMessageFromSecondBatch.getGlobal().getSharedState().get("state");
|
||||
assertNotNull(lastSharedStateFromSecondBatch);
|
||||
assertNotNull(lastSharedStateFromSecondBatch.get(MSSQL_DB_HISTORY));
|
||||
assertEquals(lastSharedStateFromFirstBatch.get(MSSQL_DB_HISTORY), lastSharedStateFromSecondBatch.get(MSSQL_DB_HISTORY));
|
||||
assertNotNull(lastSharedStateFromSecondBatch.get(MSSQL_CDC_OFFSET));
|
||||
assertNotNull(lastSharedStateFromSecondBatch.get(IS_COMPRESSED));
|
||||
assertTrue(lastSharedStateFromSecondBatch.get(IS_COMPRESSED).asBoolean());
|
||||
final var recordsFromSecondBatch = extractRecordMessages(dataFromSecondBatch);
|
||||
assertEquals(TEST_TABLES, recordsFromSecondBatch.size());
|
||||
for (final var record : recordsFromSecondBatch) {
|
||||
assertEquals("2", record.getData().get("id").toString());
|
||||
}
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
testdb.close();
|
||||
}
|
||||
|
||||
private Set<AirbyteRecordMessage> extractRecordMessages(final List<AirbyteMessage> messages) {
|
||||
final var recordsPerStream = extractRecordMessagesStreamWise(messages);
|
||||
return recordsPerStream.values().stream().flatMap(Set::stream).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private Map<String, Set<AirbyteRecordMessage>> extractRecordMessagesStreamWise(final List<AirbyteMessage> messages) {
|
||||
final var recordsPerStream = messages.stream()
|
||||
.filter(m -> m.getType() == Type.RECORD)
|
||||
.map(AirbyteMessage::getRecord)
|
||||
.collect(Collectors.groupingBy(AirbyteRecordMessage::getStream));
|
||||
|
||||
final Map<String, Set<AirbyteRecordMessage>> recordsPerStreamWithNoDuplicates = new HashMap<>();
|
||||
for (final var entry : recordsPerStream.entrySet()) {
|
||||
final var set = new HashSet<>(entry.getValue());
|
||||
recordsPerStreamWithNoDuplicates.put(entry.getKey(), set);
|
||||
assertEquals(entry.getValue().size(), set.size(), "duplicate records in sync for " + entry.getKey());
|
||||
}
|
||||
|
||||
return recordsPerStreamWithNoDuplicates;
|
||||
}
|
||||
|
||||
private List<AirbyteStateMessage> extractStateMessages(final List<AirbyteMessage> messages) {
|
||||
return messages.stream()
|
||||
.filter(r -> r.getType() == Type.STATE)
|
||||
.map(AirbyteMessage::getState)
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,122 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.base.Source;
|
||||
import io.airbyte.cdk.integrations.base.adaptive.AdaptiveSourceRunner;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer;
|
||||
import io.airbyte.cdk.integrations.base.ssh.SshTunnel;
|
||||
import io.airbyte.commons.features.EnvVariableFeatureFlags;
|
||||
import io.airbyte.commons.features.FeatureFlagsWrapper;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
|
||||
@Execution(ExecutionMode.CONCURRENT)
|
||||
public class CloudDeploymentMssqlTest {
|
||||
|
||||
private MsSQLTestDatabase createTestDatabase(String... containerFactoryMethods) {
|
||||
final var container = new MsSQLContainerFactory().shared(
|
||||
BaseImage.MSSQL_2022.reference, containerFactoryMethods);
|
||||
final var testdb = new MsSQLTestDatabase(container);
|
||||
return testdb
|
||||
.withConnectionProperty("encrypt", "true")
|
||||
.withConnectionProperty("trustServerCertificate", "true")
|
||||
.withConnectionProperty("databaseName", testdb.getDatabaseName())
|
||||
.initialized();
|
||||
}
|
||||
|
||||
private Source source() {
|
||||
final var source = new MssqlSource(FeatureFlagsWrapper.overridingDeploymentMode(
|
||||
new EnvVariableFeatureFlags(), AdaptiveSourceRunner.CLOUD_MODE));
|
||||
return MssqlSource.sshWrappedSource(source);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testStrictSSLUnsecuredNoTunnel() throws Exception {
|
||||
try (final var testdb = createTestDatabase()) {
|
||||
final var config = testdb.configBuilder()
|
||||
.withHostAndPort()
|
||||
.withDatabase()
|
||||
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
|
||||
.with(JdbcUtils.PASSWORD_KEY, "fake")
|
||||
.withoutSsl()
|
||||
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build())
|
||||
.build();
|
||||
final AirbyteConnectionStatus actual = source().check(config);
|
||||
assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus());
|
||||
assertTrue(actual.getMessage().contains("Unsecured connection not allowed"), actual.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testStrictSSLSecuredNoTunnel() throws Exception {
|
||||
try (final var testdb = createTestDatabase()) {
|
||||
final var config = testdb.testConfigBuilder()
|
||||
.withEncrytedTrustServerCertificate()
|
||||
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build())
|
||||
.build();
|
||||
final AirbyteConnectionStatus actual = source().check(config);
|
||||
assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testStrictSSLSecuredWithTunnel() throws Exception {
|
||||
try (final var testdb = createTestDatabase()) {
|
||||
final var config = testdb.configBuilder()
|
||||
.withHostAndPort()
|
||||
.withDatabase()
|
||||
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
|
||||
.with(JdbcUtils.PASSWORD_KEY, "fake")
|
||||
.withEncrytedTrustServerCertificate()
|
||||
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build())
|
||||
.build();
|
||||
final AirbyteConnectionStatus actual = source().check(config);
|
||||
assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus());
|
||||
assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testStrictSSLUnsecuredWithTunnel() throws Exception {
|
||||
try (final var testdb = createTestDatabase()) {
|
||||
final var config = testdb.configBuilder()
|
||||
.withHostAndPort()
|
||||
.withDatabase()
|
||||
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
|
||||
.with(JdbcUtils.PASSWORD_KEY, "fake")
|
||||
.withEncrytedTrustServerCertificate()
|
||||
.with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build())
|
||||
.build();
|
||||
final AirbyteConnectionStatus actual = source().check(config);
|
||||
assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus());
|
||||
assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCheckWithSslModeDisabled() throws Exception {
|
||||
try (final var testdb = createTestDatabase("withNetwork")) {
|
||||
try (final SshBastionContainer bastion = new SshBastionContainer()) {
|
||||
bastion.initAndStartBastion(testdb.getContainer().getNetwork());
|
||||
final var config = testdb.integrationTestConfigBuilder()
|
||||
.with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, false))
|
||||
.withoutSsl()
|
||||
.build();
|
||||
final AirbyteConnectionStatus actual = source().check(config);
|
||||
assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.db.factory.DataSourceFactory;
|
||||
import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.JdbcConnector;
|
||||
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
|
||||
import java.util.Map;
|
||||
import javax.sql.DataSource;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.testcontainers.containers.MSSQLServerContainer;
|
||||
|
||||
public class MssqlAgentStateTest {
|
||||
|
||||
private static MsSQLTestDatabase testdb;
|
||||
private static DataSource testDataSource;
|
||||
private static MSSQLServerContainer privateContainer;
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
privateContainer = new MsSQLContainerFactory().exclusive(
|
||||
MsSQLTestDatabase.BaseImage.MSSQL_2022.reference,
|
||||
MsSQLTestDatabase.ContainerModifier.AGENT);
|
||||
testdb = new MsSQLTestDatabase(privateContainer);
|
||||
testdb
|
||||
.withConnectionProperty("encrypt", "false")
|
||||
.withConnectionProperty("trustServerCertificate", "true")
|
||||
.withConnectionProperty("databaseName", testdb.getDatabaseName())
|
||||
.initialized()
|
||||
.withWaitUntilAgentRunning()
|
||||
.withCdc();
|
||||
testDataSource = DataSourceFactory.create(
|
||||
testdb.getUserName(),
|
||||
testdb.getPassword(),
|
||||
testdb.getDatabaseDriver().getDriverClassName(),
|
||||
testdb.getJdbcUrl(),
|
||||
Map.of("encrypt", "false", "trustServerCertificate", "true"),
|
||||
JdbcConnector.CONNECT_TIMEOUT_DEFAULT);
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
static void tearDown() {
|
||||
try {
|
||||
DataSourceFactory.close(testDataSource);
|
||||
testdb.close();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
privateContainer.close();
|
||||
}
|
||||
|
||||
protected MssqlSource source() {
|
||||
return new MssqlSource();
|
||||
}
|
||||
|
||||
private JdbcDatabase testDatabase() {
|
||||
return new DefaultJdbcDatabase(testDataSource);
|
||||
}
|
||||
|
||||
protected JsonNode config() {
|
||||
return testdb.configBuilder()
|
||||
.withHostAndPort()
|
||||
.withDatabase()
|
||||
.with(JdbcUtils.USERNAME_KEY, testdb.getUserName())
|
||||
.with(JdbcUtils.PASSWORD_KEY, testdb.getPassword())
|
||||
.withCdcReplication()
|
||||
.withoutSsl()
|
||||
.with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testAssertSqlServerAgentRunning() throws Exception {
|
||||
testdb.withAgentStopped().withWaitUntilAgentStopped();
|
||||
// assert expected failure if sql server agent stopped
|
||||
assertThrows(RuntimeException.class,
|
||||
() -> source().assertSqlServerAgentRunning(testDatabase()));
|
||||
// assert success if sql server agent running
|
||||
testdb.withAgentStarted().withWaitUntilAgentRunning();
|
||||
assertDoesNotThrow(() -> source().assertSqlServerAgentRunning(testDatabase()));
|
||||
}
|
||||
|
||||
// Ensure the CDC check operations are included when CDC is enabled
|
||||
// todo: make this better by checking the returned checkOperations from source.getCheckOperations
|
||||
@Test
|
||||
void testCdcCheckOperations() throws Exception {
|
||||
// assertCdcEnabledInDb
|
||||
testdb.withoutCdc();
|
||||
AirbyteConnectionStatus status = source().check(config());
|
||||
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
|
||||
testdb.withCdc();
|
||||
// assertCdcSchemaQueryable
|
||||
testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName());
|
||||
status = source().check(config());
|
||||
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
|
||||
testdb.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName());
|
||||
|
||||
// assertSqlServerAgentRunning
|
||||
|
||||
testdb.withAgentStopped().withWaitUntilAgentStopped();
|
||||
status = source().check(config());
|
||||
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
|
||||
testdb.withAgentStarted().withWaitUntilAgentRunning();
|
||||
status = source().check(config());
|
||||
assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,165 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.SyncMode;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class MssqlCdcHelperTest {
|
||||
|
||||
private static final JsonNode LEGACY_NON_CDC_CONFIG = Jsons.jsonNode(Map.of("replication_method", "STANDARD"));
|
||||
private static final JsonNode LEGACY_CDC_CONFIG = Jsons.jsonNode(Map.of("replication_method", "CDC"));
|
||||
|
||||
@Test
|
||||
public void testIsCdc() {
|
||||
// legacy replication method config before version 0.4.0
|
||||
assertFalse(MssqlCdcHelper.isCdc(LEGACY_NON_CDC_CONFIG));
|
||||
assertTrue(MssqlCdcHelper.isCdc(LEGACY_CDC_CONFIG));
|
||||
|
||||
// new replication method config since version 0.4.0
|
||||
final JsonNode newNonCdc = Jsons.jsonNode(Map.of("replication_method",
|
||||
Jsons.jsonNode(Map.of("method", "STANDARD"))));
|
||||
assertFalse(MssqlCdcHelper.isCdc(newNonCdc));
|
||||
|
||||
final JsonNode newCdc = Jsons.jsonNode(Map.of("replication_method",
|
||||
Jsons.jsonNode(Map.of(
|
||||
"method", "CDC"))));
|
||||
assertTrue(MssqlCdcHelper.isCdc(newCdc));
|
||||
|
||||
// migration from legacy to new config
|
||||
final JsonNode mixNonCdc = Jsons.jsonNode(Map.of(
|
||||
"replication_method", Jsons.jsonNode(Map.of("method", "STANDARD")),
|
||||
"replication", Jsons.jsonNode(Map.of("replication_type", "CDC"))));
|
||||
assertFalse(MssqlCdcHelper.isCdc(mixNonCdc));
|
||||
|
||||
final JsonNode mixCdc = Jsons.jsonNode(Map.of(
|
||||
"replication", Jsons.jsonNode(Map.of(
|
||||
"replication_type", "Standard")),
|
||||
"replication_method", Jsons.jsonNode(Map.of(
|
||||
"method", "CDC"))));
|
||||
assertTrue(MssqlCdcHelper.isCdc(mixCdc));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableIncludeListSingleTable() {
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog()
|
||||
.withStreams(Collections.singletonList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("users"))));
|
||||
|
||||
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
|
||||
// Pattern.quote escapes the period in "dbo.users" to "\Qdbo.users\E"
|
||||
assertEquals("\\Qdbo.users\\E", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableIncludeListMultipleTables() {
|
||||
final List<ConfiguredAirbyteStream> streams = Arrays.asList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("users")),
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("orders")),
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("sales")
|
||||
.withName("products")));
|
||||
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(streams);
|
||||
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
|
||||
|
||||
// Should generate a comma-separated list of escaped table identifiers
|
||||
assertEquals("\\Qdbo.users\\E,\\Qdbo.orders\\E,\\Qsales.products\\E", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableIncludeListFiltersNonIncrementalStreams() {
|
||||
final List<ConfiguredAirbyteStream> streams = Arrays.asList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("users")),
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.FULL_REFRESH)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("logs")));
|
||||
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(streams);
|
||||
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
|
||||
|
||||
// Should only include INCREMENTAL streams
|
||||
assertEquals("\\Qdbo.users\\E", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableIncludeListWithSpecialCharactersInTableName() {
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog()
|
||||
.withStreams(Collections.singletonList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("table$with_special-chars"))));
|
||||
|
||||
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
|
||||
// Pattern.quote should escape special characters
|
||||
assertEquals("\\Qdbo.table$with_special-chars\\E", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableIncludeListWithCommaInTableName() {
|
||||
final List<ConfiguredAirbyteStream> streams = Arrays.asList(
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("table,with,commas")),
|
||||
new ConfiguredAirbyteStream()
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(new AirbyteStream()
|
||||
.withNamespace("dbo")
|
||||
.withName("normal_table")));
|
||||
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(streams);
|
||||
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
|
||||
|
||||
// Commas in table names should be escaped with backslash
|
||||
assertEquals("\\Qdbo.table\\,with\\,commas\\E,\\Qdbo.normal_table\\E", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTableIncludeListEmptyCatalog() {
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog()
|
||||
.withStreams(Collections.emptyList());
|
||||
|
||||
final String result = MssqlCdcHelper.getTableIncludeList(catalog);
|
||||
assertEquals("", result);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import io.airbyte.cdk.db.factory.DataSourceFactory;
|
||||
import java.util.Map;
|
||||
import javax.sql.DataSource;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class MssqlDataSourceFactoryTest {
|
||||
|
||||
@Test
|
||||
protected void testCreatingDataSourceWithConnectionTimeoutSetBelowDefault() {
|
||||
try (var testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022)) {
|
||||
final Map<String, String> connectionProperties = Map.of("loginTimeout", String.valueOf(5));
|
||||
final DataSource dataSource = DataSourceFactory.create(
|
||||
testdb.getUserName(),
|
||||
testdb.getPassword(),
|
||||
testdb.getDatabaseDriver().getDriverClassName(),
|
||||
testdb.getJdbcUrl(),
|
||||
connectionProperties,
|
||||
new MssqlSource().getConnectionTimeoutMssql(connectionProperties));
|
||||
assertNotNull(dataSource);
|
||||
assertEquals(HikariDataSource.class, dataSource.getClass());
|
||||
assertEquals(5000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil.MssqlDebeziumStateAttributes;
|
||||
import io.debezium.connector.sqlserver.Lsn;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class MssqlDebeziumStateUtilTest {
|
||||
|
||||
private static String DB_NAME = "db_name";
|
||||
private static String LSN_STRING = "0000062d:00017ff0:016d";
|
||||
private static Lsn LSN = Lsn.valueOf(LSN_STRING);
|
||||
|
||||
@Test
|
||||
void generateCorrectFormat() {
|
||||
MssqlDebeziumStateAttributes attributes = new MssqlDebeziumStateAttributes(LSN);
|
||||
JsonNode formatResult = MssqlDebeziumStateUtil.format(attributes, DB_NAME);
|
||||
assertEquals("{\"commit_lsn\":\"0000062d:00017ff0:016d\",\"snapshot\":true,\"snapshot_completed\":true}",
|
||||
formatResult.get("[\"db_name\",{\"server\":\"db_name\",\"database\":\"db_name\"}]").asText());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import io.airbyte.integrations.source.mssql.MssqlQueryUtils.TableSizeInfo;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class MssqlInitialLoadHandlerTest {
|
||||
|
||||
private static final long ONE_GB = 1_073_741_824;
|
||||
private static final long ONE_MB = 1_048_576;
|
||||
|
||||
@Test
|
||||
void testInvalidOrNullTableSizeInfo() {
|
||||
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair("table_name", "schema_name");
|
||||
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(null, pair), 1_000_000L);
|
||||
final TableSizeInfo invalidRowLengthInfo = new TableSizeInfo(ONE_GB, 0L);
|
||||
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(invalidRowLengthInfo, pair), 1_000_000L);
|
||||
final TableSizeInfo invalidTableSizeInfo = new TableSizeInfo(0L, 0L);
|
||||
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(invalidTableSizeInfo, pair), 1_000_000L);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTableSizeInfo() {
|
||||
final AirbyteStreamNameNamespacePair pair = new AirbyteStreamNameNamespacePair("table_name", "schema_name");
|
||||
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(new TableSizeInfo(ONE_GB, 2 * ONE_MB), pair), 512L);
|
||||
assertEquals(MssqlInitialLoadHandler.calculateChunkSize(new TableSizeInfo(ONE_GB, 200L), pair), 5368709L);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,482 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY;
|
||||
import static io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadStateManager.STATE_TYPE_KEY;
|
||||
import static java.util.stream.Collectors.toList;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.CursorBasedStatus;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.models.InternalModels.StateType;
|
||||
import io.airbyte.commons.json.Jsons;
|
||||
import io.airbyte.commons.util.MoreIterators;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.protocol.models.Field;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import io.airbyte.protocol.models.v0.AirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStateStats;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.AirbyteStreamState;
|
||||
import io.airbyte.protocol.models.v0.CatalogHelpers;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode;
|
||||
import io.airbyte.protocol.models.v0.StreamDescriptor;
|
||||
import io.airbyte.protocol.models.v0.SyncMode;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH")
|
||||
public class MssqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest<MssqlSource, MsSQLTestDatabase> {
|
||||
|
||||
protected static final String USERNAME_WITHOUT_PERMISSION = "new_user";
|
||||
protected static final String PASSWORD_WITHOUT_PERMISSION = "password_3435!";
|
||||
|
||||
static {
|
||||
// In mssql, timestamp is generated automatically, so we need to use
|
||||
// the datetime type instead so that we can set the value manually.
|
||||
COL_TIMESTAMP_TYPE = "DATETIME2";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode config() {
|
||||
return testdb.testConfigBuilder()
|
||||
.withoutSsl()
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MssqlSource source() {
|
||||
return new MssqlSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MsSQLTestDatabase createTestDatabase() {
|
||||
return MsSQLTestDatabase.in(BaseImage.MSSQL_2022);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsSchemas() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void maybeSetShorterConnectionTimeout(final JsonNode config) {
|
||||
((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCheckIncorrectPasswordFailure() throws Exception {
|
||||
final var config = config();
|
||||
maybeSetShorterConnectionTimeout(config);
|
||||
((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake");
|
||||
final AirbyteConnectionStatus status = source().check(config);
|
||||
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
|
||||
assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckIncorrectUsernameFailure() throws Exception {
|
||||
final var config = config();
|
||||
maybeSetShorterConnectionTimeout(config);
|
||||
((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake");
|
||||
final AirbyteConnectionStatus status = source().check(config);
|
||||
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
|
||||
assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckIncorrectHostFailure() throws Exception {
|
||||
final var config = config();
|
||||
maybeSetShorterConnectionTimeout(config);
|
||||
((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2");
|
||||
final AirbyteConnectionStatus status = source().check(config);
|
||||
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
|
||||
assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckIncorrectPortFailure() throws Exception {
|
||||
final var config = config();
|
||||
maybeSetShorterConnectionTimeout(config);
|
||||
((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000");
|
||||
final AirbyteConnectionStatus status = source().check(config);
|
||||
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
|
||||
assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCheckIncorrectDataBaseFailure() throws Exception {
|
||||
final var config = config();
|
||||
maybeSetShorterConnectionTimeout(config);
|
||||
((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase");
|
||||
final AirbyteConnectionStatus status = source().check(config);
|
||||
Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
|
||||
assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUserHasNoPermissionToDataBase() throws Exception {
|
||||
final var config = config();
|
||||
maybeSetShorterConnectionTimeout(config);
|
||||
testdb.with("CREATE LOGIN %s WITH PASSWORD = '%s'; ", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION);
|
||||
((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, USERNAME_WITHOUT_PERMISSION);
|
||||
((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION);
|
||||
final AirbyteConnectionStatus status = source().check(config);
|
||||
assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus());
|
||||
assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
protected void testReadMultipleTablesIncrementally() throws Exception {
|
||||
final var config = config();
|
||||
((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1);
|
||||
final String streamOneName = TABLE_NAME + "one";
|
||||
// Create a fresh first table
|
||||
testdb.with("CREATE TABLE %s (\n"
|
||||
+ " id INT NOT NULL PRIMARY KEY,\n"
|
||||
+ " name VARCHAR(50) NOT NULL,\n"
|
||||
+ " updated_at DATE NOT NULL\n"
|
||||
+ ");", getFullyQualifiedTableName(streamOneName))
|
||||
.with("INSERT INTO %s(id, name, updated_at) VALUES (1, 'picard', '2004-10-19')",
|
||||
getFullyQualifiedTableName(streamOneName))
|
||||
.with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')",
|
||||
getFullyQualifiedTableName(streamOneName))
|
||||
.with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')",
|
||||
getFullyQualifiedTableName(streamOneName));
|
||||
|
||||
// Create a fresh second table
|
||||
final String streamTwoName = TABLE_NAME + "two";
|
||||
final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName);
|
||||
// Insert records into second table
|
||||
testdb.with("CREATE TABLE %s (\n"
|
||||
+ " id INT NOT NULL PRIMARY KEY,\n"
|
||||
+ " name VARCHAR(50) NOT NULL,\n"
|
||||
+ " updated_at DATE NOT NULL\n"
|
||||
+ ");", streamTwoFullyQualifiedName)
|
||||
.with("INSERT INTO %s (id, name, updated_at) VALUES (40, 'Jean Luc','2006-10-19')",
|
||||
streamTwoFullyQualifiedName)
|
||||
.with("INSERT INTO %s (id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')",
|
||||
streamTwoFullyQualifiedName)
|
||||
.with("INSERT INTO %s (id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')",
|
||||
streamTwoFullyQualifiedName);
|
||||
|
||||
final List<AirbyteMessage> streamOneExpectedRecords = Arrays.asList(
|
||||
createRecord(streamOneName, getDefaultNamespace(), Map
|
||||
.of(COL_ID, ID_VALUE_1,
|
||||
COL_NAME, "picard",
|
||||
COL_UPDATED_AT, "2004-10-19")),
|
||||
createRecord(streamOneName, getDefaultNamespace(), Map
|
||||
.of(COL_ID, ID_VALUE_2,
|
||||
COL_NAME, "crusher",
|
||||
COL_UPDATED_AT,
|
||||
"2005-10-19")),
|
||||
createRecord(streamOneName, getDefaultNamespace(), Map
|
||||
.of(COL_ID, ID_VALUE_3,
|
||||
COL_NAME, "vash",
|
||||
COL_UPDATED_AT, "2006-10-19")));
|
||||
|
||||
// Create records list that we expect to see in the state message
|
||||
final List<AirbyteMessage> streamTwoExpectedRecords = Arrays.asList(
|
||||
createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(
|
||||
COL_ID, 40,
|
||||
COL_NAME, "Jean Luc",
|
||||
COL_UPDATED_AT, "2006-10-19")),
|
||||
createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(
|
||||
COL_ID, 41,
|
||||
COL_NAME, "Groot",
|
||||
COL_UPDATED_AT, "2006-10-19")),
|
||||
createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of(
|
||||
COL_ID, 42,
|
||||
COL_NAME, "Thanos",
|
||||
COL_UPDATED_AT, "2006-10-19")));
|
||||
|
||||
// Prep and create a configured catalog to perform sync
|
||||
final AirbyteStream streamOne = getAirbyteStream(streamOneName, getDefaultNamespace());
|
||||
final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, getDefaultNamespace());
|
||||
|
||||
final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(
|
||||
new AirbyteCatalog().withStreams(List.of(streamOne, streamTwo)));
|
||||
configuredCatalog.getStreams().forEach(airbyteStream -> {
|
||||
airbyteStream.setSyncMode(SyncMode.INCREMENTAL);
|
||||
airbyteStream.setCursorField(List.of(COL_ID));
|
||||
airbyteStream.setDestinationSyncMode(DestinationSyncMode.APPEND);
|
||||
airbyteStream.withPrimaryKey(List.of(List.of(COL_ID)));
|
||||
});
|
||||
|
||||
// Perform initial sync
|
||||
final List<AirbyteMessage> messagesFromFirstSync = MoreIterators
|
||||
.toList(source().read(config, configuredCatalog, null));
|
||||
|
||||
final List<AirbyteMessage> recordsFromFirstSync = filterRecords(messagesFromFirstSync);
|
||||
|
||||
setEmittedAtToNull(messagesFromFirstSync);
|
||||
// All records in the 2 configured streams should be present
|
||||
assertThat(filterRecords(recordsFromFirstSync)).containsExactlyElementsOf(
|
||||
Stream.concat(streamOneExpectedRecords.stream().parallel(),
|
||||
streamTwoExpectedRecords.stream().parallel()).collect(toList()));
|
||||
|
||||
final List<AirbyteStateMessage> actualFirstSyncState = extractStateMessage(messagesFromFirstSync);
|
||||
// Since we are emitting a state message after each record, we should have 1 state for each record -
|
||||
// 3 from stream1 and 3 from stream2
|
||||
assertEquals(6, actualFirstSyncState.size());
|
||||
|
||||
// The expected state type should be 2 ordered_column's and the last one being cursor_based
|
||||
final List<String> expectedStateTypesFromFirstSync = List.of("ordered_column", "ordered_column", "cursor_based");
|
||||
final List<String> stateTypeOfStreamOneStatesFromFirstSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, STATE_TYPE_KEY);
|
||||
final List<String> stateTypeOfStreamTwoStatesFromFirstSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamTwoName, STATE_TYPE_KEY);
|
||||
// It should be the same for stream1 and stream2
|
||||
assertEquals(stateTypeOfStreamOneStatesFromFirstSync, expectedStateTypesFromFirstSync);
|
||||
assertEquals(stateTypeOfStreamTwoStatesFromFirstSync, expectedStateTypesFromFirstSync);
|
||||
|
||||
// Create the expected ordered_column values that we should see
|
||||
final List<String> expectedOrderedColumnValueFromFirstSync = List.of("1", "2");
|
||||
final List<String> orderedColumnValuesOfStreamOneFromFirstSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, "ordered_col_val");
|
||||
final List<String> orderedColumnValuesOfStreamTwoFromFirstSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromFirstSync, streamOneName, "ordered_col_val");
|
||||
|
||||
// Verifying each element and its index to match.
|
||||
// Only checking the first 2 elements since we have verified that the last state_type is
|
||||
// "cursor_based"
|
||||
assertEquals(expectedOrderedColumnValueFromFirstSync.get(0), orderedColumnValuesOfStreamOneFromFirstSync.get(0));
|
||||
assertEquals(expectedOrderedColumnValueFromFirstSync.get(1), orderedColumnValuesOfStreamOneFromFirstSync.get(1));
|
||||
assertEquals(expectedOrderedColumnValueFromFirstSync.get(0), orderedColumnValuesOfStreamTwoFromFirstSync.get(0));
|
||||
assertEquals(expectedOrderedColumnValueFromFirstSync.get(1), orderedColumnValuesOfStreamTwoFromFirstSync.get(1));
|
||||
|
||||
// Extract only state messages for each stream
|
||||
final List<AirbyteStateMessage> streamOneStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamOneName);
|
||||
final List<AirbyteStateMessage> streamTwoStateMessagesFromFirstSync = extractStateMessage(messagesFromFirstSync, streamTwoName);
|
||||
// Extract the incremental states of each stream's first and second state message
|
||||
final List<JsonNode> streamOneIncrementalStatesFromFirstSync =
|
||||
List.of(streamOneStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"),
|
||||
streamOneStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state"));
|
||||
final JsonNode streamOneFinalStreamStateFromFirstSync = streamOneStateMessagesFromFirstSync.get(2).getStream().getStreamState();
|
||||
|
||||
final List<JsonNode> streamTwoIncrementalStatesFromFirstSync =
|
||||
List.of(streamTwoStateMessagesFromFirstSync.get(0).getStream().getStreamState().get("incremental_state"),
|
||||
streamTwoStateMessagesFromFirstSync.get(1).getStream().getStreamState().get("incremental_state"));
|
||||
final JsonNode streamTwoFinalStreamStateFromFirstSync = streamTwoStateMessagesFromFirstSync.get(2).getStream().getStreamState();
|
||||
|
||||
// The incremental_state of each stream's first and second incremental states is expected
|
||||
// to be identical to the stream_state of the final state message for each stream
|
||||
assertEquals(streamOneIncrementalStatesFromFirstSync.get(0), streamOneFinalStreamStateFromFirstSync);
|
||||
assertEquals(streamOneIncrementalStatesFromFirstSync.get(1), streamOneFinalStreamStateFromFirstSync);
|
||||
assertEquals(streamTwoIncrementalStatesFromFirstSync.get(0), streamTwoFinalStreamStateFromFirstSync);
|
||||
assertEquals(streamTwoIncrementalStatesFromFirstSync.get(1), streamTwoFinalStreamStateFromFirstSync);
|
||||
|
||||
// Sync should work with a ordered_column state AND a cursor-based state from each stream
|
||||
// Forcing a sync with
|
||||
// - stream one state still being the first record read via Ordered column.
|
||||
// - stream two state being the Ordered Column state before the final emitted state before the
|
||||
// cursor
|
||||
// switch
|
||||
final List<AirbyteMessage> messagesFromSecondSyncWithMixedStates = MoreIterators
|
||||
.toList(source().read(config, configuredCatalog,
|
||||
Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0),
|
||||
streamTwoStateMessagesFromFirstSync.get(1)))));
|
||||
|
||||
// Extract only state messages for each stream after second sync
|
||||
final List<AirbyteStateMessage> streamOneStateMessagesFromSecondSync =
|
||||
extractStateMessage(messagesFromSecondSyncWithMixedStates, streamOneName);
|
||||
final List<String> stateTypeOfStreamOneStatesFromSecondSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, streamOneName, STATE_TYPE_KEY);
|
||||
|
||||
final List<AirbyteStateMessage> streamTwoStateMessagesFromSecondSync =
|
||||
extractStateMessage(messagesFromSecondSyncWithMixedStates, streamTwoName);
|
||||
final List<String> stateTypeOfStreamTwoStatesFromSecondSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromSecondSyncWithMixedStates, streamTwoName, STATE_TYPE_KEY);
|
||||
|
||||
// Stream One states after the second sync are expected to have 2 stream states
|
||||
// - 1 with PrimaryKey state_type and 1 state that is of cursorBased state type
|
||||
assertEquals(2, streamOneStateMessagesFromSecondSync.size());
|
||||
assertEquals(List.of("ordered_column", "cursor_based"), stateTypeOfStreamOneStatesFromSecondSync);
|
||||
|
||||
// Stream Two states after the second sync are expected to have 1 stream state
|
||||
// - The state that is of cursorBased state type
|
||||
assertEquals(1, streamTwoStateMessagesFromSecondSync.size());
|
||||
assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromSecondSync);
|
||||
|
||||
// Add some data to each table and perform a third read.
|
||||
// Expect to see all records be synced via cursorBased method and not ordered_column
|
||||
testdb.with("INSERT INTO %s (id, name, updated_at) VALUES (4,'Hooper','2006-10-19')",
|
||||
getFullyQualifiedTableName(streamOneName))
|
||||
.with("INSERT INTO %s (id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')",
|
||||
streamTwoFullyQualifiedName);
|
||||
|
||||
final List<AirbyteMessage> messagesFromThirdSync = MoreIterators
|
||||
.toList(source().read(config, configuredCatalog,
|
||||
Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1),
|
||||
streamTwoStateMessagesFromSecondSync.get(0)))));
|
||||
|
||||
// Extract only state messages, state type, and cursor for each stream after second sync
|
||||
final List<AirbyteStateMessage> streamOneStateMessagesFromThirdSync =
|
||||
extractStateMessage(messagesFromThirdSync, streamOneName);
|
||||
final List<String> stateTypeOfStreamOneStatesFromThirdSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, STATE_TYPE_KEY);
|
||||
final List<String> cursorOfStreamOneStatesFromThirdSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamOneName, "cursor");
|
||||
|
||||
final List<AirbyteStateMessage> streamTwoStateMessagesFromThirdSync =
|
||||
extractStateMessage(messagesFromThirdSync, streamTwoName);
|
||||
final List<String> stateTypeOfStreamTwoStatesFromThirdSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, STATE_TYPE_KEY);
|
||||
final List<String> cursorOfStreamTwoStatesFromThirdSync =
|
||||
extractSpecificFieldFromCombinedMessages(messagesFromThirdSync, streamTwoName, "cursor");
|
||||
|
||||
// Both streams should now be synced via standard cursor and have updated max cursor values
|
||||
// cursor: 4 for stream one
|
||||
// cursor: 43 for stream two
|
||||
assertEquals(1, streamOneStateMessagesFromThirdSync.size());
|
||||
assertEquals(List.of("cursor_based"), stateTypeOfStreamOneStatesFromThirdSync);
|
||||
assertEquals(List.of("4"), cursorOfStreamOneStatesFromThirdSync);
|
||||
|
||||
assertEquals(1, streamTwoStateMessagesFromThirdSync.size());
|
||||
assertEquals(List.of("cursor_based"), stateTypeOfStreamTwoStatesFromThirdSync);
|
||||
assertEquals(List.of("43"), cursorOfStreamTwoStatesFromThirdSync);
|
||||
}
|
||||
|
||||
private AirbyteStream getAirbyteStream(final String tableName, final String namespace) {
|
||||
return CatalogHelpers.createAirbyteStream(
|
||||
tableName,
|
||||
namespace,
|
||||
Field.of(COL_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_NAME, JsonSchemaType.STRING),
|
||||
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
|
||||
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of(COL_ID)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AirbyteCatalog getCatalog(final String defaultNamespace) {
|
||||
return new AirbyteCatalog().withStreams(List.of(
|
||||
CatalogHelpers.createAirbyteStream(
|
||||
TABLE_NAME,
|
||||
defaultNamespace,
|
||||
Field.of(COL_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_NAME, JsonSchemaType.STRING),
|
||||
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
|
||||
.withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of(COL_ID)))
|
||||
.withIsResumable(true),
|
||||
CatalogHelpers.createAirbyteStream(
|
||||
TABLE_NAME_WITHOUT_PK,
|
||||
defaultNamespace,
|
||||
Field.of(COL_ID, JsonSchemaType.INTEGER),
|
||||
Field.of(COL_NAME, JsonSchemaType.STRING),
|
||||
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
|
||||
.withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(Collections.emptyList())
|
||||
.withIsResumable(false),
|
||||
CatalogHelpers.createAirbyteStream(
|
||||
TABLE_NAME_COMPOSITE_PK,
|
||||
defaultNamespace,
|
||||
Field.of(COL_FIRST_NAME, JsonSchemaType.STRING),
|
||||
Field.of(COL_LAST_NAME, JsonSchemaType.STRING),
|
||||
Field.of(COL_UPDATED_AT, JsonSchemaType.STRING_DATE))
|
||||
.withSupportedSyncModes(List.of(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(
|
||||
List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME)))
|
||||
.withIsResumable(true)));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configuredAirbyteStream,
|
||||
final String cursorField,
|
||||
final String cursorValue) {
|
||||
return new CursorBasedStatus().withStateType(StateType.CURSOR_BASED).withVersion(2L)
|
||||
.withStreamName(configuredAirbyteStream.getStream().getName())
|
||||
.withStreamNamespace(configuredAirbyteStream.getStream().getNamespace())
|
||||
.withCursorField(List.of(cursorField))
|
||||
.withCursor(cursorValue)
|
||||
.withCursorRecordCount(1L);
|
||||
}
|
||||
|
||||
// Override from parent class as we're no longer including the legacy Data field.
|
||||
@Override
|
||||
protected List<AirbyteMessage> createExpectedTestMessages(final List<? extends DbStreamState> states, final long numRecords) {
|
||||
return states.stream()
|
||||
.map(s -> new AirbyteMessage().withType(Type.STATE)
|
||||
.withState(
|
||||
new AirbyteStateMessage().withType(AirbyteStateType.STREAM)
|
||||
.withStream(new AirbyteStreamState()
|
||||
.withStreamDescriptor(new StreamDescriptor().withNamespace(s.getStreamNamespace()).withName(s.getStreamName()))
|
||||
.withStreamState(Jsons.jsonNode(s)))
|
||||
.withSourceStats(new AirbyteStateStats().withRecordCount((double) numRecords))))
|
||||
.collect(
|
||||
Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected JsonNode getStateData(final AirbyteMessage airbyteMessage, final String streamName) {
|
||||
final JsonNode streamState = airbyteMessage.getState().getStream().getStreamState();
|
||||
if (streamState.get("stream_name").asText().equals(streamName)) {
|
||||
return streamState;
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Stream not found in state message: " + streamName);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<AirbyteMessage> getExpectedAirbyteMessagesSecondSync(final String namespace) {
|
||||
final List<AirbyteMessage> expectedMessages = new ArrayList<>();
|
||||
expectedMessages.addAll(List.of(createRecord(streamName(), namespace, ImmutableMap
|
||||
.of(COL_ID, ID_VALUE_4,
|
||||
COL_NAME, "riker",
|
||||
COL_UPDATED_AT, "2006-10-19")),
|
||||
createRecord(streamName(), namespace, ImmutableMap
|
||||
.of(COL_ID, ID_VALUE_5,
|
||||
COL_NAME, "data",
|
||||
COL_UPDATED_AT, "2006-10-19"))));
|
||||
final DbStreamState state = new CursorBasedStatus()
|
||||
.withStateType(StateType.CURSOR_BASED)
|
||||
.withVersion(2L)
|
||||
.withStreamName(streamName())
|
||||
.withStreamNamespace(namespace)
|
||||
.withCursorField(ImmutableList.of(COL_ID))
|
||||
.withCursor("5")
|
||||
.withCursorRecordCount(1L);
|
||||
|
||||
expectedMessages.addAll(createExpectedTestMessages(List.of(state), 2L));
|
||||
return expectedMessages;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void validateFullRefreshStateMessageReadSuccess(final List<? extends AirbyteStateMessage> stateMessages) {
|
||||
var finalStateMessage = stateMessages.get(stateMessages.size() - 1);
|
||||
assertEquals(
|
||||
finalStateMessage.getStream().getStreamState().get("state_type").textValue(),
|
||||
"ordered_column");
|
||||
assertEquals(finalStateMessage.getStream().getStreamState().get("ordered_col").textValue(), "id");
|
||||
assertEquals(finalStateMessage.getStream().getStreamState().get("ordered_col_val").textValue(), "3");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,305 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
|
||||
import static org.assertj.core.api.AssertionsForClassTypes.catchThrowable;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo;
|
||||
import io.airbyte.commons.exceptions.ConfigErrorException;
|
||||
import io.airbyte.commons.util.MoreIterators;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialLoadHandler;
|
||||
import io.airbyte.integrations.source.mssql.initialsync.MssqlInitialReadUtil;
|
||||
import io.airbyte.protocol.models.CommonField;
|
||||
import io.airbyte.protocol.models.Field;
|
||||
import io.airbyte.protocol.models.JsonSchemaType;
|
||||
import io.airbyte.protocol.models.v0.*;
|
||||
import java.sql.JDBCType;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Instant;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
class MssqlSourceTest {
|
||||
|
||||
private static final String STREAM_NAME = "id_and_name";
|
||||
private static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(Lists.newArrayList(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME,
|
||||
"dbo",
|
||||
Field.of("id", JsonSchemaType.INTEGER),
|
||||
Field.of("name", JsonSchemaType.STRING),
|
||||
Field.of("born", JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE))
|
||||
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of("id")))
|
||||
.withIsResumable(true)));
|
||||
|
||||
private MsSQLTestDatabase testdb;
|
||||
|
||||
private MssqlSource source() {
|
||||
return new MssqlSource();
|
||||
}
|
||||
|
||||
// how to interact with the mssql test container manaully.
|
||||
// 1. exec into mssql container (not the test container container)
|
||||
// 2. /opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "A_Str0ng_Required_Password"
|
||||
@BeforeEach
|
||||
void setup() {
|
||||
testdb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022)
|
||||
.with("CREATE TABLE id_and_name(id INTEGER NOT NULL, name VARCHAR(200), born DATETIMEOFFSET(7));")
|
||||
.with("INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', " +
|
||||
"'2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanUp() {
|
||||
testdb.close();
|
||||
}
|
||||
|
||||
private JsonNode getConfig() {
|
||||
return testdb.testConfigBuilder()
|
||||
.withoutSsl()
|
||||
.build();
|
||||
}
|
||||
|
||||
// if a column in mssql is used as a primary key and in a separate index the discover query returns
|
||||
// the column twice. we now de-duplicate it (pr: https://github.com/airbytehq/airbyte/pull/983).
|
||||
// this tests that this de-duplication is successful.
|
||||
@Test
|
||||
void testDiscoverWithPk() {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY CLUSTERED (id);")
|
||||
.with("CREATE INDEX i1 ON id_and_name (id);");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDiscoverWithoutPk() {
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(STREAM_NAME, actual.getStreams().get(0).getName());
|
||||
assertEquals(false, actual.getStreams().get(0).getIsResumable());
|
||||
}
|
||||
|
||||
@Test
|
||||
@Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out")
|
||||
public void testTableWithNullCursorValueShouldThrowException() throws Exception {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ALTER COLUMN id INTEGER NULL")
|
||||
.with("INSERT INTO id_and_name(id) VALUES (7), (8), (NULL)");
|
||||
|
||||
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
|
||||
SyncMode.INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(CatalogHelpers.createAirbyteStream(
|
||||
STREAM_NAME,
|
||||
testdb.getDatabaseName(),
|
||||
Field.of("id", JsonSchemaType.INTEGER),
|
||||
Field.of("name", JsonSchemaType.STRING),
|
||||
Field.of("born", JsonSchemaType.STRING))
|
||||
.withSupportedSyncModes(
|
||||
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
|
||||
.withSourceDefinedPrimaryKey(List.of(List.of("id"))));
|
||||
|
||||
final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(
|
||||
Collections.singletonList(configuredAirbyteStream));
|
||||
|
||||
final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(
|
||||
source().read(getConfig(), catalog, null)));
|
||||
assertThat(throwable).isInstanceOf(ConfigErrorException.class)
|
||||
.hasMessageContaining(
|
||||
"The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='dbo.id_and_name', cursorColumnName='id', cursorSqlType=INTEGER, cause=Cursor column contains NULL value}");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDiscoverWithNonClusteredPk() throws SQLException {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
|
||||
.with("CREATE INDEX i1 ON id_and_name (id);")
|
||||
.with("CREATE UNIQUE CLUSTERED INDEX n1 ON id_and_name (name)");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
final var db = source().createDatabase(getConfig());
|
||||
final Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db,
|
||||
new AirbyteStream().withName(
|
||||
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace()));
|
||||
|
||||
String firstOcKey = oc.entrySet().iterator().next().getKey();
|
||||
List<String> ocValues = oc.get(firstOcKey);
|
||||
assertEquals(1, ocValues.size());
|
||||
assertEquals("name", ocValues.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDiscoverWithNonUniqueClusteredIndex() throws SQLException {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
|
||||
.with("CREATE CLUSTERED INDEX n1 ON id_and_name (name)");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
final var db = source().createDatabase(getConfig());
|
||||
final Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db,
|
||||
new AirbyteStream().withName(
|
||||
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace()));
|
||||
assertNull(oc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDiscoverWithNonClusteredIndex() throws SQLException {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
|
||||
.with("CREATE INDEX i1 ON id_and_name (id);")
|
||||
.with("CREATE NONCLUSTERED INDEX n1 ON id_and_name (name)");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
final var db = source().createDatabase(getConfig());
|
||||
final Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db,
|
||||
new AirbyteStream().withName(
|
||||
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace()));
|
||||
|
||||
assertNull(oc);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDiscoverWithClusteredCompositeIndex() throws SQLException {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
|
||||
.with("CREATE INDEX i1 ON id_and_name (id);")
|
||||
.with("CREATE UNIQUE CLUSTERED INDEX n1 ON id_and_name (id, name)");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
final var db = source().createDatabase(getConfig());
|
||||
|
||||
AirbyteStream stream = new AirbyteStream().withName(
|
||||
actual.getStreams().get(0).getName()).withNamespace(actual.getStreams().get(0).getNamespace())
|
||||
.withSourceDefinedPrimaryKey(actual.getStreams().get(0).getSourceDefinedPrimaryKey());
|
||||
|
||||
Map<String, List<String>> oc = MssqlInitialLoadHandler.discoverClusteredIndexForStream(db, stream);
|
||||
|
||||
String firstOcKey = oc.entrySet().iterator().next().getKey();
|
||||
List<String> ocValues = oc.get(firstOcKey);
|
||||
assertEquals(2, ocValues.size());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testUsingPkWhenClusteredCompositeIndex() throws SQLException {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
|
||||
.with("CREATE INDEX i1 ON id_and_name (id);")
|
||||
.with("CREATE CLUSTERED INDEX n1 ON id_and_name (id, name)");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
final var db = source().createDatabase(getConfig());
|
||||
|
||||
AirbyteStream stream = new AirbyteStream().withName(
|
||||
actual.getStreams().getFirst().getName()).withNamespace(actual.getStreams().getFirst().getNamespace())
|
||||
.withSourceDefinedPrimaryKey(actual.getStreams().getFirst().getSourceDefinedPrimaryKey());
|
||||
|
||||
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
|
||||
SyncMode.INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(stream);
|
||||
|
||||
final List<List<String>> primaryKey = configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey();
|
||||
Optional<String> oc = MssqlInitialReadUtil.selectOcFieldName(db, configuredAirbyteStream);
|
||||
|
||||
assertEquals(primaryKey.getFirst().getFirst(), oc.orElse("No oc"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNonClusteredIndex() throws SQLException {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
|
||||
.with("CREATE INDEX i1 ON id_and_name (id);");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
final var db = source().createDatabase(getConfig());
|
||||
|
||||
AirbyteStream stream = new AirbyteStream().withName(
|
||||
actual.getStreams().getFirst().getName()).withNamespace(actual.getStreams().getFirst().getNamespace())
|
||||
.withSourceDefinedPrimaryKey(actual.getStreams().getFirst().getSourceDefinedPrimaryKey());
|
||||
|
||||
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
|
||||
SyncMode.INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(stream);
|
||||
|
||||
Optional<String> oc = MssqlInitialReadUtil.selectOcFieldName(db, configuredAirbyteStream);
|
||||
final List<List<String>> primaryKey = configuredAirbyteStream.getStream().getSourceDefinedPrimaryKey();
|
||||
|
||||
assertEquals(primaryKey.getFirst().getFirst(), oc.orElse("No oc"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNonClusteredIndexNoPK() throws SQLException {
|
||||
testdb
|
||||
.with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY NONCLUSTERED (id);")
|
||||
.with("CREATE INDEX i1 ON id_and_name (id);")
|
||||
.with("CREATE NONCLUSTERED INDEX n1 ON id_and_name (name)");
|
||||
final AirbyteCatalog actual = source().discover(getConfig());
|
||||
assertEquals(CATALOG, actual);
|
||||
final var db = source().createDatabase(getConfig());
|
||||
|
||||
AirbyteStream stream = new AirbyteStream().withName(
|
||||
actual.getStreams().getFirst().getName()).withNamespace(actual.getStreams().getFirst().getNamespace());
|
||||
|
||||
ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode(
|
||||
SyncMode.INCREMENTAL)
|
||||
.withCursorField(Lists.newArrayList("id"))
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withStream(stream);
|
||||
|
||||
Optional<String> oc = MssqlInitialReadUtil.selectOcFieldName(db, configuredAirbyteStream);
|
||||
|
||||
assert (oc.isEmpty());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSetCursorCutoffInfoForValue() {
|
||||
CursorInfo cursorInfo = new CursorInfo(null, null, null, null);
|
||||
Instant now = Instant.parse("2024-06-01T12:34:56Z");
|
||||
|
||||
// DATE
|
||||
CommonField<JDBCType> dateField = new CommonField<>("date_col", JDBCType.DATE);
|
||||
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, dateField, now);
|
||||
assertEquals("2024-06-01", cursorInfo.getCutoffTime());
|
||||
|
||||
// TIMESTAMP
|
||||
cursorInfo = new CursorInfo(null, null, null, null);
|
||||
CommonField<JDBCType> tsField = new CommonField<>("ts_col", JDBCType.TIMESTAMP);
|
||||
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, tsField, now);
|
||||
assertEquals("2024-06-01T00:00:00Z", cursorInfo.getCutoffTime()); // ISO_OFFSET_DATE_TIME
|
||||
|
||||
// TIMESTAMP_WITH_TIMEZONE
|
||||
cursorInfo = new CursorInfo(null, null, null, null);
|
||||
CommonField<JDBCType> tsTzField = new CommonField<>("ts_tz_col", JDBCType.TIMESTAMP_WITH_TIMEZONE);
|
||||
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, tsTzField, now);
|
||||
assertEquals("2024-06-01T00:00:00.000000Z", cursorInfo.getCutoffTime());
|
||||
|
||||
// Non-temporal type
|
||||
cursorInfo = new CursorInfo(null, null, null, null);
|
||||
CommonField<JDBCType> intField = new CommonField<>("int_col", JDBCType.INTEGER);
|
||||
MssqlSource.setCursorCutoffInfoForValue(cursorInfo, intField, now);
|
||||
assertNull(cursorInfo.getCutoffTime());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,122 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.fail;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.commons.exceptions.ConnectionErrorException;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.BaseImage;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.CertificateKey;
|
||||
import io.airbyte.integrations.source.mssql.MsSQLTestDatabase.ContainerModifier;
|
||||
import io.airbyte.protocol.models.v0.AirbyteCatalog;
|
||||
import java.net.InetAddress;
|
||||
import java.util.Map;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MssqlSslSourceTest {
|
||||
|
||||
private MsSQLTestDatabase testDb;
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSslSourceTest.class);
|
||||
|
||||
@BeforeEach
|
||||
void setup() {
|
||||
testDb = MsSQLTestDatabase.in(BaseImage.MSSQL_2022, ContainerModifier.AGENT, ContainerModifier.WITH_SSL_CERTIFICATES);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
testDb.close();
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(CertificateKey.class)
|
||||
public void testDiscoverWithCertificateTrustHostnameWithValidCertificates(CertificateKey certificateKey) throws Exception {
|
||||
if (!certificateKey.isValid) {
|
||||
return;
|
||||
}
|
||||
String certificate = testDb.getCertificate(certificateKey);
|
||||
JsonNode config = testDb.testConfigBuilder()
|
||||
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
|
||||
"certificate", certificate))
|
||||
.build();
|
||||
AirbyteCatalog catalog = new MssqlSource().discover(config);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(CertificateKey.class)
|
||||
public void testDiscoverWithCertificateTrustHostnameWithInvalidCertificates(CertificateKey certificateKey) throws Exception {
|
||||
if (certificateKey.isValid) {
|
||||
return;
|
||||
}
|
||||
String certificate = testDb.getCertificate(certificateKey);
|
||||
JsonNode config = testDb.testConfigBuilder()
|
||||
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
|
||||
"certificate", certificate))
|
||||
.build();
|
||||
try {
|
||||
AirbyteCatalog catalog = new MssqlSource().discover(config);
|
||||
} catch (ConnectionErrorException e) {
|
||||
if (!e.getCause().getCause().getMessage().contains("PKIX path validation") &&
|
||||
!e.getCause().getCause().getMessage().contains("PKIX path building failed")) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(CertificateKey.class)
|
||||
public void testDiscoverWithCertificateNoTrustHostnameWrongHostname(CertificateKey certificateKey) throws Throwable {
|
||||
if (!certificateKey.isValid) {
|
||||
return;
|
||||
}
|
||||
String containerIp = InetAddress.getByName(testDb.getContainer().getHost()).getHostAddress();
|
||||
String certificate = testDb.getCertificate(certificateKey);
|
||||
JsonNode config = testDb.configBuilder()
|
||||
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
|
||||
"certificate", certificate))
|
||||
.with(JdbcUtils.HOST_KEY, containerIp)
|
||||
.with(JdbcUtils.PORT_KEY, testDb.getContainer().getFirstMappedPort())
|
||||
.withCredentials()
|
||||
.withDatabase()
|
||||
.build();
|
||||
try {
|
||||
AirbyteCatalog catalog = new MssqlSource().discover(config);
|
||||
fail("discover should have failed!");
|
||||
} catch (ConnectionErrorException e) {
|
||||
String expectedMessage =
|
||||
"Failed to validate the server name \"" + containerIp + "\"in a certificate during Secure Sockets Layer (SSL) initialization.";
|
||||
if (!e.getExceptionMessage().contains(expectedMessage)) {
|
||||
fail("exception message was " + e.getExceptionMessage() + "\n expected: " + expectedMessage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(CertificateKey.class)
|
||||
public void testDiscoverWithCertificateNoTrustHostnameAlternateHostname(CertificateKey certificateKey) throws Exception {
|
||||
final String containerIp = InetAddress.getByName(testDb.getContainer().getHost()).getHostAddress();
|
||||
if (certificateKey.isValid) {
|
||||
String certificate = testDb.getCertificate(certificateKey);
|
||||
JsonNode config = testDb.configBuilder()
|
||||
.withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
|
||||
"certificate", certificate,
|
||||
"hostNameInCertificate", testDb.getContainer().getHost()))
|
||||
.with(JdbcUtils.HOST_KEY, containerIp)
|
||||
.with(JdbcUtils.PORT_KEY, testDb.getContainer().getFirstMappedPort())
|
||||
.withCredentials()
|
||||
.withDatabase()
|
||||
.build();
|
||||
AirbyteCatalog catalog = new MssqlSource().discover(config);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource;
|
||||
import io.airbyte.cdk.integrations.source.jdbc.test.JdbcStressTest;
|
||||
import java.sql.JDBCType;
|
||||
import java.util.Optional;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
|
||||
@Disabled
|
||||
public class MssqlStressTest extends JdbcStressTest {
|
||||
|
||||
private MsSQLTestDatabase testdb;
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws Exception {
|
||||
testdb = MsSQLTestDatabase.in(MsSQLTestDatabase.BaseImage.MSSQL_2022);
|
||||
super.setup();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<String> getDefaultSchemaName() {
|
||||
return Optional.of("dbo");
|
||||
}
|
||||
|
||||
@Override
|
||||
public JsonNode getConfig() {
|
||||
return testdb.testConfigBuilder().with("is_test", true).build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractJdbcSource<JDBCType> getSource() {
|
||||
return new MssqlSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDriverClass() {
|
||||
return MssqlSource.DRIVER_CLASS;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/* Copyright (c) 2025 Airbyte, Inc., all rights reserved. */
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.testcontainers.TestContainerFactory
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.testcontainers.containers.MSSQLServerContainer
|
||||
import org.testcontainers.containers.Network
|
||||
import org.testcontainers.utility.DockerImageName
|
||||
|
||||
object MsSqlServerContainerFactory {
|
||||
const val COMPATIBLE_NAME = "mcr.microsoft.com/mssql/server:2022-latest"
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
init {
|
||||
TestContainerFactory.register(COMPATIBLE_NAME) { imageName: DockerImageName ->
|
||||
MSSQLServerContainer(imageName).acceptLicense()
|
||||
}
|
||||
}
|
||||
|
||||
sealed interface MsSqlServerContainerModifier :
|
||||
TestContainerFactory.ContainerModifier<MSSQLServerContainer<*>>
|
||||
|
||||
data object WithNetwork : MsSqlServerContainerModifier {
|
||||
override fun modify(container: MSSQLServerContainer<*>) {
|
||||
container.withNetwork(Network.newNetwork())
|
||||
}
|
||||
}
|
||||
|
||||
data object WithTestDatabase : MsSqlServerContainerModifier {
|
||||
override fun modify(container: MSSQLServerContainer<*>) {
|
||||
container.start()
|
||||
container.execInContainer(
|
||||
"/opt/mssql-tools18/bin/sqlcmd",
|
||||
"-S",
|
||||
"localhost",
|
||||
"-U",
|
||||
container.username,
|
||||
"-P",
|
||||
container.password,
|
||||
"-Q",
|
||||
"CREATE DATABASE test",
|
||||
"-C"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun exclusive(
|
||||
imageName: String,
|
||||
vararg modifiers: MsSqlServerContainerModifier,
|
||||
): MSSQLServerContainer<*> {
|
||||
val dockerImageName =
|
||||
DockerImageName.parse(imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME)
|
||||
return TestContainerFactory.exclusive(dockerImageName, *modifiers)
|
||||
}
|
||||
|
||||
fun shared(
|
||||
imageName: String,
|
||||
vararg modifiers: MsSqlServerContainerModifier,
|
||||
): MSSQLServerContainer<*> {
|
||||
val dockerImageName =
|
||||
DockerImageName.parse(imageName).asCompatibleSubstituteFor(COMPATIBLE_NAME)
|
||||
return TestContainerFactory.shared(dockerImageName, *modifiers)
|
||||
}
|
||||
|
||||
@JvmStatic
|
||||
fun config(
|
||||
msSQLContainer: MSSQLServerContainer<*>
|
||||
): MsSqlServerSourceConfigurationSpecification =
|
||||
MsSqlServerSourceConfigurationSpecification().apply {
|
||||
host = msSQLContainer.host
|
||||
port = msSQLContainer.getMappedPort(MSSQLServerContainer.MS_SQL_SERVER_PORT)
|
||||
username = msSQLContainer.username
|
||||
password = msSQLContainer.password
|
||||
jdbcUrlParams = ""
|
||||
database = "test" // Connect to test database
|
||||
checkpointTargetIntervalSeconds = 60
|
||||
concurrency = 1
|
||||
setIncrementalValue(UserDefinedCursor())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,630 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.data.AirbyteSchemaType
|
||||
import io.airbyte.cdk.data.LeafAirbyteSchemaType
|
||||
import io.airbyte.cdk.discover.MetaField
|
||||
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
|
||||
import io.airbyte.cdk.read.DatatypeTestCase
|
||||
import io.airbyte.cdk.read.DatatypeTestOperations
|
||||
import io.airbyte.cdk.read.DynamicDatatypeTestFactory
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.sql.Connection
|
||||
import org.junit.jupiter.api.BeforeAll
|
||||
import org.junit.jupiter.api.DynamicNode
|
||||
import org.junit.jupiter.api.TestFactory
|
||||
import org.junit.jupiter.api.Timeout
|
||||
import org.testcontainers.containers.MSSQLServerContainer
|
||||
|
||||
class MsSqlServerDatatypeIntegrationTest {
|
||||
|
||||
@TestFactory
|
||||
@Timeout(300)
|
||||
fun syncTests(): Iterable<DynamicNode> =
|
||||
DynamicDatatypeTestFactory(MsSqlServerDatatypeTestOperations).build(dbContainer)
|
||||
|
||||
companion object {
|
||||
|
||||
lateinit var dbContainer: MSSQLServerContainer<*>
|
||||
|
||||
@JvmStatic
|
||||
@BeforeAll
|
||||
@Timeout(value = 300)
|
||||
fun startAndProvisionTestContainer() {
|
||||
dbContainer =
|
||||
MsSqlServerContainerFactory.shared(
|
||||
"mcr.microsoft.com/mssql/server:2022-latest",
|
||||
MsSqlServerContainerFactory.WithNetwork,
|
||||
MsSqlServerContainerFactory.WithTestDatabase
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object MsSqlServerDatatypeTestOperations :
|
||||
DatatypeTestOperations<
|
||||
MSSQLServerContainer<*>,
|
||||
MsSqlServerSourceConfigurationSpecification,
|
||||
MsSqlServerSourceConfiguration,
|
||||
MsSqlServerSourceConfigurationFactory,
|
||||
MsSqlServerDatatypeTestCase
|
||||
> {
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
override val withGlobal: Boolean = true
|
||||
override val globalCursorMetaField: MetaField =
|
||||
MsSqlSourceOperations.MsSqlServerCdcMetaFields.CDC_CURSOR
|
||||
|
||||
override fun streamConfigSpec(
|
||||
container: MSSQLServerContainer<*>
|
||||
): MsSqlServerSourceConfigurationSpecification =
|
||||
MsSqlServerContainerFactory.config(container).also {
|
||||
it.setIncrementalValue(UserDefinedCursor())
|
||||
}
|
||||
|
||||
override fun globalConfigSpec(
|
||||
container: MSSQLServerContainer<*>
|
||||
): MsSqlServerSourceConfigurationSpecification =
|
||||
MsSqlServerContainerFactory.config(container).also { it.setIncrementalValue(Cdc()) }
|
||||
|
||||
override val configFactory: MsSqlServerSourceConfigurationFactory =
|
||||
MsSqlServerSourceConfigurationFactory()
|
||||
|
||||
override fun createStreams(config: MsSqlServerSourceConfiguration) {
|
||||
JdbcConnectionFactory(config).get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Enable CDC on the database (required before enabling CDC on tables)
|
||||
try {
|
||||
val enableDbCdcSql = "EXEC sys.sp_cdc_enable_db"
|
||||
log.info { "Enabling CDC on database: $enableDbCdcSql" }
|
||||
connection.createStatement().use { stmt -> stmt.execute(enableDbCdcSql) }
|
||||
log.info { "Successfully enabled CDC on database" }
|
||||
} catch (e: Exception) {
|
||||
log.warn {
|
||||
"Failed to enable CDC on database (may already be enabled): ${e.message}"
|
||||
}
|
||||
}
|
||||
|
||||
// Activate CDC to ensure initial LSN is available for testing
|
||||
activateCdcWithInitialLsn(connection)
|
||||
|
||||
for ((_, case) in testCases) {
|
||||
for (ddl in case.ddl) {
|
||||
log.info { "test case ${case.id}: executing $ddl" }
|
||||
connection.createStatement().use { stmt -> stmt.execute(ddl) }
|
||||
}
|
||||
|
||||
// Enable CDC for tables that support it (CDC-compatible data types)
|
||||
if (case.isGlobal) {
|
||||
try {
|
||||
val enableCdcSql =
|
||||
"EXEC sys.sp_cdc_enable_table @source_schema = 'dbo', @source_name = '${case.id}', @role_name = 'CDC'"
|
||||
log.info { "test case ${case.id}: enabling CDC with $enableCdcSql" }
|
||||
connection.createStatement().use { stmt -> stmt.execute(enableCdcSql) }
|
||||
log.info { "test case ${case.id}: successfully enabled CDC on table" }
|
||||
} catch (e: Exception) {
|
||||
log.warn { "test case ${case.id}: failed to enable CDC: ${e.message}" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override fun populateStreams(config: MsSqlServerSourceConfiguration) {
|
||||
JdbcConnectionFactory(config).get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
for ((_, case) in testCases) {
|
||||
for (dml in case.dml) {
|
||||
log.info { "test case ${case.id}: executing $dml" }
|
||||
connection.createStatement().use { stmt -> stmt.execute(dml) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Open a NEW connection to force CDC scan after commit
|
||||
JdbcConnectionFactory(config).get().use { connection: Connection ->
|
||||
try {
|
||||
connection.createStatement().use { stmt ->
|
||||
// Manually run the CDC scan to capture all pending changes
|
||||
stmt.execute("EXEC sys.sp_cdc_scan")
|
||||
log.info {
|
||||
"Executed sp_cdc_scan in new connection to capture committed changes"
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
log.error { "Failed to force CDC scan after data population: ${e.message}" }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Activates CDC and generates initial LSN required for testing. Creates a dummy table, enables
|
||||
* CDC on it, inserts data, and ensures LSN is available.
|
||||
*/
|
||||
private fun activateCdcWithInitialLsn(connection: Connection) {
|
||||
try {
|
||||
connection.createStatement().use { stmt ->
|
||||
// Drop and recreate dummy table to ensure clean state
|
||||
stmt.execute("DROP TABLE IF EXISTS dbo.cdc_dummy")
|
||||
stmt.execute("CREATE TABLE dbo.cdc_dummy (id INT PRIMARY KEY)")
|
||||
stmt.execute(
|
||||
"EXEC sys.sp_cdc_enable_table @source_schema = 'dbo', @source_name = 'cdc_dummy', @role_name = NULL"
|
||||
)
|
||||
|
||||
// Insert data to generate LSN
|
||||
stmt.execute(
|
||||
"INSERT INTO dbo.cdc_dummy (id) SELECT COALESCE(MAX(id), 0) + 1 FROM dbo.cdc_dummy"
|
||||
)
|
||||
|
||||
// Start CDC capture job and trigger scan
|
||||
try {
|
||||
stmt.execute("EXEC sys.sp_cdc_start_job @job_type = 'capture'")
|
||||
Thread.sleep(2000)
|
||||
} catch (e: Exception) {
|
||||
log.debug { "CDC capture job start failed: ${e.message}" }
|
||||
}
|
||||
|
||||
try {
|
||||
stmt.execute("EXEC sys.sp_cdc_scan")
|
||||
Thread.sleep(1000)
|
||||
} catch (e: Exception) {
|
||||
log.debug { "Manual CDC scan failed: ${e.message}" }
|
||||
}
|
||||
|
||||
log.info { "CDC activated with dummy data for testing" }
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
log.warn { "CDC activation failed: ${e.message}" }
|
||||
}
|
||||
}
|
||||
|
||||
// Data type test values
|
||||
val booleanValues =
|
||||
mapOf(
|
||||
"0" to "false",
|
||||
"1" to "true",
|
||||
"'true'" to "true",
|
||||
"'false'" to "false",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val integerValues =
|
||||
mapOf(
|
||||
"10" to "10",
|
||||
"100000000" to "100000000",
|
||||
"200000000" to "200000000",
|
||||
"-2147483648" to "-2147483648",
|
||||
"2147483647" to "2147483647",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val bigintValues =
|
||||
mapOf(
|
||||
"-9223372036854775808" to "-9223372036854775808",
|
||||
"9223372036854775807" to "9223372036854775807",
|
||||
"0" to "0",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val smallintValues =
|
||||
mapOf(
|
||||
"-32768" to "-32768",
|
||||
"32767" to "32767",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val tinyintValues =
|
||||
mapOf(
|
||||
"0" to "0",
|
||||
"255" to "255",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val decimalValues =
|
||||
mapOf(
|
||||
"999.33" to "999.33",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val numericValues =
|
||||
mapOf(
|
||||
"'99999'" to "99999",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val moneyValues =
|
||||
mapOf(
|
||||
"'9990000.3647'" to "9990000.3647",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val smallmoneyValues =
|
||||
mapOf(
|
||||
"'-214748.3648'" to "-214748.3648",
|
||||
"214748.3647" to "214748.3647",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val floatValues =
|
||||
mapOf(
|
||||
"'123'" to "123.0",
|
||||
"'1234567890.1234567'" to "1234567890.1234567",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val realValues =
|
||||
mapOf(
|
||||
"'123'" to "123.0",
|
||||
"'1234567890.1234567'" to "1234568000",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val dateValues =
|
||||
mapOf(
|
||||
"'0001-01-01'" to """"0001-01-01"""",
|
||||
"'9999-12-31'" to """"9999-12-31"""",
|
||||
"'1999-01-08'" to """"1999-01-08"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val smalldatetimeValues =
|
||||
mapOf(
|
||||
"'1900-01-01'" to """"1900-01-01T00:00:00.000000"""",
|
||||
"'2079-06-06'" to """"2079-06-06T00:00:00.000000"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val datetimeValues =
|
||||
mapOf(
|
||||
"'1753-01-01'" to """"1753-01-01T00:00:00.000000"""",
|
||||
"'9999-12-31'" to """"9999-12-31T00:00:00.000000"""",
|
||||
"'9999-12-31T13:00:04'" to """"9999-12-31T13:00:04.000000"""",
|
||||
"'9999-12-31T13:00:04.123'" to """"9999-12-31T13:00:04.123000"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val datetime2Values =
|
||||
mapOf(
|
||||
"'0001-01-01'" to """"0001-01-01T00:00:00.000000"""",
|
||||
"'9999-12-31'" to """"9999-12-31T00:00:00.000000"""",
|
||||
"'9999-12-31T13:00:04.123456'" to """"9999-12-31T13:00:04.123456"""",
|
||||
"'2023-11-08T01:20:11.3733338'" to """"2023-11-08T01:20:11.373333"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val timeValues =
|
||||
mapOf(
|
||||
"'13:00:01'" to """"13:00:01.000000"""",
|
||||
"'13:00:04Z'" to """"13:00:04.000000"""",
|
||||
"'13:00:04.123456Z'" to """"13:00:04.123456"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val datetimeoffsetValues =
|
||||
mapOf(
|
||||
"'2001-01-10 00:00:00 +01:00'" to """"2001-01-10T00:00:00.000000+01:00"""",
|
||||
"'9999-01-10 00:00:00 +01:00'" to """"9999-01-10T00:00:00.000000+01:00"""",
|
||||
"'2024-05-10 19:00:01.604805 +03:00'" to """"2024-05-10T19:00:01.604805+03:00"""",
|
||||
"'2024-03-02 19:08:07.1234567 +09:00'" to """"2024-03-02T19:08:07.123456+09:00"""",
|
||||
"'0001-01-01 00:00:00.0000000 +00:00'" to """"0001-01-01T00:00:00.000000Z"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val charValues =
|
||||
mapOf(
|
||||
"'a'" to """"a """",
|
||||
"'*'" to """"* """",
|
||||
"'abc'" to """"abc """",
|
||||
"'Hello World!'" to """"Hello World! """",
|
||||
"'Test123'" to """"Test123 """",
|
||||
"''" to """" """",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val varcharValues =
|
||||
mapOf(
|
||||
"''" to """""""",
|
||||
"'*'" to """"*"""",
|
||||
"'a'" to """"a"""",
|
||||
"'abc'" to """"abc"""",
|
||||
"N'Миші йдуть на південь, не питай чому;'" to
|
||||
""""Миші йдуть на південь, не питай чому;"""",
|
||||
"N'櫻花分店'" to """"櫻花分店"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val textValues =
|
||||
mapOf(
|
||||
"''" to """""""",
|
||||
"'Some test text 123\$%^&*()_'" to """"Some test text 123$%^&*()_"""",
|
||||
"'a'" to """"a"""",
|
||||
"'abc'" to """"abc"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val ncharValues =
|
||||
mapOf(
|
||||
"'a'" to """"a """",
|
||||
"'*'" to """"* """",
|
||||
"'abc'" to """"abc """",
|
||||
"N'Миші йдуть на південь, не питай чому;'" to
|
||||
""""Миші йдуть на південь, не питай чому; """",
|
||||
"N'櫻花分店'" to """"櫻花分店 """",
|
||||
"''" to """" """",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val nvarcharValues =
|
||||
mapOf(
|
||||
"''" to """""""",
|
||||
"'*'" to """"*"""",
|
||||
"'a'" to """"a"""",
|
||||
"'abc'" to """"abc"""",
|
||||
"N'Миші йдуть на південь, не питай чому;'" to
|
||||
""""Миші йдуть на південь, не питай чому;"""",
|
||||
"N'櫻花分店'" to """"櫻花分店"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val binaryValues =
|
||||
mapOf(
|
||||
"CAST( 'A' AS BINARY(1))" to """"QQ=="""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val varbinaryValues =
|
||||
mapOf(
|
||||
"CAST( 'ABC' AS VARBINARY)" to """"QUJD"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val uniqueidentifierValues =
|
||||
mapOf(
|
||||
"'375CFC44-CAE3-4E43-8083-821D2DF0E626'" to
|
||||
""""375CFC44-CAE3-4E43-8083-821D2DF0E626"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val xmlValues =
|
||||
mapOf(
|
||||
"''" to """""""",
|
||||
"'<user><user_id>1</user_id></user>'" to """"<user><user_id>1</user_id></user>"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val geometryValues =
|
||||
mapOf(
|
||||
"geometry::STGeomFromText('LINESTRING (100 100, 20 180, 180 180)', 0)" to
|
||||
""""LINESTRING(100 100, 20 180, 180 180)"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val geographyValues =
|
||||
mapOf(
|
||||
"geography::STGeomFromText('LINESTRING(-122.360 47.656, -122.343 47.656 )', 4326)" to
|
||||
""""LINESTRING(-122.36 47.656, -122.343 47.656)"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
val hierarchyidValues =
|
||||
mapOf(
|
||||
"'/1/1/'" to """"/1/1/"""",
|
||||
"NULL" to "null",
|
||||
)
|
||||
|
||||
override val testCases: Map<String, MsSqlServerDatatypeTestCase> =
|
||||
listOf(
|
||||
// Integer types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"BIGINT",
|
||||
bigintValues,
|
||||
LeafAirbyteSchemaType.INTEGER,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"INT",
|
||||
integerValues,
|
||||
LeafAirbyteSchemaType.INTEGER,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"SMALLINT",
|
||||
smallintValues,
|
||||
LeafAirbyteSchemaType.INTEGER,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"TINYINT",
|
||||
tinyintValues,
|
||||
LeafAirbyteSchemaType.INTEGER,
|
||||
),
|
||||
// Boolean type
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"BIT",
|
||||
booleanValues,
|
||||
LeafAirbyteSchemaType.BOOLEAN,
|
||||
),
|
||||
// Decimal types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"DECIMAL(5,2)",
|
||||
decimalValues,
|
||||
LeafAirbyteSchemaType.NUMBER,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"NUMERIC",
|
||||
numericValues,
|
||||
LeafAirbyteSchemaType.NUMBER,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"MONEY",
|
||||
moneyValues,
|
||||
LeafAirbyteSchemaType.NUMBER,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"SMALLMONEY",
|
||||
smallmoneyValues,
|
||||
LeafAirbyteSchemaType.NUMBER,
|
||||
),
|
||||
// Float types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"FLOAT",
|
||||
floatValues,
|
||||
LeafAirbyteSchemaType.NUMBER,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"REAL",
|
||||
realValues,
|
||||
LeafAirbyteSchemaType.NUMBER,
|
||||
),
|
||||
// Date/Time types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"DATE",
|
||||
dateValues,
|
||||
LeafAirbyteSchemaType.DATE,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"SMALLDATETIME",
|
||||
smalldatetimeValues,
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"DATETIME",
|
||||
datetimeValues,
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"DATETIME2",
|
||||
datetime2Values,
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"TIME",
|
||||
timeValues,
|
||||
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"DATETIMEOFFSET",
|
||||
datetimeoffsetValues,
|
||||
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE,
|
||||
),
|
||||
// String types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"CHAR(50)",
|
||||
charValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"NVARCHAR(MAX)",
|
||||
varcharValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"TEXT",
|
||||
textValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"NCHAR(50)",
|
||||
ncharValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"NVARCHAR(MAX)",
|
||||
nvarcharValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"NTEXT",
|
||||
nvarcharValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
// Binary types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"BINARY(1)",
|
||||
binaryValues,
|
||||
LeafAirbyteSchemaType.BINARY,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"VARBINARY(3)",
|
||||
varbinaryValues,
|
||||
LeafAirbyteSchemaType.BINARY,
|
||||
),
|
||||
// Special types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"UNIQUEIDENTIFIER",
|
||||
uniqueidentifierValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"XML",
|
||||
xmlValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
// Spatial types
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"GEOMETRY",
|
||||
geometryValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"GEOGRAPHY",
|
||||
geographyValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
),
|
||||
// Hierarchy type - only for non-CDC tests
|
||||
MsSqlServerDatatypeTestCase(
|
||||
"HIERARCHYID",
|
||||
hierarchyidValues,
|
||||
LeafAirbyteSchemaType.STRING,
|
||||
isGlobal = false, // CDC doesn't support hierarchyid properly
|
||||
),
|
||||
)
|
||||
.associateBy { it.id }
|
||||
}
|
||||
|
||||
data class MsSqlServerDatatypeTestCase(
|
||||
val sqlType: String,
|
||||
val sqlToAirbyte: Map<String, String>,
|
||||
override val expectedAirbyteSchemaType: AirbyteSchemaType,
|
||||
override val isGlobal: Boolean = true,
|
||||
) : DatatypeTestCase {
|
||||
|
||||
override val isStream: Boolean
|
||||
get() = true
|
||||
|
||||
private val typeName: String
|
||||
get() =
|
||||
sqlType
|
||||
.replace("[^a-zA-Z0-9]".toRegex(), " ")
|
||||
.trim()
|
||||
.replace(" +".toRegex(), "_")
|
||||
.lowercase()
|
||||
|
||||
override val id: String
|
||||
get() = "tbl_$typeName"
|
||||
|
||||
override val fieldName: String
|
||||
get() = "col_$typeName"
|
||||
|
||||
override val expectedData: List<String>
|
||||
get() = sqlToAirbyte.values.map { """{"${fieldName}":$it}""" }
|
||||
|
||||
val ddl: List<String>
|
||||
get() =
|
||||
listOf(
|
||||
"DROP TABLE IF EXISTS $id",
|
||||
"CREATE TABLE $id " + "(pk INT IDENTITY(1,1) PRIMARY KEY, $fieldName $sqlType)",
|
||||
)
|
||||
|
||||
val dml: List<String>
|
||||
get() =
|
||||
sqlToAirbyte.keys.map {
|
||||
if (it == "NULL") {
|
||||
"INSERT INTO $id DEFAULT VALUES"
|
||||
} else {
|
||||
"INSERT INTO $id ($fieldName) VALUES ($it)"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,604 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import io.airbyte.cdk.command.CliRunner
|
||||
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
|
||||
import io.airbyte.protocol.models.v0.CatalogHelpers
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
|
||||
import io.airbyte.protocol.models.v0.SyncMode
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.sql.Connection
|
||||
import java.time.LocalDate
|
||||
import java.time.LocalDateTime
|
||||
import java.time.format.DateTimeFormatter
|
||||
import org.junit.jupiter.api.AfterAll
|
||||
import org.junit.jupiter.api.AfterEach
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.BeforeAll
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.Timeout
|
||||
import org.testcontainers.containers.MSSQLServerContainer
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
class MsSqlServerExcludeTodaysDataIntegrationTest {
|
||||
|
||||
private val createdTables = mutableListOf<String>()
|
||||
|
||||
@AfterEach
|
||||
fun cleanupTables() {
|
||||
// Clean up all tables created during the test
|
||||
if (createdTables.isNotEmpty()) {
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
createdTables.forEach { tableName ->
|
||||
try {
|
||||
connection.createStatement().use { stmt ->
|
||||
stmt.execute("DROP TABLE IF EXISTS $tableName")
|
||||
}
|
||||
log.info { "Dropped test table: $tableName" }
|
||||
} catch (e: Exception) {
|
||||
log.warn(e) { "Failed to drop table $tableName" }
|
||||
}
|
||||
}
|
||||
}
|
||||
createdTables.clear()
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Timeout(60)
|
||||
fun testExcludeTodaysDataWithDateColumn() {
|
||||
// Setup: Create a table with records from different dates
|
||||
val tableName = "test_exclude_today_date"
|
||||
val today = LocalDate.now()
|
||||
val yesterday = today.minusDays(1)
|
||||
val twoDaysAgo = today.minusDays(2)
|
||||
|
||||
setupDateTable(tableName, today, yesterday, twoDaysAgo)
|
||||
|
||||
// Test with exclude_todays_data = true
|
||||
val configWithExclude = createConfig(excludeTodaysData = true)
|
||||
val recordsWithExclude = performSync(configWithExclude, tableName, "order_date")
|
||||
|
||||
// Verify: Today's records should be excluded
|
||||
val recordDates = extractDates(recordsWithExclude, "order_date")
|
||||
Assertions.assertFalse(
|
||||
recordDates.contains(today.toString()),
|
||||
"Today's records should be excluded when exclude_todays_data is true"
|
||||
)
|
||||
Assertions.assertTrue(
|
||||
recordDates.contains(yesterday.toString()),
|
||||
"Yesterday's records should be included"
|
||||
)
|
||||
Assertions.assertTrue(
|
||||
recordDates.contains(twoDaysAgo.toString()),
|
||||
"Records from two days ago should be included"
|
||||
)
|
||||
|
||||
// Test with exclude_todays_data = false
|
||||
val configWithoutExclude = createConfig(excludeTodaysData = false)
|
||||
val recordsWithoutExclude = performSync(configWithoutExclude, tableName, "order_date")
|
||||
|
||||
// Verify: Today's records should be included
|
||||
val allRecordDates = extractDates(recordsWithoutExclude, "order_date")
|
||||
Assertions.assertTrue(
|
||||
allRecordDates.contains(today.toString()),
|
||||
"Today's records should be included when exclude_todays_data is false"
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Timeout(60)
|
||||
fun testExcludeTodaysDataWithDateTimeColumn() {
|
||||
// Setup: Create a table with datetime records
|
||||
val tableName = "test_exclude_today_datetime"
|
||||
val now = LocalDateTime.now()
|
||||
val todayMorning = now.withHour(9).withMinute(0).withSecond(0)
|
||||
val todayEvening = now.withHour(18).withMinute(30).withSecond(0)
|
||||
val yesterdayNoon = now.minusDays(1).withHour(12).withMinute(0).withSecond(0)
|
||||
val lastMidnight = now.toLocalDate().atStartOfDay()
|
||||
val beforeMidnight = lastMidnight.minusMinutes(1)
|
||||
|
||||
setupDateTimeTable(tableName, todayMorning, todayEvening, yesterdayNoon, beforeMidnight)
|
||||
|
||||
// Test with exclude_todays_data = true
|
||||
val configWithExclude = createConfig(excludeTodaysData = true)
|
||||
val recordsWithExclude = performSync(configWithExclude, tableName, "created_at")
|
||||
|
||||
// Verify: Records from today (after midnight) should be excluded
|
||||
val timestamps = extractTimestamps(recordsWithExclude, "created_at")
|
||||
|
||||
Assertions.assertFalse(
|
||||
timestamps.any {
|
||||
it.contains(todayMorning.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME))
|
||||
},
|
||||
"Today morning's records should be excluded"
|
||||
)
|
||||
Assertions.assertFalse(
|
||||
timestamps.any {
|
||||
it.contains(todayEvening.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME))
|
||||
},
|
||||
"Today evening's records should be excluded"
|
||||
)
|
||||
Assertions.assertTrue(
|
||||
timestamps.any { ts -> ts.startsWith(yesterdayNoon.toLocalDate().toString()) },
|
||||
"Yesterday's records should be included"
|
||||
)
|
||||
Assertions.assertTrue(
|
||||
timestamps.any { ts -> ts.startsWith(beforeMidnight.toLocalDate().toString()) },
|
||||
"Records from just before midnight should be included"
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Timeout(120)
|
||||
fun testExcludeTodaysDataWithCursorBasedIncremental() {
|
||||
// Setup: Create a table with date column for cursor-based incremental sync
|
||||
val tableName = "test_exclude_today_incremental"
|
||||
val today = LocalDate.now()
|
||||
val yesterday = today.minusDays(1)
|
||||
val twoDaysAgo = today.minusDays(2)
|
||||
val threeDaysAgo = today.minusDays(3)
|
||||
|
||||
// Initial data setup with records from 3 days ago and 2 days ago
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Create table
|
||||
val createTable =
|
||||
"""
|
||||
DROP TABLE IF EXISTS $tableName;
|
||||
CREATE TABLE $tableName (
|
||||
id INT IDENTITY(1,1) PRIMARY KEY,
|
||||
order_date DATE,
|
||||
amount DECIMAL(10,2),
|
||||
status VARCHAR(50)
|
||||
)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(createTable) }
|
||||
|
||||
// Insert initial data (only old records)
|
||||
val insertInitialData =
|
||||
"""
|
||||
INSERT INTO $tableName (order_date, amount, status) VALUES
|
||||
('$threeDaysAgo', 100.00, 'initial'),
|
||||
('$twoDaysAgo', 200.00, 'initial');
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(insertInitialData) }
|
||||
|
||||
log.info { "Created table $tableName with initial data" }
|
||||
createdTables.add(tableName)
|
||||
}
|
||||
|
||||
// First sync: Initial snapshot with exclude_todays_data = true
|
||||
val configWithExclude = createConfig(excludeTodaysData = true)
|
||||
val initialRecords = performSync(configWithExclude, tableName, "order_date")
|
||||
|
||||
// Verify initial sync contains only old records
|
||||
val initialDates = extractDates(initialRecords, "order_date")
|
||||
Assertions.assertEquals(2, initialRecords.size, "Initial sync should have 2 records")
|
||||
Assertions.assertTrue(
|
||||
initialDates.contains(threeDaysAgo.toString()),
|
||||
"Initial sync should include records from 3 days ago"
|
||||
)
|
||||
Assertions.assertTrue(
|
||||
initialDates.contains(twoDaysAgo.toString()),
|
||||
"Initial sync should include records from 2 days ago"
|
||||
)
|
||||
|
||||
// Add new records including yesterday and today
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
val insertNewData =
|
||||
"""
|
||||
INSERT INTO $tableName (order_date, amount, status) VALUES
|
||||
('$yesterday', 300.00, 'incremental'),
|
||||
('$yesterday', 350.00, 'incremental'),
|
||||
('$today', 400.00, 'incremental_today'),
|
||||
('$today', 450.00, 'incremental_today');
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(insertNewData) }
|
||||
log.info { "Added new records including yesterday and today" }
|
||||
}
|
||||
|
||||
// Second sync: Incremental sync should exclude today's data
|
||||
val incrementalRecords = performSync(configWithExclude, tableName, "order_date")
|
||||
|
||||
// Extract only the new records from incremental sync
|
||||
val allDates = extractDates(incrementalRecords, "order_date")
|
||||
|
||||
// Count records by date
|
||||
val todayCount = allDates.count { it == today.toString() }
|
||||
val yesterdayCount = allDates.count { it == yesterday.toString() }
|
||||
|
||||
// Verify: Today's records should be excluded in incremental sync
|
||||
Assertions.assertEquals(
|
||||
0,
|
||||
todayCount,
|
||||
"Today's records should be excluded during incremental sync when exclude_todays_data is true"
|
||||
)
|
||||
|
||||
// Verify: Yesterday's records should be included
|
||||
Assertions.assertTrue(
|
||||
yesterdayCount >= 2,
|
||||
"Yesterday's records should be included in incremental sync"
|
||||
)
|
||||
|
||||
// Test without exclude_todays_data to confirm today's records exist
|
||||
val configWithoutExclude = createConfig(excludeTodaysData = false)
|
||||
val allRecordsIncludingToday = performSync(configWithoutExclude, tableName, "order_date")
|
||||
|
||||
val allDatesWithToday = extractDates(allRecordsIncludingToday, "order_date")
|
||||
val todayCountWithoutExclude = allDatesWithToday.count { it == today.toString() }
|
||||
|
||||
Assertions.assertEquals(
|
||||
2,
|
||||
todayCountWithoutExclude,
|
||||
"Today's records should be included when exclude_todays_data is false"
|
||||
)
|
||||
|
||||
log.info { "Incremental sync test completed successfully" }
|
||||
}
|
||||
|
||||
@Test
|
||||
@Timeout(120)
|
||||
fun testExcludeTodaysDataNotTriggeredForNonTemporalCursor() {
|
||||
// Setup: Create a table with non-temporal cursor field (INTEGER and VARCHAR)
|
||||
val tableName = "test_exclude_today_non_temporal"
|
||||
val today = LocalDate.now()
|
||||
val yesterday = today.minusDays(1)
|
||||
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Create table with INTEGER primary key as cursor and date column for verification
|
||||
val createTable =
|
||||
"""
|
||||
DROP TABLE IF EXISTS $tableName;
|
||||
CREATE TABLE $tableName (
|
||||
id INT IDENTITY(1,1) PRIMARY KEY,
|
||||
order_date DATE,
|
||||
status VARCHAR(50),
|
||||
amount DECIMAL(10,2)
|
||||
)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(createTable) }
|
||||
|
||||
// Insert test data with today's and yesterday's dates
|
||||
// IDs will be 1, 2, 3, 4, 5
|
||||
val insertData =
|
||||
"""
|
||||
INSERT INTO $tableName (order_date, status, amount) VALUES
|
||||
('$yesterday', 'old', 100.00),
|
||||
('$yesterday', 'old', 150.00),
|
||||
('$today', 'new', 200.00),
|
||||
('$today', 'new', 250.00),
|
||||
('$today', 'new', 300.00);
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(insertData) }
|
||||
|
||||
log.info { "Created table $tableName with non-temporal cursor test data" }
|
||||
createdTables.add(tableName)
|
||||
}
|
||||
|
||||
// Test 1: Using INTEGER cursor field with exclude_todays_data = true
|
||||
// The feature should NOT be triggered, all records should be returned
|
||||
val configWithExclude = createConfig(excludeTodaysData = true)
|
||||
val recordsWithIntCursor = performSync(configWithExclude, tableName, "id")
|
||||
|
||||
// Verify: All records should be included (feature not triggered for INTEGER cursor)
|
||||
Assertions.assertEquals(
|
||||
5,
|
||||
recordsWithIntCursor.size,
|
||||
"All 5 records should be included when cursor is INTEGER, even with exclude_todays_data = true"
|
||||
)
|
||||
|
||||
// Verify today's records are included
|
||||
val dates = extractDates(recordsWithIntCursor, "order_date")
|
||||
val todayCount = dates.count { it == today.toString() }
|
||||
Assertions.assertEquals(
|
||||
3,
|
||||
todayCount,
|
||||
"Today's 3 records should be included when cursor is INTEGER type"
|
||||
)
|
||||
|
||||
// Test 2: Using VARCHAR cursor field with exclude_todays_data = true
|
||||
val recordsWithStringCursor = performSync(configWithExclude, tableName, "status")
|
||||
|
||||
// Verify: All records should be included (feature not triggered for VARCHAR cursor)
|
||||
Assertions.assertEquals(
|
||||
5,
|
||||
recordsWithStringCursor.size,
|
||||
"All 5 records should be included when cursor is VARCHAR, even with exclude_todays_data = true"
|
||||
)
|
||||
|
||||
// Test 3: Incremental sync with non-temporal cursor should also include all new records
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Add more records with today's date
|
||||
val insertNewData =
|
||||
"""
|
||||
INSERT INTO $tableName (order_date, status, amount) VALUES
|
||||
('$today', 'newer', 350.00),
|
||||
('$today', 'newer', 400.00);
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(insertNewData) }
|
||||
log.info { "Added new records with today's date" }
|
||||
}
|
||||
|
||||
// Perform incremental sync with INTEGER cursor
|
||||
val incrementalRecords = performSync(configWithExclude, tableName, "id")
|
||||
|
||||
// Verify: All 7 records should be present (5 original + 2 new)
|
||||
Assertions.assertEquals(
|
||||
7,
|
||||
incrementalRecords.size,
|
||||
"All records including new today's records should be included in incremental sync with non-temporal cursor"
|
||||
)
|
||||
|
||||
// Verify new today's records are included
|
||||
val allDates = extractDates(incrementalRecords, "order_date")
|
||||
val finalTodayCount = allDates.count { it == today.toString() }
|
||||
Assertions.assertEquals(
|
||||
5,
|
||||
finalTodayCount,
|
||||
"All 5 of today's records should be included after incremental sync with non-temporal cursor"
|
||||
)
|
||||
|
||||
log.info {
|
||||
"Non-temporal cursor test completed successfully - exclude_todays_data feature was correctly NOT triggered"
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Timeout(60)
|
||||
fun testExcludeTodaysDataWithDatetime2Column() {
|
||||
// Setup: Create a table with datetime2 column for higher precision
|
||||
val tableName = "test_exclude_today_datetime2"
|
||||
val now = LocalDateTime.now()
|
||||
val todayWithMicros = now.withNano(0) // Remove nanoseconds
|
||||
val yesterdayWithMicros = now.minusDays(1).withNano(0) // Remove nanoseconds
|
||||
|
||||
setupDateTime2Table(tableName, todayWithMicros, yesterdayWithMicros)
|
||||
|
||||
// Test with exclude_todays_data = true
|
||||
val configWithExclude = createConfig(excludeTodaysData = true)
|
||||
val recordsWithExclude = performSync(configWithExclude, tableName, "updated_at")
|
||||
|
||||
// Verify: Today's high-precision records should be excluded
|
||||
val timestamps = extractTimestamps(recordsWithExclude, "updated_at")
|
||||
|
||||
Assertions.assertEquals(1, timestamps.size, "Only yesterday's record should be included")
|
||||
Assertions.assertTrue(
|
||||
timestamps.any { ts -> ts.startsWith(yesterdayWithMicros.toLocalDate().toString()) },
|
||||
"Yesterday's high-precision record should be included"
|
||||
)
|
||||
}
|
||||
|
||||
private fun setupDateTable(
|
||||
tableName: String,
|
||||
today: LocalDate,
|
||||
yesterday: LocalDate,
|
||||
twoDaysAgo: LocalDate
|
||||
) {
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Create table
|
||||
val createTable =
|
||||
"""
|
||||
DROP TABLE IF EXISTS $tableName;
|
||||
CREATE TABLE $tableName (
|
||||
id INT IDENTITY(1,1) PRIMARY KEY,
|
||||
order_date DATE,
|
||||
amount DECIMAL(10,2)
|
||||
)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(createTable) }
|
||||
|
||||
// Insert test data
|
||||
val insertData =
|
||||
"""
|
||||
INSERT INTO $tableName (order_date, amount) VALUES
|
||||
('$today', 100.00),
|
||||
('$today', 150.00),
|
||||
('$yesterday', 200.00),
|
||||
('$yesterday', 250.00),
|
||||
('$twoDaysAgo', 300.00);
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(insertData) }
|
||||
|
||||
log.info { "Created table $tableName with test data" }
|
||||
createdTables.add(tableName)
|
||||
}
|
||||
}
|
||||
|
||||
private fun setupDateTimeTable(
|
||||
tableName: String,
|
||||
todayMorning: LocalDateTime,
|
||||
todayEvening: LocalDateTime,
|
||||
yesterdayNoon: LocalDateTime,
|
||||
beforeMidnight: LocalDateTime
|
||||
) {
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Create table using regular DATETIME (this test is for DATETIME columns)
|
||||
val createTable =
|
||||
"""
|
||||
DROP TABLE IF EXISTS $tableName;
|
||||
CREATE TABLE $tableName (
|
||||
id INT IDENTITY(1,1) PRIMARY KEY,
|
||||
created_at DATETIME,
|
||||
description VARCHAR(100)
|
||||
)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(createTable) }
|
||||
|
||||
// Insert test data
|
||||
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
||||
val insertData =
|
||||
"""
|
||||
INSERT INTO $tableName (created_at, description) VALUES
|
||||
('${todayMorning.format(formatter)}', 'Today morning'),
|
||||
('${todayEvening.format(formatter)}', 'Today evening'),
|
||||
('${yesterdayNoon.format(formatter)}', 'Yesterday noon'),
|
||||
('${beforeMidnight.format(formatter)}', 'Just before midnight');
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(insertData) }
|
||||
|
||||
log.info { "Created table $tableName with datetime test data (precision workaround)" }
|
||||
createdTables.add(tableName)
|
||||
}
|
||||
}
|
||||
|
||||
private fun setupDateTime2Table(
|
||||
tableName: String,
|
||||
todayWithMicros: LocalDateTime,
|
||||
yesterdayWithMicros: LocalDateTime
|
||||
) {
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Create table using DATETIME2(6) to handle precision mismatch
|
||||
val createTable =
|
||||
"""
|
||||
DROP TABLE IF EXISTS $tableName;
|
||||
CREATE TABLE $tableName (
|
||||
id INT IDENTITY(1,1) PRIMARY KEY,
|
||||
updated_at DATETIME2(6),
|
||||
status VARCHAR(50)
|
||||
)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(createTable) }
|
||||
|
||||
// Insert test data with simpler format
|
||||
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
||||
val todayFormatted = todayWithMicros.format(formatter)
|
||||
val yesterdayFormatted = yesterdayWithMicros.format(formatter)
|
||||
|
||||
log.info { "Inserting today: $todayFormatted, yesterday: $yesterdayFormatted" }
|
||||
|
||||
val insertData =
|
||||
"""
|
||||
INSERT INTO $tableName (updated_at, status) VALUES
|
||||
('$todayFormatted', 'Today with microseconds'),
|
||||
('$yesterdayFormatted', 'Yesterday with microseconds');
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().use { stmt -> stmt.execute(insertData) }
|
||||
|
||||
log.info { "Created table $tableName with datetime2 test data (precision workaround)" }
|
||||
createdTables.add(tableName)
|
||||
}
|
||||
}
|
||||
|
||||
private fun createConfig(
|
||||
excludeTodaysData: Boolean
|
||||
): MsSqlServerSourceConfigurationSpecification {
|
||||
val config = MsSqlServerContainerFactory.config(dbContainer)
|
||||
config.setIncrementalValue(
|
||||
UserDefinedCursor().apply { this.excludeTodaysData = excludeTodaysData }
|
||||
)
|
||||
return config
|
||||
}
|
||||
|
||||
private fun performSync(
|
||||
config: MsSqlServerSourceConfigurationSpecification,
|
||||
tableName: String,
|
||||
cursorField: String
|
||||
): List<JsonNode> {
|
||||
// Discover catalog
|
||||
val discoverOutput = CliRunner.source("discover", config).run()
|
||||
val catalog =
|
||||
discoverOutput.catalogs().firstOrNull()
|
||||
?: throw IllegalStateException("No catalog discovered")
|
||||
|
||||
val stream =
|
||||
catalog.streams.find { it.name == tableName }
|
||||
?: throw IllegalStateException("Table $tableName not found in catalog")
|
||||
|
||||
// Configure stream for incremental sync with cursor
|
||||
val configuredStream =
|
||||
CatalogHelpers.toDefaultConfiguredStream(stream).apply {
|
||||
syncMode = SyncMode.INCREMENTAL
|
||||
this.cursorField = listOf(cursorField)
|
||||
}
|
||||
|
||||
val configuredCatalog = ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream))
|
||||
|
||||
// Perform sync
|
||||
val syncOutput = CliRunner.source("read", config, configuredCatalog).run()
|
||||
val records = syncOutput.records().mapNotNull { it.data }
|
||||
|
||||
log.info { "Synced ${records.size} records from $tableName" }
|
||||
return records
|
||||
}
|
||||
|
||||
private fun extractDates(records: List<JsonNode>, fieldName: String): List<String> {
|
||||
return records.mapNotNull { record -> record.get(fieldName)?.asText() }
|
||||
}
|
||||
|
||||
private fun extractTimestamps(records: List<JsonNode>, fieldName: String): List<String> {
|
||||
return records.mapNotNull { record -> record.get(fieldName)?.asText() }
|
||||
}
|
||||
|
||||
companion object {
|
||||
lateinit var dbContainer: MSSQLServerContainer<*>
|
||||
|
||||
val connectionFactory: JdbcConnectionFactory by lazy {
|
||||
JdbcConnectionFactory(
|
||||
MsSqlServerSourceConfigurationFactory()
|
||||
.make(MsSqlServerContainerFactory.config(dbContainer))
|
||||
)
|
||||
}
|
||||
|
||||
@JvmStatic
|
||||
@BeforeAll
|
||||
@Timeout(value = 300)
|
||||
fun startContainer() {
|
||||
dbContainer =
|
||||
MsSqlServerContainerFactory.exclusive(
|
||||
"mcr.microsoft.com/mssql/server:2022-latest",
|
||||
MsSqlServerContainerFactory.WithNetwork,
|
||||
MsSqlServerContainerFactory.WithTestDatabase
|
||||
)
|
||||
|
||||
// Ensure test schema exists
|
||||
connectionFactory.get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
connection.createStatement().use { stmt ->
|
||||
stmt.execute(
|
||||
"IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = 'dbo') BEGIN EXEC('CREATE SCHEMA dbo') END"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@JvmStatic
|
||||
@AfterAll
|
||||
fun stopContainer() {
|
||||
if (::dbContainer.isInitialized) {
|
||||
dbContainer.stop()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,439 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.node.BinaryNode
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode
|
||||
import io.airbyte.cdk.ClockFactory
|
||||
import io.airbyte.cdk.StreamIdentifier
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.discover.MetaField
|
||||
import io.airbyte.cdk.discover.MetaFieldDecorator
|
||||
import io.airbyte.cdk.jdbc.BinaryStreamFieldType
|
||||
import io.airbyte.cdk.jdbc.DefaultJdbcConstants
|
||||
import io.airbyte.cdk.jdbc.IntFieldType
|
||||
import io.airbyte.cdk.jdbc.LocalDateTimeFieldType
|
||||
import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType
|
||||
import io.airbyte.cdk.output.BufferingOutputConsumer
|
||||
import io.airbyte.cdk.output.DataChannelFormat
|
||||
import io.airbyte.cdk.output.DataChannelMedium
|
||||
import io.airbyte.cdk.output.sockets.NativeRecordPayload
|
||||
import io.airbyte.cdk.read.ConcurrencyResource
|
||||
import io.airbyte.cdk.read.ConfiguredSyncMode
|
||||
import io.airbyte.cdk.read.DefaultJdbcSharedState
|
||||
import io.airbyte.cdk.read.ResourceAcquirer
|
||||
import io.airbyte.cdk.read.SelectQuerier
|
||||
import io.airbyte.cdk.read.StateManager
|
||||
import io.airbyte.cdk.read.Stream
|
||||
import io.airbyte.cdk.read.StreamFeedBootstrap
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.airbyte.protocol.models.v0.StreamDescriptor
|
||||
import io.mockk.mockk
|
||||
import java.time.OffsetDateTime
|
||||
import java.util.Base64
|
||||
import kotlin.test.assertNull
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Assertions.assertTrue
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.params.ParameterizedTest
|
||||
import org.junit.jupiter.params.provider.CsvSource
|
||||
|
||||
class MsSqlServerJdbcPartitionFactoryTest {
|
||||
companion object {
|
||||
private val selectQueryGenerator = MsSqlSourceOperations()
|
||||
private val sharedState = sharedState()
|
||||
private val cdcSharedState = sharedState(global = true)
|
||||
private val config = mockk<MsSqlServerSourceConfiguration>(relaxed = true)
|
||||
|
||||
val msSqlServerJdbcPartitionFactory =
|
||||
MsSqlServerJdbcPartitionFactory(sharedState, selectQueryGenerator, config)
|
||||
val msSqlServerCdcJdbcPartitionFactory =
|
||||
MsSqlServerJdbcPartitionFactory(cdcSharedState, selectQueryGenerator, config)
|
||||
|
||||
val fieldId = Field("id", IntFieldType)
|
||||
val stream =
|
||||
Stream(
|
||||
id =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withNamespace("dbo").withName("test_table")
|
||||
),
|
||||
schema = setOf(fieldId),
|
||||
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
|
||||
configuredPrimaryKey = listOf(fieldId),
|
||||
configuredCursor = fieldId,
|
||||
)
|
||||
val timestampFieldId = Field("created_at", OffsetDateTimeFieldType)
|
||||
|
||||
val timestampStream =
|
||||
Stream(
|
||||
id =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withNamespace("dbo").withName("timestamp_table")
|
||||
),
|
||||
schema = setOf(timestampFieldId),
|
||||
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
|
||||
configuredPrimaryKey = listOf(timestampFieldId),
|
||||
configuredCursor = timestampFieldId,
|
||||
)
|
||||
|
||||
val binaryFieldId = Field("binary_col", BinaryStreamFieldType)
|
||||
|
||||
val binaryStream =
|
||||
Stream(
|
||||
id =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withNamespace("dbo").withName("binary_table")
|
||||
),
|
||||
schema = setOf(binaryFieldId),
|
||||
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
|
||||
configuredPrimaryKey = listOf(binaryFieldId),
|
||||
configuredCursor = binaryFieldId,
|
||||
)
|
||||
|
||||
val datetimeFieldId = Field("datetime_col", LocalDateTimeFieldType)
|
||||
|
||||
val datetimeStream =
|
||||
Stream(
|
||||
id =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withNamespace("dbo").withName("datetime_table")
|
||||
),
|
||||
schema = setOf(datetimeFieldId),
|
||||
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
|
||||
configuredPrimaryKey = listOf(datetimeFieldId),
|
||||
configuredCursor = datetimeFieldId,
|
||||
)
|
||||
|
||||
private fun sharedState(
|
||||
global: Boolean = false,
|
||||
): DefaultJdbcSharedState {
|
||||
|
||||
val configSpec =
|
||||
MsSqlServerSourceConfigurationSpecification().apply {
|
||||
host = "localhost"
|
||||
port = 1433
|
||||
username = "sa"
|
||||
password = "Password123!"
|
||||
database = "master"
|
||||
}
|
||||
if (global) {
|
||||
configSpec.setIncrementalValue(Cdc())
|
||||
} else {
|
||||
configSpec.setIncrementalValue(UserDefinedCursor())
|
||||
}
|
||||
val configFactory = MsSqlServerSourceConfigurationFactory()
|
||||
val configuration = configFactory.make(configSpec)
|
||||
|
||||
val mockSelectQuerier = mockk<SelectQuerier>()
|
||||
|
||||
return DefaultJdbcSharedState(
|
||||
configuration,
|
||||
mockSelectQuerier,
|
||||
DefaultJdbcConstants(),
|
||||
ConcurrencyResource(configuration),
|
||||
ResourceAcquirer(emptyList())
|
||||
)
|
||||
}
|
||||
|
||||
private fun streamFeedBootstrap(
|
||||
stream: Stream,
|
||||
incumbentStateValue: OpaqueStateValue? = null
|
||||
) =
|
||||
StreamFeedBootstrap(
|
||||
outputConsumer = BufferingOutputConsumer(ClockFactory().fixed()),
|
||||
metaFieldDecorator =
|
||||
object : MetaFieldDecorator {
|
||||
override val globalCursor: MetaField? = null
|
||||
override val globalMetaFields: Set<MetaField> = emptySet()
|
||||
|
||||
override fun decorateRecordData(
|
||||
timestamp: OffsetDateTime,
|
||||
globalStateValue: OpaqueStateValue?,
|
||||
stream: Stream,
|
||||
recordData: ObjectNode
|
||||
) {}
|
||||
|
||||
override fun decorateRecordData(
|
||||
timestamp: OffsetDateTime,
|
||||
globalStateValue: OpaqueStateValue?,
|
||||
stream: Stream,
|
||||
recordData: NativeRecordPayload
|
||||
) {
|
||||
// no-op
|
||||
}
|
||||
},
|
||||
stateManager =
|
||||
StateManager(initialStreamStates = mapOf(stream to incumbentStateValue)),
|
||||
stream,
|
||||
DataChannelFormat.JSONL,
|
||||
DataChannelMedium.STDIO,
|
||||
8192,
|
||||
ClockFactory().fixed(),
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testColdStartWithPkCursorBased() {
|
||||
val jdbcPartition = msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(stream))
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcSnapshotWithCursorPartition)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testColdStartWithPkCdc() {
|
||||
val jdbcPartition = msSqlServerCdcJdbcPartitionFactory.create(streamFeedBootstrap(stream))
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcCdcSnapshotPartition)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testColdStartWithoutPk() {
|
||||
val streamWithoutPk =
|
||||
Stream(
|
||||
id =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withNamespace("dbo").withName("no_pk_table")
|
||||
),
|
||||
schema = setOf(fieldId),
|
||||
configuredSyncMode = ConfiguredSyncMode.INCREMENTAL,
|
||||
configuredPrimaryKey = listOf(),
|
||||
configuredCursor = fieldId,
|
||||
)
|
||||
val jdbcPartition =
|
||||
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(streamWithoutPk))
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcNonResumableSnapshotWithCursorPartition)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testResumeFromCompletedCursorBasedRead() {
|
||||
val incomingStateValue: OpaqueStateValue =
|
||||
Jsons.readTree(
|
||||
"""
|
||||
{
|
||||
"cursor": "12345",
|
||||
"version": 3,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": "test_table",
|
||||
"cursor_field": [
|
||||
"id"
|
||||
],
|
||||
"stream_namespace": "dbo",
|
||||
"cursor_record_count": 1
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue))
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@CsvSource(
|
||||
"'2025-01-20T10:30:45', '2025-01-20T10:30:45.000000Z'",
|
||||
"'2025-01-20T10:30:45.0', '2025-01-20T10:30:45.000000Z'",
|
||||
"'2025-01-20T10:30:45.1', '2025-01-20T10:30:45.100000Z'",
|
||||
"'2025-01-20T10:30:45.123', '2025-01-20T10:30:45.123000Z'",
|
||||
"'2025-01-20T10:30:45.123456789', '2025-01-20T10:30:45.123456Z'",
|
||||
"'2025-01-20T10:30:45.123+00:00', '2025-01-20T10:30:45.123000Z'",
|
||||
"'2025-01-20T10:30:45Z', '2025-01-20T10:30:45.000000Z'",
|
||||
"'2025-01-20T10:30:45 Z', '2025-01-20T10:30:45.000000Z'",
|
||||
"'2025-01-20T10:30:45.12345 -05:00', '2025-01-20T10:30:45.123450-05:00'",
|
||||
)
|
||||
fun testResumeFromCompletedCursorBasedReadTimestamp(
|
||||
cursorVal: String,
|
||||
expectedLowerBound: String
|
||||
) {
|
||||
val incomingStateValue: OpaqueStateValue =
|
||||
Jsons.readTree(
|
||||
"""
|
||||
{
|
||||
"cursor": "$cursorVal",
|
||||
"version": 3,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": "timestamp_table",
|
||||
"cursor_field": [
|
||||
"created_at"
|
||||
],
|
||||
"stream_namespace": "dbo",
|
||||
"cursor_record_count": 1
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerJdbcPartitionFactory.create(
|
||||
streamFeedBootstrap(timestampStream, incomingStateValue)
|
||||
)
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
|
||||
|
||||
assertEquals(
|
||||
Jsons.valueToTree(expectedLowerBound),
|
||||
(jdbcPartition as MsSqlServerJdbcCursorIncrementalPartition).cursorLowerBound
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testResumeFromCompletedCursorBasedReadTimestampWithoutTimezone() {
|
||||
val incomingStateValue: OpaqueStateValue =
|
||||
Jsons.readTree(
|
||||
"""
|
||||
{
|
||||
"cursor": "2025-01-20T10:30:45.123",
|
||||
"version": 3,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": "datetime_table",
|
||||
"cursor_field": [
|
||||
"datetime_col"
|
||||
],
|
||||
"stream_namespace": "dbo",
|
||||
"cursor_record_count": 1
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerJdbcPartitionFactory.create(
|
||||
streamFeedBootstrap(datetimeStream, incomingStateValue)
|
||||
)
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
|
||||
|
||||
assertEquals(
|
||||
Jsons.valueToTree("2025-01-20T10:30:45.123000"),
|
||||
(jdbcPartition as MsSqlServerJdbcCursorIncrementalPartition).cursorLowerBound
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testResumeFromCursorBasedReadInitialRead() {
|
||||
val incomingStateValue: OpaqueStateValue =
|
||||
Jsons.readTree(
|
||||
"""
|
||||
{
|
||||
"pk_val": "100000",
|
||||
"pk_name": "id",
|
||||
"version": 3,
|
||||
"state_type": "primary_key",
|
||||
"incremental_state": {}
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(stream, incomingStateValue))
|
||||
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcSnapshotWithCursorPartition)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testResumeFromCdcInitialRead() {
|
||||
val incomingStateValue: OpaqueStateValue =
|
||||
Jsons.readTree(
|
||||
"""
|
||||
{
|
||||
"pk_val": "50000",
|
||||
"pk_name": "id",
|
||||
"version": 3,
|
||||
"state_type": "primary_key",
|
||||
"incremental_state": {}
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerCdcJdbcPartitionFactory.create(
|
||||
streamFeedBootstrap(stream, incomingStateValue)
|
||||
)
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcCdcSnapshotPartition)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testResumeFromCdcInitialReadComplete() {
|
||||
val incomingStateValue: OpaqueStateValue =
|
||||
Jsons.readTree(
|
||||
"""
|
||||
{
|
||||
"stream_name": "test_table",
|
||||
"cursor_field": [],
|
||||
"stream_namespace": "dbo"
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerCdcJdbcPartitionFactory.create(
|
||||
streamFeedBootstrap(stream, incomingStateValue)
|
||||
)
|
||||
assertNull(jdbcPartition)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testResumeFromCompletedCursorBasedReadBinary() {
|
||||
val incomingStateValue: OpaqueStateValue =
|
||||
Jsons.readTree(
|
||||
"""
|
||||
{
|
||||
"cursor": "QUJDREVGRw==",
|
||||
"version": 3,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": "binary_table",
|
||||
"cursor_field": [
|
||||
"binary_col"
|
||||
],
|
||||
"stream_namespace": "dbo",
|
||||
"cursor_record_count": 1
|
||||
}
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerJdbcPartitionFactory.create(
|
||||
streamFeedBootstrap(binaryStream, incomingStateValue)
|
||||
)
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcCursorIncrementalPartition)
|
||||
|
||||
assertEquals(
|
||||
Jsons.valueToTree<BinaryNode>(Base64.getDecoder().decode("QUJDREVGRw==")),
|
||||
(jdbcPartition as MsSqlServerJdbcCursorIncrementalPartition).cursorLowerBound
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testFullRefreshMode() {
|
||||
val fullRefreshStream =
|
||||
Stream(
|
||||
id =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withNamespace("dbo").withName("full_refresh_table")
|
||||
),
|
||||
schema = setOf(fieldId),
|
||||
configuredSyncMode = ConfiguredSyncMode.FULL_REFRESH,
|
||||
configuredPrimaryKey = listOf(), // No PK to avoid findPkUpperBound call
|
||||
configuredCursor = null,
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerJdbcPartitionFactory.create(streamFeedBootstrap(fullRefreshStream))
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcNonResumableSnapshotPartition)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testCdcFullRefreshMode() {
|
||||
val fullRefreshStream =
|
||||
Stream(
|
||||
id =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withNamespace("dbo").withName("cdc_full_refresh_table")
|
||||
),
|
||||
schema = setOf(fieldId),
|
||||
configuredSyncMode = ConfiguredSyncMode.FULL_REFRESH,
|
||||
configuredPrimaryKey = listOf(), // No PK to avoid findPkUpperBound call
|
||||
configuredCursor = null,
|
||||
)
|
||||
|
||||
val jdbcPartition =
|
||||
msSqlServerCdcJdbcPartitionFactory.create(streamFeedBootstrap(fullRefreshStream))
|
||||
assertTrue(jdbcPartition is MsSqlServerJdbcNonResumableSnapshotPartition)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,191 @@
|
||||
/* Copyright (c) 2025 Airbyte, Inc., all rights reserved. */
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.ConfigErrorException
|
||||
import io.airbyte.cdk.command.ConfigurationSpecificationSupplier
|
||||
import io.airbyte.cdk.ssh.SshPasswordAuthTunnelMethod
|
||||
import io.airbyte.cdk.ssh.SshTunnelMethodConfiguration
|
||||
import io.micronaut.context.annotation.Property
|
||||
import io.micronaut.context.env.Environment
|
||||
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||
import jakarta.inject.Inject
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
@MicronautTest(environments = [Environment.TEST], rebuildContext = true)
|
||||
class MsSqlServerSourceConfigurationSpecificationTest {
|
||||
|
||||
@Inject
|
||||
lateinit var supplier:
|
||||
ConfigurationSpecificationSupplier<MsSqlServerSourceConfigurationSpecification>
|
||||
|
||||
@Test
|
||||
fun testSchemaViolation() {
|
||||
Assertions.assertThrows(ConfigErrorException::class.java, supplier::get)
|
||||
}
|
||||
|
||||
@Test
|
||||
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON)
|
||||
fun testJson() {
|
||||
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
|
||||
Assertions.assertEquals("localhost", pojo.host)
|
||||
Assertions.assertEquals(1433, pojo.port)
|
||||
Assertions.assertEquals("sa", pojo.username)
|
||||
Assertions.assertEquals("Password123!", pojo.password)
|
||||
Assertions.assertEquals("master", pojo.database)
|
||||
Assertions.assertArrayEquals(arrayOf("dbo", "custom_schema"), pojo.schemas)
|
||||
|
||||
val encryption: EncryptionSpecification = pojo.getEncryptionValue()!!
|
||||
Assertions.assertTrue(
|
||||
encryption
|
||||
is MsSqlServerEncryptionRequiredTrustServerCertificateConfigurationSpecification,
|
||||
encryption::class.toString()
|
||||
)
|
||||
|
||||
val tunnelMethod: SshTunnelMethodConfiguration? = pojo.getTunnelMethodValue()
|
||||
Assertions.assertTrue(
|
||||
tunnelMethod is SshPasswordAuthTunnelMethod,
|
||||
tunnelMethod!!::class.toString(),
|
||||
)
|
||||
|
||||
val replicationMethod: IncrementalConfigurationSpecification = pojo.getIncrementalValue()
|
||||
Assertions.assertTrue(replicationMethod is Cdc, replicationMethod::class.toString())
|
||||
|
||||
Assertions.assertEquals(300, pojo.checkpointTargetIntervalSeconds)
|
||||
Assertions.assertEquals(2, pojo.concurrency)
|
||||
Assertions.assertEquals(true, pojo.checkPrivileges)
|
||||
Assertions.assertEquals(
|
||||
"integratedSecurity=false&trustServerCertificate=true",
|
||||
pojo.jdbcUrlParams
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that the encryption mode is correctly set to "required" as the default value in the
|
||||
* MsSqlServerSourceConfigurationSpecification class.
|
||||
*/
|
||||
@Test
|
||||
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON_ENCRYPTION_CHECK)
|
||||
fun testDefaultEncryption() {
|
||||
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
|
||||
val encryption: EncryptionSpecification = pojo.getEncryptionValue()!!
|
||||
Assertions.assertTrue(
|
||||
encryption is MsSqlServerEncryptionDisabledConfigurationSpecification,
|
||||
encryption::class.toString()
|
||||
)
|
||||
}
|
||||
|
||||
/** Verifies that the default replication method is UserDefinedCursor when not specified. */
|
||||
@Test
|
||||
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON_DEFAULT_REPLICATION)
|
||||
fun testDefaultReplicationMethod() {
|
||||
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
|
||||
val replicationMethod: IncrementalConfigurationSpecification = pojo.getIncrementalValue()
|
||||
Assertions.assertTrue(
|
||||
replicationMethod is UserDefinedCursor,
|
||||
replicationMethod::class.toString()
|
||||
)
|
||||
}
|
||||
|
||||
/** Verifies that CDC replication method is correctly parsed. */
|
||||
@Test
|
||||
@Property(name = "airbyte.connector.config.json", value = CONFIG_JSON_CDC)
|
||||
fun testCdcReplicationMethod() {
|
||||
val pojo: MsSqlServerSourceConfigurationSpecification = supplier.get()
|
||||
val replicationMethod: IncrementalConfigurationSpecification = pojo.getIncrementalValue()
|
||||
Assertions.assertTrue(replicationMethod is Cdc, replicationMethod::class.toString())
|
||||
}
|
||||
|
||||
companion object {
|
||||
|
||||
const val CONFIG_JSON: String =
|
||||
"""
|
||||
{
|
||||
"host": "localhost",
|
||||
"port": 1433,
|
||||
"username": "sa",
|
||||
"password": "Password123!",
|
||||
"database": "master",
|
||||
"schemas": ["dbo", "custom_schema"],
|
||||
"ssl_mode": {
|
||||
"mode": "encrypted_trust_server_certificate"
|
||||
},
|
||||
"tunnel_method": {
|
||||
"tunnel_method": "SSH_PASSWORD_AUTH",
|
||||
"tunnel_host": "localhost",
|
||||
"tunnel_port": 2222,
|
||||
"tunnel_user": "sshuser",
|
||||
"tunnel_user_password": "sshpass"
|
||||
},
|
||||
"replication_method": {
|
||||
"method": "CDC"
|
||||
},
|
||||
"checkpoint_target_interval_seconds": 300,
|
||||
"jdbc_url_params": "integratedSecurity=false&trustServerCertificate=true",
|
||||
"concurrency": 2,
|
||||
"check_privileges": true
|
||||
}
|
||||
"""
|
||||
|
||||
const val CONFIG_JSON_ENCRYPTION_CHECK: String =
|
||||
"""
|
||||
{
|
||||
"host": "localhost",
|
||||
"port": 1433,
|
||||
"username": "sa",
|
||||
"password": "Password123!",
|
||||
"database": "master",
|
||||
"schemas": ["dbo"],
|
||||
"tunnel_method": {
|
||||
"tunnel_method": "SSH_PASSWORD_AUTH",
|
||||
"tunnel_host": "localhost",
|
||||
"tunnel_port": 2222,
|
||||
"tunnel_user": "sshuser",
|
||||
"tunnel_user_password": "sshpass"
|
||||
},
|
||||
"replication_method": {
|
||||
"method": "STANDARD"
|
||||
},
|
||||
"checkpoint_target_interval_seconds": 300,
|
||||
"jdbc_url_params": "integratedSecurity=false&trustServerCertificate=true",
|
||||
"concurrency": 1
|
||||
}
|
||||
"""
|
||||
|
||||
const val CONFIG_JSON_DEFAULT_REPLICATION: String =
|
||||
"""
|
||||
{
|
||||
"host": "localhost",
|
||||
"port": 1433,
|
||||
"username": "sa",
|
||||
"password": "Password123!",
|
||||
"database": "master",
|
||||
"schemas": ["dbo"],
|
||||
"ssl_mode": {
|
||||
"mode": "encrypted_trust_server_certificate"
|
||||
},
|
||||
"replication_method": {
|
||||
"method": "STANDARD"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
const val CONFIG_JSON_CDC: String =
|
||||
"""
|
||||
{
|
||||
"host": "localhost",
|
||||
"port": 1433,
|
||||
"username": "sa",
|
||||
"password": "Password123!",
|
||||
"database": "master",
|
||||
"schemas": ["dbo"],
|
||||
"ssl_mode": {
|
||||
"mode": "encrypted_trust_server_certificate"
|
||||
},
|
||||
"replication_method": {
|
||||
"method": "CDC"
|
||||
}
|
||||
}
|
||||
"""
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
/* Copyright (c) 2025 Airbyte, Inc., all rights reserved. */
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode
|
||||
import io.airbyte.cdk.discover.Field
|
||||
import io.airbyte.cdk.jdbc.DoubleFieldType
|
||||
import io.airbyte.cdk.jdbc.IntFieldType
|
||||
import io.airbyte.cdk.jdbc.LongFieldType
|
||||
import io.airbyte.cdk.jdbc.LosslessJdbcFieldType
|
||||
import io.airbyte.cdk.jdbc.OffsetDateTimeFieldType
|
||||
import io.airbyte.cdk.jdbc.StringFieldType
|
||||
import io.airbyte.cdk.read.And
|
||||
import io.airbyte.cdk.read.Equal
|
||||
import io.airbyte.cdk.read.From
|
||||
import io.airbyte.cdk.read.Greater
|
||||
import io.airbyte.cdk.read.LesserOrEqual
|
||||
import io.airbyte.cdk.read.Limit
|
||||
import io.airbyte.cdk.read.Or
|
||||
import io.airbyte.cdk.read.OrderBy
|
||||
import io.airbyte.cdk.read.SelectColumnMaxValue
|
||||
import io.airbyte.cdk.read.SelectColumns
|
||||
import io.airbyte.cdk.read.SelectQuery
|
||||
import io.airbyte.cdk.read.SelectQuerySpec
|
||||
import io.airbyte.cdk.read.Where
|
||||
import io.airbyte.cdk.read.optimize
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class MsSqlServerSourceSelectQueryGeneratorTest {
|
||||
@Test
|
||||
fun testSelectLimit0() {
|
||||
SelectQuerySpec(
|
||||
SelectColumns(
|
||||
listOf(
|
||||
Field("id", IntFieldType),
|
||||
Field("name", StringFieldType),
|
||||
),
|
||||
),
|
||||
From("users", "dbo"),
|
||||
limit = Limit(0),
|
||||
)
|
||||
.assertSqlEquals("""SELECT TOP 0 id, name FROM dbo.users""")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectMaxCursor() {
|
||||
SelectQuerySpec(
|
||||
SelectColumnMaxValue(Field("updated_at", OffsetDateTimeFieldType)),
|
||||
From("orders", "dbo"),
|
||||
)
|
||||
.assertSqlEquals("""SELECT MAX(updated_at) FROM dbo.orders""")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectForNonResumableInitialSync() {
|
||||
SelectQuerySpec(
|
||||
SelectColumns(
|
||||
listOf(
|
||||
Field("id", IntFieldType),
|
||||
Field("description", StringFieldType),
|
||||
),
|
||||
),
|
||||
From("products", "dbo"),
|
||||
)
|
||||
.assertSqlEquals("""SELECT id, description FROM dbo.products""")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectForResumableInitialSync() {
|
||||
val k1 = Field("pk1", IntFieldType)
|
||||
val v1 = Jsons.numberNode(100)
|
||||
val k2 = Field("pk2", IntFieldType)
|
||||
val v2 = Jsons.numberNode(200)
|
||||
val k3 = Field("pk3", IntFieldType)
|
||||
val v3 = Jsons.numberNode(300)
|
||||
SelectQuerySpec(
|
||||
SelectColumns(listOf(k1, k2, k3, Field("data", StringFieldType))),
|
||||
From("composite_table", "dbo"),
|
||||
Where(
|
||||
Or(
|
||||
listOf(
|
||||
And(listOf(Greater(k1, v1))),
|
||||
And(listOf(Equal(k1, v1), Greater(k2, v2))),
|
||||
And(listOf(Equal(k1, v1), Equal(k2, v2), Greater(k3, v3))),
|
||||
),
|
||||
),
|
||||
),
|
||||
OrderBy(listOf(k1, k2, k3)),
|
||||
Limit(1000),
|
||||
)
|
||||
.assertSqlEquals(
|
||||
"""SELECT TOP 1000 pk1, pk2, pk3, data FROM """ +
|
||||
"""dbo.composite_table WHERE (pk1 > ?) OR """ +
|
||||
"""((pk1 = ?) AND (pk2 > ?)) OR """ +
|
||||
"""((pk1 = ?) AND (pk2 = ?) AND (pk3 > ?)) """ +
|
||||
"""ORDER BY pk1, pk2, pk3""",
|
||||
v1 to IntFieldType,
|
||||
v1 to IntFieldType,
|
||||
v2 to IntFieldType,
|
||||
v1 to IntFieldType,
|
||||
v2 to IntFieldType,
|
||||
v3 to IntFieldType,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectForCursorBasedIncrementalSync() {
|
||||
val c = Field("last_modified", DoubleFieldType)
|
||||
val lb = Jsons.numberNode(1.5)
|
||||
val ub = Jsons.numberNode(3.5)
|
||||
SelectQuerySpec(
|
||||
SelectColumns(listOf(Field("content", StringFieldType), c)),
|
||||
From("documents", "dbo"),
|
||||
Where(And(listOf(Greater(c, lb), LesserOrEqual(c, ub)))),
|
||||
OrderBy(listOf(c)),
|
||||
Limit(500),
|
||||
)
|
||||
.assertSqlEquals(
|
||||
"""SELECT TOP 500 content, last_modified FROM """ +
|
||||
"""dbo.documents """ +
|
||||
"""WHERE (last_modified > ?) AND (last_modified <= ?) ORDER BY last_modified""",
|
||||
lb to DoubleFieldType,
|
||||
ub to DoubleFieldType,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectWithHierarchyId() {
|
||||
// Test special handling for hierarchyid field type in SQL Server
|
||||
val hierarchyField = Field("org_node", MsSqlSourceOperations.MsSqlServerHierarchyFieldType)
|
||||
SelectQuerySpec(
|
||||
SelectColumns(
|
||||
listOf(
|
||||
Field("employee_id", IntFieldType),
|
||||
hierarchyField,
|
||||
Field("employee_name", StringFieldType),
|
||||
),
|
||||
),
|
||||
From("employees", "hr"),
|
||||
)
|
||||
.assertSqlEquals(
|
||||
"""SELECT employee_id, org_node.ToString(), employee_name FROM hr.employees"""
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectWithoutNamespace() {
|
||||
// Test query generation without namespace (schema)
|
||||
SelectQuerySpec(
|
||||
SelectColumns(
|
||||
listOf(
|
||||
Field("col1", IntFieldType),
|
||||
Field("col2", StringFieldType),
|
||||
),
|
||||
),
|
||||
From("simple_table", null),
|
||||
limit = Limit(10),
|
||||
)
|
||||
.assertSqlEquals("""SELECT TOP 10 col1, col2 FROM simple_table""")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectWithLargeLimit() {
|
||||
// Test with a large limit value
|
||||
val cursor = Field("sequence_id", LongFieldType)
|
||||
val startValue = Jsons.numberNode(1000000L)
|
||||
SelectQuerySpec(
|
||||
SelectColumns(listOf(cursor, Field("payload", StringFieldType))),
|
||||
From("events", "dbo"),
|
||||
Where(Greater(cursor, startValue)),
|
||||
OrderBy(listOf(cursor)),
|
||||
Limit(10000),
|
||||
)
|
||||
.assertSqlEquals(
|
||||
"""SELECT TOP 10000 sequence_id, payload FROM dbo.events WHERE sequence_id > ? ORDER BY sequence_id""",
|
||||
startValue to LongFieldType,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testSelectWithMultipleDateTimeFields() {
|
||||
// Test with multiple datetime fields for time-based filtering
|
||||
val created = Field("created_at", OffsetDateTimeFieldType)
|
||||
val updated = Field("updated_at", OffsetDateTimeFieldType)
|
||||
val createdAfter = Jsons.textNode("2025-01-01T00:00:00Z")
|
||||
val updatedBefore = Jsons.textNode("2025-12-31T23:59:59Z")
|
||||
|
||||
SelectQuerySpec(
|
||||
SelectColumns(listOf(Field("id", IntFieldType), created, updated)),
|
||||
From("records", "dbo"),
|
||||
Where(
|
||||
And(
|
||||
listOf(
|
||||
Greater(created, createdAfter),
|
||||
LesserOrEqual(updated, updatedBefore)
|
||||
)
|
||||
)
|
||||
),
|
||||
OrderBy(listOf(created, updated)),
|
||||
Limit(100),
|
||||
)
|
||||
.assertSqlEquals(
|
||||
"""SELECT TOP 100 id, created_at, updated_at FROM dbo.records """ +
|
||||
"""WHERE (created_at > ?) AND (updated_at <= ?) ORDER BY created_at, updated_at""",
|
||||
createdAfter to OffsetDateTimeFieldType,
|
||||
updatedBefore to OffsetDateTimeFieldType,
|
||||
)
|
||||
}
|
||||
|
||||
private fun SelectQuerySpec.assertSqlEquals(
|
||||
sql: String,
|
||||
vararg bindings: Pair<JsonNode, LosslessJdbcFieldType<*, *>>,
|
||||
) {
|
||||
val expected =
|
||||
SelectQuery(
|
||||
sql,
|
||||
select.columns,
|
||||
bindings.map { SelectQuery.Binding(it.first, it.second) },
|
||||
)
|
||||
val actual: SelectQuery = MsSqlSourceOperations().generate(this.optimize())
|
||||
Assertions.assertEquals(expected, actual)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import org.junit.jupiter.api.Assertions.*
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class MsSqlServerStateMigrationTest {
|
||||
|
||||
@Test
|
||||
fun `should parse new format state correctly`() {
|
||||
val newState =
|
||||
"""
|
||||
{
|
||||
"cursor": "2024-01-01T00:00:00",
|
||||
"version": 3,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": "users",
|
||||
"cursor_field": ["created_at"],
|
||||
"stream_namespace": "dataset_1tb",
|
||||
"cursor_record_count": 0
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(newState))
|
||||
|
||||
assertEquals("2024-01-01T00:00:00", parsed.cursor)
|
||||
assertEquals("cursor_based", parsed.stateType)
|
||||
assertEquals(listOf("created_at"), parsed.cursorField)
|
||||
assertEquals(0, parsed.cursorRecordCount)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should migrate legacy OrderedColumnLoadStatus state correctly`() {
|
||||
val legacyOrderedColumnState =
|
||||
"""
|
||||
{
|
||||
"version": 2,
|
||||
"state_type": "ordered_column",
|
||||
"ordered_col": "id",
|
||||
"ordered_col_val": "12345",
|
||||
"incremental_state": {
|
||||
"version": 2,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": "users",
|
||||
"stream_namespace": "dataset_1tb",
|
||||
"cursor_field": ["created_at"],
|
||||
"cursor": "2024-01-01T00:00:00",
|
||||
"cursor_record_count": 0
|
||||
}
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed =
|
||||
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyOrderedColumnState))
|
||||
|
||||
// Should be converted to primary_key state
|
||||
assertEquals("primary_key", parsed.stateType)
|
||||
assertEquals("id", parsed.pkName)
|
||||
assertEquals("12345", parsed.pkValue)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
|
||||
// Should preserve incremental state
|
||||
assertNotNull(parsed.incrementalState)
|
||||
val incrementalState =
|
||||
Jsons.treeToValue(parsed.incrementalState, MsSqlServerJdbcStreamStateValue::class.java)
|
||||
assertEquals("cursor_based", incrementalState.stateType)
|
||||
// Stream name and namespace are not tracked in the state value
|
||||
assertEquals(listOf("created_at"), incrementalState.cursorField)
|
||||
assertEquals("2024-01-01T00:00:00", incrementalState.cursor)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should migrate legacy CursorBasedStatus state correctly`() {
|
||||
val legacyCursorState =
|
||||
"""
|
||||
{
|
||||
"version": 2,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": "users",
|
||||
"stream_namespace": "dataset_1tb",
|
||||
"cursor_field": ["created_at"],
|
||||
"cursor": "2024-01-01T00:00:00",
|
||||
"cursor_record_count": 1
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyCursorState))
|
||||
|
||||
assertEquals("cursor_based", parsed.stateType)
|
||||
// Stream name and namespace are not tracked in the state value
|
||||
assertEquals(listOf("created_at"), parsed.cursorField)
|
||||
assertEquals("2024-01-01T00:00:00", parsed.cursor)
|
||||
assertEquals(1, parsed.cursorRecordCount)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should detect OrderedColumnLoadStatus by field presence`() {
|
||||
val legacyStateWithoutStateType =
|
||||
"""
|
||||
{
|
||||
"version": 2,
|
||||
"ordered_col": "id",
|
||||
"ordered_col_val": "12345"
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed =
|
||||
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyStateWithoutStateType))
|
||||
|
||||
assertEquals("primary_key", parsed.stateType)
|
||||
assertEquals("id", parsed.pkName)
|
||||
assertEquals("12345", parsed.pkValue)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should detect CursorBasedStatus by field presence`() {
|
||||
val legacyStateWithoutStateType =
|
||||
"""
|
||||
{
|
||||
"version": 2,
|
||||
"stream_name": "users",
|
||||
"cursor_field": ["created_at"],
|
||||
"cursor": "2024-01-01T00:00:00"
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed =
|
||||
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyStateWithoutStateType))
|
||||
|
||||
assertEquals("cursor_based", parsed.stateType)
|
||||
// Stream name is not tracked in the state value
|
||||
assertEquals(listOf("created_at"), parsed.cursorField)
|
||||
assertEquals("2024-01-01T00:00:00", parsed.cursor)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should handle unknown state format gracefully`() {
|
||||
val unknownState =
|
||||
"""
|
||||
{
|
||||
"unknown_field": "unknown_value"
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(unknownState))
|
||||
|
||||
// Should return default state
|
||||
assertEquals("cursor_based", parsed.stateType)
|
||||
assertEquals("", parsed.cursor)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should migrate ordered column state without incremental_state`() {
|
||||
val legacyOrderedColumnState =
|
||||
"""
|
||||
{
|
||||
"version": 2,
|
||||
"state_type": "ordered_column",
|
||||
"ordered_col": "id",
|
||||
"ordered_col_val": "12345"
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed =
|
||||
MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyOrderedColumnState))
|
||||
|
||||
assertEquals("primary_key", parsed.stateType)
|
||||
assertEquals("id", parsed.pkName)
|
||||
assertEquals("12345", parsed.pkValue)
|
||||
assertNull(parsed.incrementalState)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `should handle null values in legacy state`() {
|
||||
val legacyStateWithNulls =
|
||||
"""
|
||||
{
|
||||
"version": 2,
|
||||
"state_type": "cursor_based",
|
||||
"stream_name": null,
|
||||
"cursor_field": null,
|
||||
"cursor": null
|
||||
}
|
||||
""".trimIndent()
|
||||
|
||||
val parsed = MsSqlServerStateMigration.parseStateValue(Jsons.readTree(legacyStateWithNulls))
|
||||
|
||||
assertEquals("cursor_based", parsed.stateType)
|
||||
// Stream name is not tracked in the state value
|
||||
assertEquals(emptyList<String>(), parsed.cursorField)
|
||||
assertEquals("", parsed.cursor)
|
||||
assertEquals(0, parsed.cursorRecordCount)
|
||||
assertEquals(MsSqlServerJdbcStreamStateValue.CURRENT_VERSION, parsed.version)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,463 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql
|
||||
|
||||
import io.airbyte.cdk.StreamIdentifier
|
||||
import io.airbyte.cdk.check.JdbcCheckQueries
|
||||
import io.airbyte.cdk.discover.JdbcMetadataQuerier
|
||||
import io.airbyte.cdk.jdbc.DefaultJdbcConstants
|
||||
import io.airbyte.cdk.jdbc.JdbcConnectionFactory
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode
|
||||
import io.airbyte.protocol.models.v0.StreamDescriptor
|
||||
import io.airbyte.protocol.models.v0.SyncMode
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.sql.Connection
|
||||
import org.junit.jupiter.api.*
|
||||
import org.junit.jupiter.api.Assertions.*
|
||||
import org.testcontainers.containers.MSSQLServerContainer
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
|
||||
class MsSqlSourceMetadataQuerierTest {
|
||||
|
||||
private lateinit var dbContainer: MSSQLServerContainer<*>
|
||||
private lateinit var config: MsSqlServerSourceConfiguration
|
||||
private lateinit var metadataQuerier: MsSqlSourceMetadataQuerier
|
||||
|
||||
@BeforeAll
|
||||
@Timeout(value = 300)
|
||||
fun setUp() {
|
||||
dbContainer =
|
||||
MsSqlServerContainerFactory.shared(
|
||||
"mcr.microsoft.com/mssql/server:2022-latest",
|
||||
MsSqlServerContainerFactory.WithNetwork,
|
||||
MsSqlServerContainerFactory.WithTestDatabase
|
||||
)
|
||||
|
||||
val spec = MsSqlServerContainerFactory.config(dbContainer)
|
||||
spec.setIncrementalValue(UserDefinedCursor())
|
||||
config = MsSqlServerSourceConfigurationFactory().make(spec)
|
||||
|
||||
// Set up tables for testing
|
||||
createTestTables()
|
||||
|
||||
// Create metadata querier
|
||||
val jdbcConnectionFactory = JdbcConnectionFactory(config)
|
||||
val sourceOperations = MsSqlSourceOperations()
|
||||
val base =
|
||||
JdbcMetadataQuerier(
|
||||
DefaultJdbcConstants(),
|
||||
config,
|
||||
sourceOperations,
|
||||
sourceOperations,
|
||||
JdbcCheckQueries(),
|
||||
jdbcConnectionFactory
|
||||
)
|
||||
metadataQuerier = MsSqlSourceMetadataQuerier(base)
|
||||
}
|
||||
|
||||
private fun createTestTables() {
|
||||
JdbcConnectionFactory(config).get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
|
||||
// Clean up existing test tables
|
||||
val dropStatements =
|
||||
listOf(
|
||||
"DROP TABLE IF EXISTS dbo.table_with_clustered_no_pk",
|
||||
"DROP TABLE IF EXISTS dbo.table_with_pk_no_clustered",
|
||||
"DROP TABLE IF EXISTS dbo.table_with_pk_and_single_clustered",
|
||||
"DROP TABLE IF EXISTS dbo.table_with_pk_and_composite_clustered",
|
||||
"DROP TABLE IF EXISTS dbo.table_no_pk_no_clustered"
|
||||
)
|
||||
|
||||
for (ddl in dropStatements) {
|
||||
connection.createStatement().use { stmt ->
|
||||
try {
|
||||
stmt.execute(ddl)
|
||||
} catch (e: Exception) {
|
||||
log.debug { "Table might not exist: ${e.message}" }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test Case 1: Table with clustered index but no primary key
|
||||
// Expected: Should use the clustered index column as primary key
|
||||
connection.createStatement().use { stmt ->
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE TABLE dbo.table_with_clustered_no_pk (
|
||||
id INT NOT NULL,
|
||||
name NVARCHAR(100),
|
||||
created_at DATETIME2
|
||||
)
|
||||
"""
|
||||
)
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE CLUSTERED INDEX idx_clustered_id
|
||||
ON dbo.table_with_clustered_no_pk (id)
|
||||
"""
|
||||
)
|
||||
}
|
||||
|
||||
// Test Case 2: Table with primary key but no clustered index
|
||||
// Expected: Should use the primary key
|
||||
connection.createStatement().use { stmt ->
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE TABLE dbo.table_with_pk_no_clustered (
|
||||
id INT NOT NULL,
|
||||
name NVARCHAR(100),
|
||||
created_at DATETIME2,
|
||||
CONSTRAINT pk_table2 PRIMARY KEY NONCLUSTERED (id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
}
|
||||
|
||||
// Test Case 3: Table with both primary key and single-column clustered index on
|
||||
// different columns
|
||||
// Expected: Should use the single-column clustered index
|
||||
connection.createStatement().use { stmt ->
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE TABLE dbo.table_with_pk_and_single_clustered (
|
||||
id INT NOT NULL,
|
||||
code NVARCHAR(50) NOT NULL,
|
||||
name NVARCHAR(100),
|
||||
created_at DATETIME2,
|
||||
CONSTRAINT pk_table3 PRIMARY KEY NONCLUSTERED (id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE CLUSTERED INDEX idx_clustered_code
|
||||
ON dbo.table_with_pk_and_single_clustered (code)
|
||||
"""
|
||||
)
|
||||
}
|
||||
|
||||
// Test Case 4: Table with primary key and composite clustered index
|
||||
// Expected: Should use the primary key (not the composite clustered index)
|
||||
connection.createStatement().use { stmt ->
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE TABLE dbo.table_with_pk_and_composite_clustered (
|
||||
id INT NOT NULL,
|
||||
code NVARCHAR(50) NOT NULL,
|
||||
category NVARCHAR(50) NOT NULL,
|
||||
name NVARCHAR(100),
|
||||
created_at DATETIME2,
|
||||
CONSTRAINT pk_table4 PRIMARY KEY NONCLUSTERED (id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE CLUSTERED INDEX idx_clustered_composite
|
||||
ON dbo.table_with_pk_and_composite_clustered (code, category)
|
||||
"""
|
||||
)
|
||||
}
|
||||
|
||||
// Test Case 5: Table with no primary key and no clustered index
|
||||
// Expected: Should return empty list
|
||||
connection.createStatement().use { stmt ->
|
||||
stmt.execute(
|
||||
"""
|
||||
CREATE TABLE dbo.table_no_pk_no_clustered (
|
||||
id INT,
|
||||
name NVARCHAR(100),
|
||||
created_at DATETIME2
|
||||
)
|
||||
"""
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should use single-column clustered index when no primary key exists")
|
||||
fun testClusteredIndexNoPrimaryKey() {
|
||||
val streamId =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withName("table_with_clustered_no_pk").withNamespace("dbo")
|
||||
)
|
||||
|
||||
val primaryKey = metadataQuerier.primaryKey(streamId)
|
||||
|
||||
assertEquals(1, primaryKey.size, "Should have one primary key column")
|
||||
assertEquals(listOf("id"), primaryKey[0], "Should use clustered index column 'id'")
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should use primary key when no clustered index exists")
|
||||
fun testPrimaryKeyNoClusteredIndex() {
|
||||
val streamId =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withName("table_with_pk_no_clustered").withNamespace("dbo")
|
||||
)
|
||||
|
||||
val primaryKey = metadataQuerier.primaryKey(streamId)
|
||||
|
||||
assertEquals(1, primaryKey.size, "Should have one primary key column")
|
||||
assertEquals(listOf("id"), primaryKey[0], "Should use primary key column 'id'")
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should prefer single-column clustered index over primary key")
|
||||
fun testSingleClusteredIndexOverPrimaryKey() {
|
||||
val streamId =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor()
|
||||
.withName("table_with_pk_and_single_clustered")
|
||||
.withNamespace("dbo")
|
||||
)
|
||||
|
||||
val primaryKey = metadataQuerier.primaryKey(streamId)
|
||||
|
||||
assertEquals(1, primaryKey.size, "Should have one primary key column")
|
||||
assertEquals(
|
||||
listOf("code"),
|
||||
primaryKey[0],
|
||||
"Should use single-column clustered index 'code' instead of primary key 'id'"
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should use primary key when clustered index is composite")
|
||||
fun testPrimaryKeyWhenCompositeClusteredIndex() {
|
||||
val streamId =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor()
|
||||
.withName("table_with_pk_and_composite_clustered")
|
||||
.withNamespace("dbo")
|
||||
)
|
||||
|
||||
val primaryKey = metadataQuerier.primaryKey(streamId)
|
||||
|
||||
assertEquals(1, primaryKey.size, "Should have one primary key column")
|
||||
assertEquals(
|
||||
listOf("id"),
|
||||
primaryKey[0],
|
||||
"Should use primary key 'id' instead of composite clustered index"
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should return empty list when no primary key and no clustered index")
|
||||
fun testNoPrimaryKeyNoClusteredIndex() {
|
||||
val streamId =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withName("table_no_pk_no_clustered").withNamespace("dbo")
|
||||
)
|
||||
|
||||
val primaryKey = metadataQuerier.primaryKey(streamId)
|
||||
|
||||
assertTrue(
|
||||
primaryKey.isEmpty(),
|
||||
"Should return empty list when no PK and no clustered index"
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Verify clustered index discovery query")
|
||||
fun testClusteredIndexDiscovery() {
|
||||
// This test verifies that the clustered index discovery is working correctly
|
||||
val memoizedClusteredIndexKeys = metadataQuerier.memoizedClusteredIndexKeys
|
||||
|
||||
// Find our test tables
|
||||
val tables = metadataQuerier.memoizedTableNames
|
||||
val testTables = tables.filter { it.name.startsWith("table_") && it.schema == "dbo" }
|
||||
|
||||
assertTrue(testTables.size >= 5, "Should have at least 5 test tables")
|
||||
|
||||
// Verify specific clustered indexes are discovered
|
||||
val tableWithClusteredNoPk = testTables.find { it.name == "table_with_clustered_no_pk" }
|
||||
assertNotNull(tableWithClusteredNoPk, "Should find table_with_clustered_no_pk")
|
||||
val clusteredKeys = memoizedClusteredIndexKeys[tableWithClusteredNoPk]
|
||||
assertNotNull(clusteredKeys, "Should have clustered index for table_with_clustered_no_pk")
|
||||
assertEquals(1, clusteredKeys?.size, "Should have single column clustered index")
|
||||
assertEquals(
|
||||
listOf("id"),
|
||||
clusteredKeys?.get(0),
|
||||
"Clustered index should be on 'id' column"
|
||||
)
|
||||
|
||||
// Verify composite clustered index
|
||||
val tableWithComposite =
|
||||
testTables.find { it.name == "table_with_pk_and_composite_clustered" }
|
||||
assertNotNull(tableWithComposite, "Should find table_with_pk_and_composite_clustered")
|
||||
val compositeKeys = memoizedClusteredIndexKeys[tableWithComposite]
|
||||
assertNotNull(compositeKeys, "Should have clustered index for composite table")
|
||||
assertEquals(2, compositeKeys?.size, "Should have two columns in composite clustered index")
|
||||
assertEquals(listOf("code"), compositeKeys?.get(0), "First column should be 'code'")
|
||||
assertEquals(
|
||||
listOf("category"),
|
||||
compositeKeys?.get(1),
|
||||
"Second column should be 'category'"
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Verify primary key discovery query")
|
||||
fun testPrimaryKeyDiscovery() {
|
||||
// This test verifies that the primary key discovery is working correctly
|
||||
val memoizedPrimaryKeys = metadataQuerier.memoizedPrimaryKeys
|
||||
|
||||
// Find our test tables
|
||||
val tables = metadataQuerier.memoizedTableNames
|
||||
val testTables = tables.filter { it.name.startsWith("table_") && it.schema == "dbo" }
|
||||
|
||||
// Verify primary keys are discovered correctly
|
||||
val tableWithPkNoCluster = testTables.find { it.name == "table_with_pk_no_clustered" }
|
||||
assertNotNull(tableWithPkNoCluster, "Should find table_with_pk_no_clustered")
|
||||
val pkKeys = memoizedPrimaryKeys[tableWithPkNoCluster]
|
||||
assertNotNull(pkKeys, "Should have primary key for table_with_pk_no_clustered")
|
||||
assertEquals(1, pkKeys?.size, "Should have single column primary key")
|
||||
assertEquals(listOf("id"), pkKeys?.get(0), "Primary key should be on 'id' column")
|
||||
|
||||
// Verify table without primary key
|
||||
val tableNoPk = testTables.find { it.name == "table_with_clustered_no_pk" }
|
||||
assertNotNull(tableNoPk, "Should find table_with_clustered_no_pk")
|
||||
val noPkKeys = memoizedPrimaryKeys[tableNoPk]
|
||||
assertNull(noPkKeys, "Should not have primary key for table_with_clustered_no_pk")
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should use user-defined logical PK from catalog when no physical PK exists")
|
||||
fun testUserDefinedLogicalPrimaryKey() {
|
||||
val streamId =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withName("table_no_pk_no_clustered").withNamespace("dbo")
|
||||
)
|
||||
|
||||
// Create a ConfiguredAirbyteCatalog with a user-defined logical PK
|
||||
val configuredCatalog =
|
||||
ConfiguredAirbyteCatalog()
|
||||
.withStreams(
|
||||
listOf(
|
||||
ConfiguredAirbyteStream()
|
||||
.withStream(
|
||||
AirbyteStream()
|
||||
.withName("table_no_pk_no_clustered")
|
||||
.withNamespace("dbo")
|
||||
)
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withPrimaryKey(listOf(listOf("name")))
|
||||
)
|
||||
)
|
||||
|
||||
// Create a new querier with the configured catalog
|
||||
val jdbcConnectionFactory = JdbcConnectionFactory(config)
|
||||
val sourceOperations = MsSqlSourceOperations()
|
||||
val base =
|
||||
JdbcMetadataQuerier(
|
||||
DefaultJdbcConstants(),
|
||||
config,
|
||||
sourceOperations,
|
||||
sourceOperations,
|
||||
JdbcCheckQueries(),
|
||||
jdbcConnectionFactory
|
||||
)
|
||||
val querierWithCatalog = MsSqlSourceMetadataQuerier(base, configuredCatalog)
|
||||
|
||||
// Test that it uses the user-defined logical PK
|
||||
val primaryKey = querierWithCatalog.primaryKey(streamId)
|
||||
|
||||
assertEquals(1, primaryKey.size, "Should have one logical primary key column")
|
||||
assertEquals(
|
||||
listOf("name"),
|
||||
primaryKey[0],
|
||||
"Should use user-defined logical primary key 'name' from catalog"
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Should prefer physical PK over user-defined logical PK")
|
||||
fun testPhysicalPrimaryKeyPreferredOverLogical() {
|
||||
val streamId =
|
||||
StreamIdentifier.from(
|
||||
StreamDescriptor().withName("table_with_pk_no_clustered").withNamespace("dbo")
|
||||
)
|
||||
|
||||
// Create a ConfiguredAirbyteCatalog with a different logical PK
|
||||
val configuredCatalog =
|
||||
ConfiguredAirbyteCatalog()
|
||||
.withStreams(
|
||||
listOf(
|
||||
ConfiguredAirbyteStream()
|
||||
.withStream(
|
||||
AirbyteStream()
|
||||
.withName("table_with_pk_no_clustered")
|
||||
.withNamespace("dbo")
|
||||
)
|
||||
.withSyncMode(SyncMode.INCREMENTAL)
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withPrimaryKey(listOf(listOf("name")))
|
||||
)
|
||||
)
|
||||
|
||||
// Create a new querier with the configured catalog
|
||||
val jdbcConnectionFactory = JdbcConnectionFactory(config)
|
||||
val sourceOperations = MsSqlSourceOperations()
|
||||
val base =
|
||||
JdbcMetadataQuerier(
|
||||
DefaultJdbcConstants(),
|
||||
config,
|
||||
sourceOperations,
|
||||
sourceOperations,
|
||||
JdbcCheckQueries(),
|
||||
jdbcConnectionFactory
|
||||
)
|
||||
val querierWithCatalog = MsSqlSourceMetadataQuerier(base, configuredCatalog)
|
||||
|
||||
// Test that it prefers the physical PK over the logical one
|
||||
val primaryKey = querierWithCatalog.primaryKey(streamId)
|
||||
|
||||
assertEquals(1, primaryKey.size, "Should have one primary key column")
|
||||
assertEquals(
|
||||
listOf("id"),
|
||||
primaryKey[0],
|
||||
"Should use physical primary key 'id' even when logical PK 'name' is defined"
|
||||
)
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
fun tearDown() {
|
||||
// Clean up test tables
|
||||
try {
|
||||
JdbcConnectionFactory(config).get().use { connection: Connection ->
|
||||
connection.isReadOnly = false
|
||||
val dropStatements =
|
||||
listOf(
|
||||
"DROP TABLE IF EXISTS dbo.table_with_clustered_no_pk",
|
||||
"DROP TABLE IF EXISTS dbo.table_with_pk_no_clustered",
|
||||
"DROP TABLE IF EXISTS dbo.table_with_pk_and_single_clustered",
|
||||
"DROP TABLE IF EXISTS dbo.table_with_pk_and_composite_clustered",
|
||||
"DROP TABLE IF EXISTS dbo.table_no_pk_no_clustered"
|
||||
)
|
||||
|
||||
for (ddl in dropStatements) {
|
||||
connection.createStatement().use { stmt ->
|
||||
try {
|
||||
stmt.execute(ddl)
|
||||
} catch (e: Exception) {
|
||||
log.debug { "Error dropping table: ${e.message}" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
log.error { "Error during teardown: ${e.message}" }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import io.airbyte.cdk.testutils.ContainerFactory;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.testcontainers.containers.MSSQLServerContainer;
|
||||
import org.testcontainers.containers.Network;
|
||||
import org.testcontainers.utility.DockerImageName;
|
||||
|
||||
public class MsSQLContainerFactory extends ContainerFactory<MSSQLServerContainer<?>> {
|
||||
|
||||
@Override
|
||||
protected MSSQLServerContainer<?> createNewContainer(DockerImageName imageName) {
|
||||
imageName = imageName.asCompatibleSubstituteFor("mcr.microsoft.com/mssql/server");
|
||||
var container = new MSSQLServerContainer<>(imageName).acceptLicense();
|
||||
container.addEnv("MSSQL_MEMORY_LIMIT_MB", "384");
|
||||
withNetwork(container);
|
||||
return container;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new network and bind it to the container.
|
||||
*/
|
||||
public static void withNetwork(MSSQLServerContainer<?> container) {
|
||||
container.withNetwork(Network.newNetwork());
|
||||
}
|
||||
|
||||
public static void withAgent(MSSQLServerContainer<?> container) {
|
||||
container.addEnv("MSSQL_AGENT_ENABLED", "True");
|
||||
}
|
||||
|
||||
public static void withSslCertificates(MSSQLServerContainer<?> container) {
|
||||
// yes, this is uglier than sin. The reason why I'm doing this is because there's no command to
|
||||
// reload a SqlServer config. So I need to create all the necessary files before I start the
|
||||
// SQL server. Hence this horror
|
||||
String command = StringUtils.replace(
|
||||
"""
|
||||
mkdir /tmp/certs/ &&
|
||||
openssl req -nodes -new -x509 -sha256 -keyout /tmp/certs/ca.key -out /tmp/certs/ca.crt -subj "/CN=ca" &&
|
||||
openssl req -nodes -new -x509 -sha256 -keyout /tmp/certs/dummy_ca.key -out /tmp/certs/dummy_ca.crt -subj "/CN=ca" &&
|
||||
openssl req -nodes -new -sha256 -keyout /tmp/certs/server.key -out /tmp/certs/server.csr -subj "/CN={hostName}" &&
|
||||
openssl req -nodes -new -sha256 -keyout /tmp/certs/dummy_server.key -out /tmp/certs/dummy_server.csr -subj "/CN={hostName}" &&
|
||||
|
||||
openssl x509 -req -in /tmp/certs/server.csr -CA /tmp/certs/ca.crt -CAkey /tmp/certs/ca.key -out /tmp/certs/server.crt -days 365 -sha256 &&
|
||||
openssl x509 -req -in /tmp/certs/dummy_server.csr -CA /tmp/certs/ca.crt -CAkey /tmp/certs/ca.key -out /tmp/certs/dummy_server.crt -days 365 -sha256 &&
|
||||
openssl x509 -req -in /tmp/certs/server.csr -CA /tmp/certs/dummy_ca.crt -CAkey /tmp/certs/dummy_ca.key -out /tmp/certs/server_dummy_ca.crt -days 365 -sha256 &&
|
||||
chmod 440 /tmp/certs/* &&
|
||||
{
|
||||
cat > /var/opt/mssql/mssql.conf <<- EOF
|
||||
[network]
|
||||
tlscert = /tmp/certs/server.crt
|
||||
tlskey = /tmp/certs/server.key
|
||||
tlsprotocols = 1.2
|
||||
forceencryption = 1
|
||||
EOF
|
||||
} && /opt/mssql/bin/sqlservr
|
||||
""",
|
||||
"{hostName}", container.getHost());
|
||||
container.withCommand("bash", "-c", command)
|
||||
.withUrlParam("trustServerCertificate", "true");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,429 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.INVALID_CDC_CURSOR_POSITION_PROPERTY;
|
||||
import static io.airbyte.integrations.source.mssql.MsSqlSpecConstants.RESYNC_DATA_OPTION;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import io.airbyte.cdk.db.factory.DatabaseDriver;
|
||||
import io.airbyte.cdk.db.jdbc.JdbcUtils;
|
||||
import io.airbyte.cdk.testutils.ContainerFactory.NamedContainerModifier;
|
||||
import io.airbyte.cdk.testutils.TestDatabase;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
|
||||
import io.debezium.connector.sqlserver.Lsn;
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.jooq.SQLDialect;
|
||||
import org.jooq.exception.DataAccessException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testcontainers.containers.MSSQLServerContainer;
|
||||
|
||||
public class MsSQLTestDatabase extends TestDatabase<MSSQLServerContainer<?>, MsSQLTestDatabase, MsSQLTestDatabase.MsSQLConfigBuilder> {
|
||||
|
||||
static private final Logger LOGGER = LoggerFactory.getLogger(MsSQLTestDatabase.class);
|
||||
|
||||
// Turning this to true will create a bunch of background threads that will regularly check the
|
||||
// state of the database and log every time it changes. A bit verbose, but useful for debugging
|
||||
private static final boolean ENABLE_BACKGROUND_THREADS = false;
|
||||
|
||||
// empirically, 240 is enough. If you fee like you need to increase it, you're probably mmissing a
|
||||
// check somewhere
|
||||
static public final int MAX_RETRIES = 240;
|
||||
|
||||
public enum BaseImage {
|
||||
|
||||
MSSQL_2022("mcr.microsoft.com/mssql/server:2022-latest"),
|
||||
;
|
||||
|
||||
public final String reference;
|
||||
|
||||
BaseImage(final String reference) {
|
||||
this.reference = reference;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public enum ContainerModifier implements NamedContainerModifier<MSSQLServerContainer<?>> {
|
||||
|
||||
AGENT(MsSQLContainerFactory::withAgent),
|
||||
WITH_SSL_CERTIFICATES(MsSQLContainerFactory::withSslCertificates),
|
||||
;
|
||||
|
||||
public final Consumer<MSSQLServerContainer<?>> modifier;
|
||||
|
||||
ContainerModifier(final Consumer<MSSQLServerContainer<?>> modifier) {
|
||||
this.modifier = modifier;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Consumer<MSSQLServerContainer<?>> modifier() {
|
||||
return modifier;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static public MsSQLTestDatabase in(final BaseImage imageName, final ContainerModifier... modifiers) {
|
||||
final var container = new MsSQLContainerFactory().shared(imageName.reference, modifiers);
|
||||
final MsSQLTestDatabase testdb;
|
||||
if (ENABLE_BACKGROUND_THREADS) {
|
||||
testdb = new MsSqlTestDatabaseWithBackgroundThreads(container);
|
||||
} else {
|
||||
testdb = new MsSQLTestDatabase(container);
|
||||
}
|
||||
return testdb
|
||||
.withConnectionProperty("encrypt", "false")
|
||||
.withConnectionProperty("trustServerCertificate", "true")
|
||||
.withConnectionProperty("databaseName", testdb.getDatabaseName())
|
||||
.initialized();
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase(final MSSQLServerContainer<?> container) {
|
||||
super(container);
|
||||
LOGGER.info("creating new database. databaseId=" + this.databaseId + ", databaseName=" + getDatabaseName());
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase withCdc() {
|
||||
LOGGER.info("enabling CDC on database {} with id {}", getDatabaseName(), databaseId);
|
||||
with("EXEC sys.sp_cdc_enable_db;");
|
||||
LOGGER.info("CDC enabled on database {} with id {}", getDatabaseName(), databaseId);
|
||||
return this;
|
||||
}
|
||||
|
||||
private static final String RETRYABLE_CDC_TABLE_ENABLEMENT_ERROR_CONTENT =
|
||||
"The error returned was 14258: 'Cannot perform this operation while SQLServerAgent is starting. Try again later.'";
|
||||
private static final String ENABLE_CDC_SQL_FMT = """
|
||||
EXEC sys.sp_cdc_enable_table
|
||||
\t@source_schema = N'%s',
|
||||
\t@source_name = N'%s',
|
||||
\t@role_name = %s,
|
||||
\t@supports_net_changes = 0,
|
||||
\t@capture_instance = N'%s'""";
|
||||
private final Set<String> CDC_INSTANCE_NAMES = Sets.newConcurrentHashSet();
|
||||
|
||||
public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName) {
|
||||
return withCdcForTable(schemaName, tableName, roleName, "%s_%s".formatted(schemaName, tableName));
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName, String instanceName) {
|
||||
LOGGER.info(formatLogLine("enabling CDC for table {}.{} and role {}, instance {}"), schemaName, tableName, roleName, instanceName);
|
||||
String sqlRoleName = roleName == null ? "NULL" : "N'%s'".formatted(roleName);
|
||||
for (int tryCount = 0; tryCount < MAX_RETRIES; tryCount++) {
|
||||
try {
|
||||
Thread.sleep(1_000);
|
||||
synchronized (getContainer()) {
|
||||
LOGGER.info(formatLogLine("Trying to enable CDC for table {}.{} and role {}, instance {}, try {}/{}"), schemaName, tableName, roleName,
|
||||
instanceName, tryCount, MAX_RETRIES);
|
||||
with(ENABLE_CDC_SQL_FMT.formatted(schemaName, tableName, sqlRoleName, instanceName));
|
||||
}
|
||||
CDC_INSTANCE_NAMES.add(instanceName);
|
||||
return withShortenedCapturePollingInterval();
|
||||
} catch (DataAccessException e) {
|
||||
if (!e.getMessage().contains(RETRYABLE_CDC_TABLE_ENABLEMENT_ERROR_CONTENT)) {
|
||||
throw e;
|
||||
}
|
||||
tryCount++;
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
throw new RuntimeException(formatLogLine("failed to enable CDC for table %s.%s within %d seconds").formatted(schemaName, tableName, MAX_RETRIES));
|
||||
}
|
||||
|
||||
private static final String DISABLE_CDC_SQL_FMT = """
|
||||
EXEC sys.sp_cdc_disable_table
|
||||
\t@source_schema = N'%s',
|
||||
\t@source_name = N'%s',
|
||||
\t@capture_instance = N'%s'
|
||||
""";
|
||||
|
||||
public MsSQLTestDatabase withCdcDisabledForTable(String schemaName, String tableName, String instanceName) {
|
||||
LOGGER.info(formatLogLine("disabling CDC for table {}.{}, instance {}"), schemaName, tableName, instanceName);
|
||||
if (!CDC_INSTANCE_NAMES.remove(instanceName)) {
|
||||
throw new RuntimeException(formatLogLine("CDC was disabled for instance ") + instanceName);
|
||||
}
|
||||
synchronized (getContainer()) {
|
||||
return with(DISABLE_CDC_SQL_FMT.formatted(schemaName, tableName, instanceName));
|
||||
}
|
||||
}
|
||||
|
||||
private static final String DISABLE_CDC_SQL = "EXEC sys.sp_cdc_disable_db;";
|
||||
|
||||
public MsSQLTestDatabase withoutCdc() {
|
||||
CDC_INSTANCE_NAMES.clear();
|
||||
synchronized (getContainer()) {
|
||||
return with(DISABLE_CDC_SQL);
|
||||
}
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase withAgentStarted() {
|
||||
return with("EXEC master.dbo.xp_servicecontrol N'START', N'SQLServerAGENT';");
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase withAgentStopped() {
|
||||
return with("EXEC master.dbo.xp_servicecontrol N'STOP', N'SQLServerAGENT';");
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase withWaitUntilAgentRunning() {
|
||||
waitForAgentState(true);
|
||||
return self();
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase withWaitUntilAgentStopped() {
|
||||
waitForAgentState(false);
|
||||
return self();
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase waitForCdcRecords(String schemaName, String tableName, int recordCount) {
|
||||
return waitForCdcRecords(schemaName, tableName, "%s_%s".formatted(schemaName, tableName), recordCount);
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase waitForCdcRecords(String schemaName, String tableName, String cdcInstanceName, int recordCount) {
|
||||
if (!CDC_INSTANCE_NAMES.contains(cdcInstanceName)) {
|
||||
throw new RuntimeException("CDC is not enabled on instance %s".formatted(cdcInstanceName));
|
||||
}
|
||||
String sql = "SELECT count(*) FROM cdc.%s_ct".formatted(cdcInstanceName);
|
||||
int actualRecordCount = 0;
|
||||
for (int tryCount = 0; tryCount < MAX_RETRIES; tryCount++) {
|
||||
LOGGER.info(formatLogLine("fetching the number of CDC records for {}.{}, instance {}"), schemaName, tableName, cdcInstanceName);
|
||||
try {
|
||||
Thread.sleep(1_000);
|
||||
actualRecordCount = query(ctx -> ctx.fetch(sql)).get(0).get(0, Integer.class);
|
||||
} catch (SQLException | DataAccessException e) {
|
||||
actualRecordCount = 0;
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
LOGGER.info(formatLogLine("Found {} CDC records for {}.{} in instance {}. Expecting {}. Trying again ({}/{}"), actualRecordCount, schemaName,
|
||||
tableName, cdcInstanceName,
|
||||
recordCount, tryCount, MAX_RETRIES);
|
||||
if (actualRecordCount >= recordCount) {
|
||||
LOGGER.info(formatLogLine("found {} records after {} tries!"), actualRecordCount, tryCount);
|
||||
return self();
|
||||
}
|
||||
}
|
||||
throw new RuntimeException(formatLogLine(
|
||||
"failed to find %d records after %s seconds. Only found %d!").formatted(recordCount, MAX_RETRIES, actualRecordCount));
|
||||
}
|
||||
|
||||
private boolean shortenedPollingIntervalEnabled = false;
|
||||
|
||||
public MsSQLTestDatabase withShortenedCapturePollingInterval() {
|
||||
if (!shortenedPollingIntervalEnabled) {
|
||||
synchronized (getContainer()) {
|
||||
shortenedPollingIntervalEnabled = true;
|
||||
with("EXEC sys.sp_cdc_change_job @job_type = 'capture', @pollinginterval = 1;");
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private void waitForAgentState(final boolean running) {
|
||||
final String expectedValue = running ? "Running." : "Stopped.";
|
||||
LOGGER.info(formatLogLine("Waiting for SQLServerAgent state to change to '{}'."), expectedValue);
|
||||
for (int i = 0; i < MAX_RETRIES; i++) {
|
||||
try {
|
||||
Thread.sleep(1_000);
|
||||
final var r = query(ctx -> ctx.fetch("EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';").get(0));
|
||||
if (expectedValue.equalsIgnoreCase(r.getValue(0).toString())) {
|
||||
LOGGER.info(formatLogLine("SQLServerAgent state is '{}', as expected."), expectedValue);
|
||||
return;
|
||||
}
|
||||
LOGGER.info(formatLogLine("Retrying, SQLServerAgent state {} does not match expected '{}'."), r, expectedValue);
|
||||
} catch (final SQLException e) {
|
||||
LOGGER.info(formatLogLine("Retrying agent state query after catching exception {}."), e.getMessage());
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
throw new RuntimeException(formatLogLine("Exhausted retry attempts while polling for agent state"));
|
||||
}
|
||||
|
||||
public static final String MAX_LSN_QUERY = "SELECT sys.fn_cdc_get_max_lsn();";
|
||||
|
||||
public MsSQLTestDatabase withWaitUntilMaxLsnAvailable() {
|
||||
LOGGER.info(formatLogLine("Waiting for max LSN to become available for database {}."), getDatabaseName());
|
||||
for (int i = 0; i < MAX_RETRIES; i++) {
|
||||
try {
|
||||
Thread.sleep(1_000);
|
||||
final var maxLSN = query(ctx -> ctx.fetch(MAX_LSN_QUERY).get(0).get(0, byte[].class));
|
||||
if (maxLSN != null) {
|
||||
LOGGER.info(formatLogLine("Max LSN available for database {}: {}"), getDatabaseName(), Lsn.valueOf(maxLSN));
|
||||
return self();
|
||||
}
|
||||
LOGGER.info(formatLogLine("Retrying, max LSN still not available for database {}."), getDatabaseName());
|
||||
} catch (final SQLException e) {
|
||||
LOGGER.info(formatLogLine("Retrying max LSN query after catching exception {}"), e.getMessage());
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
throw new RuntimeException("Exhausted retry attempts while polling for max LSN availability");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPassword() {
|
||||
return "S00p3rS33kr3tP4ssw0rd!";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getJdbcUrl() {
|
||||
return String.format("jdbc:sqlserver://%s:%d", getContainer().getHost(), getContainer().getFirstMappedPort());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Stream<Stream<String>> inContainerBootstrapCmd() {
|
||||
return Stream.of(
|
||||
mssqlCmd(Stream.of(String.format("CREATE DATABASE %s", getDatabaseName()))),
|
||||
mssqlCmd(Stream.of(
|
||||
String.format("USE %s", getDatabaseName()),
|
||||
String.format("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", getUserName(), getPassword(), getDatabaseName()),
|
||||
String.format("ALTER SERVER ROLE [sysadmin] ADD MEMBER %s", getUserName()),
|
||||
String.format("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", getUserName(), getUserName()),
|
||||
String.format("ALTER ROLE [db_owner] ADD MEMBER %s", getUserName()))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Don't drop anything when closing the test database. Instead, if cleanup is required, call
|
||||
* {@link #dropDatabaseAndUser()} explicitly. Implicit cleanups may result in deadlocks and so
|
||||
* aren't really worth it.
|
||||
*/
|
||||
@Override
|
||||
protected Stream<String> inContainerUndoBootstrapCmd() {
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
public void dropDatabaseAndUser() {
|
||||
execInContainer(mssqlCmd(Stream.of(
|
||||
String.format("USE master"),
|
||||
String.format("ALTER DATABASE %s SET single_user WITH ROLLBACK IMMEDIATE", getDatabaseName()),
|
||||
String.format("DROP DATABASE %s", getDatabaseName()))));
|
||||
}
|
||||
|
||||
public Stream<String> mssqlCmd(final Stream<String> sql) {
|
||||
return Stream.of("/opt/mssql-tools18/bin/sqlcmd",
|
||||
"-U", getContainer().getUsername(),
|
||||
"-P", getContainer().getPassword(),
|
||||
"-Q", sql.collect(Collectors.joining("; ")),
|
||||
"-b", "-e", "-C");
|
||||
}
|
||||
|
||||
@Override
|
||||
public DatabaseDriver getDatabaseDriver() {
|
||||
return DatabaseDriver.MSSQLSERVER;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SQLDialect getSqlDialect() {
|
||||
return SQLDialect.DEFAULT;
|
||||
}
|
||||
|
||||
public static enum CertificateKey {
|
||||
|
||||
CA(true),
|
||||
DUMMY_CA(false),
|
||||
SERVER(true),
|
||||
DUMMY_SERVER(false),
|
||||
SERVER_DUMMY_CA(false),
|
||||
;
|
||||
|
||||
public final boolean isValid;
|
||||
|
||||
CertificateKey(final boolean isValid) {
|
||||
this.isValid = isValid;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private volatile Map<CertificateKey, String> cachedCerts = new ConcurrentHashMap<>();
|
||||
|
||||
public String getCertificate(final CertificateKey certificateKey) {
|
||||
if (!cachedCerts.containsKey(certificateKey)) {
|
||||
final String certificate;
|
||||
try {
|
||||
final String command = "cat /tmp/certs/" + certificateKey.name().toLowerCase() + ".crt";
|
||||
certificate = getContainer().execInContainer("bash", "-c", command).getStdout().trim();
|
||||
} catch (final IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
} catch (final InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
synchronized (cachedCerts) {
|
||||
this.cachedCerts.put(certificateKey, certificate);
|
||||
}
|
||||
}
|
||||
return cachedCerts.get(certificateKey);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MsSQLConfigBuilder configBuilder() {
|
||||
return new MsSQLConfigBuilder(this);
|
||||
}
|
||||
|
||||
static public class MsSQLConfigBuilder extends ConfigBuilder<MsSQLTestDatabase, MsSQLConfigBuilder> {
|
||||
|
||||
protected MsSQLConfigBuilder(final MsSQLTestDatabase testDatabase) {
|
||||
|
||||
super(testDatabase);
|
||||
with(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=2");
|
||||
|
||||
}
|
||||
|
||||
public MsSQLConfigBuilder withCdcReplication() {
|
||||
return with("is_test", true)
|
||||
.with("replication_method", Map.of(
|
||||
"method", "CDC",
|
||||
"initial_waiting_seconds", Duration.ofSeconds(20).getSeconds(),
|
||||
INVALID_CDC_CURSOR_POSITION_PROPERTY, RESYNC_DATA_OPTION));
|
||||
}
|
||||
|
||||
public MsSQLConfigBuilder withSchemas(final String... schemas) {
|
||||
return with(JdbcUtils.SCHEMAS_KEY, List.of(schemas));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MsSQLConfigBuilder withoutSsl() {
|
||||
return withSsl(Map.of("ssl_method", "unencrypted"));
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public MsSQLConfigBuilder withSsl(final Map<Object, Object> sslMode) {
|
||||
return with("ssl_method", sslMode);
|
||||
}
|
||||
|
||||
public MsSQLConfigBuilder withEncrytedTrustServerCertificate() {
|
||||
return withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate"));
|
||||
}
|
||||
|
||||
public MsSQLConfigBuilder withEncrytedVerifyServerCertificate(final String certificate, final String hostnameInCertificate) {
|
||||
if (hostnameInCertificate != null) {
|
||||
return withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
|
||||
"certificate", certificate,
|
||||
"hostNameInCertificate", hostnameInCertificate));
|
||||
} else {
|
||||
return withSsl(Map.of("ssl_method", "encrypted_verify_certificate",
|
||||
"certificate", certificate));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
MssqlDebeziumStateUtil.disposeInitialState();
|
||||
super.close();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,306 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.integrations.source.mssql;
|
||||
|
||||
import io.airbyte.commons.logging.LoggingHelper.Color;
|
||||
import io.airbyte.commons.logging.MdcScope;
|
||||
import io.airbyte.integrations.source.mssql.cdc.MssqlDebeziumStateUtil;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
import org.jooq.Record;
|
||||
import org.jooq.exception.DataAccessException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testcontainers.containers.MSSQLServerContainer;
|
||||
|
||||
public class MsSqlTestDatabaseWithBackgroundThreads extends MsSQLTestDatabase {
|
||||
|
||||
private abstract class AbstractMssqlTestDatabaseBackgroundThread extends Thread {
|
||||
|
||||
protected Logger LOGGER = LoggerFactory.getLogger(this.getClass());
|
||||
protected final boolean PRINT_EVERY_CALL = false;
|
||||
|
||||
AbstractMssqlTestDatabaseBackgroundThread() {
|
||||
this.start();
|
||||
}
|
||||
|
||||
protected volatile boolean stop = false;
|
||||
|
||||
protected String formatLogLine(String logLine) {
|
||||
String retVal = this.getClass().getSimpleName() + " databaseId=" + databaseId + ", containerId=" + containerId + " - " + logLine;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@SuppressWarnings("try")
|
||||
public void run() {
|
||||
try (MdcScope mdcScope = new MdcScope.Builder().setPrefixColor(Color.PURPLE_BACKGROUND).setLogPrefix(this.getClass().getSimpleName())
|
||||
.build()) {
|
||||
while (!stop) {
|
||||
try {
|
||||
Thread.sleep(100);
|
||||
innerRun();
|
||||
} catch (final Throwable t) {
|
||||
LOGGER.info(formatLogLine(
|
||||
"got exception of type " + t.getClass() + ":" + StringUtils.replace(t.getMessage() + "\n" + formatStackTrace(t), "\n", "\\n")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String formatStackTrace(Throwable t) {
|
||||
boolean belowCurrentCall = false;
|
||||
List<String> stackToDisplay = new LinkedList<String>();
|
||||
for (String stackString : ExceptionUtils.getStackFrames(t)) {
|
||||
if (stackString.startsWith("\tat ")) {
|
||||
if (!belowCurrentCall && stackString.contains(AbstractMssqlTestDatabaseBackgroundThread.class.getSimpleName())) {
|
||||
belowCurrentCall = true;
|
||||
}
|
||||
} else {
|
||||
belowCurrentCall = false;
|
||||
}
|
||||
if (!belowCurrentCall) {
|
||||
stackToDisplay.add(stackString);
|
||||
}
|
||||
}
|
||||
return StringUtils.join(stackToDisplay, "\n ");
|
||||
}
|
||||
|
||||
public abstract void innerRun() throws Exception;
|
||||
|
||||
}
|
||||
|
||||
private class MssqlTestDatabaseBackgroundThreadAgentState extends AbstractMssqlTestDatabaseBackgroundThread {
|
||||
|
||||
private String previousValue = null;
|
||||
|
||||
@Override
|
||||
public void innerRun() throws Exception {
|
||||
String agentStateSql = "EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';";
|
||||
final var r = query(ctx -> ctx.fetch(agentStateSql).get(0));
|
||||
String agentState = r.getValue(0).toString();
|
||||
if (PRINT_EVERY_CALL || !Objects.equals(agentState, previousValue)) {
|
||||
LOGGER.info(formatLogLine("agentState changed from {} to {}"), previousValue, agentState);
|
||||
previousValue = agentState;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class MssqlTestDatabaseBackgroundThreadFnCdcGetMaxLsn extends AbstractMssqlTestDatabaseBackgroundThread {
|
||||
|
||||
private String previousValue = null;
|
||||
|
||||
@Override
|
||||
public void innerRun() throws Exception {
|
||||
String max_lsn;
|
||||
try {
|
||||
Object retVal = query(ctx -> ctx.fetch(MAX_LSN_QUERY)).get(0).getValue(0);
|
||||
if (retVal instanceof byte[] bytes) {
|
||||
max_lsn = new String(Base64.getEncoder().encode(bytes), StandardCharsets.UTF_8);
|
||||
} else {
|
||||
max_lsn = String.valueOf(retVal);
|
||||
}
|
||||
} catch (DataAccessException e) {
|
||||
if (e.getMessage().contains("Invalid object name 'cdc.lsn_time_mapping'")) {
|
||||
max_lsn = "DataAccessException " + e.getMessage();
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (PRINT_EVERY_CALL || !Objects.equals(max_lsn, previousValue)) {
|
||||
LOGGER.info(formatLogLine("sys.fn_cdc_get_max_lsn changed from {} to {}"), previousValue, max_lsn);
|
||||
previousValue = max_lsn;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class MssqlTestDatabaseBackgroundThreadLsnTimeMapping extends AbstractMssqlTestDatabaseBackgroundThread {
|
||||
|
||||
private String previousValue = null;
|
||||
private static final String LSN_TIME_MAPPING_QUERY = "SELECT start_lsn, tran_begin_time, tran_end_time, tran_id FROM cdc.lsn_time_mapping;";
|
||||
|
||||
@Override
|
||||
public void innerRun() throws Exception {
|
||||
String results;
|
||||
try {
|
||||
results = query(ctx -> ctx.fetch(LSN_TIME_MAPPING_QUERY)).toString();
|
||||
} catch (DataAccessException e) {
|
||||
if (e.getMessage().contains("Invalid object name 'cdc.lsn_time_mapping'")) {
|
||||
results = "DataAccessException " + e.getMessage();
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (PRINT_EVERY_CALL || !Objects.equals(results, previousValue)) {
|
||||
LOGGER.info(formatLogLine("sys.lsn_time_mapping changed from {} to {}"), previousValue, results);
|
||||
previousValue = results;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class MssqlTestDatabaseBackgroundThreadQueryJobsTable extends AbstractMssqlTestDatabaseBackgroundThread {
|
||||
|
||||
private String previousValue = null;
|
||||
private int previousRowCount = -1;
|
||||
private static final String JOBS_TABLE_QUERY = "SELECT * FROM msdb.dbo.cdc_jobs";
|
||||
|
||||
@Override
|
||||
public void innerRun() throws Exception {
|
||||
int resultSize = 0;
|
||||
String resultsAsString;
|
||||
try {
|
||||
List<Record> results = query(ctx -> ctx.fetch(JOBS_TABLE_QUERY));
|
||||
resultsAsString = results.toString();
|
||||
resultSize = results.size();
|
||||
} catch (DataAccessException e) {
|
||||
if (e.getMessage().contains("Invalid object name 'msdb.dbo.cdc_jobs'")) {
|
||||
resultsAsString = "DataAccessException " + e.getMessage();
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) {
|
||||
LOGGER.info(formatLogLine("cdc.change_tables changed from {} rows\n{} to {} rows\n{}"), previousRowCount, previousValue, resultSize,
|
||||
resultsAsString);
|
||||
previousValue = resultsAsString;
|
||||
previousRowCount = resultSize;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class MssqlTestDatabaseBackgroundThreadQueryChangeTables extends AbstractMssqlTestDatabaseBackgroundThread {
|
||||
|
||||
private String previousValue = null;
|
||||
private int previousRowCount = -1;
|
||||
private static final String CHANGE_TABLES_QUERY = """
|
||||
SELECT OBJECT_SCHEMA_NAME(source_object_id, DB_ID('%s')),
|
||||
OBJECT_NAME(source_object_id, DB_ID('%s')),
|
||||
capture_instance,
|
||||
object_id,
|
||||
start_lsn FROM cdc.change_tables""";
|
||||
|
||||
@Override
|
||||
public void innerRun() throws Exception {
|
||||
int resultSize = 0;
|
||||
String resultsAsString;
|
||||
try {
|
||||
List<Record> results = query(ctx -> ctx.fetch(CHANGE_TABLES_QUERY.formatted(getDatabaseName(), getDatabaseName())));
|
||||
resultsAsString = results.toString();
|
||||
resultSize = results.size();
|
||||
} catch (DataAccessException e) {
|
||||
if (e.getMessage().contains("Invalid object name 'cdc.change_tables'")) {
|
||||
resultsAsString = "DataAccessException " + e.getMessage();
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) {
|
||||
LOGGER.info(formatLogLine("cdc.change_tables changed from {} rows\n{} to {} rows\n{}"), previousRowCount, previousValue, resultSize,
|
||||
resultsAsString);
|
||||
previousValue = resultsAsString;
|
||||
previousRowCount = resultSize;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class MssqlTestDatabaseBackgroundThreadQueryCdcTable extends AbstractMssqlTestDatabaseBackgroundThread {
|
||||
|
||||
private final String schemaName;
|
||||
private final String tableName;
|
||||
private final String instanceName;
|
||||
private String previousValue = null;
|
||||
private int previousRowCount = -1;
|
||||
|
||||
MssqlTestDatabaseBackgroundThreadQueryCdcTable(String schemaName, String tableName, String instanceName) {
|
||||
this.schemaName = schemaName;
|
||||
this.tableName = tableName;
|
||||
this.instanceName = instanceName;
|
||||
}
|
||||
|
||||
private static final String CDC_TABLE_SELECT_QUERY_STRING = "SELECT * FROM cdc.%s_ct";
|
||||
|
||||
@Override
|
||||
public void innerRun() throws Exception {
|
||||
int resultSize = 0;
|
||||
String resultsAsString;
|
||||
try {
|
||||
List<Record> results = query(ctx -> ctx.fetch(CDC_TABLE_SELECT_QUERY_STRING.formatted(instanceName)));
|
||||
resultsAsString = results.toString();
|
||||
resultSize = results.size();
|
||||
} catch (DataAccessException e) {
|
||||
if (e.getMessage().contains("Invalid object name 'cdc.%s_ct'".formatted(instanceName))) {
|
||||
resultsAsString = "DataAccessException " + e.getMessage();
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (PRINT_EVERY_CALL || !Objects.equals(resultsAsString, previousValue)) {
|
||||
LOGGER.info(formatLogLine("cdc table {} for {}.{} changed from {} rows\n{} to {} rows\n{}"), instanceName, schemaName, tableName,
|
||||
previousRowCount, previousValue, resultSize,
|
||||
resultsAsString);
|
||||
previousValue = resultsAsString;
|
||||
previousRowCount = resultSize;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final List<AbstractMssqlTestDatabaseBackgroundThread> bgThreads = new ArrayList<>();
|
||||
|
||||
MsSqlTestDatabaseWithBackgroundThreads(MSSQLServerContainer<?> container) {
|
||||
super(container);
|
||||
|
||||
}
|
||||
|
||||
public MsSQLTestDatabase initialized() {
|
||||
super.initialized();
|
||||
bgThreads.add(new MssqlTestDatabaseBackgroundThreadAgentState());
|
||||
bgThreads.add(new MssqlTestDatabaseBackgroundThreadFnCdcGetMaxLsn());
|
||||
bgThreads.add(new MssqlTestDatabaseBackgroundThreadLsnTimeMapping());
|
||||
bgThreads.add(new MssqlTestDatabaseBackgroundThreadQueryChangeTables());
|
||||
bgThreads.add(new MssqlTestDatabaseBackgroundThreadQueryJobsTable());
|
||||
return self();
|
||||
}
|
||||
|
||||
public void close() {
|
||||
for (var bgThread : bgThreads) {
|
||||
bgThread.stop = true;
|
||||
}
|
||||
super.close();
|
||||
MssqlDebeziumStateUtil.disposeInitialState();
|
||||
}
|
||||
|
||||
private final Map<String, MssqlTestDatabaseBackgroundThreadQueryCdcTable> bgThreadByInstance = new ConcurrentHashMap<>();
|
||||
|
||||
@Override
|
||||
public MsSQLTestDatabase withCdcForTable(String schemaName, String tableName, String roleName, String instanceName) {
|
||||
super.withCdcForTable(schemaName, tableName, roleName, instanceName);
|
||||
MssqlTestDatabaseBackgroundThreadQueryCdcTable bgThread = new MssqlTestDatabaseBackgroundThreadQueryCdcTable(schemaName, tableName, instanceName);
|
||||
bgThreadByInstance.put(instanceName, bgThread);
|
||||
bgThreads.add(bgThread);
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MsSQLTestDatabase withCdcDisabledForTable(String schemaName, String tableName, String instanceName) {
|
||||
bgThreadByInstance.get(instanceName).stop = true;
|
||||
super.withCdcDisabledForTable(schemaName, tableName, instanceName);
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user