1
0
mirror of synced 2025-12-23 21:03:15 -05:00

feat: improve temporal representation in proto + shared encoder/decoder for source and dest (#67016)

Co-authored-by: Rodi Reich Zilberman <867491+rodireich@users.noreply.github.com>
This commit is contained in:
Subodh Kant Chaturvedi
2025-10-21 22:31:47 +05:30
committed by GitHub
parent c5b1afc9e7
commit 1b48ee9adf
27 changed files with 2729 additions and 1410 deletions

View File

@@ -1,3 +1,7 @@
## Version 0.1.54
Update temporal type representation for proto format
## Version 0.1.53
**Extract CDK**

View File

@@ -0,0 +1,66 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.protocol
import com.fasterxml.jackson.core.io.BigDecimalParser
import com.fasterxml.jackson.core.io.BigIntegerParser
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import java.nio.charset.StandardCharsets
import java.time.Instant
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
class AirbyteValueProtobufDecoder {
/**
* Decodes a protobuf value into its corresponding Java type. Returns null if the value is
* marked as null.
*/
fun decode(value: AirbyteValueProtobuf): Any? {
return when (value.valueCase) {
AirbyteValueProtobuf.ValueCase.BOOLEAN -> value.boolean
AirbyteValueProtobuf.ValueCase.STRING -> value.string
AirbyteValueProtobuf.ValueCase.INTEGER -> value.integer.toBigInteger()
AirbyteValueProtobuf.ValueCase.BIG_INTEGER ->
BigIntegerParser.parseWithFastParser(value.bigInteger)
AirbyteValueProtobuf.ValueCase.NUMBER -> value.number.toBigDecimal()
AirbyteValueProtobuf.ValueCase.BIG_DECIMAL ->
BigDecimalParser.parseWithFastParser(value.bigDecimal)
AirbyteValueProtobuf.ValueCase.DATE -> LocalDate.ofEpochDay(value.date)
AirbyteValueProtobuf.ValueCase.TIME_WITHOUT_TIMEZONE ->
LocalTime.ofNanoOfDay(value.timeWithoutTimezone)
AirbyteValueProtobuf.ValueCase.TIME_WITH_TIMEZONE -> {
val offsetTimeMsg = value.timeWithTimezone
val localTime = LocalTime.ofNanoOfDay(offsetTimeMsg.nanosOfDay)
val offset = ZoneOffset.ofTotalSeconds(offsetTimeMsg.offsetSeconds)
OffsetTime.of(localTime, offset)
}
AirbyteValueProtobuf.ValueCase.TIMESTAMP_WITHOUT_TIMEZONE -> {
val localDateTimeMsg = value.timestampWithoutTimezone
val localDate = LocalDate.ofEpochDay(localDateTimeMsg.dateDaysSinceEpoch)
val localTime = LocalTime.ofNanoOfDay(localDateTimeMsg.nanosOfDay)
LocalDateTime.of(localDate, localTime)
}
AirbyteValueProtobuf.ValueCase.TIMESTAMP_WITH_TIMEZONE -> {
val offsetDateTimeMsg = value.timestampWithTimezone
val instant =
Instant.ofEpochSecond(
offsetDateTimeMsg.epochSecond,
offsetDateTimeMsg.nano.toLong()
)
val offset = ZoneOffset.ofTotalSeconds(offsetDateTimeMsg.offsetSeconds)
OffsetDateTime.ofInstant(instant, offset)
}
AirbyteValueProtobuf.ValueCase.JSON -> value.json.toString(StandardCharsets.UTF_8)
AirbyteValueProtobuf.ValueCase.NULL,
AirbyteValueProtobuf.ValueCase.VALUE_NOT_SET,
null -> null
}
}
}

View File

@@ -0,0 +1,233 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.protocol
import com.google.protobuf.ByteString
import com.google.protobuf.NullValue
import io.airbyte.cdk.data.AirbyteSchemaType
import io.airbyte.cdk.data.ArrayAirbyteSchemaType
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import java.math.BigDecimal
import java.math.BigInteger
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import java.util.Base64
/**
* Type-based encoder for protobuf values. Sources use this to encode values based on the
* AirbyteSchemaType from field discovery.
*
* You can provide an existing [AirbyteValueProtobuf.Builder] to avoid repeated allocations. When
* provided, the builder is cleared before use.
*/
class AirbyteValueProtobufEncoder {
/**
* Encodes a value into protobuf format based on its AirbyteSchemaType. Returns a protobuf
* representing a null value if [value] is null.
*
* @param builder Optional builder to reuse. If provided, it will be cleared at the start.
*/
fun encode(
value: Any?,
airbyteSchemaType: AirbyteSchemaType,
builder: AirbyteValueProtobuf.Builder? = null
): AirbyteValueProtobuf.Builder {
val b = (builder ?: AirbyteValueProtobuf.newBuilder()).clear()
if (value == null) {
return buildNull(b)
}
return when (airbyteSchemaType) {
LeafAirbyteSchemaType.BOOLEAN -> encodeBoolean(value, b)
LeafAirbyteSchemaType.STRING -> encodeString(value, b)
LeafAirbyteSchemaType.INTEGER -> encodeInteger(value, b)
LeafAirbyteSchemaType.NUMBER -> encodeNumber(value, b)
LeafAirbyteSchemaType.DATE -> encodeDate(value, b)
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE -> encodeTimeWithTimezone(value, b)
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE -> encodeTimeWithoutTimezone(value, b)
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE -> encodeTimestampWithTimezone(value, b)
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE ->
encodeTimestampWithoutTimezone(value, b)
LeafAirbyteSchemaType.NULL -> buildNull(b)
LeafAirbyteSchemaType.JSONB -> encodeJson(value, b)
LeafAirbyteSchemaType.BINARY -> encodeBinary(value, b)
is ArrayAirbyteSchemaType -> encodeJson(value, b)
}
}
private fun buildNull(b: AirbyteValueProtobuf.Builder): AirbyteValueProtobuf.Builder {
return b.setNull(NullValue.NULL_VALUE)
}
private fun encodeBoolean(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
require(value is Boolean) { "Expected Boolean, got ${value::class.simpleName}" }
return b.setBoolean(value)
}
private fun encodeString(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
require(value is String) { "Expected String, got ${value::class.simpleName}" }
return b.setString(value)
}
private fun encodeInteger(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
return when (value) {
is BigInteger -> {
if (value.bitLength() < 63) {
b.setInteger(value.longValueExact())
} else {
b.setBigInteger(value.toString())
}
}
is Long -> b.setInteger(value)
is Int -> b.setInteger(value.toLong())
is Short -> b.setInteger(value.toLong())
else -> error("Expected BigInteger, Long, or Int, got ${value::class.simpleName}")
}
}
private fun encodeNumber(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
return when (value) {
is BigDecimal -> b.setBigDecimal(value.toPlainString())
is Double -> b.setNumber(value)
is Float -> b.setNumber(value.toDouble())
else -> error("Expected BigDecimal, Double, or Float, got ${value::class.simpleName}")
}
}
private fun encodeDate(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
val localDate =
when (value) {
is LocalDate -> value
is java.sql.Date -> value.toLocalDate()
else -> error("Expected LocalDate or java.sql.Date, got ${value::class.simpleName}")
}
return b.setDate(localDate.toEpochDay())
}
private fun encodeTimeWithTimezone(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
require(value is OffsetTime) { "Expected OffsetTime, got ${value::class.simpleName}" }
val offsetTimeMsg =
io.airbyte.protocol.protobuf.AirbyteRecordMessage.OffsetTime.newBuilder()
.setNanosOfDay(value.toLocalTime().toNanoOfDay())
.setOffsetSeconds(value.offset.totalSeconds)
.build()
return b.setTimeWithTimezone(offsetTimeMsg)
}
private fun encodeTimeWithoutTimezone(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
val localTime =
when (value) {
is LocalTime -> value
is java.sql.Time -> value.toLocalTime()
else -> error("Expected LocalTime or java.sql.Time, got ${value::class.simpleName}")
}
return b.setTimeWithoutTimezone(localTime.toNanoOfDay())
}
private fun encodeTimestampWithTimezone(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
val offsetDateTime =
when (value) {
is OffsetDateTime -> value
is java.sql.Timestamp -> OffsetDateTime.ofInstant(value.toInstant(), ZoneOffset.UTC)
else ->
error(
"Expected OffsetDateTime or java.sql.Timestamp, got ${value::class.simpleName}"
)
}
val instant = offsetDateTime.toInstant()
val offsetDateTimeMsg =
io.airbyte.protocol.protobuf.AirbyteRecordMessage.OffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(offsetDateTime.offset.totalSeconds)
.build()
return b.setTimestampWithTimezone(offsetDateTimeMsg)
}
private fun encodeTimestampWithoutTimezone(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
val localDateTime =
when (value) {
is LocalDateTime -> value
is java.sql.Timestamp -> value.toLocalDateTime()
else ->
error(
"Expected LocalDateTime or java.sql.Timestamp, got ${value::class.simpleName}"
)
}
val localDateTimeMsg =
io.airbyte.protocol.protobuf.AirbyteRecordMessage.LocalDateTime.newBuilder()
.setDateDaysSinceEpoch(localDateTime.toLocalDate().toEpochDay())
.setNanosOfDay(localDateTime.toLocalTime().toNanoOfDay())
.build()
return b.setTimestampWithoutTimezone(localDateTimeMsg)
}
private fun encodeJson(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
val jsonBytes =
when (value) {
is String -> value.toByteArray(StandardCharsets.UTF_8)
is ByteArray -> value
is ByteBuffer -> value.array()
else ->
error("Expected String or ByteArray for JSON, got ${value::class.simpleName}")
}
return b.setJson(ByteString.copyFrom(jsonBytes))
}
private fun encodeBinary(
value: Any,
b: AirbyteValueProtobuf.Builder
): AirbyteValueProtobuf.Builder {
val base64String =
when (value) {
is ByteArray -> Base64.getEncoder().encodeToString(value)
is ByteBuffer -> Base64.getEncoder().encodeToString(value.array())
else ->
error(
"Expected ByteArray or ByteBuffer for Binary, got ${value::class.simpleName}"
)
}
return b.setString(base64String)
}
}

View File

@@ -4,6 +4,7 @@
syntax = "proto3";
import "airbyte_record_message_meta.proto";
import "google/protobuf/struct.proto";
package io.airbyte.protocol.protobuf;
@@ -20,16 +21,33 @@ message AirbyteValueProtobuf {
oneof value {
bool boolean = 1;
string string = 2;
int64 integer = 3;
sint64 integer = 3;
string big_integer = 4; // Prefer over integer iff it exists
double number = 5;
string big_decimal = 6; // Prefer over number iff it exists
string date = 7;
string time_with_timezone = 8;
string time_without_timezone = 9;
string timestamp_with_timezone = 10;
string timestamp_without_timezone = 11;
sint64 date = 7;
OffsetTime time_with_timezone = 8;
uint64 time_without_timezone = 9;
OffsetDateTime timestamp_with_timezone = 10;
LocalDateTime timestamp_without_timezone = 11;
bytes json = 12; // JSON-encoded arrays, objects, unions, or otherwise unknown.
google.protobuf.NullValue null = 13;
}
bool is_null = 13;
};
}
message LocalDateTime {
sint64 date_days_since_epoch = 1;
uint64 nanos_of_day = 2;
}
message OffsetTime {
uint64 nanos_of_day = 1;
sint32 offset_seconds = 2;
}
message OffsetDateTime {
sint64 epoch_second = 1;
uint32 nano = 2;
sint32 offset_seconds = 3;
}

View File

@@ -0,0 +1,609 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.protocol
import com.google.protobuf.ByteString
import com.google.protobuf.NullValue
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.LocalDateTime as ProtoLocalDateTime
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.OffsetDateTime as ProtoOffsetDateTime
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.OffsetTime as ProtoOffsetTime
import java.math.BigDecimal
import java.math.BigInteger
import java.nio.charset.StandardCharsets
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Test
class AirbyteValueProtobufDecoderTest {
private val decoder = AirbyteValueProtobufDecoder()
@Test
fun testDecodeNull() {
val nullValue = AirbyteValueProtobuf.newBuilder().setNull(NullValue.NULL_VALUE).build()
assertNull(decoder.decode(nullValue))
}
@Test
fun testDecodeValueNotSet() {
val emptyValue = AirbyteValueProtobuf.newBuilder().build()
assertNull(decoder.decode(emptyValue))
}
@Test
fun testDecodeBoolean() {
val trueValue = AirbyteValueProtobuf.newBuilder().setBoolean(true).build()
assertEquals(true, decoder.decode(trueValue))
val falseValue = AirbyteValueProtobuf.newBuilder().setBoolean(false).build()
assertEquals(false, decoder.decode(falseValue))
}
@Test
fun testDecodeString() {
val value = AirbyteValueProtobuf.newBuilder().setString("hello world").build()
assertEquals("hello world", decoder.decode(value))
val emptyValue = AirbyteValueProtobuf.newBuilder().setString("").build()
assertEquals("", decoder.decode(emptyValue))
}
@Test
fun testDecodeIntegerSmall() {
val value = AirbyteValueProtobuf.newBuilder().setInteger(42L).build()
val result = decoder.decode(value)
assertEquals(BigInteger.valueOf(42L), result)
}
@Test
fun testDecodeIntegerLarge() {
val value = AirbyteValueProtobuf.newBuilder().setInteger(Long.MAX_VALUE).build()
val result = decoder.decode(value)
assertEquals(BigInteger.valueOf(Long.MAX_VALUE), result)
}
@Test
fun testDecodeIntegerNegative() {
val value = AirbyteValueProtobuf.newBuilder().setInteger(-12345L).build()
val result = decoder.decode(value)
assertEquals(BigInteger.valueOf(-12345L), result)
}
@Test
fun testDecodeBigInteger() {
val largeBigInt = BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.valueOf(2))
val value = AirbyteValueProtobuf.newBuilder().setBigInteger(largeBigInt.toString()).build()
val result = decoder.decode(value)
assertEquals(largeBigInt, result)
}
@Test
fun testDecodeBigIntegerVeryLarge() {
val veryLargeBigInt = BigInteger.valueOf(10).pow(100)
val value =
AirbyteValueProtobuf.newBuilder().setBigInteger(veryLargeBigInt.toString()).build()
val result = decoder.decode(value)
assertEquals(veryLargeBigInt, result)
}
@Test
fun testDecodeNumberDouble() {
val value = AirbyteValueProtobuf.newBuilder().setNumber(123.456).build()
val result = decoder.decode(value)
assertEquals(BigDecimal.valueOf(123.456), result)
}
@Test
fun testDecodeNumberNegative() {
val value = AirbyteValueProtobuf.newBuilder().setNumber(-999.999).build()
val result = decoder.decode(value)
assertEquals(BigDecimal.valueOf(-999.999), result)
}
@Test
fun testDecodeBigDecimal() {
val bigDec = BigDecimal("123456789.987654321")
val value = AirbyteValueProtobuf.newBuilder().setBigDecimal(bigDec.toString()).build()
val result = decoder.decode(value)
assertEquals(bigDec, result)
}
@Test
fun testDecodeBigDecimalVeryPrecise() {
val preciseDec = BigDecimal("0.123456789012345678901234567890")
val value = AirbyteValueProtobuf.newBuilder().setBigDecimal(preciseDec.toString()).build()
val result = decoder.decode(value)
assertEquals(preciseDec, result)
}
@Test
fun testDecodeDate() {
val date = LocalDate.of(2025, 10, 6)
val value = AirbyteValueProtobuf.newBuilder().setDate(date.toEpochDay()).build()
val result = decoder.decode(value)
assertEquals(date, result)
}
@Test
fun testDecodeDateEpochStart() {
val date = LocalDate.ofEpochDay(0)
val value = AirbyteValueProtobuf.newBuilder().setDate(0).build()
val result = decoder.decode(value)
assertEquals(date, result)
}
@Test
fun testDecodeDateFarFuture() {
val date = LocalDate.of(9999, 12, 31)
val value = AirbyteValueProtobuf.newBuilder().setDate(date.toEpochDay()).build()
val result = decoder.decode(value)
assertEquals(date, result)
}
@Test
fun testDecodeTimeWithoutTimezone() {
val time = LocalTime.of(14, 30, 45, 123456789)
val value =
AirbyteValueProtobuf.newBuilder().setTimeWithoutTimezone(time.toNanoOfDay()).build()
val result = decoder.decode(value)
assertEquals(time, result)
}
@Test
fun testDecodeTimeWithoutTimezoneMidnight() {
val time = LocalTime.MIDNIGHT
val value = AirbyteValueProtobuf.newBuilder().setTimeWithoutTimezone(0L).build()
val result = decoder.decode(value)
assertEquals(time, result)
}
@Test
fun testDecodeTimeWithoutTimezoneMaxTime() {
val time = LocalTime.MAX
val value =
AirbyteValueProtobuf.newBuilder().setTimeWithoutTimezone(time.toNanoOfDay()).build()
val result = decoder.decode(value)
assertEquals(time, result)
}
@Test
fun testDecodeTimeWithTimezoneUTC() {
val time = OffsetTime.of(14, 30, 45, 123456789, ZoneOffset.UTC)
val protoTime =
ProtoOffsetTime.newBuilder()
.setNanosOfDay(time.toLocalTime().toNanoOfDay())
.setOffsetSeconds(0)
.build()
val value = AirbyteValueProtobuf.newBuilder().setTimeWithTimezone(protoTime).build()
val result = decoder.decode(value) as OffsetTime
assertEquals(time, result)
}
@Test
fun testDecodeTimeWithTimezonePositiveOffset() {
val time = OffsetTime.of(14, 30, 45, 0, ZoneOffset.ofHours(5))
val protoTime =
ProtoOffsetTime.newBuilder()
.setNanosOfDay(time.toLocalTime().toNanoOfDay())
.setOffsetSeconds(18000)
.build()
val value = AirbyteValueProtobuf.newBuilder().setTimeWithTimezone(protoTime).build()
val result = decoder.decode(value) as OffsetTime
assertEquals(time, result)
}
@Test
fun testDecodeTimeWithTimezoneNegativeOffset() {
val time = OffsetTime.of(14, 30, 45, 0, ZoneOffset.ofHours(-8))
val protoTime =
ProtoOffsetTime.newBuilder()
.setNanosOfDay(time.toLocalTime().toNanoOfDay())
.setOffsetSeconds(-28800)
.build()
val value = AirbyteValueProtobuf.newBuilder().setTimeWithTimezone(protoTime).build()
val result = decoder.decode(value) as OffsetTime
assertEquals(time, result)
}
@Test
fun testDecodeTimeWithTimezoneFractionalOffset() {
val time = OffsetTime.of(14, 30, 45, 0, ZoneOffset.ofHoursMinutes(5, 30))
val protoTime =
ProtoOffsetTime.newBuilder()
.setNanosOfDay(time.toLocalTime().toNanoOfDay())
.setOffsetSeconds(19800)
.build()
val value = AirbyteValueProtobuf.newBuilder().setTimeWithTimezone(protoTime).build()
val result = decoder.decode(value) as OffsetTime
assertEquals(time, result)
}
@Test
fun testDecodeTimestampWithoutTimezone() {
val timestamp = LocalDateTime.of(2025, 10, 6, 14, 30, 45, 123456789)
val protoTimestamp =
ProtoLocalDateTime.newBuilder()
.setDateDaysSinceEpoch(timestamp.toLocalDate().toEpochDay())
.setNanosOfDay(timestamp.toLocalTime().toNanoOfDay())
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithoutTimezone(protoTimestamp).build()
val result = decoder.decode(value)
assertEquals(timestamp, result)
}
@Test
fun testDecodeTimestampWithoutTimezoneEpochStart() {
val timestamp = LocalDateTime.of(1970, 1, 1, 0, 0, 0, 0)
val protoTimestamp =
ProtoLocalDateTime.newBuilder().setDateDaysSinceEpoch(0).setNanosOfDay(0L).build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithoutTimezone(protoTimestamp).build()
val result = decoder.decode(value)
assertEquals(timestamp, result)
}
@Test
fun testDecodeTimestampWithoutTimezoneMidnight() {
val timestamp = LocalDateTime.of(2025, 10, 6, 0, 0, 0, 0)
val protoTimestamp =
ProtoLocalDateTime.newBuilder()
.setDateDaysSinceEpoch(timestamp.toLocalDate().toEpochDay())
.setNanosOfDay(0L)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithoutTimezone(protoTimestamp).build()
val result = decoder.decode(value)
assertEquals(timestamp, result)
}
@Test
fun testDecodeTimestampWithTimezoneUTC() {
val timestamp = OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 123456789, ZoneOffset.UTC)
val instant = timestamp.toInstant()
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(0)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result)
}
@Test
fun testDecodeTimestampWithTimezonePositiveOffset() {
val timestamp = OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, ZoneOffset.ofHours(5))
val instant = timestamp.toInstant()
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(18000)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result)
}
@Test
fun testDecodeTimestampWithTimezoneNegativeOffset() {
val timestamp = OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, ZoneOffset.ofHours(-8))
val instant = timestamp.toInstant()
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(-28800)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result)
}
@Test
fun testDecodeTimestampWithTimezoneFractionalOffset() {
val timestamp =
OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, ZoneOffset.ofHoursMinutes(-4, -30))
val instant = timestamp.toInstant()
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(-16200)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result)
}
@Test
fun testDecodeTimestampWithTimezoneEpochStart() {
val timestamp = OffsetDateTime.of(1970, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC)
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(0L)
.setNano(0)
.setOffsetSeconds(0)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result)
}
@Test
fun testDecodeJson() {
val json = """{"key": "value"}"""
val value =
AirbyteValueProtobuf.newBuilder()
.setJson(ByteString.copyFrom(json, StandardCharsets.UTF_8))
.build()
val result = decoder.decode(value)
assertEquals(json, result)
}
@Test
fun testDecodeJsonArray() {
val json = """[1, 2, 3, "test"]"""
val value =
AirbyteValueProtobuf.newBuilder()
.setJson(ByteString.copyFrom(json, StandardCharsets.UTF_8))
.build()
val result = decoder.decode(value)
assertEquals(json, result)
}
@Test
fun testDecodeJsonEmpty() {
val json = "{}"
val value =
AirbyteValueProtobuf.newBuilder()
.setJson(ByteString.copyFrom(json, StandardCharsets.UTF_8))
.build()
val result = decoder.decode(value)
assertEquals(json, result)
}
@Test
fun testDecodeMultipleTimezones() {
val timezones =
listOf(
ZoneOffset.UTC,
ZoneOffset.ofHours(1),
ZoneOffset.ofHours(-1),
ZoneOffset.ofHours(12),
ZoneOffset.ofHours(-12),
ZoneOffset.ofHoursMinutes(5, 30),
ZoneOffset.ofHoursMinutes(-5, -45),
ZoneOffset.ofHoursMinutesSeconds(1, 30, 30)
)
timezones.forEach { offset ->
val time = OffsetTime.of(12, 0, 0, 0, offset)
val protoTime =
ProtoOffsetTime.newBuilder()
.setNanosOfDay(time.toLocalTime().toNanoOfDay())
.setOffsetSeconds(offset.totalSeconds)
.build()
val value = AirbyteValueProtobuf.newBuilder().setTimeWithTimezone(protoTime).build()
val result = decoder.decode(value) as OffsetTime
assertEquals(time, result)
}
}
@Test
fun testDecodeTimestampWithMultipleTimezones() {
val timezones =
listOf(
ZoneOffset.UTC,
ZoneOffset.ofHours(5),
ZoneOffset.ofHours(-8),
ZoneOffset.ofHoursMinutes(9, 30),
ZoneOffset.ofHoursMinutes(-3, -30)
)
timezones.forEach { offset ->
val timestamp = OffsetDateTime.of(2025, 10, 6, 12, 0, 0, 0, offset)
val instant = timestamp.toInstant()
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(offset.totalSeconds)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result)
}
}
@Test
fun testDecodeExtremeValues() {
// Test extreme integer values
val maxLong = AirbyteValueProtobuf.newBuilder().setInteger(Long.MAX_VALUE).build()
assertEquals(BigInteger.valueOf(Long.MAX_VALUE), decoder.decode(maxLong))
val minLong = AirbyteValueProtobuf.newBuilder().setInteger(Long.MIN_VALUE).build()
assertEquals(BigInteger.valueOf(Long.MIN_VALUE), decoder.decode(minLong))
// Test extreme double values
val maxDouble = AirbyteValueProtobuf.newBuilder().setNumber(Double.MAX_VALUE).build()
assertEquals(BigDecimal.valueOf(Double.MAX_VALUE), decoder.decode(maxDouble))
val minDouble = AirbyteValueProtobuf.newBuilder().setNumber(-Double.MAX_VALUE).build()
assertEquals(BigDecimal.valueOf(-Double.MAX_VALUE), decoder.decode(minDouble))
}
@Test
fun testDecodeNanosecondPrecision() {
// Test nanosecond precision for time
val preciseTime = LocalTime.of(12, 30, 45, 123456789)
val timeValue =
AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone(preciseTime.toNanoOfDay())
.build()
assertEquals(preciseTime, decoder.decode(timeValue))
// Test nanosecond precision for timestamp
val preciseTimestamp = LocalDateTime.of(2025, 10, 6, 12, 30, 45, 987654321)
val protoTimestamp =
ProtoLocalDateTime.newBuilder()
.setDateDaysSinceEpoch(preciseTimestamp.toLocalDate().toEpochDay())
.setNanosOfDay(preciseTimestamp.toLocalTime().toNanoOfDay())
.build()
val timestampValue =
AirbyteValueProtobuf.newBuilder().setTimestampWithoutTimezone(protoTimestamp).build()
assertEquals(preciseTimestamp, decoder.decode(timestampValue))
}
@Test
fun testDecodeSpecialDoubleValues() {
val zero = AirbyteValueProtobuf.newBuilder().setNumber(0.0).build()
assertEquals(0.0.toBigDecimal(), decoder.decode(zero))
val negativeZero = AirbyteValueProtobuf.newBuilder().setNumber(-0.0).build()
assertEquals(0.0.toBigDecimal(), decoder.decode(negativeZero))
}
@Test
fun testDecodeUtf8Json() {
val jsonWithUnicode = """{"emoji": "😀", "chinese": "你好", "arabic": "مرحبا"}"""
val value =
AirbyteValueProtobuf.newBuilder()
.setJson(ByteString.copyFrom(jsonWithUnicode, StandardCharsets.UTF_8))
.build()
val result = decoder.decode(value)
assertEquals(jsonWithUnicode, result)
}
@Test
fun testDecodeAllTimezoneOffsets() {
// Test various timezone offsets including edge cases
val offsets =
listOf(
ZoneOffset.MIN, // -18:00
ZoneOffset.MAX, // +18:00
ZoneOffset.UTC, // +00:00
ZoneOffset.ofHours(0),
ZoneOffset.ofHoursMinutes(14, 0), // Kiribati
ZoneOffset.ofHoursMinutes(-11, 0), // American Samoa
ZoneOffset.ofHoursMinutes(5, 45), // Nepal
ZoneOffset.ofHoursMinutes(12, 45), // Chatham Islands
ZoneOffset.ofHoursMinutes(-3, -30), // Newfoundland
)
offsets.forEach { offset ->
val timestamp = OffsetDateTime.of(2025, 6, 15, 12, 0, 0, 0, offset)
val instant = timestamp.toInstant()
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(offset.totalSeconds)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result)
assertEquals(offset, result.offset)
}
}
@Test
fun testDecodeTimeAcrossDaylightSavingTransitions() {
// Test times around DST transitions with various offsets
val testCases =
listOf(
// Spring forward scenarios
Triple(
LocalDateTime.of(2025, 3, 9, 2, 30),
ZoneOffset.ofHours(-8),
"PST before spring forward"
),
Triple(
LocalDateTime.of(2025, 3, 9, 3, 30),
ZoneOffset.ofHours(-7),
"PDT after spring forward"
),
// Fall back scenarios
Triple(
LocalDateTime.of(2025, 11, 2, 1, 30),
ZoneOffset.ofHours(-7),
"PDT before fall back"
),
Triple(
LocalDateTime.of(2025, 11, 2, 1, 30),
ZoneOffset.ofHours(-8),
"PST after fall back"
)
)
testCases.forEach { (localDateTime, offset, description) ->
val timestamp = OffsetDateTime.of(localDateTime, offset)
val instant = timestamp.toInstant()
val protoTimestamp =
ProtoOffsetDateTime.newBuilder()
.setEpochSecond(instant.epochSecond)
.setNano(instant.nano)
.setOffsetSeconds(offset.totalSeconds)
.build()
val value =
AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(protoTimestamp).build()
val result = decoder.decode(value) as OffsetDateTime
assertEquals(timestamp, result, "Failed for: $description")
}
}
@Test
fun testDecodeLeapYearDates() {
// Test leap year dates
val leapYearDates =
listOf(
LocalDate.of(2024, 2, 29), // Leap day 2024
LocalDate.of(2000, 2, 29), // Leap day 2000 (divisible by 400)
LocalDate.of(2020, 2, 29) // Leap day 2020
)
leapYearDates.forEach { date ->
val value = AirbyteValueProtobuf.newBuilder().setDate(date.toEpochDay()).build()
val result = decoder.decode(value)
assertEquals(date, result)
}
}
@Test
fun testDecodeBoundaryDates() {
// Test boundary dates
val boundaryDates =
listOf(
LocalDate.of(1970, 1, 1), // Unix epoch
LocalDate.of(1, 1, 1), // Year 1
LocalDate.of(9999, 12, 31), // Max date
LocalDate.of(2000, 1, 1), // Y2K
LocalDate.of(1900, 2, 28), // Not a leap year (divisible by 100 but not 400)
LocalDate.of(2100, 2, 28) // Not a leap year
)
boundaryDates.forEach { date ->
val value = AirbyteValueProtobuf.newBuilder().setDate(date.toEpochDay()).build()
val result = decoder.decode(value)
assertEquals(date, result)
}
}
}

View File

@@ -0,0 +1,501 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.protocol
import com.google.protobuf.NullValue
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import java.lang.IllegalStateException
import java.math.BigDecimal
import java.math.BigInteger
import java.sql.Date as SqlDate
import java.sql.Time as SqlTime
import java.sql.Timestamp as SqlTimestamp
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import java.util.Base64
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertThrows
import org.junit.jupiter.api.Test
class AirbyteValueProtobufEncoderTest {
private val encoder = AirbyteValueProtobufEncoder()
@Test
fun testEncodeNull() {
val result = encoder.encode(null, LeafAirbyteSchemaType.STRING)
assertEquals(NullValue.NULL_VALUE, result.`null`)
assertEquals(AirbyteValueProtobuf.ValueCase.NULL, result.valueCase)
}
@Test
fun testEncodeBoolean() {
val trueResult = encoder.encode(true, LeafAirbyteSchemaType.BOOLEAN)
assertEquals(true, trueResult.boolean)
assertEquals(AirbyteValueProtobuf.ValueCase.BOOLEAN, trueResult.valueCase)
val falseResult = encoder.encode(false, LeafAirbyteSchemaType.BOOLEAN)
assertEquals(false, falseResult.boolean)
assertEquals(AirbyteValueProtobuf.ValueCase.BOOLEAN, falseResult.valueCase)
}
@Test
fun testEncodeBooleanInvalidType() {
assertThrows(IllegalArgumentException::class.java) {
encoder.encode("true", LeafAirbyteSchemaType.BOOLEAN)
}
}
@Test
fun testEncodeString() {
val result = encoder.encode("hello world", LeafAirbyteSchemaType.STRING)
assertEquals("hello world", result.string)
assertEquals(AirbyteValueProtobuf.ValueCase.STRING, result.valueCase)
val emptyResult = encoder.encode("", LeafAirbyteSchemaType.STRING)
assertEquals("", emptyResult.string)
}
@Test
fun testEncodeStringInvalidType() {
assertThrows(IllegalArgumentException::class.java) {
encoder.encode(123, LeafAirbyteSchemaType.STRING)
}
}
@Test
fun testEncodeIntegerLong() {
val result = encoder.encode(123456789L, LeafAirbyteSchemaType.INTEGER)
assertEquals(123456789L, result.integer)
assertEquals(AirbyteValueProtobuf.ValueCase.INTEGER, result.valueCase)
}
@Test
fun testEncodeIntegerInt() {
val result = encoder.encode(42, LeafAirbyteSchemaType.INTEGER)
assertEquals(42L, result.integer)
assertEquals(AirbyteValueProtobuf.ValueCase.INTEGER, result.valueCase)
}
@Test
fun testEncodeIntegerSmallBigInteger() {
val smallBigInt = BigInteger.valueOf(9999L)
val result = encoder.encode(smallBigInt, LeafAirbyteSchemaType.INTEGER)
assertEquals(9999L, result.integer)
assertEquals(AirbyteValueProtobuf.ValueCase.INTEGER, result.valueCase)
}
@Test
fun testEncodeIntegerLargeBigInteger() {
// BigInteger with more than 63 bits
val largeBigInt = BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.valueOf(2))
val result = encoder.encode(largeBigInt, LeafAirbyteSchemaType.INTEGER)
assertEquals(largeBigInt.toString(), result.bigInteger)
assertEquals(AirbyteValueProtobuf.ValueCase.BIG_INTEGER, result.valueCase)
}
@Test
fun testEncodeIntegerNegative() {
val result = encoder.encode(-12345L, LeafAirbyteSchemaType.INTEGER)
assertEquals(-12345L, result.integer)
}
@Test
fun testEncodeIntegerInvalidType() {
assertThrows(IllegalStateException::class.java) {
encoder.encode("123", LeafAirbyteSchemaType.INTEGER)
}
}
@Test
fun testEncodeNumberDouble() {
val result = encoder.encode(123.456, LeafAirbyteSchemaType.NUMBER)
assertEquals(123.456, result.number)
assertEquals(AirbyteValueProtobuf.ValueCase.NUMBER, result.valueCase)
}
@Test
fun testEncodeNumberFloat() {
val float = 123.456f
val result = encoder.encode(123.456f, LeafAirbyteSchemaType.NUMBER)
assertEquals(AirbyteValueProtobuf.ValueCase.NUMBER, result.valueCase)
assertEquals(float.toDouble(), result.number)
}
@Test
fun testEncodeNumberBigDecimal() {
val bigDec = BigDecimal("123456789.987654321")
val result = encoder.encode(bigDec, LeafAirbyteSchemaType.NUMBER)
assertEquals(bigDec.toString(), result.bigDecimal)
assertEquals(AirbyteValueProtobuf.ValueCase.BIG_DECIMAL, result.valueCase)
}
@Test
fun testEncodeNumberNegative() {
val result = encoder.encode(-999.999, LeafAirbyteSchemaType.NUMBER)
assertEquals(-999.999, result.number)
}
@Test
fun testEncodeNumberInvalidType() {
assertThrows(IllegalStateException::class.java) {
encoder.encode("123.456", LeafAirbyteSchemaType.NUMBER)
}
}
@Test
fun testEncodeDateLocalDate() {
val date = LocalDate.of(2025, 10, 6)
val result = encoder.encode(date, LeafAirbyteSchemaType.DATE)
assertEquals(date.toEpochDay(), result.date)
assertEquals(AirbyteValueProtobuf.ValueCase.DATE, result.valueCase)
}
@Test
fun testEncodeDateSqlDate() {
val date = SqlDate.valueOf("2025-10-06")
val result = encoder.encode(date, LeafAirbyteSchemaType.DATE)
assertEquals(date.toLocalDate().toEpochDay(), result.date)
assertEquals(AirbyteValueProtobuf.ValueCase.DATE, result.valueCase)
}
@Test
fun testEncodeDateEpochStart() {
val date = LocalDate.ofEpochDay(0)
val result = encoder.encode(date, LeafAirbyteSchemaType.DATE)
assertEquals(0, result.date)
}
@Test
fun testEncodeDateInvalidType() {
assertThrows(IllegalStateException::class.java) {
encoder.encode("2025-10-06", LeafAirbyteSchemaType.DATE)
}
}
@Test
fun testEncodeTimeWithoutTimezoneLocalTime() {
val time = LocalTime.of(14, 30, 45, 123456789)
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
assertEquals(time.toNanoOfDay(), result.timeWithoutTimezone)
assertEquals(AirbyteValueProtobuf.ValueCase.TIME_WITHOUT_TIMEZONE, result.valueCase)
}
@Test
fun testEncodeTimeWithoutTimezoneSqlTime() {
val time = SqlTime.valueOf("14:30:45")
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
assertEquals(time.toLocalTime().toNanoOfDay(), result.timeWithoutTimezone)
}
@Test
fun testEncodeTimeWithoutTimezoneMidnight() {
val time = LocalTime.MIDNIGHT
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
assertEquals(0L, result.timeWithoutTimezone)
}
@Test
fun testEncodeTimeWithoutTimezoneMaxTime() {
val time = LocalTime.MAX
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
assertEquals(time.toNanoOfDay(), result.timeWithoutTimezone)
}
@Test
fun testEncodeTimeWithoutTimezoneInvalidType() {
assertThrows(IllegalStateException::class.java) {
encoder.encode("14:30:45", LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
}
}
@Test
fun testEncodeTimeWithTimezoneUTC() {
val time = OffsetTime.of(14, 30, 45, 123456789, ZoneOffset.UTC)
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
assertEquals(time.toLocalTime().toNanoOfDay(), result.timeWithTimezone.nanosOfDay)
assertEquals(0, result.timeWithTimezone.offsetSeconds)
assertEquals(AirbyteValueProtobuf.ValueCase.TIME_WITH_TIMEZONE, result.valueCase)
}
@Test
fun testEncodeTimeWithTimezonePositiveOffset() {
val time = OffsetTime.of(14, 30, 45, 0, ZoneOffset.ofHours(5))
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
assertEquals(time.toLocalTime().toNanoOfDay(), result.timeWithTimezone.nanosOfDay)
assertEquals(18000, result.timeWithTimezone.offsetSeconds) // 5 hours = 18000 seconds
}
@Test
fun testEncodeTimeWithTimezoneNegativeOffset() {
val time = OffsetTime.of(14, 30, 45, 0, ZoneOffset.ofHours(-8))
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
assertEquals(time.toLocalTime().toNanoOfDay(), result.timeWithTimezone.nanosOfDay)
assertEquals(-28800, result.timeWithTimezone.offsetSeconds) // -8 hours = -28800 seconds
}
@Test
fun testEncodeTimeWithTimezoneFractionalOffset() {
val time = OffsetTime.of(14, 30, 45, 0, ZoneOffset.ofHoursMinutes(5, 30))
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
assertEquals(19800, result.timeWithTimezone.offsetSeconds) // 5.5 hours = 19800 seconds
}
@Test
fun testEncodeTimeWithTimezoneInvalidType() {
assertThrows(IllegalArgumentException::class.java) {
encoder.encode(LocalTime.now(), LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
}
}
@Test
fun testEncodeTimestampWithoutTimezoneLocalDateTime() {
val timestamp = LocalDateTime.of(2025, 10, 6, 14, 30, 45, 123456789)
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
assertEquals(
timestamp.toLocalDate().toEpochDay(),
result.timestampWithoutTimezone.dateDaysSinceEpoch
)
assertEquals(
timestamp.toLocalTime().toNanoOfDay(),
result.timestampWithoutTimezone.nanosOfDay
)
assertEquals(AirbyteValueProtobuf.ValueCase.TIMESTAMP_WITHOUT_TIMEZONE, result.valueCase)
}
@Test
fun testEncodeTimestampWithoutTimezoneSqlTimestamp() {
val timestamp = SqlTimestamp.valueOf("2025-10-06 14:30:45.123456789")
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
val localDateTime = timestamp.toLocalDateTime()
assertEquals(
localDateTime.toLocalDate().toEpochDay(),
result.timestampWithoutTimezone.dateDaysSinceEpoch
)
assertEquals(
localDateTime.toLocalTime().toNanoOfDay(),
result.timestampWithoutTimezone.nanosOfDay
)
}
@Test
fun testEncodeTimestampWithoutTimezoneEpochStart() {
val timestamp = LocalDateTime.of(1970, 1, 1, 0, 0, 0, 0)
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
assertEquals(0, result.timestampWithoutTimezone.dateDaysSinceEpoch)
assertEquals(0L, result.timestampWithoutTimezone.nanosOfDay)
}
@Test
fun testEncodeTimestampWithoutTimezoneInvalidType() {
assertThrows(IllegalStateException::class.java) {
encoder.encode("2025-10-06T14:30:45", LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
}
}
@Test
fun testEncodeTimestampWithTimezoneUTC() {
val timestamp = OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 123456789, ZoneOffset.UTC)
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
val instant = timestamp.toInstant()
assertEquals(instant.epochSecond, result.timestampWithTimezone.epochSecond)
assertEquals(instant.nano, result.timestampWithTimezone.nano)
assertEquals(0, result.timestampWithTimezone.offsetSeconds)
assertEquals(AirbyteValueProtobuf.ValueCase.TIMESTAMP_WITH_TIMEZONE, result.valueCase)
}
@Test
fun testEncodeTimestampWithTimezonePositiveOffset() {
val timestamp = OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, ZoneOffset.ofHours(5))
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
val instant = timestamp.toInstant()
assertEquals(instant.epochSecond, result.timestampWithTimezone.epochSecond)
assertEquals(instant.nano, result.timestampWithTimezone.nano)
assertEquals(18000, result.timestampWithTimezone.offsetSeconds)
}
@Test
fun testEncodeTimestampWithTimezoneNegativeOffset() {
val timestamp = OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, ZoneOffset.ofHours(-8))
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
val instant = timestamp.toInstant()
assertEquals(instant.epochSecond, result.timestampWithTimezone.epochSecond)
assertEquals(-28800, result.timestampWithTimezone.offsetSeconds)
}
@Test
fun testEncodeTimestampWithTimezoneFractionalOffset() {
val timestamp =
OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, ZoneOffset.ofHoursMinutes(-4, -30))
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
assertEquals(-16200, result.timestampWithTimezone.offsetSeconds)
}
@Test
fun testEncodeTimestampWithTimezoneSqlTimestamp() {
val timestamp = SqlTimestamp.valueOf("2025-10-06 14:30:45.123456789")
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
// SqlTimestamp converts to UTC
val instant = timestamp.toInstant()
assertEquals(instant.epochSecond, result.timestampWithTimezone.epochSecond)
assertEquals(instant.nano, result.timestampWithTimezone.nano)
assertEquals(0, result.timestampWithTimezone.offsetSeconds) // UTC
}
@Test
fun testEncodeTimestampWithTimezoneEpochStart() {
val timestamp = OffsetDateTime.of(1970, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC)
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
assertEquals(0L, result.timestampWithTimezone.epochSecond)
assertEquals(0, result.timestampWithTimezone.nano)
assertEquals(0, result.timestampWithTimezone.offsetSeconds)
}
@Test
fun testEncodeTimestampWithTimezoneInvalidType() {
assertThrows(IllegalStateException::class.java) {
encoder.encode("2025-10-06T14:30:45Z", LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
}
}
@Test
fun testEncodeJsonString() {
val json = """{"key": "value"}"""
val result = encoder.encode(json, LeafAirbyteSchemaType.JSONB)
assertEquals(json, result.json.toStringUtf8())
assertEquals(AirbyteValueProtobuf.ValueCase.JSON, result.valueCase)
}
@Test
fun testEncodeJsonByteArray() {
val jsonBytes = """{"key": "value"}""".toByteArray()
val result = encoder.encode(jsonBytes, LeafAirbyteSchemaType.JSONB)
assertEquals("""{"key": "value"}""", result.json.toStringUtf8())
}
@Test
fun testEncodeJsonInvalidType() {
assertThrows(IllegalStateException::class.java) {
encoder.encode(123, LeafAirbyteSchemaType.JSONB)
}
}
@Test
fun testEncodeBinaryString() {
val binary = "binary data".toByteArray()
val result = encoder.encode(binary, LeafAirbyteSchemaType.BINARY)
assertEquals(Base64.getEncoder().encodeToString(binary), result.string)
}
@Test
fun testEncodeBinaryByteArray() {
val binaryBytes = byteArrayOf(0x01, 0x02, 0x03)
val result = encoder.encode(binaryBytes, LeafAirbyteSchemaType.BINARY)
assertEquals(Base64.getEncoder().encodeToString(binaryBytes), result.string)
}
@Test
fun testEncodeNullType() {
val result = encoder.encode("anything", LeafAirbyteSchemaType.NULL)
assertEquals(NullValue.NULL_VALUE, result.`null`)
}
@Test
fun testEncodeWithBuilderReuse() {
val builder = AirbyteValueProtobuf.newBuilder()
val result1 = encoder.encode(42, LeafAirbyteSchemaType.INTEGER, builder)
assertEquals(42L, result1.integer)
val result2 = encoder.encode("hello", LeafAirbyteSchemaType.STRING, builder)
assertEquals("hello", result2.string)
// Verify builder was cleared between uses
assertEquals(AirbyteValueProtobuf.ValueCase.STRING, result2.valueCase)
}
@Test
fun testEncodeMultipleTimezones() {
val timezones =
listOf(
ZoneOffset.UTC,
ZoneOffset.ofHours(1),
ZoneOffset.ofHours(-1),
ZoneOffset.ofHours(12),
ZoneOffset.ofHours(-12),
ZoneOffset.ofHoursMinutes(5, 30),
ZoneOffset.ofHoursMinutes(-5, -45),
ZoneOffset.ofHoursMinutesSeconds(1, 30, 30)
)
timezones.forEach { offset ->
val time = OffsetTime.of(12, 0, 0, 0, offset)
val result = encoder.encode(time, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
assertEquals(offset.totalSeconds, result.timeWithTimezone.offsetSeconds)
}
}
@Test
fun testEncodeTimestampWithMultipleTimezones() {
val timezones =
listOf(
ZoneOffset.UTC,
ZoneOffset.ofHours(5),
ZoneOffset.ofHours(-8),
ZoneOffset.ofHoursMinutes(9, 30),
ZoneOffset.ofHoursMinutes(-3, -30)
)
timezones.forEach { offset ->
val timestamp = OffsetDateTime.of(2025, 10, 6, 12, 0, 0, 0, offset)
val result = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
assertEquals(offset.totalSeconds, result.timestampWithTimezone.offsetSeconds)
}
}
@Test
fun testEncodeExtremeValues() {
// Test extreme integer values
val maxLong = encoder.encode(Long.MAX_VALUE, LeafAirbyteSchemaType.INTEGER)
assertEquals(Long.MAX_VALUE, maxLong.integer)
val minLong = encoder.encode(Long.MIN_VALUE, LeafAirbyteSchemaType.INTEGER)
assertEquals(Long.MIN_VALUE, minLong.integer)
// Test extreme double values
val maxDouble = encoder.encode(Double.MAX_VALUE, LeafAirbyteSchemaType.NUMBER)
assertEquals(Double.MAX_VALUE, maxDouble.number, 0.0)
val minDouble = encoder.encode(-Double.MAX_VALUE, LeafAirbyteSchemaType.NUMBER)
assertEquals(-Double.MAX_VALUE, minDouble.number, 0.0)
// Test extreme dates
val farFutureDate = LocalDate.of(9999, 12, 31)
val futureDateResult = encoder.encode(farFutureDate, LeafAirbyteSchemaType.DATE)
assertEquals(farFutureDate.toEpochDay(), futureDateResult.date)
}
@Test
fun testEncodeNanosecondPrecision() {
// Test nanosecond precision for time
val preciseTime = LocalTime.of(12, 30, 45, 123456789)
val timeResult = encoder.encode(preciseTime, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
assertEquals(preciseTime.toNanoOfDay(), timeResult.timeWithoutTimezone)
// Test nanosecond precision for timestamp
val preciseTimestamp = LocalDateTime.of(2025, 10, 6, 12, 30, 45, 987654321)
val timestampResult =
encoder.encode(preciseTimestamp, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
assertEquals(
preciseTimestamp.toLocalTime().toNanoOfDay(),
timestampResult.timestampWithoutTimezone.nanosOfDay
)
}
}

View File

@@ -0,0 +1,459 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.protocol
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import java.math.BigDecimal
import java.math.BigInteger
import java.sql.Date as SqlDate
import java.sql.Time as SqlTime
import java.sql.Timestamp as SqlTimestamp
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import java.util.Base64
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
/**
* Integration tests verifying that encoding and decoding are inverse operations. These tests ensure
* data integrity across the encode-decode cycle.
*/
class ProtobufEncoderDecoderRoundTripTest {
private val encoder = AirbyteValueProtobufEncoder()
private val decoder = AirbyteValueProtobufDecoder()
private fun <T> testRoundTrip(value: T, schemaType: io.airbyte.cdk.data.AirbyteSchemaType) {
testRoundTrip(value, value, schemaType)
}
private fun <T> testRoundTrip(
value: T,
expectedValue: T,
schemaType: io.airbyte.cdk.data.AirbyteSchemaType
) {
val encoded = encoder.encode(value, schemaType)
val decoded = decoder.decode(encoded.build())
assertEquals(expectedValue, decoded, "Round trip failed for value: $value")
}
@Test
fun testNullRoundTrip() {
val encoded = encoder.encode(null, LeafAirbyteSchemaType.STRING)
val decoded = decoder.decode(encoded.build())
assertEquals(null, decoded)
}
@Test
fun testBooleanRoundTrip() {
testRoundTrip(true, LeafAirbyteSchemaType.BOOLEAN)
testRoundTrip(false, LeafAirbyteSchemaType.BOOLEAN)
}
@Test
fun testStringRoundTrip() {
testRoundTrip("", LeafAirbyteSchemaType.STRING)
testRoundTrip("hello world", LeafAirbyteSchemaType.STRING)
testRoundTrip("special chars: !@#$%^&*()", LeafAirbyteSchemaType.STRING)
testRoundTrip("unicode: 你好 مرحبا 😀", LeafAirbyteSchemaType.STRING)
testRoundTrip("line\nbreaks\tand\ttabs", LeafAirbyteSchemaType.STRING)
}
@Test
fun testIntegerRoundTrip() {
// Int values
testRoundTrip(0, 0.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(42, 42.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(-42, (-42).toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(Int.MAX_VALUE, Int.MAX_VALUE.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(Int.MIN_VALUE, Int.MIN_VALUE.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
// Long values
testRoundTrip(0L, 0L.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(123456789L, 123456789L.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(-123456789L, (-123456789L).toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(Long.MAX_VALUE, Long.MAX_VALUE.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
testRoundTrip(Long.MIN_VALUE, Long.MIN_VALUE.toBigInteger(), LeafAirbyteSchemaType.INTEGER)
// BigInteger values (note: decoded as BigInteger)
val smallBigInt = BigInteger.valueOf(999L)
val encoded1 = encoder.encode(smallBigInt, LeafAirbyteSchemaType.INTEGER)
val decoded1 = decoder.decode(encoded1.build())
assertEquals(smallBigInt, decoded1)
val largeBigInt = BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.valueOf(2))
val encoded2 = encoder.encode(largeBigInt, LeafAirbyteSchemaType.INTEGER)
val decoded2 = decoder.decode(encoded2.build())
assertEquals(largeBigInt, decoded2)
val veryLargeBigInt = BigInteger.valueOf(10).pow(100)
val encoded3 = encoder.encode(veryLargeBigInt, LeafAirbyteSchemaType.INTEGER)
val decoded3 = decoder.decode(encoded3.build())
assertEquals(veryLargeBigInt, decoded3)
}
@Test
fun testNumberRoundTrip() {
// Double values (note: decoded as BigDecimal)
val doubleValues =
listOf(0.0, 1.5, -1.5, 123.456, -999.999, Double.MAX_VALUE, -Double.MAX_VALUE)
doubleValues.forEach { value ->
val encoded = encoder.encode(value, LeafAirbyteSchemaType.NUMBER)
val decoded = decoder.decode(encoded.build()) as BigDecimal
assertEquals(BigDecimal.valueOf(value), decoded)
}
// Float values (note: decoded as BigDecimal via Double)
val floatValues = listOf(0.0f, 1.5f, -1.5f, 123.456f)
floatValues.forEach { value ->
val encoded = encoder.encode(value, LeafAirbyteSchemaType.NUMBER)
val decoded = decoder.decode(encoded.build()) as BigDecimal
assertEquals(BigDecimal.valueOf(value.toDouble()), decoded)
}
// BigDecimal values
val bigDecValues =
listOf(
BigDecimal.ZERO,
BigDecimal.ONE,
BigDecimal("-123.456"),
BigDecimal("123456789.987654321"),
BigDecimal("0.123456789012345678901234567890")
)
bigDecValues.forEach { value -> testRoundTrip(value, LeafAirbyteSchemaType.NUMBER) }
}
@Test
fun testDateRoundTrip() {
testRoundTrip(LocalDate.of(2025, 10, 6), LeafAirbyteSchemaType.DATE)
testRoundTrip(LocalDate.ofEpochDay(0), LeafAirbyteSchemaType.DATE)
testRoundTrip(LocalDate.of(1970, 1, 1), LeafAirbyteSchemaType.DATE)
testRoundTrip(LocalDate.of(2000, 1, 1), LeafAirbyteSchemaType.DATE)
testRoundTrip(LocalDate.of(2024, 2, 29), LeafAirbyteSchemaType.DATE) // Leap day
testRoundTrip(LocalDate.of(9999, 12, 31), LeafAirbyteSchemaType.DATE)
testRoundTrip(LocalDate.of(1, 1, 1), LeafAirbyteSchemaType.DATE)
testRoundTrip(LocalDate.MIN, LeafAirbyteSchemaType.DATE)
testRoundTrip(LocalDate.MAX, LeafAirbyteSchemaType.DATE)
// Test SqlDate conversion
val sqlDate = SqlDate.valueOf("2025-10-06")
val encoded = encoder.encode(sqlDate, LeafAirbyteSchemaType.DATE)
val decoded = decoder.decode(encoded.build())
assertEquals(sqlDate.toLocalDate(), decoded)
}
@Test
fun testTimeWithoutTimezoneRoundTrip() {
testRoundTrip(LocalTime.MIDNIGHT, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
testRoundTrip(LocalTime.NOON, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
testRoundTrip(LocalTime.MAX, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
testRoundTrip(LocalTime.MIN, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
testRoundTrip(LocalTime.of(14, 30, 45), LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
testRoundTrip(
LocalTime.of(14, 30, 45, 123456789),
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
)
testRoundTrip(LocalTime.of(0, 0, 0, 1), LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
// Test SqlTime conversion
val sqlTime = SqlTime.valueOf("14:30:45")
val encoded = encoder.encode(sqlTime, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
val decoded = decoder.decode(encoded.build())
assertEquals(sqlTime.toLocalTime(), decoded)
}
@Test
fun testTimeWithTimezoneRoundTrip() {
// Various timezone offsets
val offsets =
listOf(
ZoneOffset.UTC,
ZoneOffset.ofHours(1),
ZoneOffset.ofHours(-1),
ZoneOffset.ofHours(5),
ZoneOffset.ofHours(-8),
ZoneOffset.ofHours(12),
ZoneOffset.ofHours(-12),
ZoneOffset.ofHoursMinutes(5, 30),
ZoneOffset.ofHoursMinutes(-5, -45),
ZoneOffset.ofHoursMinutesSeconds(1, 30, 30),
ZoneOffset.MIN,
ZoneOffset.MAX
)
offsets.forEach { offset ->
testRoundTrip(
OffsetTime.of(14, 30, 45, 0, offset),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
)
testRoundTrip(OffsetTime.MIN, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
testRoundTrip(OffsetTime.MAX, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
testRoundTrip(
OffsetTime.of(14, 30, 45, 123456789, offset),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
)
testRoundTrip(
OffsetTime.of(0, 0, 0, 0, offset),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
)
testRoundTrip(
OffsetTime.of(23, 59, 59, 999999999, offset),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
)
}
}
@Test
fun testTimestampWithoutTimezoneRoundTrip() {
testRoundTrip(
LocalDateTime.of(2025, 10, 6, 14, 30, 45),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
)
testRoundTrip(
LocalDateTime.of(2025, 10, 6, 14, 30, 45, 123456789),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
)
testRoundTrip(
LocalDateTime.of(1970, 1, 1, 0, 0, 0, 0),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
)
testRoundTrip(
LocalDateTime.of(2025, 10, 6, 0, 0, 0, 0),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
)
testRoundTrip(
LocalDateTime.of(2024, 2, 29, 12, 0, 0),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
)
testRoundTrip(
LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999999),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
)
testRoundTrip(LocalDateTime.MIN, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
testRoundTrip(LocalDateTime.MAX, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
// Test SqlTimestamp conversion
val sqlTimestamp = SqlTimestamp.valueOf("2025-10-06 14:30:45.123456789")
val encoded = encoder.encode(sqlTimestamp, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
val decoded = decoder.decode(encoded.build())
assertEquals(sqlTimestamp.toLocalDateTime(), decoded)
}
@Test
fun testTimestampWithTimezoneRoundTrip() {
// Various timezone offsets
val offsets =
listOf(
ZoneOffset.UTC,
ZoneOffset.ofHours(1),
ZoneOffset.ofHours(-1),
ZoneOffset.ofHours(5),
ZoneOffset.ofHours(-8),
ZoneOffset.ofHours(12),
ZoneOffset.ofHours(-12),
ZoneOffset.ofHoursMinutes(5, 30),
ZoneOffset.ofHoursMinutes(-5, -45),
ZoneOffset.ofHoursMinutes(-3, -30),
ZoneOffset.ofHoursMinutes(-4, -30),
ZoneOffset.ofHoursMinutes(9, 30),
ZoneOffset.MIN,
ZoneOffset.MAX
)
offsets.forEach { offset ->
testRoundTrip(
OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, offset),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
)
testRoundTrip(OffsetDateTime.MIN, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
testRoundTrip(OffsetDateTime.MAX, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
testRoundTrip(
OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 123456789, offset),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
)
testRoundTrip(
OffsetDateTime.of(1970, 1, 1, 0, 0, 0, 0, offset),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
)
}
// Test SqlTimestamp conversion (converts to UTC)
val sqlTimestamp = SqlTimestamp.valueOf("2025-10-06 14:30:45.123456789")
val encoded = encoder.encode(sqlTimestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
val decoded = decoder.decode(encoded.build()) as OffsetDateTime
assertEquals(OffsetDateTime.ofInstant(sqlTimestamp.toInstant(), ZoneOffset.UTC), decoded)
}
@Test
fun testJsonRoundTrip() {
val jsonStrings =
listOf(
"""{}""",
"""[]""",
"""{"key": "value"}""",
"""[1, 2, 3, "test"]""",
"""{"nested": {"object": true}}""",
"""{"unicode": "你好 مرحبا 😀"}"""
)
jsonStrings.forEach { json -> testRoundTrip(json, LeafAirbyteSchemaType.JSONB) }
// Test with byte arrays
jsonStrings.forEach { json ->
val bytes = json.toByteArray()
val encoded = encoder.encode(bytes, LeafAirbyteSchemaType.JSONB)
val decoded = decoder.decode(encoded.build())
assertEquals(json, decoded)
}
}
@Test
fun testBinaryRoundTrip() {
val binaryStrings = listOf("simple text", "binary data with special chars: \n\t\r", "")
binaryStrings.forEach { binary ->
testRoundTrip(
binary.toByteArray(),
Base64.getEncoder().encodeToString(binary.toByteArray()),
LeafAirbyteSchemaType.BINARY
)
}
// Test with byte arrays
val byteArrays =
listOf(
byteArrayOf(0x01, 0x02, 0x03),
byteArrayOf(0xFF.toByte(), 0xFE.toByte()),
byteArrayOf()
)
byteArrays.forEach { bytes ->
val encoded = encoder.encode(bytes, LeafAirbyteSchemaType.BINARY)
val decoded = decoder.decode(encoded.build()) as String
assertEquals(Base64.getEncoder().encodeToString(bytes), decoded)
}
}
@Test
fun testComplexTimezoneScenarios() {
// Test same instant in different timezones
val baseTimestamp = OffsetDateTime.of(2025, 10, 6, 14, 30, 45, 0, ZoneOffset.UTC)
val timezones =
listOf(
ZoneOffset.ofHours(-8), // PST
ZoneOffset.ofHours(5), // IST (approximately)
ZoneOffset.ofHours(9), // JST
ZoneOffset.ofHoursMinutes(-4, -30) // VET
)
timezones.forEach { offset ->
val timestamp = baseTimestamp.withOffsetSameInstant(offset)
val encoded = encoder.encode(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
val decoded = decoder.decode(encoded.build()) as OffsetDateTime
// Should preserve the instant and the offset
assertEquals(timestamp.toInstant(), decoded.toInstant())
assertEquals(timestamp.offset, decoded.offset)
assertEquals(timestamp, decoded)
}
}
@Test
fun testDstTransitions() {
// Test timestamps around DST transitions
val dstTestCases =
listOf(
// Spring forward
OffsetDateTime.of(2025, 3, 9, 1, 59, 0, 0, ZoneOffset.ofHours(-8)),
OffsetDateTime.of(2025, 3, 9, 3, 0, 0, 0, ZoneOffset.ofHours(-7)),
// Fall back
OffsetDateTime.of(2025, 11, 2, 1, 0, 0, 0, ZoneOffset.ofHours(-7)),
OffsetDateTime.of(2025, 11, 2, 1, 0, 0, 0, ZoneOffset.ofHours(-8))
)
dstTestCases.forEach { timestamp ->
testRoundTrip(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
}
}
@Test
fun testBuilderReuse() {
// Test that builder reuse works correctly
val builder =
io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
// Encode multiple different types with the same builder
val encoded1 = encoder.encode(42, LeafAirbyteSchemaType.INTEGER, builder)
val decoded1 = decoder.decode(encoded1.build())
assertEquals(BigInteger.valueOf(42), decoded1)
val encoded2 = encoder.encode("hello", LeafAirbyteSchemaType.STRING, builder)
val decoded2 = decoder.decode(encoded2.build())
assertEquals("hello", decoded2)
val encoded3 =
encoder.encode(LocalDate.of(2025, 10, 6), LeafAirbyteSchemaType.DATE, builder)
val decoded3 = decoder.decode(encoded3.build())
assertEquals(LocalDate.of(2025, 10, 6), decoded3)
}
@Test
fun testEdgeCaseValues() {
// Test zero values
testRoundTrip(0.0.toBigDecimal(), LeafAirbyteSchemaType.NUMBER)
testRoundTrip(LocalTime.MIDNIGHT, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
// Test maximum precision
val maxPrecisionTime = LocalTime.of(23, 59, 59, 999999999)
testRoundTrip(maxPrecisionTime, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
val maxPrecisionTimestamp = LocalDateTime.of(2025, 10, 6, 23, 59, 59, 999999999)
testRoundTrip(maxPrecisionTimestamp, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
// Test empty strings and collections
testRoundTrip("", LeafAirbyteSchemaType.STRING)
testRoundTrip("{}", LeafAirbyteSchemaType.JSONB)
testRoundTrip("[]", LeafAirbyteSchemaType.JSONB)
}
@Test
fun testAllTimezonesRoundTrip() {
// Test all common timezone offsets
for (hours in -12..14) {
for (minutes in listOf(0, 15, 30, 45)) {
if (hours == 14 && minutes > 0) continue // Skip invalid offsets
if (hours == -12 && minutes > 0) continue // Skip invalid offsets
try {
val offset =
ZoneOffset.ofHoursMinutes(hours, if (hours >= 0) minutes else -minutes)
val time = OffsetTime.of(12, 0, 0, 0, offset)
testRoundTrip(time, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
val timestamp = OffsetDateTime.of(2025, 10, 6, 12, 0, 0, 0, offset)
testRoundTrip(timestamp, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
} catch (e: Exception) {
// Skip invalid timezone offsets
}
}
}
}
@Test
fun testLeapYearRoundTrip() {
val leapYears = listOf(2000, 2004, 2020, 2024)
leapYears.forEach { year ->
val leapDay = LocalDate.of(year, 2, 29)
testRoundTrip(leapDay, LeafAirbyteSchemaType.DATE)
val leapDayTimestamp = LocalDateTime.of(year, 2, 29, 12, 0, 0)
testRoundTrip(leapDayTimestamp, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
}
}
}

View File

@@ -7,40 +7,21 @@ package io.airbyte.cdk.output.sockets
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.cdk.data.ArrayEncoder
import io.airbyte.cdk.data.BigDecimalCodec
import io.airbyte.cdk.data.BigDecimalIntegerCodec
import io.airbyte.cdk.data.BinaryCodec
import io.airbyte.cdk.data.BooleanCodec
import io.airbyte.cdk.data.ByteCodec
import io.airbyte.cdk.data.CdcOffsetDateTimeCodec
import io.airbyte.cdk.data.DoubleCodec
import io.airbyte.cdk.data.FloatCodec
import io.airbyte.cdk.data.IntCodec
import io.airbyte.cdk.data.JsonBytesCodec
import io.airbyte.cdk.data.JsonEncoder
import io.airbyte.cdk.data.JsonStringCodec
import io.airbyte.cdk.data.LocalDateCodec
import io.airbyte.cdk.data.LocalDateTimeCodec
import io.airbyte.cdk.data.LocalTimeCodec
import io.airbyte.cdk.data.LongCodec
import io.airbyte.cdk.data.NullCodec
import io.airbyte.cdk.data.OffsetDateTimeCodec
import io.airbyte.cdk.data.OffsetTimeCodec
import io.airbyte.cdk.data.ShortCodec
import io.airbyte.cdk.data.TextCodec
import io.airbyte.cdk.data.UrlCodec
import io.airbyte.cdk.discover.FieldOrMetaField
import io.airbyte.cdk.protocol.AirbyteValueProtobufEncoder
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.protobuf.AirbyteRecordMessage
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteRecordMessageProtobuf
import java.math.BigDecimal
import java.net.URL
import java.nio.ByteBuffer
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
// A value of a field along with its encoder
class FieldValueEncoder<R>(val fieldValue: R?, val jsonEncoder: JsonEncoder<in R>) {
@@ -53,6 +34,8 @@ class FieldValueEncoder<R>(val fieldValue: R?, val jsonEncoder: JsonEncoder<in R
// (json or protobuf)
typealias NativeRecordPayload = MutableMap<String, FieldValueEncoder<*>>
val encoder = AirbyteValueProtobufEncoder()
fun NativeRecordPayload.toJson(parentNode: ObjectNode = Jsons.objectNode()): ObjectNode {
for ((columnId, value) in this) {
parentNode.set<JsonNode>(columnId, value.encode())
@@ -60,116 +43,24 @@ fun NativeRecordPayload.toJson(parentNode: ObjectNode = Jsons.objectNode()): Obj
return parentNode
}
fun <T> JsonEncoder<T>.toProtobufEncoder(): ProtoEncoder<*> {
return when (this) {
is LongCodec, -> longProtoEncoder
is IntCodec, -> intProtoEncoder
is TextCodec, -> textProtoEncoder
is BooleanCodec, -> booleanProtoEncoder
is OffsetDateTimeCodec, -> offsetDateTimeProtoEncoder
is FloatCodec, -> floatProtoEncoder
is NullCodec, -> nullProtoEncoder
is BinaryCodec, -> binaryProtoEncoder
is BigDecimalCodec, -> bigDecimalProtoEncoder
is BigDecimalIntegerCodec, -> bigDecimalProtoEncoder
is ShortCodec, -> shortProtoEncoder
is ByteCodec, -> byteProtoEncoder
is DoubleCodec, -> doubleProtoEncoder
is JsonBytesCodec, -> binaryProtoEncoder
is JsonStringCodec, -> textProtoEncoder
is UrlCodec, -> urlProtoEncoder
is LocalDateCodec, -> localDateProtoEncoder
is LocalTimeCodec, -> localTimeProtoEncoder
is LocalDateTimeCodec, -> localDateTimeProtoEncoder
is OffsetTimeCodec, -> offsetTimeProtoEncoder
is ArrayEncoder<*>, -> anyProtoEncoder
is CdcOffsetDateTimeCodec -> offsetDateTimeStringProtoEncoder
else -> anyProtoEncoder
}
}
fun interface ProtoEncoder<T> {
fun encode(
builder: AirbyteRecordMessage.AirbyteValueProtobuf.Builder,
decoded: T
): AirbyteRecordMessage.AirbyteValueProtobuf.Builder
}
/**
* Generates a ProtoEncoder for a specific type T.
*
* @param setValue A lambda function that sets the value in the builder for the given type T.
* @return A ProtoEncoder instance that encodes values of type T into AirbyteValueProtobuf.
* Transforms a field value into a protobuf-compatible representation. Handles special conversions
* for types that need preprocessing before protobuf encoding, such as ByteBuffer -> Base64 String,
* BigDecimal -> BigInteger, URL -> String, etc.
*/
private inline fun <T> generateProtoEncoder(
crossinline setValue:
(
AirbyteRecordMessage.AirbyteValueProtobuf.Builder,
T
) -> AirbyteRecordMessage.AirbyteValueProtobuf.Builder
): ProtoEncoder<T> =
object : ProtoEncoder<T> {
override fun encode(
builder: AirbyteRecordMessage.AirbyteValueProtobuf.Builder,
decoded: T
): AirbyteRecordMessage.AirbyteValueProtobuf.Builder = setValue(builder, decoded)
}
val offsetTimeProtoEncoder =
generateProtoEncoder<OffsetTime> { builder, value ->
builder.setTimeWithTimezone(value.format(OffsetTimeCodec.formatter))
}
val localDateTimeProtoEncoder =
generateProtoEncoder<LocalDateTime> { builder, value ->
builder.setTimestampWithoutTimezone(value.format(LocalDateTimeCodec.formatter))
}
val localTimeProtoEncoder =
generateProtoEncoder<LocalTime> { builder, time ->
builder.setTimeWithoutTimezone(time.format(LocalTimeCodec.formatter))
}
val localDateProtoEncoder =
generateProtoEncoder<LocalDate> { builder, date ->
builder.setDate(date.format(LocalDateCodec.formatter))
}
val urlProtoEncoder =
generateProtoEncoder<URL> { builder, url -> builder.setString(url.toExternalForm()) }
val doubleProtoEncoder = generateProtoEncoder<Double> { builder, value -> builder.setNumber(value) }
val byteProtoEncoder =
generateProtoEncoder<Byte> { builder, value -> builder.setInteger(value.toLong()) }
val binaryProtoEncoder =
generateProtoEncoder<ByteBuffer> { builder, decoded ->
builder.setString(java.util.Base64.getEncoder().encodeToString(decoded.array()))
}
val shortProtoEncoder =
generateProtoEncoder<Short> { builder, value -> builder.setInteger(value.toLong()) }
val bigDecimalProtoEncoder =
generateProtoEncoder<BigDecimal> { builder, decoded ->
when (decoded.scale()) {
0 -> builder.setBigInteger(decoded.toPlainString()) // no decimal places
else -> builder.setBigDecimal(decoded.toPlainString())
fun <R> valueForProtobufEncoding(fve: FieldValueEncoder<R>): Any? {
return fve.fieldValue?.let { value ->
when (fve.jsonEncoder) {
is BigDecimalIntegerCodec -> (value as BigDecimal).toBigInteger()
is ByteCodec -> (value as Byte).toLong()
is UrlCodec -> (value as URL).toExternalForm()
is CdcOffsetDateTimeCodec ->
(value as OffsetDateTime).format(OffsetDateTimeCodec.formatter)
is ArrayEncoder<*> -> fve.encode().toString()
else -> value
}
}
val longProtoEncoder = generateProtoEncoder<Long> { builder, value -> builder.setInteger(value) }
val textProtoEncoder = generateProtoEncoder<String> { builder, value -> builder.setString(value) }
val intProtoEncoder =
generateProtoEncoder<Int> { builder, value -> builder.setInteger(value.toLong()) }
val booleanProtoEncoder =
generateProtoEncoder<Boolean> { builder, value -> builder.setBoolean(value) }
val offsetDateTimeProtoEncoder =
generateProtoEncoder<OffsetDateTime> { builder, decoded ->
builder.setTimestampWithTimezone(decoded.format(OffsetDateTimeCodec.formatter))
}
// Supports the case where OffsetDateTime is encoded as a protobuf string (e.g. in CDC scenarios)
val offsetDateTimeStringProtoEncoder =
generateProtoEncoder<OffsetDateTime> { builder, decoded ->
builder.setString(decoded.format(OffsetDateTimeCodec.formatter))
}
val floatProtoEncoder =
generateProtoEncoder<Float> { builder, decoded -> builder.setBigDecimal(decoded.toString()) }
val nullProtoEncoder = generateProtoEncoder<Any?> { builder, _ -> builder.setIsNull(true) }
val anyProtoEncoder = textProtoEncoder
}
fun NativeRecordPayload.toProtobuf(
schema: Set<FieldOrMetaField>,
@@ -183,17 +74,15 @@ fun NativeRecordPayload.toProtobuf(
// Protobuf does not have field names, so we use a sorted order of fields
// So for destination to know which fields it is, we order the fields alphabetically
// to make sure that the order is consistent.
this@toProtobuf[field.id]?.let { value ->
@Suppress("UNCHECKED_CAST")
this@toProtobuf[field.id]?.let { fve ->
val decodedValueForProto = valueForProtobufEncoding(fve)
setData(
index,
value.fieldValue?.let {
(value.jsonEncoder.toProtobufEncoder() as ProtoEncoder<Any>).encode(
valueBuilder.clear(),
value.fieldValue
)
}
?: nullProtoEncoder.encode(valueBuilder.clear(), null)
encoder.encode(
decodedValueForProto,
field.type.airbyteSchemaType,
valueBuilder.clear()
)
)
}
}

View File

@@ -15,13 +15,12 @@ import io.airbyte.cdk.output.DataChannelMedium
import io.airbyte.cdk.output.OutputConsumer
import io.airbyte.cdk.output.StandardOutputConsumer
import io.airbyte.cdk.output.sockets.NativeRecordPayload
import io.airbyte.cdk.output.sockets.ProtoEncoder
import io.airbyte.cdk.output.sockets.SocketJsonOutputConsumer
import io.airbyte.cdk.output.sockets.SocketProtobufOutputConsumer
import io.airbyte.cdk.output.sockets.nullProtoEncoder
import io.airbyte.cdk.output.sockets.toJson
import io.airbyte.cdk.output.sockets.toProtobuf
import io.airbyte.cdk.output.sockets.toProtobufEncoder
import io.airbyte.cdk.output.sockets.valueForProtobufEncoding
import io.airbyte.cdk.protocol.AirbyteValueProtobufEncoder
import io.airbyte.cdk.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteMessage
import io.airbyte.protocol.models.v0.AirbyteRecordMessage
@@ -320,26 +319,23 @@ sealed class FeedBootstrap<T : Feed>(
// Unlike STDIO mode, in socket mode we always include all scehma fields
// Including decorating field even when it has NULL value.
// This is necessary beacuse in PROTOBUF mode we don't have field names so
// This is necessary because in PROTOBUF mode we don't have field names so
// the sorted order of fields is used to determine the field position on the
// other side.
val encoder = AirbyteValueProtobufEncoder()
stream.schema
.sortedBy { it.id }
.forEach { field ->
builder.addData(
when {
decoratingFields.keys.contains(field.id) -> {
@Suppress("UNCHECKED_CAST")
(decoratingFields[field.id]!!
.jsonEncoder
.toProtobufEncoder() as ProtoEncoder<Any>)
.encode(
valueVBuilder.clear(),
decoratingFields[field.id]!!.fieldValue!!
)
}
else -> nullProtoEncoder.encode(valueVBuilder.clear(), true)
val decodedValueForProto =
decoratingFields[field.id]?.let { fve ->
valueForProtobufEncoding(fve)
}
builder.addData(
encoder.encode(
decodedValueForProto,
field.type.airbyteSchemaType,
valueVBuilder.clear()
)
)
}
}

View File

@@ -4,18 +4,19 @@
package io.airbyte.cdk.output.sockets
import com.fasterxml.jackson.core.io.BigDecimalParser
import io.airbyte.cdk.data.AirbyteSchemaType
import io.airbyte.cdk.data.BigDecimalCodec
import io.airbyte.cdk.data.BigDecimalIntegerCodec
import io.airbyte.cdk.data.BinaryCodec
import io.airbyte.cdk.data.BooleanCodec
import io.airbyte.cdk.data.ByteCodec
import io.airbyte.cdk.data.CdcOffsetDateTimeCodec
import io.airbyte.cdk.data.DoubleCodec
import io.airbyte.cdk.data.FloatCodec
import io.airbyte.cdk.data.IntCodec
import io.airbyte.cdk.data.JsonBytesCodec
import io.airbyte.cdk.data.JsonEncoder
import io.airbyte.cdk.data.JsonStringCodec
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.data.LocalDateCodec
import io.airbyte.cdk.data.LocalDateTimeCodec
import io.airbyte.cdk.data.LocalTimeCodec
@@ -26,56 +27,86 @@ import io.airbyte.cdk.data.ShortCodec
import io.airbyte.cdk.data.TextCodec
import io.airbyte.cdk.data.UrlCodec
import io.airbyte.cdk.discover.Field
import io.airbyte.cdk.discover.StringFieldType
import io.airbyte.cdk.discover.FieldType
import io.airbyte.cdk.protocol.AirbyteValueProtobufDecoder
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteRecordMessageProtobuf
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import java.math.BigDecimal
import java.net.URI
import java.net.URL
import java.nio.ByteBuffer
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.util.Base64
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.DynamicNode
import org.junit.jupiter.api.DynamicTest
import org.junit.jupiter.api.TestFactory
class NativeRecordProtobufEncoderTest {
private val protoDecoder = AirbyteValueProtobufDecoder()
data class TestCase<T>(
val value: T,
val jsonEncoder: JsonEncoder<*>,
val decoder: (AirbyteRecordMessageProtobuf) -> T
)
val airbyteSchemaType: AirbyteSchemaType,
) {
val asDecoderType: Any
get() =
when (value) {
is Short, -> value.toLong().toBigInteger()
is Int -> value.toBigInteger()
is Long -> value.toBigInteger()
is Float -> value.toDouble().toBigDecimal()
is BigDecimal -> if (value.scale() == 0) value.toBigInteger() else value
is URL -> value.toExternalForm()
is ByteBuffer ->
when (jsonEncoder) {
is JsonBytesCodec -> value.array().toString(Charsets.UTF_8)
else -> java.util.Base64.getEncoder().encodeToString(value.array())
}
is Byte -> value.toLong().toBigInteger()
is Double -> value.toBigDecimal()
else -> value!!
}
}
val valBuilder = AirbyteValueProtobuf.newBuilder()
val protoBuilder =
AirbyteRecordMessageProtobuf.newBuilder().also { it.addData(0, valBuilder.clear()) }
fun fieldOf(airbyteSchemaType: AirbyteSchemaType, jsonEncoder: JsonEncoder<*>): Field =
Field(
"id",
object : FieldType {
override val airbyteSchemaType = airbyteSchemaType
override val jsonEncoder = jsonEncoder
}
)
val testCases =
listOf(
TestCase(
value = 123L,
jsonEncoder = LongCodec,
decoder = { proto -> proto.getData(0).integer }
airbyteSchemaType = LeafAirbyteSchemaType.INTEGER,
),
TestCase(
value = 123,
jsonEncoder = IntCodec,
decoder = { proto -> proto.getData(0).integer.toInt() }
airbyteSchemaType = LeafAirbyteSchemaType.INTEGER
),
TestCase(
value = "text value",
jsonEncoder = TextCodec,
decoder = { proto -> proto.getData(0).string }
airbyteSchemaType = LeafAirbyteSchemaType.STRING
),
TestCase(
value = true,
jsonEncoder = BooleanCodec,
decoder = { proto -> proto.getData(0).boolean }
airbyteSchemaType = LeafAirbyteSchemaType.BOOLEAN
),
TestCase(
value =
@@ -83,112 +114,92 @@ class NativeRecordProtobufEncoderTest {
OffsetDateTime.now().format(OffsetDateTimeCodec.formatter)
),
jsonEncoder = OffsetDateTimeCodec,
decoder = { proto -> OffsetDateTime.parse(proto.getData(0).timestampWithTimezone) }
),
TestCase(
value =
OffsetDateTime.parse(
OffsetDateTime.now().format(OffsetDateTimeCodec.formatter)
),
jsonEncoder = CdcOffsetDateTimeCodec,
decoder = { proto -> OffsetDateTime.parse(proto.getData(0).string) }
airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
),
TestCase(
value = 123.456f,
jsonEncoder = FloatCodec,
decoder = { proto ->
BigDecimalParser.parseWithFastParser(proto.getData(0).bigDecimal).toFloat()
}
airbyteSchemaType = LeafAirbyteSchemaType.NUMBER
),
TestCase(
value = "hello".toByteArray().let { ByteBuffer.wrap(it) },
value = ByteBuffer.wrap("hello".toByteArray()),
jsonEncoder = BinaryCodec,
decoder = { proto ->
ByteBuffer.wrap(Base64.getDecoder().decode(proto.getData(0).string))
}
airbyteSchemaType = LeafAirbyteSchemaType.BINARY
),
TestCase(
value = BigDecimal.valueOf(1234.567),
jsonEncoder = BigDecimalCodec,
decoder = { proto ->
BigDecimalParser.parseWithFastParser(proto.getData(0).bigDecimal)
}
airbyteSchemaType = LeafAirbyteSchemaType.NUMBER
),
TestCase(
value = BigDecimal.valueOf(987),
jsonEncoder = BigDecimalCodec,
decoder = { proto ->
BigDecimalParser.parseWithFastParser(proto.getData(0).bigInteger)
}
jsonEncoder = BigDecimalIntegerCodec,
airbyteSchemaType = LeafAirbyteSchemaType.INTEGER
),
TestCase(
value = 12,
value = 12.toShort(),
jsonEncoder = ShortCodec,
decoder = { proto -> proto.getData(0).integer.toShort() }
airbyteSchemaType = LeafAirbyteSchemaType.INTEGER
),
TestCase(
value = 123.toByte(),
jsonEncoder = ByteCodec,
decoder = { proto -> proto.getData(0).integer.toByte() }
airbyteSchemaType = LeafAirbyteSchemaType.INTEGER
),
TestCase(
value = 12345.678,
jsonEncoder = DoubleCodec,
decoder = { proto -> proto.getData(0).number }
airbyteSchemaType = LeafAirbyteSchemaType.NUMBER
),
TestCase(
value = "{\"hello\":1234}".toByteArray().let { ByteBuffer.wrap(it) },
value = ByteBuffer.wrap("{\"hello\":1234}".toByteArray()),
jsonEncoder = JsonBytesCodec,
decoder = { proto ->
ByteBuffer.wrap(Base64.getDecoder().decode(proto.getData(0).string))
}
airbyteSchemaType = LeafAirbyteSchemaType.JSONB
),
TestCase(
value = "{\"hello\":1234}",
jsonEncoder = JsonStringCodec,
decoder = { proto -> proto.getData(0).string }
airbyteSchemaType = LeafAirbyteSchemaType.JSONB
),
TestCase(
value = URI("http://www.example.com").toURL(),
jsonEncoder = UrlCodec,
decoder = { proto -> URI(proto.getData(0).string).toURL() }
airbyteSchemaType = LeafAirbyteSchemaType.STRING
),
TestCase(
value = LocalDate.now(),
jsonEncoder = LocalDateCodec,
decoder = { proto -> LocalDate.parse(proto.getData(0).date) }
airbyteSchemaType = LeafAirbyteSchemaType.DATE
),
TestCase(
value = LocalTime.parse(LocalTime.now().format(LocalTimeCodec.formatter)),
jsonEncoder = LocalTimeCodec,
decoder = { proto -> LocalTime.parse(proto.getData(0).timeWithoutTimezone) }
airbyteSchemaType = LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
),
TestCase(
value =
LocalDateTime.parse(LocalDateTime.now().format(LocalDateTimeCodec.formatter)),
jsonEncoder = LocalDateTimeCodec,
decoder = { proto ->
LocalDateTime.parse(proto.getData(0).timestampWithoutTimezone)
}
airbyteSchemaType = LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
),
TestCase(
value = OffsetTime.parse(OffsetTime.now().format(OffsetTimeCodec.formatter)),
jsonEncoder = OffsetTimeCodec,
decoder = { proto -> OffsetTime.parse(proto.getData(0).timeWithTimezone) }
airbyteSchemaType = LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
),
)
@TestFactory
fun dynamicTestsForAddition(): Collection<DynamicNode> {
return testCases.map { case ->
@Suppress("UNCHECKED_CAST")
val fve = FieldValueEncoder(case.value, case.jsonEncoder as JsonEncoder<in Any>)
DynamicTest.dynamicTest("test-${case.value.javaClass.simpleName}") {
DynamicTest.dynamicTest("test-${case.value!!.javaClass.simpleName}") {
val n: NativeRecordPayload = mutableMapOf("id" to fve)
val actualProto =
n.toProtobuf(setOf(Field("id", StringFieldType)), protoBuilder, valBuilder)
.build()
assertEquals(case.value, case.decoder(actualProto))
val field = fieldOf(case.airbyteSchemaType, case.jsonEncoder)
val actualProto = n.toProtobuf(setOf(field), protoBuilder, valBuilder).build()
assertEquals(case.asDecoderType, protoDecoder.decode(actualProto.getData(0)))
}
}
}

View File

@@ -439,7 +439,7 @@ class MockBasicFunctionalityIntegrationTestStdioJsonl :
DataChannelFormat.JSONL,
)
// Speed mode
@Disabled
class MockBasicFunctionalityIntegrationTestSocketProtobuf :
BaseMockBasicFunctionalityIntegrationTest(
DataChannelMedium.SOCKET,

View File

@@ -4,67 +4,110 @@
package io.airbyte.cdk.load.data
import com.fasterxml.jackson.core.io.BigDecimalParser
import com.fasterxml.jackson.core.io.BigIntegerParser
import com.fasterxml.jackson.databind.JsonNode
import io.airbyte.cdk.load.data.AirbyteValueProxy.FieldAccessor
import io.airbyte.cdk.load.util.Jsons
import io.airbyte.cdk.protocol.AirbyteValueProtobufDecoder
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import java.math.BigDecimal
import java.math.BigInteger
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.format.DateTimeFormatter
import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.ChronoField
/**
* Protobuf is sent as an ordered list of AirbyteValues. Coherent access depends on the source and
* destination agreeing on the schema. Currently, this is alphabetical order by field name, as
* constraints on the socket implementation guarantee that source and destination will always see
* the same schema. Eventually this order needs to be set by the source with a header message.
*
* @deprecated This is inefficient and should not be used. Use
* [io.airbyte.cdk.load.dataflow.transform.medium.ProtobufConverter] instead.
* @see io.airbyte.cdk.load.dataflow.transform.medium.ProtobufConverter
*/
@Deprecated("This is inefficient and should not be used. Use ProtobufConverter instead.")
class AirbyteValueProtobufProxy(private val data: List<AirbyteValueProtobuf>) : AirbyteValueProxy {
private val decoder = AirbyteValueProtobufDecoder()
companion object {
// Formatters that preserve full precision including trailing zeros
private val LOCAL_TIME_FORMATTER =
DateTimeFormatterBuilder()
.appendValue(ChronoField.HOUR_OF_DAY, 2)
.appendLiteral(':')
.appendValue(ChronoField.MINUTE_OF_HOUR, 2)
.appendLiteral(':')
.appendValue(ChronoField.SECOND_OF_MINUTE, 2)
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
.toFormatter()
private val OFFSET_TIME_FORMATTER =
DateTimeFormatterBuilder().append(LOCAL_TIME_FORMATTER).appendOffsetId().toFormatter()
private val LOCAL_DATE_TIME_FORMATTER =
DateTimeFormatterBuilder()
.append(DateTimeFormatter.ISO_LOCAL_DATE)
.appendLiteral('T')
.append(LOCAL_TIME_FORMATTER)
.toFormatter()
private val OFFSET_DATE_TIME_FORMATTER =
DateTimeFormatterBuilder()
.append(DateTimeFormatter.ISO_LOCAL_DATE)
.appendLiteral('T')
.append(LOCAL_TIME_FORMATTER)
.appendOffsetId()
.toFormatter()
}
private inline fun <T> getNullable(field: FieldAccessor, getter: (FieldAccessor) -> T): T? {
return if (data.isEmpty() || data.size < field.index || data[field.index].isNull) null
return if (
data.isEmpty() ||
data.size < field.index ||
data[field.index].valueCase == AirbyteValueProtobuf.ValueCase.NULL
)
null
else getter(field)
}
override fun getBoolean(field: FieldAccessor): Boolean? =
getNullable(field) { data[it.index].boolean }
getNullable(field) { decoder.decode(data[it.index]) as? Boolean }
override fun getString(field: FieldAccessor): String? =
getNullable(field) { data[it.index].string }
getNullable(field) { decoder.decode(data[it.index]) as? String }
override fun getInteger(field: FieldAccessor): BigInteger? =
getNullable(field) {
if (data[it.index].hasBigInteger()) {
BigIntegerParser.parseWithFastParser(data[it.index].bigInteger)
} else {
data[it.index].integer.toBigInteger()
}
}
getNullable(field) { decoder.decode(data[it.index]) as? BigInteger }
override fun getNumber(field: FieldAccessor): BigDecimal? =
getNullable(field) {
if (data[it.index].hasBigDecimal()) {
BigDecimalParser.parseWithFastParser(data[it.index].bigDecimal)
} else if (data[it.index].hasNumber()) {
data[it.index].number.toBigDecimal()
} else {
null
}
}
getNullable(field) { decoder.decode(data[it.index]) as? BigDecimal }
override fun getDate(field: FieldAccessor): String? =
getNullable(field) { data[field.index].date }
getNullable(field) { (decoder.decode(data[it.index]) as? LocalDate)?.toString() }
override fun getTimeWithTimezone(field: FieldAccessor): String? =
getNullable(field) { data[field.index].timeWithTimezone }
getNullable(field) {
(decoder.decode(data[it.index]) as? OffsetTime)?.format(OFFSET_TIME_FORMATTER)
}
override fun getTimeWithoutTimezone(field: FieldAccessor): String? =
getNullable(field) { data[field.index].timeWithoutTimezone }
getNullable(field) {
(decoder.decode(data[it.index]) as? java.time.LocalTime)?.format(LOCAL_TIME_FORMATTER)
}
override fun getTimestampWithTimezone(field: FieldAccessor): String? =
getNullable(field) { data[field.index].timestampWithTimezone }
getNullable(field) {
(decoder.decode(data[it.index]) as? OffsetDateTime)?.format(OFFSET_DATE_TIME_FORMATTER)
}
override fun getTimestampWithoutTimezone(field: FieldAccessor): String? =
getNullable(field) { data[field.index].timestampWithoutTimezone }
getNullable(field) {
(decoder.decode(data[it.index]) as? LocalDateTime)?.format(LOCAL_DATE_TIME_FORMATTER)
}
override fun getJsonBytes(field: FieldAccessor): ByteArray? =
getNullable(field) { data[field.index].json.toByteArray() }

View File

@@ -4,82 +4,165 @@
package io.airbyte.cdk.load.data
import com.google.protobuf.ByteString
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.load.data.json.toJson
import io.airbyte.cdk.load.util.serializeToJsonBytes
import io.airbyte.cdk.protocol.AirbyteValueProtobufEncoder
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
/** Convenience class for testing. */
class AirbyteValueToProtobuf {
private val encoder = AirbyteValueProtobufEncoder()
fun toProtobuf(value: AirbyteValue, type: AirbyteType): AirbyteValueProtobuf {
val b = AirbyteValueProtobuf.newBuilder()
// Handle null values
if (value is NullValue) {
return b.setIsNull(true).build()
}
fun setJson(value: AirbyteValue) =
b.setJson(ByteString.copyFrom(value.toJson().serializeToJsonBytes()))
when (type) {
is BooleanType ->
if (value is BooleanValue) b.setBoolean(value.value) else b.setIsNull(true)
is StringType ->
if (value is StringValue) b.setString(value.value) else b.setIsNull(true)
is NumberType ->
if (value is NumberValue) {
if (value.value.equals(value.value.toDouble())) {
b.setNumber(value.value.toDouble())
} else {
b.setBigDecimal(value.value.toString())
}
} else {
setJson(value)
}
is IntegerType ->
if (value is IntegerValue) {
if (value.value.equals(value.value.toLong())) {
b.setInteger(value.value.toLong())
} else {
b.setBigInteger(value.value.toString())
}
} else {
b.setIsNull(true)
}
is DateType -> if (value is StringValue) b.setDate(value.value) else b.setIsNull(true)
is TimeTypeWithTimezone ->
if (value is StringValue) b.setTimeWithTimezone(value.value) else b.setIsNull(true)
is TimeTypeWithoutTimezone ->
if (value is StringValue) b.setTimeWithoutTimezone(value.value)
else b.setIsNull(true)
is TimestampTypeWithTimezone ->
if (value is StringValue) {
b.setTimestampWithTimezone(value.value)
} else {
b.setIsNull(true)
}
is TimestampTypeWithoutTimezone ->
if (value is StringValue) {
b.setTimestampWithoutTimezone(value.value)
} else {
b.setIsNull(true)
}
is ArrayType,
ArrayTypeWithoutSchema ->
if (value is ArrayValue) {
b.setJson(ByteString.copyFrom(value.toJson().serializeToJsonBytes()))
} else {
b.setIsNull(true)
}
is ObjectType,
ObjectTypeWithEmptySchema,
ObjectTypeWithoutSchema ->
if (value is ObjectValue) {
b.setJson(ByteString.copyFrom(value.toJson().serializeToJsonBytes()))
} else {
b.setIsNull(true)
}
is UnionType,
is UnknownType -> b.setJson(ByteString.copyFrom(value.toJson().serializeToJsonBytes()))
return encoder.encode(null, LeafAirbyteSchemaType.STRING).build()
}
return b.build()
// For complex types (arrays, objects, unions), encode as JSON
return when (type) {
is ArrayType,
ArrayTypeWithoutSchema -> {
if (value is ArrayValue) {
encoder
.encode(value.toJson().serializeToJsonBytes(), LeafAirbyteSchemaType.JSONB)
.build()
} else {
encoder.encode(null, LeafAirbyteSchemaType.JSONB).build()
}
}
is ObjectType,
ObjectTypeWithEmptySchema,
ObjectTypeWithoutSchema -> {
if (value is ObjectValue) {
encoder
.encode(value.toJson().serializeToJsonBytes(), LeafAirbyteSchemaType.JSONB)
.build()
} else {
encoder.encode(null, LeafAirbyteSchemaType.JSONB).build()
}
}
is UnionType,
is UnknownType -> {
encoder
.encode(value.toJson().serializeToJsonBytes(), LeafAirbyteSchemaType.JSONB)
.build()
}
// For scalar and temporal types, extract the underlying value and use the encoder
is BooleanType -> {
if (value is BooleanValue) {
encoder.encode(value.value, LeafAirbyteSchemaType.BOOLEAN).build()
} else {
encoder.encode(null, LeafAirbyteSchemaType.BOOLEAN).build()
}
}
is StringType -> {
if (value is StringValue) {
encoder.encode(value.value, LeafAirbyteSchemaType.STRING).build()
} else {
encoder.encode(null, LeafAirbyteSchemaType.STRING).build()
}
}
is IntegerType -> {
if (value is IntegerValue) {
encoder.encode(value.value, LeafAirbyteSchemaType.INTEGER).build()
} else {
encoder.encode(null, LeafAirbyteSchemaType.INTEGER).build()
}
}
is NumberType -> {
val numberValue =
when (value) {
is NumberValue -> value.value
is StringValue ->
try {
value.value.toBigDecimal()
} catch (_: Exception) {
null
}
else -> null
}
encoder.encode(numberValue, LeafAirbyteSchemaType.NUMBER).build()
}
is DateType -> {
val dateValue =
when (value) {
is DateValue -> value.value
is StringValue ->
try {
LocalDate.parse(value.value)
} catch (_: Exception) {
null
}
else -> null
}
encoder.encode(dateValue, LeafAirbyteSchemaType.DATE).build()
}
is TimeTypeWithTimezone -> {
val timeValue =
when (value) {
is TimeWithTimezoneValue -> value.value
is StringValue ->
try {
OffsetTime.parse(value.value)
} catch (_: Exception) {
null
}
else -> null
}
encoder.encode(timeValue, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE).build()
}
is TimeTypeWithoutTimezone -> {
val timeValue =
when (value) {
is TimeWithoutTimezoneValue -> value.value
is StringValue ->
try {
LocalTime.parse(value.value)
} catch (_: Exception) {
null
}
else -> null
}
encoder.encode(timeValue, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE).build()
}
is TimestampTypeWithTimezone -> {
val timestampValue =
when (value) {
is TimestampWithTimezoneValue -> value.value
is StringValue ->
try {
OffsetDateTime.parse(value.value)
} catch (_: Exception) {
null
}
else -> null
}
encoder
.encode(timestampValue, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
.build()
}
is TimestampTypeWithoutTimezone -> {
val timestampValue =
when (value) {
is TimestampWithoutTimezoneValue -> value.value
is StringValue ->
try {
LocalDateTime.parse(value.value)
} catch (_: Exception) {
null
}
else -> null
}
encoder
.encode(timestampValue, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
.build()
}
}
}
}

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.data
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
/**
* Exception thrown when a protobuf value type doesn't match the expected AirbyteType. This
* indicates that the source connector is using the wrong protobuf setter method for the declared
* schema type.
*/
class ProtobufTypeMismatchException(
val streamName: String,
val columnName: String,
val expectedType: AirbyteType,
val actualValueCase: AirbyteValueProtobuf.ValueCase,
) : RuntimeException(buildMessage(streamName, columnName, expectedType, actualValueCase)) {
companion object {
private fun buildMessage(
streamName: String,
columnName: String,
expectedType: AirbyteType,
actualValueCase: AirbyteValueProtobuf.ValueCase
): String {
return """
|Protobuf type mismatch detected in stream '$streamName', column '$columnName':
| Expected AirbyteType: $expectedType
| Actual protobuf ValueCase: $actualValueCase
|This error indicates that the source connector is using the wrong protobuf setter method.
|The source must use the protobuf setter that corresponds to the declared schema type.
|Please fix the source connector to use the correct protobuf setter method for this column.
""".trimMargin()
}
}
}

View File

@@ -1,114 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.data.protobuf
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.AirbyteValueProxy
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.BooleanValue
import io.airbyte.cdk.load.data.DateType
import io.airbyte.cdk.load.data.DateValue
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.NullValue
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.NumberValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimeWithTimezoneValue
import io.airbyte.cdk.load.data.TimeWithoutTimezoneValue
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.data.json.JsonToAirbyteValue
import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
/**
* Naively convert a Protocol Buffers object to the equivalent [AirbyteValue]. Note that this does
* not match against a declared schema; it simply does the most obvious conversion.
*/
class ProtobufToAirbyteValue(
private val fields: Array<AirbyteValueProxy.FieldAccessor>,
) {
private val jsonToAirbyteValue = JsonToAirbyteValue()
fun convert(record: DestinationRecordProtobufSource): AirbyteValue {
val proxy = record.asAirbyteValueProxy()
val convertedFields = fields.associate { field -> field.name to convertField(proxy, field) }
return ObjectValue(LinkedHashMap(convertedFields))
}
private fun convertField(
proxy: AirbyteValueProxy,
field: AirbyteValueProxy.FieldAccessor
): AirbyteValue {
return when (field.type) {
is BooleanType -> convertPrimitiveField { proxy.getBoolean(field) }?.let(::BooleanValue)
?: NullValue
is IntegerType -> convertPrimitiveField { proxy.getInteger(field) }?.let(::IntegerValue)
?: NullValue
is NumberType -> convertPrimitiveField { proxy.getNumber(field) }?.let(::NumberValue)
?: NullValue
is DateType -> convertPrimitiveField { proxy.getDate(field) }?.let(::DateValue)
?: NullValue
is StringType -> convertPrimitiveField { proxy.getString(field) }?.let(::StringValue)
?: NullValue
is TimeTypeWithTimezone ->
convertPrimitiveField { proxy.getTimeWithTimezone(field) }
?.let(::TimeWithTimezoneValue)
?: NullValue
is TimeTypeWithoutTimezone ->
convertPrimitiveField { proxy.getTimeWithoutTimezone(field) }
?.let(::TimeWithoutTimezoneValue)
?: NullValue
is TimestampTypeWithTimezone ->
convertPrimitiveField { proxy.getTimestampWithTimezone(field) }
?.let(::TimestampWithTimezoneValue)
?: NullValue
is TimestampTypeWithoutTimezone ->
convertPrimitiveField { proxy.getTimestampWithoutTimezone(field) }
?.let(::TimestampWithoutTimezoneValue)
?: NullValue
is ArrayType,
is ArrayTypeWithoutSchema,
is UnionType,
is ObjectType,
is ObjectTypeWithEmptySchema,
is ObjectTypeWithoutSchema -> convertComplexField(proxy, field)
is UnknownType -> NullValue
}
}
private inline fun <T> convertPrimitiveField(getter: () -> T?): T? = getter()
private fun convertComplexField(
proxy: AirbyteValueProxy,
field: AirbyteValueProxy.FieldAccessor
): AirbyteValue {
val jsonNode = proxy.getJsonNode(field)
return if (jsonNode != null) {
jsonToAirbyteValue.convert(jsonNode)
} else {
NullValue
}
}
}
fun DestinationRecordProtobufSource.toAirbyteValue(
fields: Array<AirbyteValueProxy.FieldAccessor>
): AirbyteValue {
return ProtobufToAirbyteValue(fields = fields).convert(this)
}

View File

@@ -4,12 +4,11 @@
package io.airbyte.cdk.load.dataflow.transform.medium
import com.fasterxml.jackson.core.io.BigDecimalParser
import com.fasterxml.jackson.core.io.BigIntegerParser
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.AirbyteValueProxy.FieldAccessor
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
import io.airbyte.cdk.load.data.ArrayValue
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.BooleanValue
@@ -22,7 +21,10 @@ import io.airbyte.cdk.load.data.NullValue
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.NumberValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.ProtobufTypeMismatchException
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
@@ -43,12 +45,16 @@ import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
import io.airbyte.cdk.load.message.DestinationRecordRaw
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.util.Jsons
import io.airbyte.cdk.load.util.serializeToString
import io.airbyte.cdk.protocol.AirbyteValueProtobufDecoder
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import io.airbyte.protocol.protobuf.AirbyteRecordMessage.AirbyteValueProtobuf
import java.math.BigDecimal
import java.math.BigInteger
import java.time.Instant
import java.time.LocalDate
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import java.util.concurrent.ConcurrentHashMap
import javax.inject.Singleton
@@ -64,6 +70,7 @@ class ProtobufConverter(
) {
private val isNoOpMapper = columnNameMapper is NoOpColumnNameMapper
private val decoder = AirbyteValueProtobufDecoder()
private val perStreamMappedNames =
ConcurrentHashMap<DestinationStream.Descriptor, Array<String>>()
@@ -115,7 +122,13 @@ class ProtobufConverter(
airbyteMetaField = null
)
val airbyteValue = extractTypedValue(protobufValue, accessor, enrichedValue)
val airbyteValue =
extractTypedValue(
protobufValue = protobufValue,
accessor = accessor,
enrichedValue = enrichedValue,
streamName = stream.unmappedDescriptor.name
)
enrichedValue.abValue = airbyteValue
val mappedValue = coercer.map(enrichedValue)
@@ -186,15 +199,18 @@ class ProtobufConverter(
private fun extractTypedValue(
protobufValue: AirbyteValueProtobuf?,
accessor: FieldAccessor,
enrichedValue: EnrichedAirbyteValue
enrichedValue: EnrichedAirbyteValue,
streamName: String
): AirbyteValue {
if (protobufValue == null || protobufValue.isNull) {
if (
protobufValue == null || protobufValue.valueCase == AirbyteValueProtobuf.ValueCase.NULL
) {
return NullValue
}
return try {
// Step 1: Extract raw value from protobuf using the right method based on type
val rawValue = extractRawValue(protobufValue, accessor)
val rawValue = extractRawValue(protobufValue, accessor, streamName)
// Step 2: Decide final target type (check for destination override first)
val targetClass =
@@ -202,6 +218,8 @@ class ProtobufConverter(
// Step 3: Create AirbyteValue of the target type using the raw value
createAirbyteValue(rawValue, targetClass)
} catch (e: ProtobufTypeMismatchException) {
throw e
} catch (_: Exception) {
// Add parsing error to metadata
enrichedValue.changes.add(
@@ -217,43 +235,102 @@ class ProtobufConverter(
private fun extractRawValue(
protobufValue: AirbyteValueProtobuf,
accessor: FieldAccessor
accessor: FieldAccessor,
streamName: String
): Any? {
// Validate that the protobuf value type matches the expected AirbyteType
validateProtobufType(protobufValue, accessor.type, streamName, accessor.name)
// Use the centralized decoder for all scalar and temporal types
val decodedValue = decoder.decode(protobufValue)
// For complex types (arrays, objects, unions), handle separately
return when (accessor.type) {
is BooleanType -> protobufValue.boolean
is StringType -> protobufValue.string
is IntegerType -> {
if (protobufValue.hasBigInteger()) {
BigIntegerParser.parseWithFastParser(protobufValue.bigInteger)
} else {
protobufValue.integer.toBigInteger()
}
}
is NumberType -> {
if (protobufValue.hasBigDecimal()) {
BigDecimalParser.parseWithFastParser(protobufValue.bigDecimal)
} else if (protobufValue.hasNumber()) {
protobufValue.number.toBigDecimal()
} else {
null
}
}
is DateType -> protobufValue.date
is TimestampTypeWithTimezone -> protobufValue.timestampWithTimezone
is TimestampTypeWithoutTimezone -> protobufValue.timestampWithoutTimezone
is TimeTypeWithTimezone -> protobufValue.timeWithTimezone
is TimeTypeWithoutTimezone -> protobufValue.timeWithoutTimezone
is UnionType,
is ArrayType,
is ObjectType -> {
val jsonNode = Jsons.readTree(protobufValue.json.toByteArray())
jsonNode.toAirbyteValue()
if (decodedValue is String) {
// If decoder returned a JSON string, parse it
val jsonNode = Jsons.readTree(decodedValue.toByteArray())
jsonNode.toAirbyteValue()
} else {
decodedValue
}
}
is UnknownType -> null
else -> {
val jsonNode = Jsons.readTree(protobufValue.json.toByteArray())
jsonNode.serializeToString()
else -> decodedValue
}
}
/**
* Validates that the protobuf value case matches the expected AirbyteType.
*
* @throws ProtobufTypeMismatchException if there's a type mismatch
*/
private fun validateProtobufType(
value: AirbyteValueProtobuf,
expectedType: io.airbyte.cdk.load.data.AirbyteType,
streamName: String,
columnName: String
) {
val valueCase = value.valueCase
// Null values are always valid
if (
valueCase == AirbyteValueProtobuf.ValueCase.NULL ||
valueCase == AirbyteValueProtobuf.ValueCase.VALUE_NOT_SET ||
valueCase == null
) {
return
}
// Check if the value case matches the expected type
val isValid =
when (expectedType) {
is StringType -> valueCase == AirbyteValueProtobuf.ValueCase.STRING
is BooleanType -> valueCase == AirbyteValueProtobuf.ValueCase.BOOLEAN
is IntegerType ->
valueCase == AirbyteValueProtobuf.ValueCase.INTEGER ||
valueCase == AirbyteValueProtobuf.ValueCase.BIG_INTEGER
is NumberType ->
valueCase == AirbyteValueProtobuf.ValueCase.NUMBER ||
valueCase == AirbyteValueProtobuf.ValueCase.BIG_DECIMAL
is DateType -> valueCase == AirbyteValueProtobuf.ValueCase.DATE
is TimeTypeWithTimezone ->
valueCase == AirbyteValueProtobuf.ValueCase.TIME_WITH_TIMEZONE
is TimeTypeWithoutTimezone ->
valueCase == AirbyteValueProtobuf.ValueCase.TIME_WITHOUT_TIMEZONE
is TimestampTypeWithTimezone ->
valueCase == AirbyteValueProtobuf.ValueCase.TIMESTAMP_WITH_TIMEZONE
is TimestampTypeWithoutTimezone ->
valueCase == AirbyteValueProtobuf.ValueCase.TIMESTAMP_WITHOUT_TIMEZONE
is ArrayType,
is ArrayTypeWithoutSchema,
is ObjectType,
is ObjectTypeWithEmptySchema,
is ObjectTypeWithoutSchema -> valueCase == AirbyteValueProtobuf.ValueCase.JSON
is UnionType -> {
// For union types, the value must match at least one of the options
valueCase == AirbyteValueProtobuf.ValueCase.JSON ||
expectedType.options.any { option ->
try {
validateProtobufType(value, option, streamName, columnName)
true
} catch (_: ProtobufTypeMismatchException) {
false
}
}
}
is UnknownType -> true // Unknown types accept any value
}
if (!isValid) {
throw ProtobufTypeMismatchException(
streamName = streamName,
columnName = columnName,
expectedType = expectedType,
actualValueCase = valueCase
)
}
}
@@ -288,14 +365,38 @@ class ProtobufConverter(
return when (targetClass) {
BooleanValue::class.java -> BooleanValue(rawValue as Boolean)
StringValue::class.java -> StringValue(rawValue.toString())
IntegerValue::class.java -> IntegerValue(rawValue as java.math.BigInteger)
IntegerValue::class.java -> IntegerValue(rawValue as BigInteger)
NumberValue::class.java -> NumberValue(rawValue as BigDecimal)
DateValue::class.java -> DateValue(rawValue as String)
TimestampWithTimezoneValue::class.java -> TimestampWithTimezoneValue(rawValue as String)
DateValue::class.java ->
when (rawValue) {
is LocalDate -> DateValue(rawValue)
is String -> DateValue(rawValue)
else -> DateValue(rawValue.toString())
}
TimestampWithTimezoneValue::class.java ->
when (rawValue) {
is OffsetDateTime -> TimestampWithTimezoneValue(rawValue)
is String -> TimestampWithTimezoneValue(rawValue)
else -> TimestampWithTimezoneValue(rawValue.toString())
}
TimestampWithoutTimezoneValue::class.java ->
TimestampWithoutTimezoneValue(rawValue as String)
TimeWithTimezoneValue::class.java -> TimeWithTimezoneValue(rawValue as String)
TimeWithoutTimezoneValue::class.java -> TimeWithoutTimezoneValue(rawValue as String)
when (rawValue) {
is java.time.LocalDateTime -> TimestampWithoutTimezoneValue(rawValue)
is String -> TimestampWithoutTimezoneValue(rawValue)
else -> TimestampWithoutTimezoneValue(rawValue.toString())
}
TimeWithTimezoneValue::class.java ->
when (rawValue) {
is OffsetTime -> TimeWithTimezoneValue(rawValue)
is String -> TimeWithTimezoneValue(rawValue)
else -> TimeWithTimezoneValue(rawValue.toString())
}
TimeWithoutTimezoneValue::class.java ->
when (rawValue) {
is LocalTime -> TimeWithoutTimezoneValue(rawValue)
is String -> TimeWithoutTimezoneValue(rawValue)
else -> TimeWithoutTimezoneValue(rawValue.toString())
}
NullValue::class.java -> NullValue
AirbyteValue::class.java ->
rawValue as AirbyteValue // Already an AirbyteValue (JSON types)

View File

@@ -12,7 +12,6 @@ import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.NullValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.json.toAirbyteValue
import io.airbyte.cdk.load.data.protobuf.toAirbyteValue
import io.airbyte.cdk.load.state.CheckpointId
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import java.util.*
@@ -47,7 +46,7 @@ data class DestinationRecordRaw(
stream = stream,
data =
if (rawData is DestinationRecordProtobufSource) {
rawData.toAirbyteValue(stream.airbyteValueProxyFieldAccessors)
throw RuntimeException("DestinationRecordProtobufSource not supported")
} else asJsonRecord().toAirbyteValue(),
emittedAtMs = rawData.emittedAtMs,
meta = rawData.sourceMeta,

View File

@@ -2,6 +2,8 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
@file:Suppress("DEPRECATION")
package io.airbyte.cdk.load.message
import com.fasterxml.jackson.databind.JsonNode
@@ -81,6 +83,7 @@ value class DestinationRecordProtobufSource(val source: AirbyteMessageProtobuf)
override fun asJsonRecord(orderedSchema: Array<FieldAccessor>): JsonNode =
asAirbyteValueProxy().asJson(orderedSchema)
@Suppress("DEPRECATION")
override fun asAirbyteValueProxy(): AirbyteValueProxy =
AirbyteValueProtobufProxy(source.record.dataList)
}

View File

@@ -26,9 +26,9 @@ class AirbyteValueProxyTest {
"integerField": 1234567890123456789,
"numberField": 1234567890123456789.123456789,
"dateField": "2023-10-01",
"timeWithTimezoneField": "12:34:56+00:00",
"timeWithTimezoneField": "12:34:56Z",
"timeWithoutTimezoneField": "12:34:56",
"timestampWithTimezoneField": "2023-10-01T12:34:56+00:00",
"timestampWithTimezoneField": "2023-10-01T12:34:56Z",
"timestampWithoutTimezoneField": "2023-10-01T12:34:56",
"objectField": {"key": "value"},
"arrayField": [1, 2, 3],
@@ -137,7 +137,7 @@ class AirbyteValueProxyTest {
data.add(protoField)
}
val proxy = AirbyteValueProtobufProxy(data)
@Suppress("DEPRECATION") val proxy = AirbyteValueProtobufProxy(data)
validate(objectTree, proxy)
}
}

View File

@@ -1,651 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.data.protobuf
import com.google.protobuf.kotlin.toByteStringUtf8
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.AirbyteType
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.BooleanValue
import io.airbyte.cdk.load.data.DateType
import io.airbyte.cdk.load.data.DateValue
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.NullValue
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.NumberValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimeWithTimezoneValue
import io.airbyte.cdk.load.data.TimeWithoutTimezoneValue
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
import io.airbyte.protocol.protobuf.AirbyteMessage.AirbyteMessageProtobuf
import io.airbyte.protocol.protobuf.AirbyteRecordMessage
import io.mockk.mockk
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
internal class ProtobufToAirbyteValueTest {
@Test
fun testString() {
val fieldName = "testString"
val expectedValue = "testValue"
val stream = createStream(fieldName = fieldName, fieldType = StringType)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setString(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is StringValue)
assertEquals(expectedValue, (value.values[fieldName] as StringValue).value)
}
@Test
fun testBoolean() {
val fieldName = "testBoolean"
val expectedValue = true
val stream = createStream(fieldName = fieldName, fieldType = BooleanType)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setBoolean(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is BooleanValue)
assertEquals(expectedValue, (value.values[fieldName] as BooleanValue).value)
}
@Test
fun testInteger() {
val fieldName = "testInteger"
val expectedValue = 1L
val stream = createStream(fieldName = fieldName, fieldType = IntegerType)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setInteger(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is IntegerValue)
assertEquals(expectedValue.toBigInteger(), (value.values[fieldName] as IntegerValue).value)
}
@Test
fun testNumber() {
val fieldName = "testNumber"
val expectedValue = 1.0
val stream = createStream(fieldName = fieldName, fieldType = NumberType)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setNumber(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is NumberValue)
assertEquals(expectedValue.toBigDecimal(), (value.values[fieldName] as NumberValue).value)
}
@Test
fun testObject() {
val fieldName = "testObject"
val expectedValue = "{\"name\":\"testObject\"}"
val stream =
createStream(
fieldName = fieldName,
fieldType =
ObjectType(properties = linkedMapOf("name" to FieldType(StringType, false)))
)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson(expectedValue.toByteStringUtf8())
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is ObjectValue)
assertEquals(
"testObject",
((value.values[fieldName] as ObjectValue).values["name"] as StringValue).value
)
}
@Test
fun testArray() {
val fieldName = "testArray"
val expectedValue = "{\"name\":\"testObject\"}"
val stream =
createStream(
fieldName = fieldName,
fieldType = ArrayType(FieldType(StringType, false)),
)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson(expectedValue.toByteStringUtf8())
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is ObjectValue)
assertEquals(
"testObject",
((value.values[fieldName] as ObjectValue).values["name"] as StringValue).value
)
}
@Test
fun testArrayWithoutSchema() {
val fieldName = "testArrayWithoutSchema"
val expectedValue = "{\"name\":\"testObject\"}"
val stream =
createStream(
fieldName = fieldName,
fieldType = ArrayTypeWithoutSchema,
)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson(expectedValue.toByteStringUtf8())
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is ObjectValue)
assertEquals(
"testObject",
((value.values[fieldName] as ObjectValue).values["name"] as StringValue).value
)
}
@Test
fun testUnion() {
val fieldName = "testUnion"
val expectedValue = "{\"name\":\"testObject\"}"
val stream =
createStream(
fieldName = fieldName,
fieldType = UnionType(setOf(StringType), false),
)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson(expectedValue.toByteStringUtf8())
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is ObjectValue)
assertEquals(
"testObject",
((value.values[fieldName] as ObjectValue).values["name"] as StringValue).value
)
}
@Test
fun testObjectWithEmptySchema() {
val fieldName = "testObjectWithEmptySchema"
val expectedValue = "{\"name\":\"testObject\"}"
val stream =
createStream(
fieldName = fieldName,
fieldType = ObjectTypeWithEmptySchema,
)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson(expectedValue.toByteStringUtf8())
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is ObjectValue)
assertEquals(
"testObject",
((value.values[fieldName] as ObjectValue).values["name"] as StringValue).value
)
}
@Test
fun testObjectWithoutSchema() {
val fieldName = "testObjectWithoutSchema"
val expectedValue = "{\"name\":\"testObject\"}"
val stream =
createStream(
fieldName = fieldName,
fieldType = ObjectTypeWithoutSchema,
)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson(expectedValue.toByteStringUtf8())
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is ObjectValue)
assertEquals(
"testObject",
((value.values[fieldName] as ObjectValue).values["name"] as StringValue).value
)
}
@Test
fun testDate() {
val fieldName = "testDate"
val expectedValue = "2025-01-01"
val stream = createStream(fieldName = fieldName, fieldType = DateType)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setDate(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is DateValue)
assertEquals(LocalDate.parse(expectedValue), (value.values[fieldName] as DateValue).value)
}
@Test
fun testTimeWithTimezone() {
val fieldName = "testTimeWithTimezone"
val expectedValue = "10:15:30+01:00"
val stream = createStream(fieldName = fieldName, fieldType = TimeTypeWithTimezone)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithTimezone(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is TimeWithTimezoneValue)
assertEquals(
OffsetTime.parse(expectedValue),
(value.values[fieldName] as TimeWithTimezoneValue).value
)
}
@Test
fun testTimeWithoutTimezone() {
val fieldName = "testTimeWithoutTimezone"
val expectedValue = "10:15:30"
val stream = createStream(fieldName = fieldName, fieldType = TimeTypeWithoutTimezone)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is TimeWithoutTimezoneValue)
assertEquals(
LocalTime.parse(expectedValue),
(value.values[fieldName] as TimeWithoutTimezoneValue).value
)
}
@Test
fun testTimestampWithTimezone() {
val fieldName = "testTimestampWithTimezone"
val expectedValue = "2007-12-03T10:15:30+01:00"
val stream = createStream(fieldName = fieldName, fieldType = TimestampTypeWithTimezone)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is TimestampWithTimezoneValue)
assertEquals(
OffsetDateTime.parse(expectedValue),
(value.values[fieldName] as TimestampWithTimezoneValue).value
)
}
@Test
fun testTimestampWithoutTimezone() {
val fieldName = "testTimestampWithoutTimezone"
val expectedValue = "2007-12-03T10:15:30"
val stream = createStream(fieldName = fieldName, fieldType = TimestampTypeWithoutTimezone)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is TimestampWithoutTimezoneValue)
assertEquals(
LocalDateTime.parse(expectedValue),
(value.values[fieldName] as TimestampWithoutTimezoneValue).value
)
}
@Test
fun testNull() {
val fieldName = "testString"
val stream = createStream(fieldName = fieldName, fieldType = StringType)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is NullValue)
}
@Test
fun testNullValueForMissingValues() {
val stream =
DestinationStream(
unmappedNamespace = null,
unmappedName = "test",
importType = Append,
schema =
ObjectType(
properties =
linkedMapOf(
"field1" to FieldType(BooleanType, false),
"field2" to FieldType(IntegerType, false),
"field3" to FieldType(NumberType, false),
"field4" to FieldType(DateType, false),
"field5" to
FieldType(ObjectType(properties = linkedMapOf()), false),
"field6" to FieldType(StringType, false),
"field7" to FieldType(TimeTypeWithTimezone, false),
"field8" to FieldType(TimeTypeWithoutTimezone, false),
"field9" to FieldType(TimestampTypeWithTimezone, false),
"field10" to FieldType(TimestampTypeWithoutTimezone, false),
)
),
generationId = 1L,
minimumGenerationId = 1L,
syncId = 1L,
namespaceMapper = mockk<NamespaceMapper>(relaxed = true),
isFileBased = false,
includeFiles = false,
destinationObjectName = null,
matchingKey = null,
)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val rawData = DestinationRecordProtobufSource(source = protobuf)
val value =
ProtobufToAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors).convert(rawData)
assertTrue(value is ObjectValue)
assertEquals(0, (value as ObjectValue).values.filter { it.value !is NullValue }.count())
}
@Test
fun testExtensionFunction() {
val fieldName = "testString"
val expectedValue = "testValue"
val stream = createStream(fieldName = fieldName, fieldType = StringType)
val protobuf =
AirbyteMessageProtobuf.newBuilder()
.setRecord(
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("test")
.setEmittedAtMs(1234)
.addData(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setString(expectedValue)
)
.setPartitionId("checkpoint_id")
.build()
)
.build()
val value =
DestinationRecordProtobufSource(source = protobuf)
.toAirbyteValue(fields = stream.airbyteValueProxyFieldAccessors)
assertTrue(value is ObjectValue)
assertTrue((value as ObjectValue).values[fieldName] is StringValue)
assertEquals(expectedValue, (value.values[fieldName] as StringValue).value)
}
private fun createStream(fieldName: String, fieldType: AirbyteType): DestinationStream {
val properties = LinkedHashMap<String, FieldType>()
properties.put(fieldName, FieldType(fieldType, false))
return DestinationStream(
unmappedNamespace = null,
unmappedName = "test",
importType = Append,
schema = ObjectType(properties = properties),
generationId = 1L,
minimumGenerationId = 1L,
syncId = 1L,
namespaceMapper = mockk<NamespaceMapper>(relaxed = true),
isFileBased = false,
includeFiles = false,
destinationObjectName = null,
matchingKey = null,
)
}
}

View File

@@ -4,7 +4,7 @@
package io.airbyte.cdk.load.dataflow.transform
import com.google.protobuf.kotlin.toByteString
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.computeUnknownColumnChanges
import io.airbyte.cdk.load.data.*
@@ -28,6 +28,7 @@ import io.airbyte.cdk.load.dataflow.transform.medium.ProtobufConverter
import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
import io.airbyte.cdk.load.message.DestinationRecordRaw
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.protocol.AirbyteValueProtobufEncoder
import io.airbyte.protocol.models.Jsons
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import io.airbyte.protocol.protobuf.AirbyteMessage.AirbyteMessageProtobuf
@@ -55,6 +56,7 @@ import org.junit.jupiter.api.extension.ExtendWith
@ExtendWith(MockKExtension::class)
class ProtobufRecordMungerTest {
private val encoder = AirbyteValueProtobufEncoder()
private val uuid: UUID = UUID.fromString("11111111-1111-1111-1111-111111111111")
private val emittedAtMs = 1_724_438_400_000L
private val syncId = 42L
@@ -135,35 +137,31 @@ class ProtobufRecordMungerTest {
val protoValues =
mutableListOf(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBoolean(true).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setInteger(123).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setNumber(12.34).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString("hello").build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setDate("2025-06-17")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithTimezone("23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone("23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone("2025-06-17T23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone("2025-06-17T23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""["a","b"]""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"k":"v"}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"u":1}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true).build(),
encoder.encode(true, LeafAirbyteSchemaType.BOOLEAN),
encoder.encode(123L, LeafAirbyteSchemaType.INTEGER),
encoder.encode(12.34, LeafAirbyteSchemaType.NUMBER),
encoder.encode("hello", LeafAirbyteSchemaType.STRING),
encoder.encode(LocalDate.parse("2025-06-17"), LeafAirbyteSchemaType.DATE),
encoder.encode(
OffsetTime.parse("23:59:59+02:00"),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
),
encoder.encode(
LocalTime.parse("23:59:59"),
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
),
encoder.encode(
OffsetDateTime.parse("2025-06-17T23:59:59+02:00"),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
),
encoder.encode(
LocalDateTime.parse("2025-06-17T23:59:59"),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
),
encoder.encode("""["a","b"]""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"k":"v"}""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"u":1}""", LeafAirbyteSchemaType.JSONB),
encoder.encode(null, LeafAirbyteSchemaType.STRING),
)
val metaProto =
@@ -207,7 +205,7 @@ class ProtobufRecordMungerTest {
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("dummy")
.setEmittedAtMs(emittedAtMs)
.addAllData(protoValues)
.addAllData(protoValues.map { it.build() })
.setMeta(metaProto)
.build()
@@ -254,6 +252,8 @@ class ProtobufRecordMungerTest {
every { this@mockk.generationId } returns this@ProtobufRecordMungerTest.generationId
every { this@mockk.schema } returns dummyType
every { this@mockk.mappedDescriptor } returns DestinationStream.Descriptor("", "dummy")
every { this@mockk.unmappedDescriptor } returns
DestinationStream.Descriptor("", "dummy")
every { this@mockk.unknownColumnChanges } returns
dummyType.computeUnknownColumnChanges()
}
@@ -417,43 +417,43 @@ class ProtobufRecordMungerTest {
val nullProtoValues =
mutableListOf(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // bool_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // int_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // num_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // string_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // date_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // time_tz_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // time_no_tz_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // ts_tz_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // ts_no_tz_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // array_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // obj_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // union_col
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setIsNull(true)
.setNull(com.google.protobuf.NullValue.NULL_VALUE)
.build(), // unknown_col
)
@@ -484,40 +484,34 @@ class ProtobufRecordMungerTest {
val bigInteger = BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)
val oversizedProtoValues =
mutableListOf(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBoolean(true).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setBigInteger(bigInteger.toString())
.build(), // Oversized int
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setNumber(12.34).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString("hello").build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setDate("2025-06-17")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithTimezone("23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone("23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone("2025-06-17T23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone("2025-06-17T23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""["a","b"]""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"k":"v"}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"u":1}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true).build(),
encoder.encode(true, LeafAirbyteSchemaType.BOOLEAN),
encoder.encode(bigInteger, LeafAirbyteSchemaType.INTEGER), // Oversized int
encoder.encode(12.34, LeafAirbyteSchemaType.NUMBER),
encoder.encode("hello", LeafAirbyteSchemaType.STRING),
encoder.encode(LocalDate.parse("2025-06-17"), LeafAirbyteSchemaType.DATE),
encoder.encode(
OffsetTime.parse("23:59:59+02:00"),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
),
encoder.encode(
LocalTime.parse("23:59:59"),
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
),
encoder.encode(
OffsetDateTime.parse("2025-06-17T23:59:59+02:00"),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
),
encoder.encode(
LocalDateTime.parse("2025-06-17T23:59:59"),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
),
encoder.encode("""["a","b"]""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"k":"v"}""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"u":1}""", LeafAirbyteSchemaType.JSONB),
encoder.encode(null, LeafAirbyteSchemaType.STRING),
)
val oversizedRecord = buildModifiedRecord(oversizedProtoValues)
val oversizedRecord = buildModifiedRecord(oversizedProtoValues.map { it.build() })
every { record.rawData } returns oversizedRecord
val result = munger.transformForDest(record)
@@ -543,40 +537,45 @@ class ProtobufRecordMungerTest {
@Test
fun `handles invalid timestamp with proper error tracking`() {
// Create an invalid timestamp using a manually crafted protobuf value
// that will fail parsing in the decoder
val invalidTimestampValue =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone(
AirbyteRecordMessage.OffsetDateTime.newBuilder()
.setEpochSecond(Long.MAX_VALUE) // Invalid - will cause overflow
.setNano(999999999)
.setOffsetSeconds(7200)
.build()
)
val invalidTimestampProtoValues =
mutableListOf(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBoolean(true).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setInteger(123).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setNumber(12.34).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString("hello").build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setDate("2025-06-17")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithTimezone("23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone("23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone("invalid-timestamp")
.build(), // Invalid timestamp
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone("2025-06-17T23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""["a","b"]""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"k":"v"}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"u":1}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true).build(),
encoder.encode(true, LeafAirbyteSchemaType.BOOLEAN),
encoder.encode(123L, LeafAirbyteSchemaType.INTEGER),
encoder.encode(12.34, LeafAirbyteSchemaType.NUMBER),
encoder.encode("hello", LeafAirbyteSchemaType.STRING),
encoder.encode(LocalDate.parse("2025-06-17"), LeafAirbyteSchemaType.DATE),
encoder.encode(
OffsetTime.parse("23:59:59+02:00"),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
),
encoder.encode(
LocalTime.parse("23:59:59"),
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
),
invalidTimestampValue, // Invalid timestamp
encoder.encode(
LocalDateTime.parse("2025-06-17T23:59:59"),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
),
encoder.encode("""["a","b"]""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"k":"v"}""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"u":1}""", LeafAirbyteSchemaType.JSONB),
encoder.encode(null, LeafAirbyteSchemaType.STRING),
)
val invalidRecord = buildModifiedRecord(invalidTimestampProtoValues)
val invalidRecord = buildModifiedRecord(invalidTimestampProtoValues.map { it.build() })
every { record.rawData } returns invalidRecord
val result = munger.transformForDest(record)
@@ -623,38 +622,35 @@ class ProtobufRecordMungerTest {
fun `handles empty arrays and objects`() {
val emptyComplexTypesProtoValues =
mutableListOf(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBoolean(true).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setInteger(123).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setNumber(12.34).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString("hello").build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setDate("2025-06-17")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithTimezone("23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone("23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone("2025-06-17T23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone("2025-06-17T23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""[]""".toByteArray().toByteString()) // Empty array
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{}""".toByteArray().toByteString()) // Empty object
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"u":1}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true).build(),
encoder.encode(true, LeafAirbyteSchemaType.BOOLEAN),
encoder.encode(123L, LeafAirbyteSchemaType.INTEGER),
encoder.encode(12.34, LeafAirbyteSchemaType.NUMBER),
encoder.encode("hello", LeafAirbyteSchemaType.STRING),
encoder.encode(LocalDate.parse("2025-06-17"), LeafAirbyteSchemaType.DATE),
encoder.encode(
OffsetTime.parse("23:59:59+02:00"),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
),
encoder.encode(
LocalTime.parse("23:59:59"),
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
),
encoder.encode(
OffsetDateTime.parse("2025-06-17T23:59:59+02:00"),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
),
encoder.encode(
LocalDateTime.parse("2025-06-17T23:59:59"),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
),
encoder.encode("""[]""", LeafAirbyteSchemaType.JSONB), // Empty array
encoder.encode("""{}""", LeafAirbyteSchemaType.JSONB), // Empty object
encoder.encode("""{"u":1}""", LeafAirbyteSchemaType.JSONB),
encoder.encode(null, LeafAirbyteSchemaType.STRING),
)
val emptyComplexRecord = buildModifiedRecord(emptyComplexTypesProtoValues)
val emptyComplexRecord =
buildModifiedRecord(emptyComplexTypesProtoValues.map { it.build() })
every { record.rawData } returns emptyComplexRecord
val result = munger.transformForDest(record)
@@ -668,63 +664,54 @@ class ProtobufRecordMungerTest {
}
@Test
fun `handles invalid date format with proper error tracking`() {
fun `throws ProtobufTypeMismatchException when date field uses wrong setter`() {
// Create an invalid date using setString() instead of the proper date setter
val invalidDateValue =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString("invalid-date")
val invalidDateProtoValues =
mutableListOf(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBoolean(true).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setInteger(123).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setNumber(12.34).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString("hello").build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setDate("invalid-date") // Invalid date format
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithTimezone("23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone("23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone("2025-06-17T23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone("2025-06-17T23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""["a","b"]""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"k":"v"}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"u":1}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true).build(),
encoder.encode(true, LeafAirbyteSchemaType.BOOLEAN),
encoder.encode(123L, LeafAirbyteSchemaType.INTEGER),
encoder.encode(12.34, LeafAirbyteSchemaType.NUMBER),
encoder.encode("hello", LeafAirbyteSchemaType.STRING),
invalidDateValue, // Wrong setter used - should use date setter, not setString()
encoder.encode(
OffsetTime.parse("23:59:59+02:00"),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
),
encoder.encode(
LocalTime.parse("23:59:59"),
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
),
encoder.encode(
OffsetDateTime.parse("2025-06-17T23:59:59+02:00"),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
),
encoder.encode(
LocalDateTime.parse("2025-06-17T23:59:59"),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
),
encoder.encode("""["a","b"]""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"k":"v"}""", LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"u":1}""", LeafAirbyteSchemaType.JSONB),
encoder.encode(null, LeafAirbyteSchemaType.STRING),
)
val invalidDateRecord = buildModifiedRecord(invalidDateProtoValues)
val invalidDateRecord = buildModifiedRecord(invalidDateProtoValues.map { it.build() })
every { record.rawData } returns invalidDateRecord
val result = munger.transformForDest(record)
// Assert that ProtobufTypeMismatchException is thrown
val exception =
assertThrows(ProtobufTypeMismatchException::class.java) {
munger.transformForDest(record)
}
// Date field should be excluded due to parsing error
assertTrue(result.containsKey("mapped_date_col"))
assertTrue(result.get("mapped_date_col") is NullValue)
// Check that error was tracked in meta object
val metaValue = result[Meta.COLUMN_NAME_AB_META] as ObjectValue
val changesArray = metaValue.values["changes"] as ArrayValue
assertTrue(changesArray.values.isNotEmpty())
// Verify that parsing failure is present in the changes
val changes = changesArray.values.filterIsInstance<ObjectValue>()
val dateError = changes.find { (it.values["field"] as StringValue).value == "date_col" }
assertNotNull(dateError)
assertEquals("NULLED", (dateError!!.values["change"] as StringValue).value)
assertEquals(
"DESTINATION_SERIALIZATION_ERROR",
(dateError.values["reason"] as StringValue).value
)
// Verify the error message contains expected information
assertTrue(exception.message!!.contains("stream 'dummy'"))
assertTrue(exception.message!!.contains("column 'date_col'"))
assertTrue(exception.message!!.contains("Expected AirbyteType: DateType"))
assertTrue(exception.message!!.contains("Actual protobuf ValueCase: STRING"))
}
private fun buildModifiedRecord(

View File

@@ -4,7 +4,7 @@
package io.airbyte.cdk.load.dataflow.transform.medium
import com.google.protobuf.ByteString
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.data.*
import io.airbyte.cdk.load.data.AirbyteValueProxy.FieldAccessor
@@ -13,6 +13,7 @@ import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
import io.airbyte.cdk.load.message.DestinationRecordRaw
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.protocol.AirbyteValueProtobufEncoder
import io.airbyte.protocol.models.Jsons
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import io.airbyte.protocol.protobuf.AirbyteMessage.AirbyteMessageProtobuf
@@ -24,12 +25,20 @@ import io.mockk.mockk
import io.mockk.verify
import java.math.BigDecimal
import java.math.BigInteger
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import java.util.UUID
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.Test
class ProtobufConverterTest {
private val encoder = AirbyteValueProtobufEncoder()
private fun createMockCoercerPassThrough(): ValueCoercer =
mockk<ValueCoercer> {
every { representAs(any()) } returns null
@@ -48,48 +57,37 @@ class ProtobufConverterTest {
every { this@mockk.index } returns idx
}
private fun vBoolean(b: Boolean) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBoolean(b).build()
private fun vBoolean(b: Boolean) = encoder.encode(b, LeafAirbyteSchemaType.BOOLEAN)
private fun vInteger(i: Long) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setInteger(i).build()
private fun vInteger(i: Long) = encoder.encode(i, LeafAirbyteSchemaType.INTEGER)
private fun vBigInteger(str: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBigInteger(str).build()
encoder.encode(BigInteger(str), LeafAirbyteSchemaType.INTEGER)
private fun vNumber(d: Double) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setNumber(d).build()
private fun vNumber(d: Double) = encoder.encode(d, LeafAirbyteSchemaType.NUMBER)
private fun vBigDecimal(str: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBigDecimal(str).build()
encoder.encode(BigDecimal(str), LeafAirbyteSchemaType.NUMBER)
private fun vString(s: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString(s).build()
private fun vString(s: String) = encoder.encode(s, LeafAirbyteSchemaType.STRING)
private fun vDate(iso: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setDate(iso).build()
private fun vDate(date: LocalDate) = encoder.encode(date, LeafAirbyteSchemaType.DATE)
private fun vTimeTz(s: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setTimeWithTimezone(s).build()
private fun vTimeTz(time: OffsetTime) =
encoder.encode(time, LeafAirbyteSchemaType.TIME_WITH_TIMEZONE)
private fun vTimeNoTz(s: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setTimeWithoutTimezone(s).build()
private fun vTimeNoTz(time: LocalTime) =
encoder.encode(time, LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE)
private fun vTsTz(s: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setTimestampWithTimezone(s).build()
private fun vTsTz(ts: OffsetDateTime) =
encoder.encode(ts, LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE)
private fun vTsNoTz(s: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone(s)
.build()
private fun vTsNoTz(ts: LocalDateTime) =
encoder.encode(ts, LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE)
private fun vJson(json: String) =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson(ByteString.copyFrom(json.toByteArray()))
.build()
private fun vJson(json: String) = encoder.encode(json, LeafAirbyteSchemaType.JSONB)
private fun vNull() =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true).build()
private fun vNull() = encoder.encode(null, LeafAirbyteSchemaType.STRING)
/** Build a real DestinationRecordProtobufSource without mocking the value class. */
private fun buildProtoSource(
@@ -141,6 +139,8 @@ class ProtobufConverterTest {
every { this@mockk.syncId } returns syncId
every { unknownColumnChanges } returns unknownChanges
every { mappedDescriptor } returns DestinationStream.Descriptor("namespace", "name")
every { unmappedDescriptor } returns
DestinationStream.Descriptor("namespace", "name")
}
return mockk<DestinationRecordRaw> {
every { stream } returns destinationStream
@@ -183,11 +183,16 @@ class ProtobufConverterTest {
vInteger(123),
vNumber(45.67),
vBigDecimal("999.12345"),
vDate("2025-06-17"),
vTimeTz("23:59:59+02:00"),
vTimeNoTz("23:59:59"),
vTsTz("2025-06-17T23:59:59+02:00"),
vTsNoTz("2025-06-17T23:59:59"),
vDate(LocalDate.parse("2025-06-17")),
vTimeTz(OffsetTime.of(LocalTime.parse("23:59:59"), ZoneOffset.ofHours(2))),
vTimeNoTz(LocalTime.parse("23:59:59")),
vTsTz(
OffsetDateTime.of(
LocalDateTime.parse("2025-06-17T23:59:59"),
ZoneOffset.ofHours(2)
)
),
vTsNoTz(LocalDateTime.parse("2025-06-17T23:59:59")),
vJson("""["a","b"]"""),
vJson("""{"k":"v"}"""),
vJson("""{"u":1}"""),
@@ -195,7 +200,7 @@ class ProtobufConverterTest {
)
val msg = mockMsgWithStream(accessors)
val source = buildProtoSource(protoValues)
val source = buildProtoSource(protoValues.map { it.build() })
val result = converter.convert(msg, source)
@@ -279,7 +284,7 @@ class ProtobufConverterTest {
)
val msg = mockMsgWithStream(accessors)
val source = buildProtoSource(protoValues)
val source = buildProtoSource(protoValues.map { it.build() })
val result = converter.convert(msg, source)
@@ -310,7 +315,7 @@ class ProtobufConverterTest {
val protoValues = listOf(vNull())
val msg = mockMsgWithStream(accessors)
val source = buildProtoSource(protoValues)
val source = buildProtoSource(protoValues.map { it.build() })
val result = converter.convert(msg, source)
assertTrue(result.containsKey("null_field"))
@@ -332,10 +337,10 @@ class ProtobufConverterTest {
val converter = ProtobufConverter(columnNameMapper, valueCoercer)
val accessors = arrayOf(fa("time_field", TimeTypeWithoutTimezone, 0))
val protoValues = listOf(vTimeNoTz("12:34:56"))
val protoValues = listOf(vTimeNoTz(LocalTime.parse("12:34:56")))
val msg = mockMsgWithStream(accessors)
val source = buildProtoSource(protoValues)
val source = buildProtoSource(protoValues.map { it.build() })
val result = converter.convert(msg, source)
@@ -385,7 +390,7 @@ class ProtobufConverterTest {
val protoValues = listOf(vString("hello"), vString("this_is_too_long"))
val msg = mockMsgWithStream(accessors)
val source = buildProtoSource(protoValues)
val source = buildProtoSource(protoValues.map { it.build() })
val result = converter.convert(msg, source)
@@ -412,7 +417,7 @@ class ProtobufConverterTest {
val protoValues = listOf(vString("test"))
val msg = mockMsgWithStream(accessors)
val source = buildProtoSource(protoValues)
val source = buildProtoSource(protoValues.map { it.build() })
val result = converter.convert(msg, source)
@@ -428,7 +433,10 @@ class ProtobufConverterTest {
val converter = ProtobufConverter(columnNameMapper, valueCoercer)
val accessors = arrayOf(fa("invalid_int", IntegerType, 0))
val protoValues = listOf(vBigInteger("not-a-number"))
val invalidBigInteger =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBigInteger("boom!").build()
val protoValues = listOf(invalidBigInteger)
val msg = mockMsgWithStream(accessors)
val source = buildProtoSource(protoValues)
@@ -467,7 +475,10 @@ class ProtobufConverterTest {
)
)
val protoValues = listOf(vString("hello"), vBigInteger("boom!"))
val invalidBigInteger =
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBigInteger("boom!")
val protoValues = listOf(vString("hello"), invalidBigInteger)
val unknownColumnChanges =
listOf(
@@ -479,7 +490,8 @@ class ProtobufConverterTest {
)
val msg = mockMsgWithStream(accessors, unknownChanges = unknownColumnChanges)
val source = buildProtoSource(protoValues, metaChanges = sourceSideChanges)
val source =
buildProtoSource(protoValues.map { it.build() }, metaChanges = sourceSideChanges)
val result = converter.convert(msg, source)

View File

@@ -588,7 +588,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CHECKPOINT_ID_NAME to "partition_1",
CheckpointMessage.COMMITTED_BYTES_COUNT to 57,
CheckpointMessage.COMMITTED_BYTES_COUNT to 56,
CHECKPOINT_INDEX_NAME to 1,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 1,
),
@@ -624,7 +624,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CHECKPOINT_ID_NAME to "partition_2",
CheckpointMessage.COMMITTED_BYTES_COUNT to 171,
CheckpointMessage.COMMITTED_BYTES_COUNT to 168,
CHECKPOINT_INDEX_NAME to 2,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 3,
),
@@ -645,7 +645,7 @@ abstract class BasicFunctionalityIntegrationTest(
}
@Test
fun testCDCStateTypes() {
open fun testCDCStateTypes() {
if (
dataChannelMedium != DataChannelMedium.SOCKET ||
dataChannelFormat != DataChannelFormat.PROTOBUF
@@ -945,7 +945,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 57,
CheckpointMessage.COMMITTED_BYTES_COUNT to 56,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 1,
),
it[firstStream]!!.additionalProperties,
@@ -958,7 +958,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 122,
CheckpointMessage.COMMITTED_BYTES_COUNT to 120,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 2,
),
it[secondStream]!!.additionalProperties,
@@ -971,7 +971,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 63,
CheckpointMessage.COMMITTED_BYTES_COUNT to 62,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 1,
),
it[thirdStream]!!.additionalProperties,
@@ -982,7 +982,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CHECKPOINT_ID_NAME to "outer_partition",
CheckpointMessage.COMMITTED_BYTES_COUNT to 242,
CheckpointMessage.COMMITTED_BYTES_COUNT to 238,
CHECKPOINT_INDEX_NAME to 1,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 4,
),
@@ -1021,7 +1021,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 114,
CheckpointMessage.COMMITTED_BYTES_COUNT to 112,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 2,
),
it[firstStream]!!.additionalProperties,
@@ -1034,7 +1034,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 181,
CheckpointMessage.COMMITTED_BYTES_COUNT to 178,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 3,
),
it[secondStream]!!.additionalProperties,
@@ -1047,7 +1047,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 122,
CheckpointMessage.COMMITTED_BYTES_COUNT to 120,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 2,
),
it[thirdStream]!!.additionalProperties,
@@ -1058,7 +1058,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CHECKPOINT_ID_NAME to "outer_partition_2",
CheckpointMessage.COMMITTED_BYTES_COUNT to 417,
CheckpointMessage.COMMITTED_BYTES_COUNT to 410,
CHECKPOINT_INDEX_NAME to 2,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 7,
),
@@ -1097,7 +1097,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 177,
CheckpointMessage.COMMITTED_BYTES_COUNT to 174,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 3,
),
it[firstStream]!!.additionalProperties,
@@ -1110,7 +1110,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 246,
CheckpointMessage.COMMITTED_BYTES_COUNT to 242,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 4,
),
it[secondStream]!!.additionalProperties,
@@ -1123,7 +1123,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CheckpointMessage.COMMITTED_BYTES_COUNT to 188,
CheckpointMessage.COMMITTED_BYTES_COUNT to 184,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 3,
),
it[thirdStream]!!.additionalProperties,
@@ -1134,7 +1134,7 @@ abstract class BasicFunctionalityIntegrationTest(
assertEquals(
mapOf(
CHECKPOINT_ID_NAME to "outer_partition_3",
CheckpointMessage.COMMITTED_BYTES_COUNT to 611,
CheckpointMessage.COMMITTED_BYTES_COUNT to 600,
CHECKPOINT_INDEX_NAME to 3,
CheckpointMessage.COMMITTED_RECORDS_COUNT to 10,
),
@@ -1202,7 +1202,7 @@ abstract class BasicFunctionalityIntegrationTest(
// Only used for speed mode (unnecessary to test if dest does not support speed)
val expectedBytes =
if (testSpeedModeStatsEmission) expectedBytesForMediumAndFormat(214L, 234L, 59L)
if (testSpeedModeStatsEmission) expectedBytesForMediumAndFormat(214L, 234L, 56L)
else null
assertAll(
@@ -3559,6 +3559,25 @@ abstract class BasicFunctionalityIntegrationTest(
.filter {
it != "number" || dataChannelFormat != DataChannelFormat.PROTOBUF
}
// With protobuf, temporal types are encoded as proper types (not strings),
// so it's impossible to send invalid values like "foo"
.filter { it != "date" || dataChannelFormat != DataChannelFormat.PROTOBUF }
.filter {
it != "time_with_timezone" ||
dataChannelFormat != DataChannelFormat.PROTOBUF
}
.filter {
it != "time_without_timezone" ||
dataChannelFormat != DataChannelFormat.PROTOBUF
}
.filter {
it != "timestamp_with_timezone" ||
dataChannelFormat != DataChannelFormat.PROTOBUF
}
.filter {
it != "timestamp_without_timezone" ||
dataChannelFormat != DataChannelFormat.PROTOBUF
}
.map { key ->
val change =
Change(
@@ -3592,14 +3611,25 @@ abstract class BasicFunctionalityIntegrationTest(
bigNumberChanges = emptyList()
badValuesData =
// note that the values have different types than what's declared in the schema
mapOf(
"id" to 5,
"timestamp_with_timezone" to "foo",
"timestamp_without_timezone" to "foo",
"time_with_timezone" to "foo",
"time_without_timezone" to "foo",
"date" to "foo",
) +
// With protobuf, temporal types can't be sent as strings, so exclude them
(mapOf("id" to 5) +
if (dataChannelFormat != DataChannelFormat.PROTOBUF) {
mapOf(
"timestamp_with_timezone" to "foo",
"timestamp_without_timezone" to "foo",
"time_with_timezone" to "foo",
"time_without_timezone" to "foo",
"date" to "foo",
)
} else {
mapOf(
"timestamp_with_timezone" to null,
"timestamp_without_timezone" to null,
"time_with_timezone" to null,
"time_without_timezone" to null,
"date" to null,
)
}) +
if (mismatchedTypesUnrepresentable) emptyMap()
else
mapOf(

View File

@@ -4,7 +4,7 @@
package io.airbyte.cdk.load.file.object_storage
import com.google.protobuf.kotlin.toByteString
import io.airbyte.cdk.data.LeafAirbyteSchemaType
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.computeUnknownColumnChanges
import io.airbyte.cdk.load.data.AirbyteType
@@ -25,6 +25,7 @@ import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
import io.airbyte.cdk.load.message.DestinationRecordRaw
import io.airbyte.cdk.protocol.AirbyteValueProtobufEncoder
import io.airbyte.protocol.models.Jsons
import io.airbyte.protocol.protobuf.AirbyteMessage
import io.airbyte.protocol.protobuf.AirbyteRecordMessage
@@ -32,6 +33,11 @@ import io.airbyte.protocol.protobuf.AirbyteRecordMessageMetaOuterClass
import io.mockk.every
import io.mockk.mockk
import io.mockk.unmockkAll
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.util.UUID
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.BeforeEach
@@ -87,41 +93,38 @@ abstract class ProtoFixtures(private val addUnknownTypeToSchema: Boolean) {
fieldAccessors = fields.toTypedArray()
val encoder = AirbyteValueProtobufEncoder()
val protoValues =
mutableListOf(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setBoolean(true).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setInteger(123).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setNumber(12.34).build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setString("hello").build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setDate("2025-06-17")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithTimezone("23:59:59+02:00")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimeWithoutTimezone("23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithTimezone("2025-06-17T23:59:59+02")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setTimestampWithoutTimezone("2025-06-17T23:59:59")
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""["a","b"]""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"k":"v"}""".toByteArray().toByteString())
.build(),
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder()
.setJson("""{"u":1}""".toByteArray().toByteString())
.build(),
encoder.encode(true, LeafAirbyteSchemaType.BOOLEAN),
encoder.encode(123L, LeafAirbyteSchemaType.INTEGER),
encoder.encode(12.34, LeafAirbyteSchemaType.NUMBER),
encoder.encode("hello", LeafAirbyteSchemaType.STRING),
encoder.encode(LocalDate.parse("2025-06-17"), LeafAirbyteSchemaType.DATE),
encoder.encode(
OffsetTime.parse("23:59:59+02:00"),
LeafAirbyteSchemaType.TIME_WITH_TIMEZONE
),
encoder.encode(
LocalTime.parse("23:59:59"),
LeafAirbyteSchemaType.TIME_WITHOUT_TIMEZONE
),
encoder.encode(
OffsetDateTime.parse("2025-06-17T23:59:59+02:00"),
LeafAirbyteSchemaType.TIMESTAMP_WITH_TIMEZONE
),
encoder.encode(
LocalDateTime.parse("2025-06-17T23:59:59"),
LeafAirbyteSchemaType.TIMESTAMP_WITHOUT_TIMEZONE
),
encoder.encode("""["a","b"]""".toByteArray(), LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"k":"v"}""".toByteArray(), LeafAirbyteSchemaType.JSONB),
encoder.encode("""{"u":1}""".toByteArray(), LeafAirbyteSchemaType.JSONB),
)
if (addUnknownTypeToSchema) {
protoValues.add(
AirbyteRecordMessage.AirbyteValueProtobuf.newBuilder().setIsNull(true).build(),
encoder.encode(null, LeafAirbyteSchemaType.STRING),
)
}
@@ -166,7 +169,7 @@ abstract class ProtoFixtures(private val addUnknownTypeToSchema: Boolean) {
AirbyteRecordMessage.AirbyteRecordMessageProtobuf.newBuilder()
.setStreamName("dummy")
.setEmittedAtMs(emittedAtMs)
.addAllData(protoValues)
.addAllData(protoValues.map { it.build() })
.setMeta(metaProto)
.build()

View File

@@ -75,7 +75,7 @@ class ProtoToCsvFormattingTest : ProtoFixtures(true) {
"2025-06-17",
"23:59:59+02:00",
"23:59:59",
"2025-06-17T23:59:59+02",
"2025-06-17T23:59:59+02:00",
"2025-06-17T23:59:59",
"[""a"",""b""]",
"{""k"":""v""}",
@@ -131,7 +131,7 @@ class ProtoToCsvFormattingTest : ProtoFixtures(true) {
1724438400000,
"{""sync_id"":42,""changes"":[{""field"":""x"",""change"":""NULLED"",""reason"":""DESTINATION_SERIALIZATION_ERROR""},{""field"":""y"",""change"":""NULLED"",""reason"":""SOURCE_SERIALIZATION_ERROR""},{""field"":""z"",""change"":""TRUNCATED"",""reason"":""SOURCE_RECORD_SIZE_LIMITATION""},{""field"":""unknown_col"",""change"":""NULLED"",""reason"":""DESTINATION_SERIALIZATION_ERROR""}]}",
314,
"{""bool_col"":true,""int_col"":123,""num_col"":12.34,""string_col"":""hello"",""date_col"":""2025-06-17"",""time_tz_col"":""23:59:59+02:00"",""time_no_tz_col"":""23:59:59"",""ts_tz_col"":""2025-06-17T23:59:59+02"",""ts_no_tz_col"":""2025-06-17T23:59:59"",""array_col"":[""a"",""b""],""obj_col"":{""k"":""v""},""union_col"":{""u"":1},""unknown_col"":null}"
"{""bool_col"":true,""int_col"":123,""num_col"":12.34,""string_col"":""hello"",""date_col"":""2025-06-17"",""time_tz_col"":""23:59:59+02:00"",""time_no_tz_col"":""23:59:59"",""ts_tz_col"":""2025-06-17T23:59:59+02:00"",""ts_no_tz_col"":""2025-06-17T23:59:59"",""array_col"":[""a"",""b""],""obj_col"":{""k"":""v""},""union_col"":{""u"":1},""unknown_col"":null}"
"""
.trimIndent()
.replace("\n", "")

View File

@@ -31,7 +31,7 @@ class ProtoToJsonFormattingTest : ProtoFixtures(true) {
val expectedJson =
"""
{"_airbyte_raw_id":"11111111-1111-1111-1111-111111111111","_airbyte_extracted_at":1724438400000,"_airbyte_meta":{"sync_id":42,"changes":[{"field":"x","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"},{"field":"y","change":"NULLED","reason":"SOURCE_SERIALIZATION_ERROR"},{"field":"z","change":"TRUNCATED","reason":"SOURCE_RECORD_SIZE_LIMITATION"},{"field":"unknown_col","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"}]},"_airbyte_generation_id":314,"bool_col":true,"int_col":123,"num_col":12.34,"string_col":"hello","date_col":"2025-06-17","time_tz_col":"23:59:59+02:00","time_no_tz_col":"23:59:59","ts_tz_col":"2025-06-17T23:59:59+02","ts_no_tz_col":"2025-06-17T23:59:59","array_col":["a","b"],"obj_col":{"k":"v"},"union_col":{"u":1},"unknown_col":null}
{"_airbyte_raw_id":"11111111-1111-1111-1111-111111111111","_airbyte_extracted_at":1724438400000,"_airbyte_meta":{"sync_id":42,"changes":[{"field":"x","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"},{"field":"y","change":"NULLED","reason":"SOURCE_SERIALIZATION_ERROR"},{"field":"z","change":"TRUNCATED","reason":"SOURCE_RECORD_SIZE_LIMITATION"},{"field":"unknown_col","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"}]},"_airbyte_generation_id":314,"bool_col":true,"int_col":123,"num_col":12.34,"string_col":"hello","date_col":"2025-06-17","time_tz_col":"23:59:59+02:00","time_no_tz_col":"23:59:59","ts_tz_col":"2025-06-17T23:59:59+02:00","ts_no_tz_col":"2025-06-17T23:59:59","array_col":["a","b"],"obj_col":{"k":"v"},"union_col":{"u":1},"unknown_col":null}
""".trimIndent()
assertEquals(
@@ -54,7 +54,7 @@ class ProtoToJsonFormattingTest : ProtoFixtures(true) {
val expectedJson =
"""
{"_airbyte_raw_id":"11111111-1111-1111-1111-111111111111","_airbyte_extracted_at":1724438400000,"_airbyte_meta":{"sync_id":42,"changes":[{"field":"x","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"},{"field":"y","change":"NULLED","reason":"SOURCE_SERIALIZATION_ERROR"},{"field":"z","change":"TRUNCATED","reason":"SOURCE_RECORD_SIZE_LIMITATION"},{"field":"unknown_col","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"}]},"_airbyte_generation_id":314,"_airbyte_data":{"bool_col":true,"int_col":123,"num_col":12.34,"string_col":"hello","date_col":"2025-06-17","time_tz_col":"23:59:59+02:00","time_no_tz_col":"23:59:59","ts_tz_col":"2025-06-17T23:59:59+02","ts_no_tz_col":"2025-06-17T23:59:59","array_col":["a","b"],"obj_col":{"k":"v"},"union_col":{"u":1},"unknown_col":null}}
{"_airbyte_raw_id":"11111111-1111-1111-1111-111111111111","_airbyte_extracted_at":1724438400000,"_airbyte_meta":{"sync_id":42,"changes":[{"field":"x","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"},{"field":"y","change":"NULLED","reason":"SOURCE_SERIALIZATION_ERROR"},{"field":"z","change":"TRUNCATED","reason":"SOURCE_RECORD_SIZE_LIMITATION"},{"field":"unknown_col","change":"NULLED","reason":"DESTINATION_SERIALIZATION_ERROR"}]},"_airbyte_generation_id":314,"_airbyte_data":{"bool_col":true,"int_col":123,"num_col":12.34,"string_col":"hello","date_col":"2025-06-17","time_tz_col":"23:59:59+02:00","time_no_tz_col":"23:59:59","ts_tz_col":"2025-06-17T23:59:59+02:00","ts_no_tz_col":"2025-06-17T23:59:59","array_col":["a","b"],"obj_col":{"k":"v"},"union_col":{"u":1},"unknown_col":null}}
""".trimIndent()
assertEquals(

View File

@@ -1 +1 @@
version=0.1.53
version=0.1.54