1
0
mirror of synced 2025-12-19 18:14:56 -05:00

Remove destination-bigquery-denormalized (#30369)

Co-authored-by: evantahler <evantahler@users.noreply.github.com>
This commit is contained in:
Evan Tahler
2023-09-25 10:53:38 -07:00
committed by GitHub
parent 7b8ce96752
commit de188beff6
72 changed files with 4 additions and 4643 deletions

View File

@@ -1,5 +0,0 @@
# BigQuery Denormalized Destination Connector Bootstrap
Instead of splitting the final data into multiple tables, this destination leverages BigQuery capabilities with [Structured and Repeated fields](https://cloud.google.com/bigquery/docs/nested-repeated) to produce a single "big" table per stream. This does not write the `_airbyte_raw_*` tables in the destination and normalization from this connector is not supported at this time.
See [this](https://docs.airbyte.io/integrations/destinations/databricks) link for the nuances about the connector.

View File

@@ -1,28 +0,0 @@
### WARNING ###
# The Java connector Dockerfiles will soon be deprecated.
# This Dockerfile is not used to build the connector image we publish to DockerHub.
# The new logic to build the connector image is declared with Dagger here:
# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649
# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder.
# Please reach out to the Connectors Operations team if you have any question.
FROM airbyte/integration-base-java:dev AS build
WORKDIR /airbyte
ENV APPLICATION destination-bigquery-denormalized
COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar
RUN tar xf ${APPLICATION}.tar --strip-components=1 && rm -rf ${APPLICATION}.tar
FROM airbyte/integration-base-java:dev
WORKDIR /airbyte
ENV APPLICATION destination-bigquery-denormalized
COPY --from=build /airbyte /airbyte
LABEL io.airbyte.version=2.0.0
LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized

View File

@@ -1,25 +0,0 @@
# BigQuery vs BigQuery Denormalized
This is the same destination as BigQuery but tables are created de-normalized (one big table) without the use of `_airbyte_raw_*` tables containing the JSON blob data.
# BigQuery Test Configuration
In order to test the BigQuery destination, you need a service account key file.
## Community Contributor
As a community contributor, you will need access to a GCP project and BigQuery to run tests.
1. Go to the `Service Accounts` page on the GCP console
1. Click on `+ Create Service Account" button
1. Fill out a descriptive name/id/description
1. Click the edit icon next to the service account you created on the `IAM` page
1. Add the `BigQuery Data Editor` and `BigQuery User` role
1. Go back to the `Service Accounts` page and use the actions modal to `Create Key`
1. Download this key as a JSON file
1. Move and rename this file to `secrets/credentials.json`
## Airbyte Employee
1. Access the `BigQuery Integration Test User` secret on Rippling under the `Engineering` folder
1. Create a file with the contents at `secrets/credentials.json`

View File

@@ -1,41 +1,3 @@
plugins {
id 'application'
id 'airbyte-docker'
id 'airbyte-integration-test-java'
}
application {
mainClass = 'io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedDestination'
applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0']
}
dependencies {
implementation 'com.google.cloud:google-cloud-bigquery:1.122.2'
implementation 'org.apache.commons:commons-lang3:3.11'
implementation project(':airbyte-config-oss:config-models-oss')
implementation project(':airbyte-integrations:bases:base-java')
implementation project(':airbyte-integrations:connectors:destination-bigquery')
implementation libs.airbyte.protocol
implementation project(':airbyte-integrations:bases:base-java-s3')
implementation project(':airbyte-integrations:connectors:destination-gcs')
implementation group: 'org.apache.parquet', name: 'parquet-avro', version: '1.12.0'
testImplementation 'org.hamcrest:hamcrest-all:1.3'
testImplementation 'org.mockito:mockito-inline:4.7.0'
integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test')
integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-bigquery-denormalized')
integrationTestJavaImplementation project(':airbyte-db:db-lib')
}
configurations.all {
resolutionStrategy {
// at time of writing: deps.toml declares google-cloud-storage 2.17.2
// which pulls in google-api-client:2.2.0
// which conflicts with google-cloud-bigquery, which requires google-api-client:1.x
// google-cloud-storage is OK with downgrading to anything >=1.31.1.
force 'com.google.api-client:google-api-client:1.31.5'
}
}

View File

@@ -10,9 +10,9 @@ data:
name: BigQuery (denormalized typed struct)
registries:
cloud:
enabled: true
enabled: false
oss:
enabled: true
enabled: false
releases:
breakingChanges:
2.0.0:

View File

@@ -1,172 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static com.google.cloud.bigquery.Field.Mode.REPEATED;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Table;
import io.airbyte.integrations.base.Destination;
import io.airbyte.integrations.base.IntegrationRunner;
import io.airbyte.integrations.destination.bigquery.formatter.BigQueryRecordFormatter;
import io.airbyte.integrations.destination.bigquery.formatter.DefaultBigQueryDenormalizedRecordFormatter;
import io.airbyte.integrations.destination.bigquery.formatter.GcsBigQueryDenormalizedRecordFormatter;
import io.airbyte.integrations.destination.bigquery.formatter.arrayformater.LegacyArrayFormatter;
import io.airbyte.integrations.destination.bigquery.uploader.AbstractBigQueryUploader;
import io.airbyte.integrations.destination.bigquery.uploader.BigQueryUploaderFactory;
import io.airbyte.integrations.destination.bigquery.uploader.UploaderType;
import io.airbyte.integrations.destination.bigquery.uploader.config.UploaderConfig;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
import java.io.IOException;
import java.util.Map;
import javax.annotation.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BigQueryDenormalizedDestination extends BigQueryDestination {
private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDenormalizedDestination.class);
@Override
protected Map<UploaderType, BigQueryRecordFormatter> getFormatterMap(final JsonNode jsonSchema) {
return Map.of(UploaderType.STANDARD, new DefaultBigQueryDenormalizedRecordFormatter(jsonSchema, namingResolver),
UploaderType.AVRO, new GcsBigQueryDenormalizedRecordFormatter(jsonSchema, namingResolver));
}
/**
* BigQuery might have different structure of the Temporary table. If this method returns TRUE,
* temporary table will have only three common Airbyte attributes. In case of FALSE, temporary table
* structure will be in line with Airbyte message JsonSchema.
*
* @return use default AirbyteSchema or build using JsonSchema
*/
@Override
protected boolean isDefaultAirbyteTmpTableSchema() {
// Build temporary table structure based on incoming JsonSchema
return false;
}
@Override
protected void putStreamIntoUploaderMap(final AirbyteStream stream,
final UploaderConfig uploaderConfig,
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap)
throws IOException {
final String datasetId = BigQueryUtils.sanitizeDatasetId(uploaderConfig.getConfigStream().getStream().getNamespace());
final Table existingTable = uploaderConfig.getBigQuery().getTable(datasetId, uploaderConfig.getTargetTableName());
final BigQueryRecordFormatter formatter = uploaderConfig.getFormatter();
if (existingTable != null) {
LOGGER.info("Target table already exists. Checking could we use the default destination processing.");
if (!compareSchemas((formatter.getBigQuerySchema()), existingTable.getDefinition().getSchema())) {
((DefaultBigQueryDenormalizedRecordFormatter) formatter).setArrayFormatter(new LegacyArrayFormatter());
LOGGER.warn("Existing target table has different structure with the new destination processing. Trying legacy implementation.");
} else {
LOGGER.info("Existing target table {} has equal structure with the destination schema. Using the default array processing.",
stream.getName());
}
} else {
LOGGER.info("Target table is not created yet. The default destination processing will be used.");
}
final AbstractBigQueryUploader<?> uploader = BigQueryUploaderFactory.getUploader(uploaderConfig);
uploaderMap.put(
AirbyteStreamNameNamespacePair.fromAirbyteStream(stream),
uploader);
}
/**
* Compare calculated bigquery schema and existing schema of the table. Note! We compare only fields
* from the calculated schema to avoid manually created fields in the table.
*
* @param expectedSchema BigQuery schema of the table which we calculated using the stream schema
* config
* @param existingSchema BigQuery schema of the existing table (created by previous run)
* @return Are calculated fields same as we have in the existing table
*/
private boolean compareSchemas(final com.google.cloud.bigquery.Schema expectedSchema,
@Nullable final com.google.cloud.bigquery.Schema existingSchema) {
if (expectedSchema != null && existingSchema == null) {
LOGGER.warn("Existing schema is null when we expect {}", expectedSchema);
return false;
} else if (expectedSchema == null && existingSchema == null) {
LOGGER.info("Existing and expected schemas are null.");
return true;
} else if (expectedSchema == null) {
LOGGER.warn("Expected schema is null when we have existing schema {}", existingSchema);
return false;
}
final var expectedFields = expectedSchema.getFields();
final var existingFields = existingSchema.getFields();
for (final Field expectedField : expectedFields) {
final var existingField = existingFields.get(expectedField.getName());
if (isDifferenceBetweenFields(expectedField, existingField)) {
LOGGER.warn("Expected field {} is different from existing field {}", expectedField, existingField);
return false;
}
}
LOGGER.info("Existing and expected schemas are equal.");
return true;
}
private boolean isDifferenceBetweenFields(final Field expectedField, final Field existingField) {
if (existingField == null) {
return true;
} else {
return !expectedField.getType().equals(existingField.getType())
|| !compareRepeatedMode(expectedField, existingField)
|| !compareSubFields(expectedField, existingField);
}
}
/**
* Compare field modes. Field can have on of four modes: NULLABLE, REQUIRED, REPEATED, null. Only
* the REPEATED mode difference is critical. The method fails only if at least one is REPEATED and
* the second one is not.
*
* @param expectedField expected field structure
* @param existingField existing field structure
* @return is critical difference in the field modes
*/
private boolean compareRepeatedMode(final Field expectedField, final Field existingField) {
final var expectedMode = expectedField.getMode();
final var existingMode = existingField.getMode();
if (expectedMode != null && expectedMode.equals(REPEATED) || existingMode != null && existingMode.equals(REPEATED)) {
return expectedMode != null && expectedMode.equals(existingMode);
} else {
return true;
}
}
private boolean compareSubFields(final Field expectedField, final Field existingField) {
final var expectedSubFields = expectedField.getSubFields();
final var existingSubFields = existingField.getSubFields();
if (expectedSubFields == null || expectedSubFields.isEmpty()) {
return true;
} else if (existingSubFields == null || existingSubFields.isEmpty()) {
return false;
} else {
for (final Field expectedSubField : expectedSubFields) {
final var existingSubField = existingSubFields.get(expectedSubField.getName());
if (isDifferenceBetweenFields(expectedSubField, existingSubField)) {
return false;
}
}
return true;
}
}
public static void main(final String[] args) throws Exception {
final Destination destination = new BigQueryDenormalizedDestination();
new IntegrationRunner(destination).run(args);
}
}

View File

@@ -1,71 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import com.google.cloud.bigquery.StandardSQLTypeName;
import java.util.Arrays;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Mapping of JsonSchema formats to BigQuery Standard SQL types.
*/
public enum JsonSchemaFormat {
DATE("date", null, StandardSQLTypeName.DATE),
DATETIME("date-time", null, StandardSQLTypeName.DATETIME),
DATETIME_WITH_TZ("date-time", "timestamp_with_timezone", StandardSQLTypeName.TIMESTAMP),
TIME("time", null, StandardSQLTypeName.TIME),
TIMESTAMP("timestamp-micros", null, StandardSQLTypeName.TIMESTAMP);
private static final Logger LOGGER = LoggerFactory.getLogger(JsonSchemaFormat.class);
private final String jsonSchemaFormat;
private final String jsonSchemaAirbyteType;
private final StandardSQLTypeName bigQueryType;
JsonSchemaFormat(final String jsonSchemaFormat, final String jsonSchemaAirbyteType, final StandardSQLTypeName bigQueryType) {
this.jsonSchemaAirbyteType = jsonSchemaAirbyteType;
this.jsonSchemaFormat = jsonSchemaFormat;
this.bigQueryType = bigQueryType;
}
public static JsonSchemaFormat fromJsonSchemaFormat(final @Nonnull String jsonSchemaFormat, final @Nullable String jsonSchemaAirbyteType) {
List<JsonSchemaFormat> matchFormats = null;
// Match by Format + Type
if (jsonSchemaAirbyteType != null) {
matchFormats = Arrays.stream(values())
.filter(format -> jsonSchemaFormat.equals(format.jsonSchemaFormat) && jsonSchemaAirbyteType.equals(format.jsonSchemaAirbyteType)).toList();
}
// Match by Format are no results already
if (matchFormats == null || matchFormats.isEmpty()) {
matchFormats =
Arrays.stream(values()).filter(format -> jsonSchemaFormat.equals(format.jsonSchemaFormat) && format.jsonSchemaAirbyteType == null).toList();
}
if (matchFormats.isEmpty()) {
return null;
} else if (matchFormats.size() > 1) {
throw new RuntimeException(
"Match with more than one json format! Matched formats : " + matchFormats + ", Inputs jsonSchemaFormat : " + jsonSchemaFormat
+ ", jsonSchemaAirbyteType : " + jsonSchemaAirbyteType);
} else {
return matchFormats.get(0);
}
}
public StandardSQLTypeName getBigQueryType() {
return bigQueryType;
}
@Override
public String toString() {
return jsonSchemaFormat;
}
}

View File

@@ -1,62 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import com.google.cloud.bigquery.StandardSQLTypeName;
/**
* Mapping of JsonSchema types to BigQuery Standard SQL types.
*
* The order field of the enum provides us the ability to sort union types (array of JsonSchemaType
* from narrow to wider scopes of types. For example, STRING takes precedence over NUMBER if both
* are included in the same type array.
*/
public enum JsonSchemaType {
STRING(0, "string", StandardSQLTypeName.STRING),
NUMBER(1, "number", StandardSQLTypeName.FLOAT64),
INTEGER(2, "integer", StandardSQLTypeName.INT64),
BOOLEAN(3, "boolean", StandardSQLTypeName.BOOL),
OBJECT(4, "object", StandardSQLTypeName.STRUCT),
ARRAY(5, "array", StandardSQLTypeName.ARRAY),
NULL(6, "null", null);
private final int order;
private final String jsonSchemaType;
private final StandardSQLTypeName bigQueryType;
JsonSchemaType(final int order, final String jsonSchemaType, final StandardSQLTypeName bigQueryType) {
this.order = order;
this.jsonSchemaType = jsonSchemaType;
this.bigQueryType = bigQueryType;
}
public static JsonSchemaType fromJsonSchemaType(final String value) {
for (final JsonSchemaType type : values()) {
if (value.equals(type.jsonSchemaType)) {
return type;
}
}
throw new IllegalArgumentException("Unexpected json schema type: " + value);
}
public int getOrder() {
return order;
}
public String getJsonSchemaType() {
return jsonSchemaType;
}
public StandardSQLTypeName getBigQueryType() {
return bigQueryType;
}
@Override
public String toString() {
return jsonSchemaType;
}
}

View File

@@ -1,376 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter;
import static io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil.TYPE_FIELD;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Field.Builder;
import com.google.cloud.bigquery.Field.Mode;
import com.google.cloud.bigquery.FieldList;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.QueryParameterValue;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.util.MoreIterators;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.StandardNameTransformer;
import io.airbyte.integrations.destination.bigquery.BigQueryUtils;
import io.airbyte.integrations.destination.bigquery.JsonSchemaFormat;
import io.airbyte.integrations.destination.bigquery.JsonSchemaType;
import io.airbyte.integrations.destination.bigquery.formatter.arrayformater.ArrayFormatter;
import io.airbyte.integrations.destination.bigquery.formatter.arrayformater.DefaultArrayFormatter;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DefaultBigQueryDenormalizedRecordFormatter extends DefaultBigQueryRecordFormatter {
private static final Logger LOGGER = LoggerFactory.getLogger(DefaultBigQueryDenormalizedRecordFormatter.class);
public static final String PROPERTIES_FIELD = "properties";
private static final String ALL_OF_FIELD = "allOf";
private static final String ANY_OF_FIELD = "anyOf";
private static final String FORMAT_FIELD = "format";
private static final String AIRBYTE_TYPE = "airbyte_type";
private static final String REF_DEFINITION_KEY = "$ref";
private static final ObjectMapper mapper = new ObjectMapper();
protected ArrayFormatter arrayFormatter;
public DefaultBigQueryDenormalizedRecordFormatter(final JsonNode jsonSchema, final StandardNameTransformer namingResolver) {
super(jsonSchema, namingResolver);
}
private ArrayFormatter getArrayFormatter() {
if (arrayFormatter == null) {
arrayFormatter = new DefaultArrayFormatter();
}
return arrayFormatter;
}
public void setArrayFormatter(final ArrayFormatter arrayFormatter) {
this.arrayFormatter = arrayFormatter;
this.jsonSchema = formatJsonSchema(this.originalJsonSchema.deepCopy());
this.bigQuerySchema = getBigQuerySchema(jsonSchema);
}
@Override
protected JsonNode formatJsonSchema(final JsonNode jsonSchema) {
final var modifiedJsonSchema = jsonSchema.deepCopy(); // Issue #5912 is reopened (PR #11166) formatAllOfAndAnyOfFields(namingResolver,
// jsonSchema);
getArrayFormatter().populateEmptyArrays(modifiedJsonSchema);
getArrayFormatter().surroundArraysByObjects(modifiedJsonSchema);
return modifiedJsonSchema;
}
@Override
public JsonNode formatRecord(final AirbyteRecordMessage recordMessage) {
// Bigquery represents TIMESTAMP to the microsecond precision, so we convert to microseconds then
// use BQ helpers to string-format correctly.
Preconditions.checkArgument(recordMessage.getData().isObject());
final ObjectNode data = (ObjectNode) formatData(getBigQuerySchema().getFields(), recordMessage.getData());
// replace ObjectNode with TextNode for fields with $ref definition key
// Do not need to iterate through all JSON Object nodes, only first nesting object.
if (!fieldsContainRefDefinitionValue.isEmpty()) {
fieldsContainRefDefinitionValue.forEach(key -> {
if (data.get(key) != null && !data.get(key).isNull()) {
data.put(key, data.get(key).toString());
}
});
}
addAirbyteColumns(data, recordMessage);
return data;
}
protected void addAirbyteColumns(final ObjectNode data, final AirbyteRecordMessage recordMessage) {
// currently emittedAt time is in millis format from airbyte message
final long emittedAtMicroseconds = TimeUnit.MICROSECONDS.convert(
recordMessage.getEmittedAt(), TimeUnit.MILLISECONDS);
final String formattedEmittedAt = QueryParameterValue.timestamp(emittedAtMicroseconds).getValue();
data.put(JavaBaseConstants.COLUMN_NAME_AB_ID, UUID.randomUUID().toString());
data.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, formattedEmittedAt);
}
private JsonNode formatData(final FieldList fields, final JsonNode root) {
// handles empty objects and arrays
if (fields == null) {
return root;
}
final JsonNode formattedData;
if (root.isObject()) {
formattedData = getObjectNode(fields, root);
} else if (root.isArray()) {
formattedData = getArrayNode(fields, root);
} else {
formattedData = root;
}
formatDateTimeFields(fields, formattedData);
return formattedData;
}
protected void formatDateTimeFields(final FieldList fields, final JsonNode root) {
final List<String> dateTimeFields = BigQueryUtils.getDateTimeFieldsFromSchema(fields);
if (!dateTimeFields.isEmpty() && !root.isNull()) {
if (root.isArray()) {
root.forEach(jsonNode -> BigQueryUtils.transformJsonDateTimeToBigDataFormat(dateTimeFields, jsonNode));
} else {
BigQueryUtils.transformJsonDateTimeToBigDataFormat(dateTimeFields, root);
}
}
}
private JsonNode getArrayNode(final FieldList fields, final JsonNode root) {
// Arrays can have only one field
final Field arrayField = fields.get(0);
// If an array of records, we should use subfields
final FieldList subFields;
if (arrayField.getSubFields() == null || arrayField.getSubFields().isEmpty()) {
subFields = fields;
} else {
subFields = arrayField.getSubFields();
}
final List<JsonNode> arrayItems = MoreIterators.toList(root.elements()).stream()
.map(p -> formatData(subFields, p))
.toList();
return getArrayFormatter().formatArrayItems(arrayItems);
}
private JsonNode getObjectNode(final FieldList fields, final JsonNode root) {
final List<String> fieldNames = fields.stream().map(Field::getName).collect(Collectors.toList());
fields.stream()
.filter(f -> f.getType().equals(LegacySQLTypeName.STRING))
.filter(field -> root.get(field.getName()) != null)
.filter(f -> root.get(f.getName()).isObject())
.forEach(f -> {
final String value = root.get(f.getName()).toString();
((ObjectNode) root).remove(f.getName());
((ObjectNode) root).put(f.getName(), new TextNode(value));
});
return Jsons.jsonNode(Jsons.keys(root).stream()
.filter(key -> {
final boolean validKey = fieldNames.contains(namingResolver.getIdentifier(key));
if (!validKey && !invalidKeys.contains(key)) {
logFieldFail("Ignoring field as it is not defined in catalog", key);
invalidKeys.add(key);
}
return validKey;
})
.collect(Collectors.toMap(namingResolver::getIdentifier,
key -> formatData(fields.get(namingResolver.getIdentifier(key)).getSubFields(), root.get(key)))));
}
@Override
public Schema getBigQuerySchema(final JsonNode jsonSchema) {
final List<Field> fieldList = getSchemaFields(namingResolver, jsonSchema);
if (fieldList.stream().noneMatch(f -> f.getName().equals(JavaBaseConstants.COLUMN_NAME_AB_ID))) {
fieldList.add(Field.of(JavaBaseConstants.COLUMN_NAME_AB_ID, StandardSQLTypeName.STRING));
}
if (fieldList.stream().noneMatch(f -> f.getName().equals(JavaBaseConstants.COLUMN_NAME_EMITTED_AT))) {
fieldList.add(Field.of(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, StandardSQLTypeName.TIMESTAMP));
}
LOGGER.info("Airbyte Schema is transformed from {} to {}.", jsonSchema, fieldList);
return Schema.of(fieldList);
}
private List<Field> getSchemaFields(final StandardNameTransformer namingResolver, final JsonNode jsonSchema) {
LOGGER.info("getSchemaFields : " + jsonSchema + " namingResolver " + namingResolver);
Preconditions.checkArgument(jsonSchema.isObject() && jsonSchema.has(PROPERTIES_FIELD));
final ObjectNode properties = (ObjectNode) jsonSchema.get(PROPERTIES_FIELD);
final List<Field> tmpFields = Jsons.keys(properties).stream()
.peek(addToRefList(properties))
.map(key -> getField(namingResolver, key, properties.get(key))
.build())
.collect(Collectors.toList());
if (!fieldsContainRefDefinitionValue.isEmpty()) {
LOGGER.warn("Next fields contain \"$ref\" as Definition: {}. They are going to be saved as String Type column",
fieldsContainRefDefinitionValue);
}
return tmpFields;
}
/**
* @param properties - JSON schema with properties
* <p>
* The method is responsible for population of fieldsContainRefDefinitionValue set with keys
* contain $ref definition
* <p>
* Currently, AirByte doesn't support parsing value by $ref key definition. The issue to
* track this <a href="https://github.com/airbytehq/airbyte/issues/7725">7725</a>
*/
private Consumer<String> addToRefList(final ObjectNode properties) {
return key -> {
if (properties.get(key).has(REF_DEFINITION_KEY)) {
fieldsContainRefDefinitionValue.add(key);
}
};
}
private static JsonNode getFileDefinition(final JsonNode fieldDefinition) {
if (fieldDefinition.has(TYPE_FIELD)) {
return fieldDefinition;
} else {
if (fieldDefinition.has(ANY_OF_FIELD) && fieldDefinition.get(ANY_OF_FIELD).isArray()) {
return allOfAndAnyOfFieldProcessing(ANY_OF_FIELD, fieldDefinition);
}
if (fieldDefinition.has(ALL_OF_FIELD) && fieldDefinition.get(ALL_OF_FIELD).isArray()) {
return allOfAndAnyOfFieldProcessing(ALL_OF_FIELD, fieldDefinition);
}
}
return fieldDefinition;
}
private static JsonNode allOfAndAnyOfFieldProcessing(final String fieldName, final JsonNode fieldDefinition) {
final ObjectReader reader = mapper.readerFor(new TypeReference<List<JsonNode>>() {});
final List<JsonNode> list;
try {
list = reader.readValue(fieldDefinition.get(fieldName));
} catch (final IOException e) {
throw new IllegalStateException(
String.format("Failed to read and process the following field - %s", fieldDefinition));
}
final ObjectNode objectNode = mapper.createObjectNode();
list.forEach(field -> {
objectNode.set("big_query_" + field.get("type").asText(), field);
});
return Jsons.jsonNode(ImmutableMap.builder()
.put("type", "object")
.put(PROPERTIES_FIELD, objectNode)
.put("additionalProperties", false)
.build());
}
private static Builder getField(final StandardNameTransformer namingResolver, final String key, final JsonNode fieldDefinition) {
final String fieldName = namingResolver.getIdentifier(key);
final Builder builder = Field.newBuilder(fieldName, StandardSQLTypeName.STRING);
final JsonNode updatedFileDefinition = getFileDefinition(fieldDefinition);
final JsonNode type = updatedFileDefinition.get(TYPE_FIELD);
final JsonNode airbyteType = updatedFileDefinition.get(AIRBYTE_TYPE);
final List<JsonSchemaType> fieldTypes = getTypes(fieldName, type);
for (int i = 0; i < fieldTypes.size(); i++) {
final JsonSchemaType fieldType = fieldTypes.get(i);
if (fieldType == JsonSchemaType.NULL) {
builder.setMode(Mode.NULLABLE);
}
if (i == 0) {
// Treat the first type in the list with the widest scope as the primary type
final JsonSchemaType primaryType = fieldTypes.get(i);
switch (primaryType) {
case NULL -> {
builder.setType(StandardSQLTypeName.STRING);
}
case STRING, INTEGER, BOOLEAN -> {
builder.setType(primaryType.getBigQueryType());
}
case NUMBER -> {
if (airbyteType != null
&& StringUtils.equalsAnyIgnoreCase(airbyteType.asText(),
"big_integer", "integer")) {
builder.setType(StandardSQLTypeName.INT64);
} else {
builder.setType(primaryType.getBigQueryType());
}
}
case ARRAY -> {
final JsonNode items;
if (updatedFileDefinition.has("items")) {
items = updatedFileDefinition.get("items");
} else {
LOGGER.warn("Source connector provided schema for ARRAY with missed \"items\", will assume that it's a String type");
// this is handler for case when we get "array" without "items"
// (https://github.com/airbytehq/airbyte/issues/5486)
items = getTypeStringSchema();
}
return getField(namingResolver, fieldName, items).setMode(Mode.REPEATED);
}
case OBJECT -> {
final JsonNode properties;
if (updatedFileDefinition.has(PROPERTIES_FIELD)) {
properties = updatedFileDefinition.get(PROPERTIES_FIELD);
} else {
properties = updatedFileDefinition;
}
final FieldList fieldList = FieldList.of(Jsons.keys(properties)
.stream()
.map(f -> getField(namingResolver, f, properties.get(f)).build())
.collect(Collectors.toList()));
if (!fieldList.isEmpty()) {
builder.setType(StandardSQLTypeName.STRUCT, fieldList);
} else {
builder.setType(StandardSQLTypeName.STRING);
}
}
default -> {
throw new IllegalStateException(
String.format("Unexpected type for field %s: %s", fieldName, primaryType));
}
}
}
}
// If a specific format is defined, use their specific type instead of the JSON's one
final JsonNode fieldFormat = updatedFileDefinition.get(FORMAT_FIELD);
if (fieldFormat != null) {
final JsonSchemaFormat schemaFormat = JsonSchemaFormat.fromJsonSchemaFormat(fieldFormat.asText(),
(airbyteType != null ? airbyteType.asText() : null));
if (schemaFormat != null) {
builder.setType(schemaFormat.getBigQueryType());
}
}
return builder;
}
private static JsonNode getTypeStringSchema() {
return Jsons.deserialize("{\n"
+ " \"type\": [\n"
+ " \"string\"\n"
+ " ]\n"
+ " }");
}
private static List<JsonSchemaType> getTypes(final String fieldName, final JsonNode type) {
if (type == null) {
LOGGER.warn("Field {} has no type defined, defaulting to STRING", fieldName);
return List.of(JsonSchemaType.STRING);
} else if (type.isArray()) {
return MoreIterators.toList(type.elements()).stream()
.map(s -> JsonSchemaType.fromJsonSchemaType(s.asText()))
// re-order depending to make sure wider scope types are first
.sorted(Comparator.comparingInt(JsonSchemaType::getOrder))
.collect(Collectors.toList());
} else if (type.isTextual()) {
return Collections.singletonList(JsonSchemaType.fromJsonSchemaType(type.asText()));
} else {
throw new IllegalStateException("Unexpected type: " + type);
}
}
}

View File

@@ -1,51 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.cloud.bigquery.Schema;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.StandardNameTransformer;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
public class GcsBigQueryDenormalizedRecordFormatter extends DefaultBigQueryDenormalizedRecordFormatter {
public GcsBigQueryDenormalizedRecordFormatter(
final JsonNode jsonSchema,
final StandardNameTransformer namingResolver) {
super(jsonSchema, namingResolver);
}
@Override
protected JsonNode formatJsonSchema(final JsonNode jsonSchema) {
var textJson = Jsons.serialize(jsonSchema);
textJson = textJson.replace("{\"$ref\":\"", "{\"type\":[\"string\"], \"$ref\":\"");
return super.formatJsonSchema(Jsons.deserialize(textJson));
}
@Override
public Schema getBigQuerySchema(final JsonNode jsonSchema) {
final String schemaString = Jsons.serialize(jsonSchema)
// BigQuery avro file loader doesn't support date-time
// https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#logical_types
// So we use timestamp for date-time
.replace("\"format\":\"date-time\"", "\"format\":\"timestamp-micros\"");
final JsonNode bigQuerySchema = Jsons.deserialize(schemaString);
return super.getBigQuerySchema(bigQuerySchema);
}
@Override
protected void addAirbyteColumns(final ObjectNode data, final AirbyteRecordMessage recordMessage) {
final long emittedAtMicroseconds = TimeUnit.MILLISECONDS.convert(recordMessage.getEmittedAt(), TimeUnit.MILLISECONDS);
data.put(JavaBaseConstants.COLUMN_NAME_AB_ID, UUID.randomUUID().toString());
data.put(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, emittedAtMicroseconds);
}
}

View File

@@ -1,18 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter.arrayformater;
import com.fasterxml.jackson.databind.JsonNode;
import java.util.List;
public interface ArrayFormatter {
void populateEmptyArrays(final JsonNode node);
void surroundArraysByObjects(final JsonNode node);
JsonNode formatArrayItems(final List<JsonNode> arrayItems);
}

View File

@@ -1,67 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter.arrayformater;
import static io.airbyte.integrations.destination.bigquery.formatter.DefaultBigQueryDenormalizedRecordFormatter.PROPERTIES_FIELD;
import static io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil.ARRAY_ITEMS_FIELD;
import static io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil.NESTED_ARRAY_FIELD;
import static io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil.TYPE_FIELD;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableMap;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
public class DefaultArrayFormatter implements ArrayFormatter {
@Override
public void populateEmptyArrays(final JsonNode node) {
findArrays(node).forEach(jsonNode -> {
if (!jsonNode.has(ARRAY_ITEMS_FIELD)) {
final ObjectNode nodeToChange = (ObjectNode) jsonNode;
nodeToChange.putObject(ARRAY_ITEMS_FIELD).putArray(TYPE_FIELD).add("string");
}
});
}
@Override
public void surroundArraysByObjects(final JsonNode node) {
findArrays(node).forEach(
jsonNode -> {
if (FormatterUtil.isAirbyteArray(jsonNode.get(ARRAY_ITEMS_FIELD))) {
final ObjectNode arrayNode = jsonNode.get(ARRAY_ITEMS_FIELD).deepCopy();
final ObjectNode originalNode = (ObjectNode) jsonNode;
originalNode.remove(ARRAY_ITEMS_FIELD);
final ObjectNode itemsNode = originalNode.putObject(ARRAY_ITEMS_FIELD);
itemsNode.putArray(TYPE_FIELD).add("object");
itemsNode.putObject(PROPERTIES_FIELD).putObject(NESTED_ARRAY_FIELD).setAll(arrayNode);
surroundArraysByObjects(originalNode.get(ARRAY_ITEMS_FIELD));
}
});
}
@Override
public JsonNode formatArrayItems(List<JsonNode> arrayItems) {
return Jsons
.jsonNode(arrayItems.stream().map(node -> (node.isArray() ? Jsons.jsonNode(ImmutableMap.of(NESTED_ARRAY_FIELD, node)) : node)).toList());
}
protected List<JsonNode> findArrays(final JsonNode node) {
if (node != null) {
return node.findParents(TYPE_FIELD).stream()
.filter(FormatterUtil::isAirbyteArray)
.collect(Collectors.toList());
} else {
return Collections.emptyList();
}
}
}

View File

@@ -1,54 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter.arrayformater;
import static io.airbyte.integrations.destination.bigquery.formatter.DefaultBigQueryDenormalizedRecordFormatter.PROPERTIES_FIELD;
import static io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil.ARRAY_ITEMS_FIELD;
import static io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil.NESTED_ARRAY_FIELD;
import static io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil.TYPE_FIELD;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableMap;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.destination.bigquery.formatter.util.FormatterUtil;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
public class LegacyArrayFormatter extends DefaultArrayFormatter {
@Override
public void surroundArraysByObjects(final JsonNode node) {
findArrays(node).forEach(
jsonNode -> {
final JsonNode arrayNode = jsonNode.deepCopy();
final ObjectNode newNode = (ObjectNode) jsonNode;
newNode.removeAll();
newNode.putArray(TYPE_FIELD).add("object");
newNode.putObject(PROPERTIES_FIELD).set(NESTED_ARRAY_FIELD, arrayNode);
surroundArraysByObjects(arrayNode.get(ARRAY_ITEMS_FIELD));
});
}
@Override
protected List<JsonNode> findArrays(final JsonNode node) {
if (node != null) {
return node.findParents(TYPE_FIELD).stream()
.filter(FormatterUtil::isAirbyteArray)
.collect(Collectors.toList());
} else {
return Collections.emptyList();
}
}
@Override
public JsonNode formatArrayItems(List<JsonNode> arrayItems) {
return Jsons.jsonNode(ImmutableMap.of(NESTED_ARRAY_FIELD, arrayItems));
}
}

View File

@@ -1,34 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter.util;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
public class FormatterUtil {
public static final String NESTED_ARRAY_FIELD = "big_query_array";
public static final String ARRAY_ITEMS_FIELD = "items";
public static final String TYPE_FIELD = "type";
public static boolean isAirbyteArray(final JsonNode jsonSchemaNode) {
if (jsonSchemaNode == null || jsonSchemaNode.get("type") == null) {
return false;
}
final JsonNode type = jsonSchemaNode.get("type");
if (type.isArray()) {
final ArrayNode typeNode = (ArrayNode) type;
for (final JsonNode arrayTypeNode : typeNode) {
if (arrayTypeNode.isTextual() && arrayTypeNode.textValue().equals("array")) {
return true;
}
}
} else if (type.isTextual()) {
return jsonSchemaNode.asText().equals("array");
}
return false;
}
}

View File

@@ -1,207 +0,0 @@
{
"documentationUrl": "https://docs.airbyte.com/integrations/destinations/bigquery",
"supportsIncremental": true,
"supportsNormalization": false,
"supportsDBT": false,
"supported_destination_sync_modes": ["overwrite", "append"],
"connectionSpecification": {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "BigQuery Denormalized Typed Struct Destination Spec",
"type": "object",
"required": ["project_id", "dataset_id"],
"additionalProperties": true,
"properties": {
"project_id": {
"type": "string",
"description": "The GCP project ID for the project containing the target BigQuery dataset. Read more <a href=\"https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects\">here</a>.",
"title": "Project ID",
"order": 0
},
"dataset_id": {
"type": "string",
"description": "The default BigQuery Dataset ID that tables are replicated to if the source does not specify a namespace. Read more <a href=\"https://cloud.google.com/bigquery/docs/datasets#create-dataset\">here</a>.",
"title": "Default Dataset ID",
"order": 1
},
"loading_method": {
"type": "object",
"title": "Loading Method",
"description": "Loading method used to send select the way data will be uploaded to BigQuery. <br/><b>Standard Inserts</b> - Direct uploading using SQL INSERT statements. This method is extremely inefficient and provided only for quick testing. In almost all cases, you should use staging. <br/><b>GCS Staging</b> - Writes large batches of records to a file, uploads the file to GCS, then uses <b>COPY INTO table</b> to upload the file. Recommended for most workloads for better speed and scalability. Read more about GCS Staging <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#gcs-staging\">here</a>.",
"order": 2,
"oneOf": [
{
"title": "Standard Inserts",
"required": ["method"],
"properties": {
"method": {
"type": "string",
"const": "Standard"
}
}
},
{
"title": "GCS Staging",
"type": "object",
"required": [
"method",
"gcs_bucket_name",
"gcs_bucket_path",
"credential"
],
"properties": {
"method": {
"type": "string",
"const": "GCS Staging",
"order": 0
},
"credential": {
"title": "Credential",
"description": "An HMAC key is a type of credential and can be associated with a service account or a user account in Cloud Storage. Read more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys\">here</a>.",
"type": "object",
"order": 1,
"oneOf": [
{
"title": "HMAC key",
"order": 0,
"required": [
"credential_type",
"hmac_key_access_id",
"hmac_key_secret"
],
"properties": {
"credential_type": {
"type": "string",
"const": "HMAC_KEY",
"order": 0
},
"hmac_key_access_id": {
"type": "string",
"description": "HMAC key access ID. When linked to a service account, this ID is 61 characters long; when linked to a user account, it is 24 characters long.",
"title": "HMAC Key Access ID",
"airbyte_secret": true,
"examples": ["1234567890abcdefghij1234"],
"order": 1
},
"hmac_key_secret": {
"type": "string",
"description": "The corresponding secret for the access ID. It is a 40-character base-64 encoded string.",
"title": "HMAC Key Secret",
"airbyte_secret": true,
"examples": [
"1234567890abcdefghij1234567890ABCDEFGHIJ"
],
"order": 2
}
}
}
]
},
"gcs_bucket_name": {
"title": "GCS Bucket Name",
"type": "string",
"description": "The name of the GCS bucket. Read more <a href=\"https://cloud.google.com/storage/docs/naming-buckets\">here</a>.",
"examples": ["airbyte_sync"],
"order": 2
},
"gcs_bucket_path": {
"title": "GCS Bucket Path",
"description": "Directory under the GCS bucket where data will be written. Read more <a href=\"https://cloud.google.com/storage/docs/locations\">here</a>.",
"type": "string",
"examples": ["data_sync/test"],
"order": 3
},
"keep_files_in_gcs-bucket": {
"type": "string",
"description": "This upload method is supposed to temporary store records in GCS bucket. By this select you can chose if these records should be removed from GCS when migration has finished. The default \"Delete all tmp files from GCS\" value is used if not set explicitly.",
"title": "GCS Tmp Files Afterward Processing",
"default": "Delete all tmp files from GCS",
"enum": [
"Delete all tmp files from GCS",
"Keep all tmp files in GCS"
],
"order": 4
},
"file_buffer_count": {
"title": "File Buffer Count",
"type": "integer",
"minimum": 10,
"maximum": 50,
"default": 10,
"description": "Number of file buffers allocated for writing data. Increasing this number is beneficial for connections using Change Data Capture (CDC) and up to the number of streams within a connection. Increasing the number of file buffers past the maximum number of streams has deteriorating effects",
"examples": ["10"],
"order": 5
}
}
}
]
},
"credentials_json": {
"type": "string",
"description": "The contents of the JSON service account key. Check out the <a href=\"https://docs.airbyte.com/integrations/destinations/bigquery#service-account-key\">docs</a> if you need help generating this key. Default credentials will be used if this field is left empty.",
"title": "Service Account Key JSON (Required for cloud, optional for open-source)",
"airbyte_secret": true,
"order": 3,
"always_show": true
},
"dataset_location": {
"type": "string",
"description": "The location of the dataset. Warning: Changes made after creation will not be applied. The default \"US\" value is used if not set explicitly. Read more <a href=\"https://cloud.google.com/bigquery/docs/locations\">here</a>.",
"title": "Dataset Location",
"default": "US",
"order": 4,
"enum": [
"US",
"EU",
"asia-east1",
"asia-east2",
"asia-northeast1",
"asia-northeast2",
"asia-northeast3",
"asia-south1",
"asia-south2",
"asia-southeast1",
"asia-southeast2",
"australia-southeast1",
"australia-southeast2",
"europe-central1",
"europe-central2",
"europe-north1",
"europe-southwest1",
"europe-west1",
"europe-west2",
"europe-west3",
"europe-west4",
"europe-west6",
"europe-west7",
"europe-west8",
"europe-west9",
"me-west1",
"northamerica-northeast1",
"northamerica-northeast2",
"southamerica-east1",
"southamerica-west1",
"us-central1",
"us-east1",
"us-east2",
"us-east3",
"us-east4",
"us-east5",
"us-west1",
"us-west2",
"us-west3",
"us-west4"
]
},
"big_query_client_buffer_size_mb": {
"title": "Google BigQuery Client Chunk Size",
"description": "Google BigQuery client's chunk (buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MB value is used if not set explicitly. Read more <a href=\"https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html\">here</a>.",
"type": "integer",
"minimum": 1,
"maximum": 15,
"default": 15,
"examples": ["15"],
"order": 5
}
}
}
}

View File

@@ -1,271 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.AIRBYTE_COLUMNS;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.CONFIG_PROJECT_ID;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.NAME_TRANSFORMER;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.configureBigQuery;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.createCommonConfig;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getBigQueryDataSet;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.tearDownBigQuery;
import static org.junit.jupiter.api.Assertions.assertEquals;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.ConnectionProperty;
import com.google.cloud.bigquery.Dataset;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.FieldList;
import com.google.cloud.bigquery.FieldValue;
import com.google.cloud.bigquery.FieldValueList;
import com.google.cloud.bigquery.Job;
import com.google.cloud.bigquery.JobId;
import com.google.cloud.bigquery.JobInfo;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.TableResult;
import com.google.common.collect.Streams;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.resources.MoreResources;
import io.airbyte.db.bigquery.BigQueryResultSet;
import io.airbyte.db.bigquery.BigQuerySourceOperations;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.NamingConventionTransformer;
import io.airbyte.integrations.destination.StandardNameTransformer;
import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest;
import io.airbyte.integrations.standardtest.destination.argproviders.DataArgumentsProvider;
import io.airbyte.integrations.standardtest.destination.comparator.TestDataComparator;
import io.airbyte.protocol.models.v0.AirbyteCatalog;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import io.airbyte.protocol.models.v0.CatalogHelpers;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.TimeZone;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ArgumentsSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BigQueryDenormalizedDestinationAcceptanceTest extends DestinationAcceptanceTest {
private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDenormalizedDestinationAcceptanceTest.class);
private BigQuery bigquery;
private Dataset dataset;
protected JsonNode config;
private final StandardNameTransformer namingResolver = new StandardNameTransformer();
@Override
protected String getImageName() {
return "airbyte/destination-bigquery-denormalized:dev";
}
@Override
protected JsonNode getConfig() {
return config;
}
@Override
protected JsonNode getFailCheckConfig() {
((ObjectNode) config).put(CONFIG_PROJECT_ID, "fake");
return config;
}
@Override
protected boolean implementsNamespaces() {
return true;
}
@Override
protected boolean supportNamespaceTest() {
return true;
}
@Override
protected Optional<NamingConventionTransformer> getNameTransformer() {
return Optional.of(NAME_TRANSFORMER);
}
@Override
protected TestDataComparator getTestDataComparator() {
return new BigQueryDenormalizedTestDataComparator();
}
@Override
protected boolean supportBasicDataTypeTest() {
return true;
}
// #13154 Normalization issue
@Override
protected boolean supportArrayDataTypeTest() {
return true;
}
@Override
protected boolean supportObjectDataTypeTest() {
return true;
}
@Override
protected void assertNamespaceNormalization(final String testCaseId,
final String expectedNormalizedNamespace,
final String actualNormalizedNamespace) {
final String message = String.format("Test case %s failed; if this is expected, please override assertNamespaceNormalization", testCaseId);
if (testCaseId.equals("S3A-1")) {
// bigquery allows namespace starting with a number, and prepending underscore
// will hide the dataset, so we don't do it as we do for other destinations
final int underscoreIndex = expectedNormalizedNamespace.indexOf("_", 1);
final String randomSuffix = expectedNormalizedNamespace.substring(underscoreIndex);
assertEquals("99namespace" + randomSuffix, actualNormalizedNamespace, message);
} else {
assertEquals(expectedNormalizedNamespace, actualNormalizedNamespace, message);
}
}
@Override
protected String getDefaultSchema(final JsonNode config) {
return BigQueryUtils.getDatasetId(config);
}
@Override
protected List<JsonNode> retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace)
throws Exception {
final String tableName = namingResolver.getIdentifier(streamName);
final String schema = namingResolver.getIdentifier(namespace);
return retrieveRecordsFromTable(tableName, schema);
}
@Override
protected List<JsonNode> retrieveRecords(final TestDestinationEnv env,
final String streamName,
final String namespace,
final JsonNode streamSchema)
throws Exception {
final String tableName = namingResolver.getIdentifier(streamName);
final String schema = namingResolver.getIdentifier(namespace);
return retrieveRecordsFromTable(tableName, schema);
}
private List<JsonNode> retrieveRecordsFromTable(final String tableName, final String schema) throws InterruptedException {
TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
final QueryJobConfiguration queryConfig =
QueryJobConfiguration
.newBuilder(
String.format("SELECT * FROM `%s`.`%s` order by %s asc;", schema, tableName,
JavaBaseConstants.COLUMN_NAME_EMITTED_AT))
// .setUseLegacySql(false)
.setConnectionProperties(Collections.singletonList(ConnectionProperty.of("time_zone", "UTC")))
.build();
final TableResult queryResults = executeQuery(bigquery, queryConfig).getLeft().getQueryResults();
final FieldList fields = queryResults.getSchema().getFields();
final BigQuerySourceOperations sourceOperations = new BigQuerySourceOperations();
return Streams.stream(queryResults.iterateAll())
.map(fieldValues -> sourceOperations.rowToJson(new BigQueryResultSet(fieldValues, fields))).collect(Collectors.toList());
}
private boolean isAirbyteColumn(final String name) {
if (AIRBYTE_COLUMNS.contains(name)) {
return true;
}
return name.startsWith("_airbyte") && name.endsWith("_hashid");
}
private Object getTypedFieldValue(final FieldValueList row, final Field field) {
final FieldValue fieldValue = row.get(field.getName());
if (fieldValue.getValue() != null) {
return switch (field.getType().getStandardType()) {
case FLOAT64, NUMERIC -> fieldValue.getDoubleValue();
case INT64 -> fieldValue.getNumericValue().intValue();
case STRING -> fieldValue.getStringValue();
case BOOL -> fieldValue.getBooleanValue();
case STRUCT -> fieldValue.getRecordValue().toString();
default -> fieldValue.getValue();
};
} else {
return null;
}
}
protected JsonNode createConfig() throws IOException {
return createCommonConfig();
}
@Override
protected void setup(final TestDestinationEnv testEnv, final HashSet<String> TEST_SCHEMAS) throws Exception {
config = createConfig();
bigquery = configureBigQuery(config);
dataset = getBigQueryDataSet(config, bigquery);
}
@Override
protected void tearDown(final TestDestinationEnv testEnv) {
tearDownBigQuery(dataset, bigquery);
}
// todo (cgardens) - figure out how to share these helpers. they are currently copied from
// BigQueryDestination.
private static ImmutablePair<Job, String> executeQuery(final BigQuery bigquery, final QueryJobConfiguration queryConfig) {
final JobId jobId = JobId.of(UUID.randomUUID().toString());
final Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build());
return executeQuery(queryJob);
}
private static ImmutablePair<Job, String> executeQuery(final Job queryJob) {
final Job completedJob = waitForQuery(queryJob);
if (completedJob == null) {
throw new RuntimeException("Job no longer exists");
} else if (completedJob.getStatus().getError() != null) {
// You can also look at queryJob.getStatus().getExecutionErrors() for all
// errors, not just the latest one.
return ImmutablePair.of(null, (completedJob.getStatus().getError().toString()));
}
return ImmutablePair.of(completedJob, null);
}
private static Job waitForQuery(final Job queryJob) {
try {
return queryJob.waitFor();
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
/**
* Verify that the integration successfully writes normalized records successfully (without actually
* running the normalization module) Tests a wide variety of messages an schemas (aspirationally,
* anyway).
*/
@ParameterizedTest
@ArgumentsSource(DataArgumentsProvider.class)
public void testSyncNormalizedWithoutNormalization(final String messagesFilename, final String catalogFilename) throws Exception {
final AirbyteCatalog catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), AirbyteCatalog.class);
final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog);
final List<AirbyteMessage> messages = MoreResources.readResource(messagesFilename).lines()
.map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList());
final JsonNode config = getConfig();
// don't run normalization though
runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false);
final String defaultSchema = getDefaultSchema(config);
final List<AirbyteRecordMessage> actualMessages = retrieveNormalizedRecords(catalog, defaultSchema);
assertSameMessages(messages, actualMessages, true);
}
}

View File

@@ -1,378 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.AIRBYTE_COLUMNS;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.BIGQUERY_DATETIME_FORMAT;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.USERS_STREAM_NAME;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.configureBigQuery;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.createCommonConfig;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getAnyOfFormats;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getAnyOfFormatsWithEmptyList;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getAnyOfFormatsWithNull;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getAnyOfSchema;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getBigQueryDataSet;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getCommonCatalog;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getData;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataArrays;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataMaxNestedDepth;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataTooDeepNestedDepth;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithEmptyObjectAndArray;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithFormats;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithJSONDateTimeFormats;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithJSONWithReference;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithNestedDatetimeInsideNullObject;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getExpectedDataArrays;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchema;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaArrays;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaMaxNestedDepth;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaTooDeepNestedDepth;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithDateTime;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithFormats;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithInvalidArrayType;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithNestedDatetimeInsideNullObject;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithReferenceDefinition;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.runDestinationWrite;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.tearDownBigQuery;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.params.provider.Arguments.arguments;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.Dataset;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import java.io.IOException;
import java.time.Instant;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.assertj.core.util.Sets;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class BigQueryDenormalizedDestinationTest {
private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDenormalizedDestinationTest.class);
protected static final Instant NOW = Instant.now();
protected static final AirbyteMessage MESSAGE_USERS1 = createRecordMessage(USERS_STREAM_NAME, getData());
protected static final AirbyteMessage MESSAGE_USERS2 = createRecordMessage(USERS_STREAM_NAME, getDataWithEmptyObjectAndArray());
protected static final AirbyteMessage MESSAGE_USERS3 = createRecordMessage(USERS_STREAM_NAME, getDataWithFormats());
protected static final AirbyteMessage MESSAGE_USERS4 = createRecordMessage(USERS_STREAM_NAME, getDataWithJSONDateTimeFormats());
protected static final AirbyteMessage MESSAGE_USERS5 = createRecordMessage(USERS_STREAM_NAME, getDataWithJSONWithReference());
protected static final AirbyteMessage MESSAGE_USERS6 = createRecordMessage(USERS_STREAM_NAME, Jsons.deserialize("{\"users\":null}"));
protected static final AirbyteMessage MESSAGE_USERS7 = createRecordMessage(USERS_STREAM_NAME, getDataWithNestedDatetimeInsideNullObject());
protected static final AirbyteMessage MESSAGE_USERS8 = createRecordMessage(USERS_STREAM_NAME, getAnyOfFormats());
protected static final AirbyteMessage MESSAGE_USERS9 = createRecordMessage(USERS_STREAM_NAME, getAnyOfFormatsWithNull());
protected static final AirbyteMessage MESSAGE_USERS10 = createRecordMessage(USERS_STREAM_NAME, getAnyOfFormatsWithEmptyList());
protected static final AirbyteMessage MESSAGE_USERS11 = createRecordMessage(USERS_STREAM_NAME, getDataArrays());
protected static final AirbyteMessage MESSAGE_USERS12 = createRecordMessage(USERS_STREAM_NAME, getDataTooDeepNestedDepth());
protected static final AirbyteMessage MESSAGE_USERS13 = createRecordMessage(USERS_STREAM_NAME, getDataMaxNestedDepth());
protected static final AirbyteMessage EMPTY_MESSAGE = createRecordMessage(USERS_STREAM_NAME, Jsons.deserialize("{}"));
protected JsonNode config;
protected BigQuery bigquery;
protected Dataset dataset;
protected String datasetId;
protected JsonNode createConfig() throws IOException {
return createCommonConfig();
}
@BeforeEach
void setup(final TestInfo info) throws IOException {
if (info.getDisplayName().equals("testSpec()")) {
return;
}
config = createConfig();
bigquery = configureBigQuery(config);
dataset = getBigQueryDataSet(config, bigquery);
datasetId = dataset.getDatasetId().getDataset();
MESSAGE_USERS1.getRecord().setNamespace(datasetId);
MESSAGE_USERS2.getRecord().setNamespace(datasetId);
MESSAGE_USERS3.getRecord().setNamespace(datasetId);
MESSAGE_USERS4.getRecord().setNamespace(datasetId);
MESSAGE_USERS5.getRecord().setNamespace(datasetId);
MESSAGE_USERS6.getRecord().setNamespace(datasetId);
MESSAGE_USERS7.getRecord().setNamespace(datasetId);
MESSAGE_USERS8.getRecord().setNamespace(datasetId);
MESSAGE_USERS9.getRecord().setNamespace(datasetId);
MESSAGE_USERS10.getRecord().setNamespace(datasetId);
MESSAGE_USERS11.getRecord().setNamespace(datasetId);
MESSAGE_USERS12.getRecord().setNamespace(datasetId);
MESSAGE_USERS13.getRecord().setNamespace(datasetId);
EMPTY_MESSAGE.getRecord().setNamespace(datasetId);
}
@AfterEach
void tearDown(final TestInfo info) {
if (info.getDisplayName().equals("testSpec()")) {
return;
}
tearDownBigQuery(dataset, bigquery);
}
@ParameterizedTest
@MethodSource("schemaAndDataProvider")
void testNestedWrite(final JsonNode schema, final AirbyteMessage message) throws Exception {
runDestinationWrite(getCommonCatalog(schema, datasetId), config, message);
final List<JsonNode> usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME);
final JsonNode expectedUsersJson = message.getRecord().getData();
assertEquals(usersActual.size(), 1);
final JsonNode resultJson = usersActual.get(0);
assertEquals(extractJsonValues(resultJson, "name"), extractJsonValues(expectedUsersJson, "name"));
assertEquals(extractJsonValues(resultJson, "grants"), extractJsonValues(expectedUsersJson, "grants"));
assertEquals(extractJsonValues(resultJson, "domain"), extractJsonValues(expectedUsersJson, "domain"));
}
@Test
void testNestedDataTimeInsideNullObject() throws Exception {
runDestinationWrite(getCommonCatalog(getSchemaWithNestedDatetimeInsideNullObject(), datasetId), config, MESSAGE_USERS7);
final List<JsonNode> usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME);
final JsonNode expectedUsersJson = MESSAGE_USERS7.getRecord().getData();
assertEquals(usersActual.size(), 1);
final JsonNode resultJson = usersActual.get(0);
assertEquals(extractJsonValues(resultJson, "name"), extractJsonValues(expectedUsersJson, "name"));
assertEquals(extractJsonValues(resultJson, "appointment"), extractJsonValues(expectedUsersJson, "appointment"));
}
protected Schema getExpectedSchemaForWriteWithFormatTest() {
return Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("date_of_birth", StandardSQLTypeName.DATE),
Field.of("updated_at", StandardSQLTypeName.DATETIME),
Field.of(JavaBaseConstants.COLUMN_NAME_AB_ID, StandardSQLTypeName.STRING),
Field.of(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, StandardSQLTypeName.TIMESTAMP));
}
@Test
void testWriteWithFormat() throws Exception {
runDestinationWrite(getCommonCatalog(getSchemaWithFormats(), datasetId), config, MESSAGE_USERS3);
final List<JsonNode> usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME);
final JsonNode expectedUsersJson = MESSAGE_USERS3.getRecord().getData();
assertEquals(usersActual.size(), 1);
final JsonNode resultJson = usersActual.get(0);
assertEquals(extractJsonValues(resultJson, "name"), extractJsonValues(expectedUsersJson, "name"));
assertEquals(extractJsonValues(resultJson, "date_of_birth"), extractJsonValues(expectedUsersJson, "date_of_birth"));
// Bigquery's datetime type accepts multiple input format but always outputs the same, so we can't
// expect to receive the value we sent.
var expectedValue = LocalDate.parse(extractJsonValues(expectedUsersJson, "updated_at").stream().findFirst().get(),
DateTimeFormatter.ofPattern(BIGQUERY_DATETIME_FORMAT));
var actualValue =
LocalDate.parse(extractJsonValues(resultJson, "updated_at").stream().findFirst().get(),
DateTimeFormatter.ofPattern(BIGQUERY_DATETIME_FORMAT));
assertEquals(expectedValue, actualValue);
assertEquals(BigQueryUtils.getTableDefinition(bigquery, datasetId, USERS_STREAM_NAME).getSchema(), getExpectedSchemaForWriteWithFormatTest());
}
@Test
@Disabled // Issue #5912 is reopened
void testAnyOf() throws Exception {
runDestinationWrite(getCommonCatalog(getAnyOfSchema(), datasetId), config, MESSAGE_USERS8);
final List<JsonNode> usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME);
final JsonNode expectedUsersJson = MESSAGE_USERS8.getRecord().getData();
assertEquals(usersActual.size(), 1);
final JsonNode resultJson = usersActual.get(0);
assertEquals(extractJsonValues(resultJson, "id"), extractJsonValues(expectedUsersJson, "id"));
assertEquals(extractJsonValues(resultJson, "name"), extractJsonValues(expectedUsersJson, "name"));
assertEquals(extractJsonValues(resultJson, "type"), extractJsonValues(expectedUsersJson, "type"));
assertEquals(extractJsonValues(resultJson, "email"), extractJsonValues(expectedUsersJson, "email"));
assertEquals(extractJsonValues(resultJson, "avatar"), extractJsonValues(expectedUsersJson, "avatar"));
assertEquals(extractJsonValues(resultJson, "team_ids"), extractJsonValues(expectedUsersJson, "team_ids"));
assertEquals(extractJsonValues(resultJson, "admin_ids"), extractJsonValues(expectedUsersJson, "admin_ids"));
assertEquals(extractJsonValues(resultJson, "all_of_field"), extractJsonValues(expectedUsersJson, "all_of_field"));
assertEquals(extractJsonValues(resultJson, "job_title"), extractJsonValues(expectedUsersJson, "job_title"));
assertEquals(extractJsonValues(resultJson, "has_inbox_seat"), extractJsonValues(expectedUsersJson, "has_inbox_seat"));
assertEquals(extractJsonValues(resultJson, "away_mode_enabled"), extractJsonValues(expectedUsersJson, "away_mode_enabled"));
assertEquals(extractJsonValues(resultJson, "away_mode_reassign"), extractJsonValues(expectedUsersJson, "away_mode_reassign"));
}
@Test
@Disabled // Issue #5912 is reopened
void testAnyOfWithNull() throws Exception {
runDestinationWrite(getCommonCatalog(getAnyOfSchema(), datasetId), config, MESSAGE_USERS9);
final List<JsonNode> usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME);
final JsonNode expectedUsersJson = MESSAGE_USERS9.getRecord().getData();
assertEquals(usersActual.size(), 1);
final JsonNode resultJson = usersActual.get(0);
assertEquals(extractJsonValues(resultJson, "name"), extractJsonValues(expectedUsersJson, "name"));
assertEquals(extractJsonValues(resultJson, "team_ids"), extractJsonValues(expectedUsersJson, "team_ids"));
assertEquals(extractJsonValues(resultJson, "all_of_field"), extractJsonValues(expectedUsersJson, "all_of_field"));
assertEquals(extractJsonValues(resultJson, "avatar"), extractJsonValues(expectedUsersJson, "avatar"));
}
@Test
@Disabled // Issue #5912 is reopened
void testAnyOfWithEmptyList() throws Exception {
runDestinationWrite(getCommonCatalog(getAnyOfSchema(), datasetId), config, MESSAGE_USERS10);
final List<JsonNode> usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME);
final JsonNode expectedUsersJson = MESSAGE_USERS10.getRecord().getData();
assertEquals(usersActual.size(), 1);
final JsonNode resultJson = usersActual.get(0);
assertEquals(extractJsonValues(resultJson, "name"), extractJsonValues(expectedUsersJson, "name"));
assertEquals(extractJsonValues(resultJson, "team_ids"), extractJsonValues(expectedUsersJson, "team_ids"));
assertEquals(extractJsonValues(resultJson, "all_of_field"), extractJsonValues(expectedUsersJson, "all_of_field"));
}
@Test
void testIfJSONDateTimeWasConvertedToBigQueryFormat() throws Exception {
runDestinationWrite(getCommonCatalog(getSchemaWithDateTime(), datasetId), config, MESSAGE_USERS4);
final List<JsonNode> usersActual = retrieveRecordsAsJson(USERS_STREAM_NAME);
assertEquals(usersActual.size(), 1);
final JsonNode resultJson = usersActual.get(0);
// BigQuery Accepts "YYYY-MM-DD HH:MM:SS[.SSSSSS]" format
Set<String> actualValues = extractJsonValues(resultJson, "updated_at");
assertEquals(Set.of(new DateTime("2021-10-11T06:36:53+00:00").withZone(DateTimeZone.UTC).toString(BIGQUERY_DATETIME_FORMAT)),
actualValues);
// check nested datetime
actualValues = extractJsonValues(resultJson.get("items"), "nested_datetime");
assertEquals(Set.of(new DateTime("2021-11-11T06:36:53+00:00").withZone(DateTimeZone.UTC).toString(BIGQUERY_DATETIME_FORMAT)),
actualValues);
}
@Test
void testJsonReferenceDefinition() throws Exception {
runDestinationWrite(getCommonCatalog(getSchemaWithReferenceDefinition(), datasetId), config, MESSAGE_USERS5, MESSAGE_USERS6, EMPTY_MESSAGE);
final Set<String> actual =
retrieveRecordsAsJson(USERS_STREAM_NAME).stream().flatMap(x -> extractJsonValues(x, "users").stream()).collect(Collectors.toSet());
final Set<String> expected = Sets.set(
"\"{\\\"name\\\":\\\"John\\\",\\\"surname\\\":\\\"Adams\\\"}\"",
null // we expect one record to have not had the users field set
);
assertEquals(2, actual.size());
assertEquals(expected, actual);
}
@Test
void testArrays() throws Exception {
runDestinationWrite(getCommonCatalog(getSchemaArrays(), datasetId), config, MESSAGE_USERS11);
assertEquals(getExpectedDataArrays(), retrieveRecordsAsJson(USERS_STREAM_NAME).get(0));
}
// Issue #14668
@Test
void testTooDeepNestedDepth() {
try {
runDestinationWrite(getCommonCatalog(getSchemaTooDeepNestedDepth(), datasetId), config, MESSAGE_USERS12);
} catch (Exception e) {
assert (e.getCause().getMessage().contains("nested too deeply"));
}
}
// Issue #14668
@Test
void testMaxNestedDepth() throws Exception {
runDestinationWrite(getCommonCatalog(getSchemaMaxNestedDepth(), datasetId), config, MESSAGE_USERS13);
assertEquals(getDataMaxNestedDepth().findValue("str_value").asText(),
retrieveRecordsAsJson(USERS_STREAM_NAME).get(0).findValue("str_value").asText());
}
private Set<String> extractJsonValues(final JsonNode node, final String attributeName) {
final List<JsonNode> valuesNode = node.findValues(attributeName);
final Set<String> resultSet = new HashSet<>();
valuesNode.forEach(jsonNode -> {
if (jsonNode.isArray()) {
jsonNode.forEach(arrayNodeValue -> resultSet.add(arrayNodeValue.textValue()));
} else if (jsonNode.isObject()) {
resultSet.addAll(extractJsonValues(jsonNode, "big_query_array"));
} else {
resultSet.add(jsonNode.textValue());
}
});
return resultSet;
}
private JsonNode removeAirbyteMetadataFields(final JsonNode record) {
for (final String airbyteMetadataField : AIRBYTE_COLUMNS) {
((ObjectNode) record).remove(airbyteMetadataField);
}
return record;
}
private List<JsonNode> retrieveRecordsAsJson(final String tableName) throws Exception {
final QueryJobConfiguration queryConfig =
QueryJobConfiguration
.newBuilder(
String.format("select TO_JSON_STRING(t) as jsonValue from %s.%s t;", datasetId, tableName.toLowerCase()))
.setUseLegacySql(false).build();
BigQueryUtils.executeQuery(bigquery, queryConfig);
var valuesStream = StreamSupport
.stream(BigQueryUtils.executeQuery(bigquery, queryConfig).getLeft().getQueryResults().iterateAll().spliterator(), false)
.map(v -> v.get("jsonValue").getStringValue());
return formatDateValues(valuesStream)
.map(Jsons::deserialize)
.map(this::removeAirbyteMetadataFields)
.collect(Collectors.toList());
}
/**
* BigQuery returns date values in a different format based on the column type. Datetime :
* YYYY-MM-DD'T'HH:MM:SS Timestamp : YYYY-MM-DD'T'HH:MM:SS'Z'
*
* This method formats all values as Airbite format to simplify test result validation.
*/
private Stream<String> formatDateValues(Stream<String> values) {
return values.map(s -> s.replaceAll("(\"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2})(Z)(\")", "$1$3"));
}
private static Stream<Arguments> schemaAndDataProvider() {
return Stream.of(
arguments(getSchema(), MESSAGE_USERS1),
arguments(getSchemaWithInvalidArrayType(), MESSAGE_USERS1),
arguments(getSchema(), MESSAGE_USERS2));
}
private static AirbyteMessage createRecordMessage(final String stream, final JsonNode data) {
return new AirbyteMessage().withType(AirbyteMessage.Type.RECORD)
.withRecord(new AirbyteRecordMessage().withStream(stream)
.withData(data)
.withEmittedAt(NOW.toEpochMilli()));
}
}

View File

@@ -1,52 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.createGcsConfig;
import static org.junit.jupiter.api.Assertions.assertEquals;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.airbyte.commons.json.Jsons;
import io.airbyte.integrations.destination.record_buffer.FileBuffer;
import java.io.IOException;
import org.junit.jupiter.api.Test;
public class BigQueryDenormalizedGcsDestinationAcceptanceTest extends BigQueryDenormalizedDestinationAcceptanceTest {
@Override
protected JsonNode createConfig() throws IOException {
return createGcsConfig();
}
/*
* FileBuffer Default Tests
*/
@Test
public void testGetFileBufferDefault() {
final BigQueryDenormalizedDestination destination = new BigQueryDenormalizedDestination();
assertEquals(destination.getNumberOfFileBuffers(config),
FileBuffer.DEFAULT_MAX_CONCURRENT_STREAM_IN_BUFFER);
}
@Test
public void testGetFileBufferMaxLimited() {
final JsonNode defaultConfig = Jsons.clone(config);
((ObjectNode) defaultConfig.get(BigQueryConsts.LOADING_METHOD)).put(FileBuffer.FILE_BUFFER_COUNT_KEY, 100);
final BigQueryDenormalizedDestination destination = new BigQueryDenormalizedDestination();
assertEquals(FileBuffer.MAX_CONCURRENT_STREAM_IN_BUFFER, destination.getNumberOfFileBuffers(defaultConfig));
}
@Test
public void testGetMinimumFileBufferCount() {
final JsonNode defaultConfig = Jsons.clone(config);
((ObjectNode) defaultConfig.get(BigQueryConsts.LOADING_METHOD)).put(FileBuffer.FILE_BUFFER_COUNT_KEY, 1);
final BigQueryDenormalizedDestination destination = new BigQueryDenormalizedDestination();
// User cannot set number of file counts below the default file buffer count, which is existing
// behavior
assertEquals(FileBuffer.DEFAULT_MAX_CONCURRENT_STREAM_IN_BUFFER, destination.getNumberOfFileBuffers(defaultConfig));
}
}

View File

@@ -1,33 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.createGcsConfig;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import io.airbyte.integrations.base.JavaBaseConstants;
import java.io.IOException;
class BigQueryDenormalizedGcsDestinationTest extends BigQueryDenormalizedDestinationTest {
@Override
protected JsonNode createConfig() throws IOException {
return createGcsConfig();
}
@Override
protected Schema getExpectedSchemaForWriteWithFormatTest() {
return Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("date_of_birth", StandardSQLTypeName.DATE),
Field.of("updated_at", StandardSQLTypeName.TIMESTAMP),
Field.of(JavaBaseConstants.COLUMN_NAME_AB_ID, StandardSQLTypeName.STRING),
Field.of(JavaBaseConstants.COLUMN_NAME_EMITTED_AT, StandardSQLTypeName.TIMESTAMP));
}
}

View File

@@ -1,24 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import io.airbyte.integrations.base.JavaBaseConstants;
import java.nio.file.Path;
import java.util.List;
public class BigQueryDenormalizedTestConstants {
public static final BigQuerySQLNameTransformer NAME_TRANSFORMER = new BigQuerySQLNameTransformer();
public static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json");
public static final String CONFIG_DATASET_ID = "dataset_id";
public static final String CONFIG_PROJECT_ID = "project_id";
public static final String CONFIG_DATASET_LOCATION = "dataset_location";
public static final String CONFIG_CREDS = "credentials_json";
public static final List<String> AIRBYTE_COLUMNS = List.of(JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_EMITTED_AT);
public static final String USERS_STREAM_NAME = "users";
public static final String BIGQUERY_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss";
}

View File

@@ -1,94 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import io.airbyte.integrations.destination.StandardNameTransformer;
import io.airbyte.integrations.standardtest.destination.comparator.AdvancedTestDataComparator;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BigQueryDenormalizedTestDataComparator extends AdvancedTestDataComparator {
private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDenormalizedTestDataComparator.class);
private static final String BIGQUERY_DATETIME_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'";
private final StandardNameTransformer namingResolver = new StandardNameTransformer();
@Override
protected List<String> resolveIdentifier(final String identifier) {
final List<String> result = new ArrayList<>();
result.add(identifier);
result.add(namingResolver.getIdentifier(identifier));
return result;
}
private LocalDate parseDate(String dateValue) {
if (dateValue != null) {
var format = (dateValue.matches(".+Z") ? BIGQUERY_DATETIME_FORMAT : AIRBYTE_DATE_FORMAT);
return LocalDate.parse(dateValue, DateTimeFormatter.ofPattern(format));
} else {
return null;
}
}
private LocalDateTime parseDateTime(String dateTimeValue) {
if (dateTimeValue != null) {
var format = (dateTimeValue.matches(".+Z") ? BIGQUERY_DATETIME_FORMAT : AIRBYTE_DATETIME_FORMAT);
return LocalDateTime.parse(dateTimeValue, DateTimeFormatter.ofPattern(format));
} else {
return null;
}
}
@Override
protected boolean compareDateTimeValues(String expectedValue, String actualValue) {
var destinationDate = parseDateTime(actualValue);
var expectedDate = LocalDateTime.parse(expectedValue, DateTimeFormatter.ofPattern(AIRBYTE_DATETIME_FORMAT));
if (expectedDate.isBefore(getBrokenDate().toLocalDateTime())) {
LOGGER
.warn("Validation is skipped due to known Normalization issue. Values older then 1583 year and with time zone stored wrongly(lose days).");
return true;
} else {
return expectedDate.equals(destinationDate);
}
}
@Override
protected boolean compareDateValues(String expectedValue, String actualValue) {
var destinationDate = parseDate(actualValue);
var expectedDate = LocalDate.parse(expectedValue, DateTimeFormatter.ofPattern(AIRBYTE_DATE_FORMAT));
return expectedDate.equals(destinationDate);
}
@Override
protected ZonedDateTime parseDestinationDateWithTz(String destinationValue) {
return ZonedDateTime.of(LocalDateTime.parse(destinationValue, DateTimeFormatter.ofPattern(BIGQUERY_DATETIME_FORMAT)), ZoneOffset.UTC);
}
@Override
protected boolean compareDateTimeWithTzValues(String airbyteMessageValue, String destinationValue) {
// #13123 Normalization issue
if (parseDestinationDateWithTz(destinationValue).isBefore(getBrokenDate())) {
LOGGER
.warn("Validation is skipped due to known Normalization issue. Values older then 1583 year and with time zone stored wrongly(lose days).");
return true;
} else {
return super.compareDateTimeWithTzValues(airbyteMessageValue, destinationValue);
}
}
// #13123 Normalization issue
private ZonedDateTime getBrokenDate() {
return ZonedDateTime.of(1583, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC);
}
}

View File

@@ -1,280 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.util;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.CONFIG_CREDS;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.CONFIG_DATASET_ID;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.CONFIG_DATASET_LOCATION;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.CONFIG_PROJECT_ID;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.CREDENTIALS_PATH;
import static io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants.USERS_STREAM_NAME;
import com.fasterxml.jackson.databind.JsonNode;
import com.google.auth.oauth2.ServiceAccountCredentials;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Dataset;
import com.google.cloud.bigquery.DatasetInfo;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.string.Strings;
import io.airbyte.integrations.base.AirbyteMessageConsumer;
import io.airbyte.integrations.base.Destination;
import io.airbyte.integrations.destination.bigquery.BigQueryConsts;
import io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedDestination;
import io.airbyte.integrations.destination.bigquery.BigQueryDenormalizedTestConstants;
import io.airbyte.integrations.destination.bigquery.BigQueryDestination;
import io.airbyte.integrations.destination.bigquery.BigQueryUtils;
import io.airbyte.protocol.models.v0.AirbyteMessage;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import io.airbyte.protocol.models.v0.DestinationSyncMode;
import io.airbyte.protocol.models.v0.SyncMode;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BigQueryDenormalizedTestDataUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDenormalizedTestDataUtils.class);
private static final String JSON_FILES_BASE_LOCATION = "testdata/";
public static JsonNode getSchema() {
return getTestDataFromResourceJson("schema.json");
}
public static JsonNode getAnyOfSchema() {
return getTestDataFromResourceJson("schemaAnyOfAllOf.json");
}
public static JsonNode getSchemaWithFormats() {
return getTestDataFromResourceJson("schemaWithFormats.json");
}
public static JsonNode getSchemaWithDateTime() {
return getTestDataFromResourceJson("schemaWithDateTime.json");
}
public static JsonNode getSchemaWithInvalidArrayType() {
return getTestDataFromResourceJson("schemaWithInvalidArrayType.json");
}
public static JsonNode getSchemaArrays() {
return getTestDataFromResourceJson("schemaArrays.json");
}
public static JsonNode getDataArrays() {
return getTestDataFromResourceJson("dataArrays.json");
}
public static JsonNode getSchemaTooDeepNestedDepth() {
return getTestDataFromResourceJson("schemaTooDeepNestedDepth.json");
}
public static JsonNode getDataTooDeepNestedDepth() {
return getTestDataFromResourceJson("dataTooDeepNestedDepth.json");
}
public static JsonNode getSchemaMaxNestedDepth() {
return getTestDataFromResourceJson("schemaMaxNestedDepth.json");
}
public static JsonNode getDataMaxNestedDepth() {
return getTestDataFromResourceJson("dataMaxNestedDepth.json");
}
public static JsonNode getExpectedDataArrays() {
return getTestDataFromResourceJson("expectedDataArrays.json");
}
public static JsonNode getData() {
return getTestDataFromResourceJson("data.json");
}
public static JsonNode getDataWithFormats() {
return getTestDataFromResourceJson("dataWithFormats.json");
}
public static JsonNode getAnyOfFormats() {
return getTestDataFromResourceJson("dataAnyOfFormats.json");
}
public static JsonNode getAnyOfFormatsWithNull() {
return getTestDataFromResourceJson("dataAnyOfFormatsWithNull.json");
}
public static JsonNode getAnyOfFormatsWithEmptyList() {
return getTestDataFromResourceJson("dataAnyOfFormatsWithEmptyList.json");
}
public static JsonNode getDataWithJSONDateTimeFormats() {
return getTestDataFromResourceJson("dataWithJSONDateTimeFormats.json");
}
public static JsonNode getDataWithJSONWithReference() {
return getTestDataFromResourceJson("dataWithJSONWithReference.json");
}
public static JsonNode getSchemaWithReferenceDefinition() {
return getTestDataFromResourceJson("schemaWithReferenceDefinition.json");
}
public static JsonNode getSchemaWithNestedDatetimeInsideNullObject() {
return getTestDataFromResourceJson("schemaWithNestedDatetimeInsideNullObject.json");
}
public static JsonNode getDataWithEmptyObjectAndArray() {
return getTestDataFromResourceJson("dataWithEmptyObjectAndArray.json");
}
public static JsonNode getDataWithNestedDatetimeInsideNullObject() {
return getTestDataFromResourceJson("dataWithNestedDatetimeInsideNullObject.json");
}
private static JsonNode getTestDataFromResourceJson(final String fileName) {
final String fileContent;
try {
fileContent = Files.readString(Path.of(BigQueryDenormalizedTestDataUtils.class.getClassLoader()
.getResource(JSON_FILES_BASE_LOCATION + fileName).getPath()));
} catch (final IOException e) {
throw new RuntimeException(e);
}
return Jsons.deserialize(fileContent);
}
public static ConfiguredAirbyteCatalog getCommonCatalog(final JsonNode schema, final String datasetId) {
return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(new ConfiguredAirbyteStream()
.withStream(new AirbyteStream().withName(USERS_STREAM_NAME).withNamespace(datasetId).withJsonSchema(schema)
.withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH)))
.withSyncMode(SyncMode.FULL_REFRESH).withDestinationSyncMode(DestinationSyncMode.OVERWRITE)));
}
public static void runDestinationWrite(ConfiguredAirbyteCatalog catalog, JsonNode config, AirbyteMessage... messages) throws Exception {
final BigQueryDestination destination = new BigQueryDenormalizedDestination();
final AirbyteMessageConsumer consumer = destination.getConsumer(config, catalog, Destination::defaultOutputRecordCollector);
consumer.start();
for (AirbyteMessage message : messages) {
consumer.accept(message);
}
consumer.close();
}
private static void checkCredentialPath() {
if (!Files.exists(CREDENTIALS_PATH)) {
throw new IllegalStateException(
"Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH
+ ". Override by setting setting path with the CREDENTIALS_PATH constant.");
}
}
public static JsonNode createCommonConfig() throws IOException {
checkCredentialPath();
final String credentialsJsonString = Files.readString(CREDENTIALS_PATH);
final JsonNode credentialsJson = Jsons.deserialize(credentialsJsonString).get(BigQueryConsts.BIGQUERY_BASIC_CONFIG);
final String projectId = credentialsJson.get(CONFIG_PROJECT_ID).asText();
final String datasetLocation = "US";
final String datasetId = Strings.addRandomSuffix("airbyte_tests", "_", 8);
return Jsons.jsonNode(ImmutableMap.builder()
.put(CONFIG_PROJECT_ID, projectId)
.put(BigQueryDenormalizedTestConstants.CONFIG_CREDS, credentialsJson.toString())
.put(CONFIG_DATASET_ID, datasetId)
.put(CONFIG_DATASET_LOCATION, datasetLocation)
.build());
}
public static JsonNode createGcsConfig() throws IOException {
checkCredentialPath();
final String credentialsJsonString = Files.readString(CREDENTIALS_PATH);
final JsonNode fullConfigFromSecretFileJson = Jsons.deserialize(credentialsJsonString);
final JsonNode bigqueryConfigFromSecretFile = fullConfigFromSecretFileJson.get(BigQueryConsts.BIGQUERY_BASIC_CONFIG);
final JsonNode gcsConfigFromSecretFile = fullConfigFromSecretFileJson.get(BigQueryConsts.GCS_CONFIG);
final String projectId = bigqueryConfigFromSecretFile.get(CONFIG_PROJECT_ID).asText();
final String datasetLocation = "US";
final String datasetId = Strings.addRandomSuffix("airbyte_tests", "_", 8);
final JsonNode gcsCredentialFromSecretFile = gcsConfigFromSecretFile.get(BigQueryConsts.CREDENTIAL);
final JsonNode credential = Jsons.jsonNode(ImmutableMap.builder()
.put(BigQueryConsts.CREDENTIAL_TYPE, gcsCredentialFromSecretFile.get(BigQueryConsts.CREDENTIAL_TYPE))
.put(BigQueryConsts.HMAC_KEY_ACCESS_ID, gcsCredentialFromSecretFile.get(BigQueryConsts.HMAC_KEY_ACCESS_ID))
.put(BigQueryConsts.HMAC_KEY_ACCESS_SECRET, gcsCredentialFromSecretFile.get(BigQueryConsts.HMAC_KEY_ACCESS_SECRET))
.build());
final JsonNode loadingMethod = Jsons.jsonNode(ImmutableMap.builder()
.put(BigQueryConsts.METHOD, BigQueryConsts.GCS_STAGING)
.put(BigQueryConsts.GCS_BUCKET_NAME, gcsConfigFromSecretFile.get(BigQueryConsts.GCS_BUCKET_NAME))
.put(BigQueryConsts.GCS_BUCKET_PATH, gcsConfigFromSecretFile.get(BigQueryConsts.GCS_BUCKET_PATH).asText() + System.currentTimeMillis())
.put(BigQueryConsts.CREDENTIAL, credential)
.build());
return Jsons.jsonNode(ImmutableMap.builder()
.put(BigQueryConsts.CONFIG_PROJECT_ID, projectId)
.put(BigQueryConsts.CONFIG_CREDS, bigqueryConfigFromSecretFile.toString())
.put(BigQueryConsts.CONFIG_DATASET_ID, datasetId)
.put(BigQueryConsts.CONFIG_DATASET_LOCATION, datasetLocation)
.put(BigQueryConsts.LOADING_METHOD, loadingMethod)
.build());
}
public static BigQuery configureBigQuery(final JsonNode config) throws IOException {
final ServiceAccountCredentials credentials = ServiceAccountCredentials
.fromStream(new ByteArrayInputStream(config.get(CONFIG_CREDS).asText().getBytes(StandardCharsets.UTF_8)));
return BigQueryOptions.newBuilder()
.setProjectId(config.get(CONFIG_PROJECT_ID).asText())
.setCredentials(credentials)
.build()
.getService();
}
public static Dataset getBigQueryDataSet(final JsonNode config, final BigQuery bigQuery) {
final DatasetInfo datasetInfo =
DatasetInfo.newBuilder(BigQueryUtils.getDatasetId(config)).setLocation(config.get(CONFIG_DATASET_LOCATION).asText()).build();
Dataset dataset = bigQuery.create(datasetInfo);
trackTestDataSet(dataset, bigQuery);
return dataset;
}
private static Set<Dataset> dataSetsForDrop = new HashSet<>();
public static void trackTestDataSet(final Dataset dataset, final BigQuery bigQuery) {
Runtime.getRuntime()
.addShutdownHook(
new Thread(
() -> tearDownBigQuery(dataset, bigQuery)));
}
public static synchronized void tearDownBigQuery(final Dataset dataset, final BigQuery bigQuery) {
if (dataSetsForDrop.contains(dataset)) {
// allows deletion of a dataset that has contents
final BigQuery.DatasetDeleteOption option = BigQuery.DatasetDeleteOption.deleteContents();
final boolean success = bigQuery.delete(dataset.getDatasetId(), option);
if (success) {
LOGGER.info("BQ Dataset " + dataset + " deleted...");
} else {
LOGGER.info("BQ Dataset cleanup for " + dataset + " failed!");
}
dataSetsForDrop.remove(dataset);
}
}
}

View File

@@ -1,14 +0,0 @@
{
"name": "Andrii",
"accepts_marketing_updated_at": "2021-10-11T06:36:53-07:00",
"permission-list": [
{
"domain": "abs",
"grants": ["admin"]
},
{
"domain": "tools",
"grants": ["read", "write"]
}
]
}

View File

@@ -1,26 +0,0 @@
{
"id": "ID",
"name": "Andrii",
"type": "some_type",
"email": "email@email.com",
"avatar": {
"image_url": "url_to_avatar.jpg"
},
"team_ids": {
"big_query_array": [1, 2, 3],
"big_query_null": null
},
"admin_ids": {
"big_query_array": [],
"big_query_null": null
},
"all_of_field": {
"big_query_array": [4, 5, 6],
"big_query_string": "Some text",
"big_query_integer": 42
},
"job_title": "title",
"has_inbox_seat": true,
"away_mode_enabled": false,
"away_mode_reassign": false
}

View File

@@ -1,9 +0,0 @@
{
"name": "Sergii",
"team_ids": [],
"all_of_field": {
"big_query_array": [4, 5, 6],
"big_query_string": "Some text",
"big_query_integer": 42
}
}

View File

@@ -1,6 +0,0 @@
{
"name": "Mukola",
"team_ids": null,
"all_of_field": null,
"avatar": null
}

View File

@@ -1,14 +0,0 @@
{
"object_with_arrays": {
"array_3": [1, 2, 3]
},
"simple_string": "simple string",
"array_1": [
[1, 2],
[2, 3]
],
"array_4": [[[4]]],
"array_5": [[[[5]]]],
"array_6": [["2021-10-11T06:36:53+00:00", "2020-10-10T01:00:00+00:00"]],
"array_7": [[["2021-10-11T06:36:53+00:00", "2020-10-10T01:00:00+00:00"]]]
}

View File

@@ -1,31 +0,0 @@
{
"rec_lvl_1": {
"rec_lvl_2": {
"rec_lvl_3": {
"rec_lvl_4": {
"rec_lvl_5": {
"rec_lvl_6": {
"rec_lvl_7": {
"rec_lvl_8": {
"rec_lvl_9": {
"rec_lvl_10": {
"rec_lvl_11": {
"rec_lvl_12": {
"rec_lvl_13": {
"rec_lvl_14": {
"str_value": "test_value"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,33 +0,0 @@
{
"rec_lvl_1": {
"rec_lvl_2": {
"rec_lvl_3": {
"rec_lvl_4": {
"rec_lvl_5": {
"rec_lvl_6": {
"rec_lvl_7": {
"rec_lvl_8": {
"rec_lvl_9": {
"rec_lvl_10": {
"rec_lvl_11": {
"rec_lvl_12": {
"rec_lvl_13": {
"rec_lvl_14": {
"rec_lvl_15": {
"str_value": "test_value"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,18 +0,0 @@
{
"name": "Andrii",
"permission-list": [
{
"domain": "abs",
"items": {},
"grants": ["admin"]
},
{
"domain": "tools",
"grants": [],
"items": {
"object": {},
"array": []
}
}
]
}

View File

@@ -1,5 +0,0 @@
{
"name": "Andrii",
"date_of_birth": "1996-01-25",
"updated_at": "2021-10-11T06:36:53"
}

View File

@@ -1,6 +0,0 @@
{
"updated_at": "2021-10-11T06:36:53+00:00",
"items": {
"nested_datetime": "2021-11-11T06:36:53+00:00"
}
}

View File

@@ -1,6 +0,0 @@
{
"users": {
"name": "John",
"surname": "Adams"
}
}

View File

@@ -1,50 +0,0 @@
{
"object_with_arrays": {
"array_3": [1, 2, 3]
},
"simple_string": "simple string",
"array_1": [
{
"big_query_array": [1, 2]
},
{
"big_query_array": [2, 3]
}
],
"array_4": [
{
"big_query_array": [
{
"big_query_array": [4]
}
]
}
],
"array_5": [
{
"big_query_array": [
{
"big_query_array": [
{
"big_query_array": [5]
}
]
}
]
}
],
"array_6": [
{
"big_query_array": ["2021-10-11T06:36:53", "2020-10-10T01:00:00"]
}
],
"array_7": [
{
"big_query_array": [
{
"big_query_array": ["2021-10-11T06:36:53", "2020-10-10T01:00:00"]
}
]
}
]
}

View File

@@ -1,29 +0,0 @@
{
"type": ["object"],
"properties": {
"accepts_marketing_updated_at": {
"type": ["null", "string"],
"format": "date-time"
},
"name": {
"type": ["string"]
},
"permission-list": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"domain": {
"type": ["string"]
},
"grants": {
"type": ["array"],
"items": {
"type": ["string"]
}
}
}
}
}
}
}

View File

@@ -1,79 +0,0 @@
{
"type": "object",
"properties": {
"id": {
"type": ["null", "string"]
},
"name": {
"type": ["null", "string"]
},
"type": {
"type": ["null", "string"]
},
"email": {
"type": ["null", "string"]
},
"avatar": {
"type": ["null", "object"],
"properties": {
"image_url": {
"type": ["null", "string"]
}
}
},
"team_ids": {
"anyOf": [
{
"type": "array",
"items": {
"type": "integer"
}
},
{
"type": "null"
}
]
},
"admin_ids": {
"anyOf": [
{
"type": "array",
"items": {
"type": "integer"
}
},
{
"type": "null"
}
]
},
"all_of_field": {
"allOf": [
{
"type": "array",
"items": {
"type": "integer"
}
},
{
"type": "string"
},
{
"type": "integer"
}
]
},
"job_title": {
"type": ["null", "string"]
},
"has_inbox_seat": {
"type": ["null", "boolean"]
},
"away_mode_enabled": {
"type": ["null", "boolean"]
},
"away_mode_reassign": {
"type": ["null", "boolean"]
}
}
}

View File

@@ -1,78 +0,0 @@
{
"type": ["object"],
"properties": {
"object_with_arrays": {
"type": ["object"],
"properties": {
"array_3": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
},
"simple_string": {
"type": ["string"]
},
"array_1": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": "integer"
}
}
},
"array_4": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
},
"array_5": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
},
"array_6": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["string"],
"format": "date-time"
}
}
},
"array_7": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["string"],
"format": "date-time"
}
}
}
}
}
}

View File

@@ -1,78 +0,0 @@
{
"type": ["object"],
"properties": {
"rec_lvl_1": {
"type": ["object"],
"properties": {
"rec_lvl_2": {
"type": ["object"],
"properties": {
"rec_lvl_3": {
"type": ["object"],
"properties": {
"rec_lvl_4": {
"type": ["object"],
"properties": {
"rec_lvl_5": {
"type": ["object"],
"properties": {
"rec_lvl_6": {
"type": ["object"],
"properties": {
"rec_lvl_7": {
"type": ["object"],
"properties": {
"rec_lvl_8": {
"type": ["object"],
"properties": {
"rec_lvl_9": {
"type": ["object"],
"properties": {
"rec_lvl_10": {
"type": ["object"],
"properties": {
"rec_lvl_11": {
"type": ["object"],
"properties": {
"rec_lvl_12": {
"type": ["object"],
"properties": {
"rec_lvl_13": {
"type": ["object"],
"properties": {
"rec_lvl_14": {
"type": ["object"],
"properties": {
"str_value": {
"type": ["string"]
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,87 +0,0 @@
{
"type": ["object"],
"properties": {
"rec_lvl_1": {
"type": ["object"],
"properties": {
"rec_lvl_2": {
"type": ["object"],
"properties": {
"rec_lvl_3": {
"type": ["object"],
"properties": {
"rec_lvl_4": {
"type": ["object"],
"properties": {
"rec_lvl_5": {
"type": ["object"],
"properties": {
"rec_lvl_6": {
"type": ["object"],
"properties": {
"rec_lvl_7": {
"type": ["object"],
"properties": {
"rec_lvl_8": {
"type": ["object"],
"properties": {
"rec_lvl_9": {
"type": ["object"],
"properties": {
"rec_lvl_10": {
"type": ["object"],
"properties": {
"rec_lvl_11": {
"type": ["object"],
"properties": {
"rec_lvl_12": {
"type": ["object"],
"properties": {
"rec_lvl_13": {
"type": ["object"],
"properties": {
"rec_lvl_14": {
"type": ["object"],
"properties": {
"rec_lvl_15": {
"type": [
"object"
],
"properties": {
"str_value": {
"type": [
"string"
]
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,18 +0,0 @@
{
"type": ["object"],
"properties": {
"updated_at": {
"type": ["string"],
"format": "date-time"
},
"items": {
"type": ["object"],
"properties": {
"nested_datetime": {
"type": ["string"],
"format": "date-time"
}
}
}
}
}

View File

@@ -1,16 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["string"]
},
"date_of_birth": {
"type": ["string"],
"format": "date"
},
"updated_at": {
"type": ["string"],
"format": "date-time"
}
}
}

View File

@@ -1,22 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["string"]
},
"permission-list": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"domain": {
"type": ["string"]
},
"grants": {
"type": ["array"]
}
}
}
}
}
}

View File

@@ -1,20 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["null", "string"]
},
"appointment": {
"type": ["null", "object"],
"properties": {
"street": {
"type": ["null", "string"]
},
"expTime": {
"type": ["null", "string"],
"format": "date-time"
}
}
}
}
}

View File

@@ -1,8 +0,0 @@
{
"type": ["null", "object"],
"properties": {
"users": {
"$ref": "#/definitions/users_"
}
}
}

View File

@@ -1,380 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static com.google.cloud.bigquery.Field.Mode.REPEATED;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchema;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.IsInstanceOf.instanceOf;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.FieldList;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.Table;
import com.google.cloud.bigquery.TableDefinition;
import io.airbyte.integrations.destination.bigquery.formatter.BigQueryRecordFormatter;
import io.airbyte.integrations.destination.bigquery.formatter.DefaultBigQueryDenormalizedRecordFormatter;
import io.airbyte.integrations.destination.bigquery.formatter.GcsBigQueryDenormalizedRecordFormatter;
import io.airbyte.integrations.destination.bigquery.formatter.arrayformater.LegacyArrayFormatter;
import io.airbyte.integrations.destination.bigquery.uploader.AbstractBigQueryUploader;
import io.airbyte.integrations.destination.bigquery.uploader.BigQueryDirectUploader;
import io.airbyte.integrations.destination.bigquery.uploader.BigQueryUploaderFactory;
import io.airbyte.integrations.destination.bigquery.uploader.UploaderType;
import io.airbyte.integrations.destination.bigquery.uploader.config.UploaderConfig;
import io.airbyte.protocol.models.v0.AirbyteStream;
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.MockedStatic;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
class BigQueryDenormalizedDestinationTest {
@Mock
UploaderConfig uploaderConfigMock;
@Mock
ConfiguredAirbyteStream configuredStreamMock;
@Mock
AirbyteStream airbyteStreamMock;
@Mock
DefaultBigQueryDenormalizedRecordFormatter bigQueryRecordFormatterMock;
@Mock
BigQuery bigQueryMock;
MockedStatic<BigQueryUploaderFactory> uploaderFactoryMock;
@InjectMocks
BigQueryDenormalizedDestination bqdd;
final ObjectMapper mapper = new ObjectMapper();
@BeforeEach
void init() {
uploaderFactoryMock = Mockito.mockStatic(BigQueryUploaderFactory.class, Mockito.CALLS_REAL_METHODS);
uploaderFactoryMock.when(() -> BigQueryUploaderFactory.getUploader(any(UploaderConfig.class))).thenReturn(mock(BigQueryDirectUploader.class));
}
@AfterEach
public void teardown() {
uploaderFactoryMock.close();
}
@Test
void getFormatterMap() {
final JsonNode jsonNodeSchema = getSchema();
final Map<UploaderType, BigQueryRecordFormatter> formatterMap = bqdd.getFormatterMap(jsonNodeSchema);
assertEquals(2, formatterMap.size());
assertTrue(formatterMap.containsKey(UploaderType.AVRO));
assertTrue(formatterMap.containsKey(UploaderType.STANDARD));
assertThat(formatterMap.get(UploaderType.AVRO), instanceOf(GcsBigQueryDenormalizedRecordFormatter.class));
assertThat(formatterMap.get(UploaderType.STANDARD), instanceOf(DefaultBigQueryDenormalizedRecordFormatter.class));
}
@Test
void isDefaultAirbyteTmpTableSchema() {
assertFalse(bqdd.isDefaultAirbyteTmpTableSchema());
}
@Test
void putStreamIntoUploaderMap_compareSchemas_expectedIsNotNullExistingIsNull() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final String streamName = "stream_name";
final String nameSpace = "name_space";
final Table tableMock = mock(Table.class);
final Schema schemaMock = mock(Schema.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
mockBigqueryStream();
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn(nameSpace);
when(airbyteStreamMock.getName()).thenReturn(streamName);
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
// expected schema is not null
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(schemaMock);
// existing schema is null
when(tableDefinitionMock.getSchema()).thenReturn(null);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// should use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(1)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_compareSchemas_existingAndExpectedAreNull() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final Table tableMock = mock(Table.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
mockBigqueryStream();
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn("name_space");
when(airbyteStreamMock.getName()).thenReturn("stream_name");
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
// expected schema is null
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(null);
// existing schema is null
when(tableDefinitionMock.getSchema()).thenReturn(null);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// should not use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(0)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_compareSchemas_expectedSchemaIsNull() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final Table tableMock = mock(Table.class);
final Schema schemaMock = mock(Schema.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
mockBigqueryStream();
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
when(airbyteStreamMock.getNamespace()).thenReturn("name_space");
when(airbyteStreamMock.getName()).thenReturn("stream_name");
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
// expected schema is null
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(null);
// existing schema is not null
when(tableDefinitionMock.getSchema()).thenReturn(schemaMock);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// should use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(1)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_isDifferenceBetweenFields_equalType() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final Table tableMock = mock(Table.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
final Schema existingSchemaMock = mock(Schema.class);
final Schema expectedSchemaMock = mock(Schema.class);
mockBigqueryStream();
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(tableDefinitionMock.getSchema()).thenReturn(existingSchemaMock);
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(expectedSchemaMock);
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn("name_space");
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
final FieldList existingFields = FieldList.of(Field.newBuilder("name", StandardSQLTypeName.STRING).build());
final FieldList expectedFields = FieldList.of(Field.newBuilder("name", StandardSQLTypeName.STRING).build());
when(existingSchemaMock.getFields()).thenReturn(existingFields);
when(expectedSchemaMock.getFields()).thenReturn(expectedFields);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// equal type should not use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(0)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_isDifferenceBetweenFields_notEqualType() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final Table tableMock = mock(Table.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
final Schema existingSchemaMock = mock(Schema.class);
final Schema expectedSchemaMock = mock(Schema.class);
mockBigqueryStream();
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(tableDefinitionMock.getSchema()).thenReturn(existingSchemaMock);
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(expectedSchemaMock);
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn("name_space");
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
final FieldList existingFields = FieldList.of(Field.newBuilder("name", StandardSQLTypeName.DATE).build());
final FieldList expectedFields = FieldList.of(Field.newBuilder("name", StandardSQLTypeName.STRING).build());
when(existingSchemaMock.getFields()).thenReturn(existingFields);
when(expectedSchemaMock.getFields()).thenReturn(expectedFields);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// equal type should not use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(1)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_isDifferenceBetweenFields_existingFieldIsNull() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final Table tableMock = mock(Table.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
final Schema existingSchemaMock = mock(Schema.class);
final Schema expectedSchemaMock = mock(Schema.class);
mockBigqueryStream();
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(tableDefinitionMock.getSchema()).thenReturn(existingSchemaMock);
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(expectedSchemaMock);
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn("name_space");
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
final FieldList expectedFields = FieldList.of(Field.newBuilder("name", StandardSQLTypeName.STRING).build());
final FieldList existingFields = mock(FieldList.class);
when(existingSchemaMock.getFields()).thenReturn(existingFields);
when(expectedSchemaMock.getFields()).thenReturn(expectedFields);
when(existingFields.get(anyString())).thenReturn(null);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// equal type should not use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(1)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_compareRepeatedMode_isEqual() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final Table tableMock = mock(Table.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
final Schema existingSchemaMock = mock(Schema.class);
final Schema expectedSchemaMock = mock(Schema.class);
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(tableDefinitionMock.getSchema()).thenReturn(existingSchemaMock);
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(expectedSchemaMock);
mockBigqueryStream();
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn("name_space");
when(airbyteStreamMock.getName()).thenReturn("stream_name");
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
final FieldList existingFields = FieldList.of(Field.newBuilder("name", StandardSQLTypeName.STRING).setMode(REPEATED).build());
final FieldList expectedFields = FieldList.of(Field.newBuilder("name", StandardSQLTypeName.STRING).setMode(REPEATED).build());
when(existingSchemaMock.getFields()).thenReturn(existingFields);
when(expectedSchemaMock.getFields()).thenReturn(expectedFields);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// equal mode should not use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(0)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_compareSubFields_equalType() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final String streamName = "stream_name";
final String nameSpace = "name_space";
final Table tableMock = mock(Table.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
final Schema existingSchemaMock = mock(Schema.class);
final Schema expectedSchemaMock = mock(Schema.class);
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(tableDefinitionMock.getSchema()).thenReturn(existingSchemaMock);
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(expectedSchemaMock);
mockBigqueryStream();
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn(nameSpace);
when(airbyteStreamMock.getName()).thenReturn(streamName);
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
final FieldList expectedSubField = FieldList.of(Field.newBuilder("sub_field_name", StandardSQLTypeName.STRING).build());
final FieldList existingSubField = FieldList.of(Field.newBuilder("sub_field_name", StandardSQLTypeName.STRING).build());
final Field existingField = Field.newBuilder("field_name", LegacySQLTypeName.RECORD, existingSubField).build();
final Field expectedField = Field.newBuilder("field_name", LegacySQLTypeName.RECORD, expectedSubField).build();
when(existingSchemaMock.getFields()).thenReturn(FieldList.of(existingField));
when(expectedSchemaMock.getFields()).thenReturn(FieldList.of(expectedField));
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// equal subfield type should not use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(0)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_compareSubFields_notEqualType() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final String streamName = "stream_name";
final String nameSpace = "name_space";
final Table tableMock = mock(Table.class);
final TableDefinition tableDefinitionMock = mock(TableDefinition.class);
final Schema existingSchemaMock = mock(Schema.class);
final Schema expectedSchemaMock = mock(Schema.class);
when(tableMock.getDefinition()).thenReturn(tableDefinitionMock);
when(tableDefinitionMock.getSchema()).thenReturn(existingSchemaMock);
when(bigQueryRecordFormatterMock.getBigQuerySchema()).thenReturn(expectedSchemaMock);
mockBigqueryStream();
when(uploaderConfigMock.getTargetTableName()).thenReturn("target_table");
when(airbyteStreamMock.getNamespace()).thenReturn(nameSpace);
when(airbyteStreamMock.getName()).thenReturn(streamName);
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(tableMock);
final FieldList expectedSubField = FieldList.of(Field.newBuilder("sub_field_name", StandardSQLTypeName.DATE).build());
final FieldList existingSubField = FieldList.of(Field.newBuilder("sub_field_name", StandardSQLTypeName.STRING).build());
final Field existingField = Field.newBuilder("field_name", LegacySQLTypeName.RECORD, existingSubField).build();
final Field expectedField = Field.newBuilder("field_name", LegacySQLTypeName.RECORD, expectedSubField).build();
when(existingSchemaMock.getFields()).thenReturn(FieldList.of(existingField));
when(expectedSchemaMock.getFields()).thenReturn(FieldList.of(expectedField));
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
// not equal subfield type should use LegacyArrayFormatter
verify(bigQueryRecordFormatterMock, times(1)).setArrayFormatter(any(LegacyArrayFormatter.class));
}
@Test
void putStreamIntoUploaderMap_existingTableIsNull() throws IOException {
final Map<AirbyteStreamNameNamespacePair, AbstractBigQueryUploader<?>> uploaderMap = new HashMap<>();
final String streamName = "stream_name";
final String nameSpace = "name_space";
final String targetTableName = "target_table";
final AirbyteStreamNameNamespacePair expectedResult = new AirbyteStreamNameNamespacePair(streamName, nameSpace);
mockBigqueryStream();
when(uploaderConfigMock.getTargetTableName()).thenReturn(targetTableName);
when(airbyteStreamMock.getNamespace()).thenReturn(nameSpace);
when(airbyteStreamMock.getName()).thenReturn(streamName);
// existing table is null
when(bigQueryMock.getTable(anyString(), anyString())).thenReturn(null);
// run test
bqdd.putStreamIntoUploaderMap(airbyteStreamMock, uploaderConfigMock, uploaderMap);
verify(bigQueryRecordFormatterMock, times(0)).setArrayFormatter(any(LegacyArrayFormatter.class));
assertTrue(uploaderMap.containsKey(expectedResult));
}
private void mockBigqueryStream() {
when(uploaderConfigMock.getConfigStream()).thenReturn(configuredStreamMock);
when(uploaderConfigMock.getBigQuery()).thenReturn(bigQueryMock);
when(uploaderConfigMock.getFormatter()).thenReturn(bigQueryRecordFormatterMock);
when(configuredStreamMock.getStream()).thenReturn(airbyteStreamMock);
}
}

View File

@@ -1,31 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test;
class JsonSchemaFormatTest {
@Test
void fromJsonSchemaFormat_matchByFormatAndType() {
JsonSchemaFormat result = JsonSchemaFormat.fromJsonSchemaFormat("date-time", "timestamp_with_timezone");
assertEquals(JsonSchemaFormat.DATETIME_WITH_TZ, result);
}
@Test
void fromJsonSchemaFormat_matchByFormat() {
JsonSchemaFormat result = JsonSchemaFormat.fromJsonSchemaFormat("date", null);
assertEquals(JsonSchemaFormat.DATE, result);
}
@Test
void fromJsonSchemaFormat_notExistingFormat() {
JsonSchemaFormat result = JsonSchemaFormat.fromJsonSchemaFormat("not_existing_format", null);
assertNull(result);
}
}

View File

@@ -1,25 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import org.junit.jupiter.api.Test;
class JsonSchemaTypeTest {
@Test
void fromJsonSchemaType_notPresent() {
assertThrows(IllegalArgumentException.class, () -> JsonSchemaType.fromJsonSchemaType("not_existing_value"));
}
@Test
void fromJsonSchemaType_getType() {
JsonSchemaType result = JsonSchemaType.fromJsonSchemaType("string");
assertEquals(JsonSchemaType.STRING, result);
}
}

View File

@@ -1,288 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchema;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaArrays;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaWithDateTime;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaWithFormats;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaWithInvalidArrayType;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaWithNestedDatetimeInsideNullObject;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchema;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaArrays;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithBigInteger;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithDateTime;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithFormats;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithInvalidArrayType;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithNestedDatetimeInsideNullObject;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithReferenceDefinition;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.params.provider.Arguments.arguments;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.JsonNodeType;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Field.Mode;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.bigquery.BigQuerySQLNameTransformer;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.Mockito;
class DefaultBigQueryDenormalizedRecordFormatterTest {
private final ObjectMapper mapper = new ObjectMapper();
private static Stream<Arguments> actualAndExpectedSchemasProvider() {
return Stream.of(
arguments(getSchema(), getExpectedSchema()),
arguments(getSchemaWithFormats(), getExpectedSchemaWithFormats()),
arguments(getSchemaWithDateTime(), getExpectedSchemaWithDateTime()),
arguments(getSchemaWithInvalidArrayType(), getExpectedSchemaWithInvalidArrayType()),
arguments(getSchemaWithNestedDatetimeInsideNullObject(),
getExpectedSchemaWithNestedDatetimeInsideNullObject()),
arguments(getSchemaArrays(), getExpectedSchemaArrays()));
}
@ParameterizedTest
@MethodSource("actualAndExpectedSchemasProvider")
void testDefaultSchema(final JsonNode schemaToProcess, final JsonNode expectedSchema) {
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
schemaToProcess, new BigQuerySQLNameTransformer());
assertEquals(expectedSchema, rf.formatJsonSchema(schemaToProcess));
}
@Test
void testSchema() {
final JsonNode jsonNodeSchema = getSchema();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Field subFields = Field.newBuilder("big_query_array", LegacySQLTypeName.RECORD,
Field.of("domain", LegacySQLTypeName.STRING),
Field.of("grants", LegacySQLTypeName.RECORD,
Field.newBuilder("big_query_array", StandardSQLTypeName.STRING).setMode(Mode.REPEATED).build()))
.setMode(Mode.REPEATED).build();
final Schema expectedResult = Schema.of(
Field.newBuilder("accepts_marketing_updated_at", LegacySQLTypeName.DATETIME).setMode(Mode.NULLABLE).build(),
Field.of("name", LegacySQLTypeName.STRING),
Field.of("permission_list", LegacySQLTypeName.RECORD, subFields),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
void testSchemaWithFormats() {
final JsonNode jsonNodeSchema = getSchemaWithFormats();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Schema expectedResult = Schema.of(
Field.of("name", LegacySQLTypeName.STRING),
Field.of("date_of_birth", LegacySQLTypeName.DATE),
Field.of("updated_at", LegacySQLTypeName.DATETIME),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
void testSchemaWithBigInteger() {
final JsonNode jsonNodeSchema = getSchemaWithBigInteger();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Schema expectedResult = Schema.of(
Field.of("salary", LegacySQLTypeName.INTEGER),
Field.of("updated_at", LegacySQLTypeName.DATETIME),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
void testSchemaWithDateTime() {
final JsonNode jsonNodeSchema = getSchemaWithDateTime();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Schema expectedResult = Schema.of(
Field.of("updated_at", LegacySQLTypeName.DATETIME),
Field.of("items", LegacySQLTypeName.RECORD, Field.of("nested_datetime", LegacySQLTypeName.DATETIME)),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
void testSchemaWithInvalidArrayType() {
final JsonNode jsonNodeSchema = getSchemaWithInvalidArrayType();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Schema expectedResult = Schema.of(
Field.of("name", LegacySQLTypeName.STRING),
Field.newBuilder("permission_list", LegacySQLTypeName.RECORD,
Field.of("domain", LegacySQLTypeName.STRING),
Field.newBuilder("grants", LegacySQLTypeName.STRING).setMode(Mode.REPEATED).build())
.setMode(Mode.REPEATED).build(),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
void testSchemaWithReferenceDefinition() {
final JsonNode jsonNodeSchema = getSchemaWithReferenceDefinition();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Schema expectedResult = Schema.of(
Field.of("users", LegacySQLTypeName.STRING),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
void testSchemaWithNestedDatetimeInsideNullObject() {
final JsonNode jsonNodeSchema = getSchemaWithNestedDatetimeInsideNullObject();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Schema expectedResult = Schema.of(
Field.newBuilder("name", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build(),
Field.newBuilder("appointment", LegacySQLTypeName.RECORD,
Field.newBuilder("street", LegacySQLTypeName.STRING).setMode(Mode.NULLABLE).build(),
Field.newBuilder("expTime", LegacySQLTypeName.DATETIME).setMode(Mode.NULLABLE).build())
.setMode(Mode.NULLABLE).build(),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
public void testEmittedAtTimeConversion() {
final DefaultBigQueryDenormalizedRecordFormatter mockedFormatter = Mockito.mock(
DefaultBigQueryDenormalizedRecordFormatter.class, Mockito.CALLS_REAL_METHODS);
final ObjectNode objectNode = mapper.createObjectNode();
final AirbyteRecordMessage airbyteRecordMessage = new AirbyteRecordMessage();
airbyteRecordMessage.setEmittedAt(1602637589000L);
mockedFormatter.addAirbyteColumns(objectNode, airbyteRecordMessage);
assertEquals("2020-10-14 01:06:29.000000+00:00",
objectNode.get(JavaBaseConstants.COLUMN_NAME_EMITTED_AT).textValue());
}
@Test
void formatRecord_objectType() throws JsonProcessingException {
final JsonNode jsonNodeSchema = getSchema();
final DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final JsonNode objectNode = mapper.readTree("""
{"name":"data"}
""");
final AirbyteRecordMessage airbyteRecordMessage = new AirbyteRecordMessage();
airbyteRecordMessage.setEmittedAt(1602637589000L);
airbyteRecordMessage.setData(objectNode);
final JsonNode result = rf.formatRecord(airbyteRecordMessage);
assertNotNull(result);
assertTrue(result.has("name"));
assertEquals("data", result.get("name").textValue());
assertEquals(JsonNodeType.STRING, result.get("name").getNodeType());
}
@Test
void formatRecord_containsRefDefinition() throws JsonProcessingException {
final JsonNode jsonNodeSchema = getSchema();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
rf.fieldsContainRefDefinitionValue.add("name");
final JsonNode objectNode = mapper.readTree("""
{"name":"data"}
""");
final AirbyteRecordMessage airbyteRecordMessage = new AirbyteRecordMessage();
airbyteRecordMessage.setEmittedAt(1602637589000L);
airbyteRecordMessage.setData(objectNode);
final JsonNode result = rf.formatRecord(airbyteRecordMessage);
assertNotNull(result);
assertTrue(result.has("name"));
assertEquals("\"data\"", result.get("name").textValue());
assertEquals(JsonNodeType.STRING, result.get("name").getNodeType());
}
@Test
void formatRecord_objectWithArray() throws JsonProcessingException {
final JsonNode jsonNodeSchema = getSchemaArrays();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final JsonNode objectNode = mapper.readTree("""
{"object_with_arrays":["array_3"]}
""");
final AirbyteRecordMessage airbyteRecordMessage = new AirbyteRecordMessage();
airbyteRecordMessage.setEmittedAt(1602637589000L);
airbyteRecordMessage.setData(objectNode);
final JsonNode result = rf.formatRecord(airbyteRecordMessage);
assertNotNull(result);
assertTrue(result.has("object_with_arrays"));
result.has("object_with_arrays");
assertEquals(JsonNodeType.ARRAY, result.get("object_with_arrays").getNodeType());
assertNotNull(result.get("object_with_arrays").get(0));
assertEquals(JsonNodeType.STRING, result.get("object_with_arrays").get(0).getNodeType());
}
@Test
void formatRecordNotObject_thenThrowsError() throws JsonProcessingException {
final JsonNode jsonNodeSchema = getSchema();
DefaultBigQueryDenormalizedRecordFormatter rf = new DefaultBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final JsonNode arrayNode = mapper.readTree("""
["one"]""");
final AirbyteRecordMessage airbyteRecordMessage = new AirbyteRecordMessage();
airbyteRecordMessage.setEmittedAt(1602637589000L);
airbyteRecordMessage.setData(arrayNode);
assertThrows(IllegalArgumentException.class, () -> rf.formatRecord(airbyteRecordMessage));
}
}

View File

@@ -1,70 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaWithReferenceDefinition;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithDateTime;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaWithReferenceDefinition;
import static org.junit.jupiter.api.Assertions.assertEquals;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.LegacySQLTypeName;
import com.google.cloud.bigquery.Schema;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.bigquery.BigQuerySQLNameTransformer;
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
class GcsBigQueryDenormalizedRecordFormatterTest {
@Test
void refReplacement() {
final JsonNode jsonNodeSchema = getSchemaWithReferenceDefinition();
final JsonNode expectedResult = getExpectedSchemaWithReferenceDefinition();
final GcsBigQueryDenormalizedRecordFormatter rf = new GcsBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final JsonNode result = rf.formatJsonSchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
void dataTimeReplacement() {
final JsonNode jsonNodeSchema = getSchemaWithDateTime();
final GcsBigQueryDenormalizedRecordFormatter rf = new GcsBigQueryDenormalizedRecordFormatter(
jsonNodeSchema, new BigQuerySQLNameTransformer());
final Schema expectedResult = Schema.of(
Field.of("updated_at", LegacySQLTypeName.TIMESTAMP),
Field.of("items", LegacySQLTypeName.RECORD, Field.of("nested_datetime", LegacySQLTypeName.TIMESTAMP)),
Field.of("_airbyte_ab_id", LegacySQLTypeName.STRING),
Field.of("_airbyte_emitted_at", LegacySQLTypeName.TIMESTAMP));
final Schema result = rf.getBigQuerySchema(jsonNodeSchema);
assertEquals(expectedResult, result);
}
@Test
public void testEmittedAtTimeConversion() {
final GcsBigQueryDenormalizedRecordFormatter mockedFormatter = Mockito.mock(
GcsBigQueryDenormalizedRecordFormatter.class, Mockito.CALLS_REAL_METHODS);
final ObjectMapper mapper = new ObjectMapper();
final ObjectNode objectNode = mapper.createObjectNode();
final AirbyteRecordMessage airbyteRecordMessage = new AirbyteRecordMessage();
airbyteRecordMessage.setEmittedAt(1602637589000L);
mockedFormatter.addAirbyteColumns(objectNode, airbyteRecordMessage);
assertEquals("1602637589000",
objectNode.get(JavaBaseConstants.COLUMN_NAME_EMITTED_AT).asText());
}
}

View File

@@ -1,99 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter.arrayformater;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaArrays;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaArrays;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.List;
import org.junit.jupiter.api.Test;
class DefaultArrayFormatterTest {
private final DefaultArrayFormatter formatter = new DefaultArrayFormatter();
private final ObjectMapper mapper = new ObjectMapper();
@Test
void surroundArraysByObjects() {
final JsonNode schemaArrays = getSchemaArrays();
final JsonNode expectedSchemaArrays = getExpectedSchemaArrays();
formatter.surroundArraysByObjects(schemaArrays);
assertEquals(expectedSchemaArrays, schemaArrays);
}
@Test
void formatArrayItems() throws JsonProcessingException {
final JsonNode expectedArrayNode = mapper.readTree(
"""
[
{"big_query_array": ["one", "two"]},
{"big_query_array": ["one", "two"]}
]
""");
final List<JsonNode> arrayNodes = List.of(
mapper.readTree("""
["one", "two"]"""),
mapper.readTree("""
["one", "two"]"""));
final JsonNode result = formatter.formatArrayItems(arrayNodes);
assertEquals(expectedArrayNode, result);
}
@Test
void formatArrayItems_notArray() throws JsonProcessingException {
final JsonNode objectNodeInput = mapper.readTree("""
{"type":"object","items":{"type":"integer"}}""");
final JsonNode expectedResult = mapper.readTree("""
[{"type":"object","items":{"type":"integer"}}]""");
final JsonNode result = formatter.formatArrayItems(List.of(objectNodeInput));
assertEquals(expectedResult, result);
}
@Test
void findArrays() throws JsonProcessingException {
final JsonNode schemaArrays = getSchemaArrays();
final List<JsonNode> expectedResult = List.of(
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""),
mapper.readTree(
"""
{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""));
final List<JsonNode> result = formatter.findArrays(schemaArrays);
assertEquals(expectedResult, result);
}
@Test
void findArrays_null() {
final List<JsonNode> result = formatter.findArrays(null);
assertTrue(result.isEmpty());
}
}

View File

@@ -1,86 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter.arrayformater;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getExpectedSchemaArraysLegacy;
import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestSchemaUtils.getSchemaArrays;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.List;
import org.junit.jupiter.api.Test;
public class LegacyArrayFormatterTest {
private final LegacyArrayFormatter formatter = new LegacyArrayFormatter();
private final ObjectMapper mapper = new ObjectMapper();
@Test
void surroundArraysByObjects() {
final JsonNode schemaArrays = getSchemaArrays();
final JsonNode expectedSchemaArrays = getExpectedSchemaArraysLegacy();
formatter.surroundArraysByObjects(schemaArrays);
assertEquals(expectedSchemaArrays, schemaArrays);
}
@Test
void findArrays() throws JsonProcessingException {
final JsonNode schemaArrays = getSchemaArrays();
final List<JsonNode> expectedResult = List.of(
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""),
mapper.readTree(
"""
{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":["array"],"items":{"type":"integer"}}}"""),
mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}"""));
final List<JsonNode> result = formatter.findArrays(schemaArrays);
assertEquals(expectedResult, result);
}
@Test
void findArrays_null() {
final List<JsonNode> result = formatter.findArrays(null);
assertTrue(result.isEmpty());
}
@Test
void formatArrayItems() throws JsonProcessingException {
final JsonNode expectedArrayNode = mapper.readTree(
"""
{"big_query_array": [["one", "two"], ["one", "two"]]}
""");
final List<JsonNode> arrayNodes = List.of(
mapper.readTree("""
["one", "two"]"""),
mapper.readTree("""
["one", "two"]"""));
final JsonNode result = formatter.formatArrayItems(arrayNodes);
assertEquals(expectedArrayNode, result);
}
}

View File

@@ -1,54 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.formatter.util;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Test;
class FormatterUtilTest {
private final ObjectMapper mapper = new ObjectMapper();
@Test
void isAirbyteArray_typeIsNull() throws JsonProcessingException {
final JsonNode arrayNode = mapper.readTree(
"""
["one", "two"]""");
final boolean result = FormatterUtil.isAirbyteArray(arrayNode);
assertFalse(result);
}
@Test
void isAirbyteArray_typeFieldIsArray() throws JsonProcessingException {
final JsonNode arrayNode = mapper.readTree("""
{"type":["array"],"items":{"type":"integer"}}""");
boolean result = FormatterUtil.isAirbyteArray(arrayNode);
assertTrue(result);
}
@Test
void isAirbyteArray_typeFieldIsNotArray() throws JsonProcessingException {
final JsonNode objectNode = mapper.readTree("""
{"type":"object"}""");
final boolean result = FormatterUtil.isAirbyteArray(objectNode);
assertFalse(result);
}
@Test
void isAirbyteArray_textIsNotArray() throws JsonProcessingException {
final JsonNode arrayNode = mapper.readTree("""
{"type":["notArrayText"]}""");
final boolean result = FormatterUtil.isAirbyteArray(arrayNode);
assertFalse(result);
}
}

View File

@@ -1,96 +0,0 @@
/*
* Copyright (c) 2023 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.bigquery.util;
import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.commons.json.Jsons;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
public class BigQueryDenormalizedTestSchemaUtils {
private static final String JSON_FILES_BASE_LOCATION = "schemas/";
public static JsonNode getSchema() {
return getTestDataFromResourceJson("schema.json");
}
public static JsonNode getSchemaWithFormats() {
return getTestDataFromResourceJson("schemaWithFormats.json");
}
public static JsonNode getSchemaWithDateTime() {
return getTestDataFromResourceJson("schemaWithDateTime.json");
}
public static JsonNode getSchemaWithBigInteger() {
return getTestDataFromResourceJson("schemaWithBigInteger.json");
}
public static JsonNode getSchemaWithInvalidArrayType() {
return getTestDataFromResourceJson("schemaWithInvalidArrayType.json");
}
public static JsonNode getSchemaWithReferenceDefinition() {
return getTestDataFromResourceJson("schemaWithReferenceDefinition.json");
}
public static JsonNode getSchemaWithNestedDatetimeInsideNullObject() {
return getTestDataFromResourceJson("schemaWithNestedDatetimeInsideNullObject.json");
}
public static JsonNode getSchemaArrays() {
return getTestDataFromResourceJson("schemaArrays.json");
}
public static JsonNode getExpectedSchema() {
return getTestDataFromResourceJson("expectedSchema.json");
}
public static JsonNode getExpectedSchemaWithFormats() {
return getTestDataFromResourceJson("expectedSchemaWithFormats.json");
}
public static JsonNode getExpectedSchemaWithDateTime() {
return getTestDataFromResourceJson("expectedSchemaWithDateTime.json");
}
public static JsonNode getExpectedSchemaWithInvalidArrayType() {
return getTestDataFromResourceJson("expectedSchemaWithInvalidArrayType.json");
}
public static JsonNode getExpectedSchemaWithReferenceDefinition() {
return getTestDataFromResourceJson("expectedSchemaWithReferenceDefinition.json");
}
public static JsonNode getExpectedSchemaWithNestedDatetimeInsideNullObject() {
return getTestDataFromResourceJson("expectedSchemaWithNestedDatetimeInsideNullObject.json");
}
public static JsonNode getExpectedSchemaArrays() {
return getTestDataFromResourceJson("expectedSchemaArrays.json");
}
public static JsonNode getExpectedSchemaArraysLegacy() {
return getTestDataFromResourceJson("expectedSchemaArraysLegacy.json");
}
public static JsonNode getSchemaWithAllOf() {
return getTestDataFromResourceJson("schemaAllOf.json");
}
private static JsonNode getTestDataFromResourceJson(final String fileName) {
final String fileContent;
try {
fileContent = Files.readString(Path.of(BigQueryDenormalizedTestSchemaUtils.class.getClassLoader()
.getResource(JSON_FILES_BASE_LOCATION + fileName).getPath()));
} catch (final IOException e) {
throw new RuntimeException(e);
}
return Jsons.deserialize(fileContent);
}
}

View File

@@ -1,33 +0,0 @@
{
"type": ["object"],
"properties": {
"accepts_marketing_updated_at": {
"type": ["null", "string"],
"format": "date-time"
},
"name": { "type": ["string"] },
"permission-list": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"domain": { "type": ["string"] },
"grants": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": { "type": ["string"] }
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,85 +0,0 @@
{
"type": ["object"],
"properties": {
"object_with_arrays": {
"type": ["object"],
"properties": {
"array_3": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
},
"simple_string": {
"type": ["string"]
},
"array_1": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
},
"array_4": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
}
}
}
},
"array_5": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,105 +0,0 @@
{
"type": ["object"],
"properties": {
"object_with_arrays": {
"type": ["object"],
"properties": {
"array_3": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
}
},
"simple_string": {
"type": ["string"]
},
"array_1": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
}
}
},
"array_4": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
}
}
}
}
}
},
"array_5": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,18 +0,0 @@
{
"type": ["object"],
"properties": {
"updated_at": {
"type": ["string"],
"format": "date-time"
},
"items": {
"type": ["object"],
"properties": {
"nested_datetime": {
"type": ["string"],
"format": "date-time"
}
}
}
}
}

View File

@@ -1,16 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["string"]
},
"date_of_birth": {
"type": ["string"],
"format": "date"
},
"updated_at": {
"type": ["string"],
"format": "date-time"
}
}
}

View File

@@ -1,25 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["string"]
},
"permission-list": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"domain": {
"type": ["string"]
},
"grants": {
"type": ["array"],
"items": {
"type": ["string"]
}
}
}
}
}
}
}

View File

@@ -1,20 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["null", "string"]
},
"appointment": {
"type": ["null", "object"],
"properties": {
"street": {
"type": ["null", "string"]
},
"expTime": {
"type": ["null", "string"],
"format": "date-time"
}
}
}
}
}

View File

@@ -1,9 +0,0 @@
{
"type": ["null", "object"],
"properties": {
"users": {
"type": ["string"],
"$ref": "#/definitions/users_"
}
}
}

View File

@@ -1,33 +0,0 @@
{
"type": ["object"],
"properties": {
"accepts_marketing_updated_at": {
"type": ["null", "string"],
"format": "date-time"
},
"name": { "type": ["string"] },
"permission-list": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"domain": { "type": ["string"] },
"grants": {
"type": ["object"],
"properties": {
"big_query_array": {
"type": ["array"],
"items": { "type": ["string"] }
}
}
}
}
}
}
}
}
}
}

View File

@@ -1,16 +0,0 @@
{
"definitions": {
"schemaArray": {
"type": "array",
"items": { "$ref": "#" }
}
},
"type": ["object", "boolean"],
"properties": {
"title": {
"type": "string"
},
"allOf": { "$ref": "#/definitions/schemaArray" }
}
}

View File

@@ -1,55 +0,0 @@
{
"type": ["object"],
"properties": {
"object_with_arrays": {
"type": ["object"],
"properties": {
"array_3": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
},
"simple_string": {
"type": ["string"]
},
"array_1": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": "integer"
}
}
},
"array_4": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
},
"array_5": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": ["array"],
"items": {
"type": "integer"
}
}
}
}
}
}
}

View File

@@ -1,13 +0,0 @@
{
"type": ["object"],
"properties": {
"salary": {
"type": ["number"],
"airbyte_type": "big_integer"
},
"updated_at": {
"type": ["string"],
"format": "date-time"
}
}
}

View File

@@ -1,18 +0,0 @@
{
"type": ["object"],
"properties": {
"updated_at": {
"type": ["string"],
"format": "date-time"
},
"items": {
"type": ["object"],
"properties": {
"nested_datetime": {
"type": ["string"],
"format": "date-time"
}
}
}
}
}

View File

@@ -1,16 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["string"]
},
"date_of_birth": {
"type": ["string"],
"format": "date"
},
"updated_at": {
"type": ["string"],
"format": "date-time"
}
}
}

View File

@@ -1,22 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["string"]
},
"permission-list": {
"type": ["array"],
"items": {
"type": ["object"],
"properties": {
"domain": {
"type": ["string"]
},
"grants": {
"type": ["array"]
}
}
}
}
}
}

View File

@@ -1,20 +0,0 @@
{
"type": ["object"],
"properties": {
"name": {
"type": ["null", "string"]
},
"appointment": {
"type": ["null", "object"],
"properties": {
"street": {
"type": ["null", "string"]
},
"expTime": {
"type": ["null", "string"],
"format": "date-time"
}
}
}
}
}

View File

@@ -1,8 +0,0 @@
{
"type": ["null", "object"],
"properties": {
"users": {
"$ref": "#/definitions/users_"
}
}
}

View File

@@ -43,8 +43,7 @@ class NoRequiredObj(Object):
if self._properties:
schema["properties"] = self._properties_to_schema(self._properties)
if self._pattern_properties:
schema["patternProperties"] = self._properties_to_schema(
self._pattern_properties)
schema["patternProperties"] = self._properties_to_schema(self._pattern_properties)
schema["additionalProperties"] = True
return schema
@@ -54,7 +53,7 @@ class NoRequiredSchemaBuilder(SchemaBuilder):
EXTRA_STRATEGIES = (NoRequiredObj,)
def __init__(self):
super().__init__(schema_uri='http://json-schema.org/draft-07/schema#')
super().__init__(schema_uri="http://json-schema.org/draft-07/schema#")
def infer_schemas():