This commit is contained in:
4
.github/labeler.yml
vendored
4
.github/labeler.yml
vendored
@@ -14,3 +14,7 @@ area/documentation:
|
|||||||
CDK:
|
CDK:
|
||||||
- airbyte-cdk/*
|
- airbyte-cdk/*
|
||||||
- airbyte-cdk/**/*
|
- airbyte-cdk/**/*
|
||||||
|
|
||||||
|
normalization:
|
||||||
|
- airbyte-integrations/bases/base-normalization/*
|
||||||
|
- airbyte-integrations/bases/base-normalization/**/*
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ exclude: |
|
|||||||
^.*?/node_modules/.*$|
|
^.*?/node_modules/.*$|
|
||||||
|
|
||||||
^.*?/charts/.*$|
|
^.*?/charts/.*$|
|
||||||
|
^airbyte-integrations/bases/base-normalization/.*$|
|
||||||
|
^.*?/normalization_test_output/.*$|
|
||||||
|
|
||||||
^.*?/pnpm-lock\.yaml$|
|
^.*?/pnpm-lock\.yaml$|
|
||||||
^.*?/source-amplitude/unit_tests/api_data/zipped\.json$|
|
^.*?/source-amplitude/unit_tests/api_data/zipped\.json$|
|
||||||
|
|||||||
5
airbyte-integrations/bases/base-java/.dockerignore
Normal file
5
airbyte-integrations/bases/base-java/.dockerignore
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
*
|
||||||
|
!Dockerfile
|
||||||
|
!build
|
||||||
|
!javabase.sh
|
||||||
|
!run_with_normalization.sh
|
||||||
34
airbyte-integrations/bases/base-java/Dockerfile
Normal file
34
airbyte-integrations/bases/base-java/Dockerfile
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
### WARNING ###
|
||||||
|
# The Java connector Dockerfiles will soon be deprecated.
|
||||||
|
# This Dockerfile is not used to build the connector image we publish to DockerHub.
|
||||||
|
# The new logic to build the connector image is declared with Dagger here:
|
||||||
|
# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649
|
||||||
|
|
||||||
|
# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder.
|
||||||
|
# Please reach out to the Connectors Operations team if you have any question.
|
||||||
|
ARG JDK_VERSION=17.0.8
|
||||||
|
FROM amazoncorretto:${JDK_VERSION}
|
||||||
|
COPY --from=airbyte/integration-base:dev /airbyte /airbyte
|
||||||
|
|
||||||
|
RUN yum update -y && yum install -y tar openssl && yum clean all
|
||||||
|
|
||||||
|
WORKDIR /airbyte
|
||||||
|
|
||||||
|
# Add the Datadog Java APM agent
|
||||||
|
ADD https://dtdg.co/latest-java-tracer dd-java-agent.jar
|
||||||
|
|
||||||
|
COPY javabase.sh .
|
||||||
|
COPY run_with_normalization.sh .
|
||||||
|
|
||||||
|
# airbyte base commands
|
||||||
|
ENV AIRBYTE_SPEC_CMD "/airbyte/javabase.sh --spec"
|
||||||
|
ENV AIRBYTE_CHECK_CMD "/airbyte/javabase.sh --check"
|
||||||
|
ENV AIRBYTE_DISCOVER_CMD "/airbyte/javabase.sh --discover"
|
||||||
|
ENV AIRBYTE_READ_CMD "/airbyte/javabase.sh --read"
|
||||||
|
ENV AIRBYTE_WRITE_CMD "/airbyte/javabase.sh --write"
|
||||||
|
|
||||||
|
ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh"
|
||||||
|
ENTRYPOINT ["/airbyte/base.sh"]
|
||||||
|
|
||||||
|
LABEL io.airbyte.version=0.1.2
|
||||||
|
LABEL io.airbyte.name=airbyte/integration-base-java
|
||||||
3
airbyte-integrations/bases/base-java/build.gradle
Normal file
3
airbyte-integrations/bases/base-java/build.gradle
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
plugins {
|
||||||
|
id 'airbyte-docker-legacy'
|
||||||
|
}
|
||||||
33
airbyte-integrations/bases/base-java/javabase.sh
Executable file
33
airbyte-integrations/bases/base-java/javabase.sh
Executable file
@@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# if IS_CAPTURE_HEAP_DUMP_ON_ERROR is set to true, then will capture Heap dump on OutOfMemory error
|
||||||
|
if [[ $IS_CAPTURE_HEAP_DUMP_ON_ERROR = true ]]; then
|
||||||
|
|
||||||
|
arrayOfSupportedConnectors=("source-postgres" "source-mssql" "source-mysql" )
|
||||||
|
|
||||||
|
# The heap dump would be captured only in case when java-based connector fails with OutOfMemory error
|
||||||
|
if [[ " ${arrayOfSupportedConnectors[*]} " =~ " $APPLICATION " ]]; then
|
||||||
|
JAVA_OPTS=$JAVA_OPTS" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/data/dump.hprof"
|
||||||
|
export JAVA_OPTS
|
||||||
|
echo "Added JAVA_OPTS=$JAVA_OPTS"
|
||||||
|
echo "APPLICATION=$APPLICATION"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
#30781 - Allocate 32KB for log4j appender buffer to ensure that each line is logged in a single println
|
||||||
|
JAVA_OPTS=$JAVA_OPTS" -Dlog4j.encoder.byteBufferSize=32768 -Dlog4j2.configurationFile=log4j2.xml"
|
||||||
|
#needed because we make ThreadLocal.get(Thread) accessible in IntegrationRunner.stopOrphanedThreads
|
||||||
|
JAVA_OPTS=$JAVA_OPTS" --add-opens=java.base/java.lang=ALL-UNNAMED"
|
||||||
|
# tell jooq to be quiet (https://stackoverflow.com/questions/28272284/how-to-disable-jooqs-self-ad-message-in-3-4)
|
||||||
|
JAVA_OPTS=$JAVA_OPTS" -Dorg.jooq.no-logo=true -Dorg.jooq.no-tips=true"
|
||||||
|
export JAVA_OPTS
|
||||||
|
|
||||||
|
# Wrap run script in a script so that we can lazy evaluate the value of APPLICATION. APPLICATION is
|
||||||
|
# set by the dockerfile that inherits base-java, so it cannot be evaluated when base-java is built.
|
||||||
|
# We also need to make sure that stdin of the script is piped to the stdin of the java application.
|
||||||
|
if [[ $A = --write ]]; then
|
||||||
|
cat <&0 | /airbyte/bin/"$APPLICATION" "$@"
|
||||||
|
else
|
||||||
|
/airbyte/bin/"$APPLICATION" "$@"
|
||||||
|
fi
|
||||||
61
airbyte-integrations/bases/base-java/run_with_normalization.sh
Executable file
61
airbyte-integrations/bases/base-java/run_with_normalization.sh
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Intentionally no set -e, because we want to run normalization even if the destination fails
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
/airbyte/base.sh $@
|
||||||
|
destination_exit_code=$?
|
||||||
|
echo '{"type": "LOG","log":{"level":"INFO","message":"Destination process done (exit code '"$destination_exit_code"')"}}'
|
||||||
|
|
||||||
|
# store original args
|
||||||
|
args=$@
|
||||||
|
|
||||||
|
while [ $# -ne 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--config)
|
||||||
|
CONFIG_FILE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# move on
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# restore original args after shifts
|
||||||
|
set -- $args
|
||||||
|
|
||||||
|
USE_1S1T_FORMAT="false"
|
||||||
|
if [[ -s "$CONFIG_FILE" ]]; then
|
||||||
|
USE_1S1T_FORMAT=$(jq -r '.use_1s1t_format' "$CONFIG_FILE")
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$1" != 'write'
|
||||||
|
then
|
||||||
|
normalization_exit_code=0
|
||||||
|
elif test "$NORMALIZATION_TECHNIQUE" = 'LEGACY' && test "$USE_1S1T_FORMAT" != "true"
|
||||||
|
then
|
||||||
|
echo '{"type": "LOG","log":{"level":"INFO","message":"Starting in-connector normalization"}}'
|
||||||
|
# Normalization tries to create this file from the connector config and crashes if it already exists
|
||||||
|
# so just nuke it and let normalization recreate it.
|
||||||
|
# Use -f to avoid error if it doesn't exist, since it's only created for certain SSL modes.
|
||||||
|
rm -f ca.crt
|
||||||
|
# the args in a write command are `write --catalog foo.json --config bar.json`
|
||||||
|
# so if we remove the `write`, we can just pass the rest directly into normalization
|
||||||
|
/airbyte/entrypoint.sh run ${@:2} --integration-type $AIRBYTE_NORMALIZATION_INTEGRATION | java -cp "/airbyte/lib/*" io.airbyte.cdk.integrations.destination.normalization.NormalizationLogParser
|
||||||
|
normalization_exit_code=$?
|
||||||
|
echo '{"type": "LOG","log":{"level":"INFO","message":"In-connector normalization done (exit code '"$normalization_exit_code"')"}}'
|
||||||
|
else
|
||||||
|
echo '{"type": "LOG","log":{"level":"INFO","message":"Skipping in-connector normalization"}}'
|
||||||
|
normalization_exit_code=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $destination_exit_code -ne 0
|
||||||
|
then
|
||||||
|
exit $destination_exit_code
|
||||||
|
elif test $normalization_exit_code -ne 0
|
||||||
|
then
|
||||||
|
exit $normalization_exit_code
|
||||||
|
else
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
13
airbyte-integrations/bases/base-normalization/.dockerignore
Normal file
13
airbyte-integrations/bases/base-normalization/.dockerignore
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
*
|
||||||
|
!Dockerfile
|
||||||
|
!entrypoint.sh
|
||||||
|
!build/sshtunneling.sh
|
||||||
|
!setup.py
|
||||||
|
!normalization
|
||||||
|
!dbt-project-template
|
||||||
|
!dbt-project-template-mssql
|
||||||
|
!dbt-project-template-mysql
|
||||||
|
!dbt-project-template-oracle
|
||||||
|
!dbt-project-template-clickhouse
|
||||||
|
!dbt-project-template-snowflake
|
||||||
|
!dbt-project-template-redshift
|
||||||
51
airbyte-integrations/bases/base-normalization/.gitignore
vendored
Normal file
51
airbyte-integrations/bases/base-normalization/.gitignore
vendored
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
build/
|
||||||
|
logs/
|
||||||
|
dbt-project-template/models/generated/
|
||||||
|
dbt-project-template/test_output.log
|
||||||
|
dbt_modules/
|
||||||
|
secrets/
|
||||||
|
dist/
|
||||||
|
|
||||||
|
integration_tests/normalization_test_output/*/*/macros
|
||||||
|
integration_tests/normalization_test_output/*/*/tests
|
||||||
|
integration_tests/normalization_test_output/**/*.json
|
||||||
|
integration_tests/normalization_test_output/**/*.log
|
||||||
|
integration_tests/normalization_test_output/**/*.md
|
||||||
|
integration_tests/normalization_test_output/**/*.sql
|
||||||
|
integration_tests/normalization_test_output/**/*.yml
|
||||||
|
!integration_tests/normalization_test_output/**/*dbt_project.yml
|
||||||
|
!integration_tests/normalization_test_output/**/generated/sources.yml
|
||||||
|
|
||||||
|
# We keep a minimal/restricted subset of sql files for all destinations to avoid noise in diff
|
||||||
|
# Simple Streams
|
||||||
|
!integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql
|
||||||
|
!integration_tests/normalization_test_output/**/DEDUP_EXCHANGE_RATE*.sql
|
||||||
|
!integration_tests/normalization_test_output/**/exchange_rate.sql
|
||||||
|
!integration_tests/normalization_test_output/**/EXCHANGE_RATE.sql
|
||||||
|
!integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql
|
||||||
|
# Nested Streams
|
||||||
|
# Parent table
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with*_names_scd.sql
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with*_names.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_AB*.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_SCD.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES.sql
|
||||||
|
# Nested table
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with_*_partition_ab1.sql
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with_*_data_ab1.sql
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with*_partition_scd.sql
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with*_data_scd.sql
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with*_partition.sql
|
||||||
|
!integration_tests/normalization_test_output/**/nested_stream_with*_data.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_PARTITION_AB1.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_DATA_AB1.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION_SCD.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA_SCD.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION.sql
|
||||||
|
!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA.sql
|
||||||
|
|
||||||
|
# but we keep all sql files for Postgres
|
||||||
|
!integration_tests/normalization_test_output/postgres/**/*.sql
|
||||||
|
integration_tests/normalization_test_output/postgres/**/dbt_data_tests
|
||||||
|
integration_tests/normalization_test_output/postgres/**/dbt_schema_tests
|
||||||
37
airbyte-integrations/bases/base-normalization/Dockerfile
Normal file
37
airbyte-integrations/bases/base-normalization/Dockerfile
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
FROM fishtownanalytics/dbt:1.0.0
|
||||||
|
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte
|
||||||
|
|
||||||
|
# Install SSH Tunneling dependencies
|
||||||
|
RUN apt-get update && apt-get install -y jq sshpass
|
||||||
|
|
||||||
|
WORKDIR /airbyte
|
||||||
|
COPY entrypoint.sh .
|
||||||
|
COPY build/sshtunneling.sh .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code
|
||||||
|
COPY normalization ./normalization
|
||||||
|
COPY setup.py .
|
||||||
|
COPY dbt-project-template/ ./dbt-template/
|
||||||
|
|
||||||
|
# Install python dependencies
|
||||||
|
WORKDIR /airbyte/base_python_structs
|
||||||
|
|
||||||
|
# workaround for https://github.com/yaml/pyyaml/issues/601
|
||||||
|
# this should be fixed in the airbyte/base-airbyte-protocol-python image
|
||||||
|
RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation
|
||||||
|
|
||||||
|
RUN pip install .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code
|
||||||
|
RUN pip install .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code/dbt-template/
|
||||||
|
# Download external dbt dependencies
|
||||||
|
RUN dbt deps
|
||||||
|
|
||||||
|
WORKDIR /airbyte
|
||||||
|
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
|
||||||
|
ENTRYPOINT ["/airbyte/entrypoint.sh"]
|
||||||
|
|
||||||
|
LABEL io.airbyte.version=0.4.3
|
||||||
|
LABEL io.airbyte.name=airbyte/normalization
|
||||||
57
airbyte-integrations/bases/base-normalization/build.gradle
Normal file
57
airbyte-integrations/bases/base-normalization/build.gradle
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
plugins {
|
||||||
|
id 'airbyte-docker-legacy'
|
||||||
|
id 'airbyte-python'
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
testFixtures(project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies'))
|
||||||
|
}
|
||||||
|
|
||||||
|
// we need to access the sshtunneling script from airbyte-workers for ssh support
|
||||||
|
def copySshScript = tasks.register('copySshScript', Copy) {
|
||||||
|
from "${project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').buildDir}/resources/testFixtures"
|
||||||
|
into "${buildDir}"
|
||||||
|
include "sshtunneling.sh"
|
||||||
|
}
|
||||||
|
copySshScript.configure {
|
||||||
|
dependsOn project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').tasks.named('processTestFixturesResources')
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure the copy task above worked (if it fails, it fails silently annoyingly)
|
||||||
|
def checkSshScriptCopy = tasks.register('checkSshScriptCopy') {
|
||||||
|
doFirst {
|
||||||
|
assert file("${buildDir}/sshtunneling.sh").exists() : "Copy of sshtunneling.sh failed."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
checkSshScriptCopy.configure {
|
||||||
|
dependsOn copySshScript
|
||||||
|
}
|
||||||
|
|
||||||
|
def generate = tasks.register('generate')
|
||||||
|
generate.configure {
|
||||||
|
dependsOn checkSshScriptCopy
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.named('check').configure {
|
||||||
|
dependsOn generate
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.named("jar").configure {
|
||||||
|
dependsOn copySshScript
|
||||||
|
}
|
||||||
|
|
||||||
|
[
|
||||||
|
'bigquery',
|
||||||
|
'mysql',
|
||||||
|
'postgres',
|
||||||
|
'redshift',
|
||||||
|
'snowflake',
|
||||||
|
'oracle',
|
||||||
|
'mssql',
|
||||||
|
'clickhouse',
|
||||||
|
'tidb',
|
||||||
|
].each {destinationName ->
|
||||||
|
tasks.matching { it.name == 'integrationTestPython' }.configureEach {
|
||||||
|
dependsOn project(":airbyte-integrations:connectors:destination-$destinationName").tasks.named('assemble')
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
FROM ghcr.io/dbt-labs/dbt-core:1.3.1
|
||||||
|
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte
|
||||||
|
|
||||||
|
# Install SSH Tunneling dependencies
|
||||||
|
RUN apt-get update && apt-get install -y jq sshpass
|
||||||
|
WORKDIR /airbyte
|
||||||
|
COPY entrypoint.sh .
|
||||||
|
COPY build/sshtunneling.sh .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code
|
||||||
|
COPY normalization ./normalization
|
||||||
|
COPY setup.py .
|
||||||
|
COPY dbt-project-template/ ./dbt-template/
|
||||||
|
|
||||||
|
# Install python dependencies
|
||||||
|
WORKDIR /airbyte/base_python_structs
|
||||||
|
|
||||||
|
# workaround for https://github.com/yaml/pyyaml/issues/601
|
||||||
|
# this should be fixed in the airbyte/base-airbyte-protocol-python image
|
||||||
|
RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation
|
||||||
|
|
||||||
|
RUN pip install .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code
|
||||||
|
RUN pip install .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code/dbt-template/
|
||||||
|
RUN pip install "dbt-clickhouse>=1.4.0"
|
||||||
|
# Download external dbt dependencies
|
||||||
|
RUN dbt deps
|
||||||
|
|
||||||
|
WORKDIR /airbyte
|
||||||
|
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
|
||||||
|
ENTRYPOINT ["/airbyte/entrypoint.sh"]
|
||||||
|
|
||||||
|
LABEL io.airbyte.name=airbyte/normalization-clickhouse
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization's
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: true
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
# ephemeral materialization isn't supported in ClickHouse yet
|
||||||
|
+materialized: view
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
# schema change test isn't supported in ClickHouse yet
|
||||||
|
+on_schema_change: "ignore"
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order: ["airbyte_utils", "dbt_utils"]
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# add dependencies. these will get pulled during the `dbt deps` process.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
|
revision: 0.8.2
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization's
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
+on_schema_change: sync_all_columns
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order: ["airbyte_utils", "dbt_utils"]
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# add dependencies. these will get pulled during the `dbt deps` process.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
|
revision: 0.8.2
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization's
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
vars:
|
||||||
|
dbt_utils_dispatch_list: ["airbyte_utils"]
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# add dependencies. these will get pulled during the `dbt deps` process.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
|
revision: 0.8.2
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization"s
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won"t need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
# incremental is not enabled for MySql yet
|
||||||
|
#+materialized: incremental
|
||||||
|
+materialized: table
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
vars:
|
||||||
|
dbt_utils_dispatch_list: ["airbyte_utils"]
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# add dependencies. these will get pulled during the `dbt deps` process.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
|
revision: 0.8.2
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization's
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `source-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
|
source-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
data-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
modules-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: false
|
||||||
|
schema: false
|
||||||
|
identifier: false
|
||||||
|
|
||||||
|
# You can define configurations for models in the `source-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
# incremental is not enabled for Oracle yet
|
||||||
|
#+materialized: incremental
|
||||||
|
+materialized: table
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
vars:
|
||||||
|
dbt_utils_dispatch_list: ["airbyte_utils"]
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# add dependencies. these will get pulled during the `dbt deps` process.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
|
revision: 0.6.4
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization's
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
+transient: false
|
||||||
|
# https://docs.aws.amazon.com/redshift/latest/dg/super-configurations.html
|
||||||
|
+pre-hook: "SET enable_case_sensitive_identifier to TRUE"
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
+on_schema_change: sync_all_columns
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order: ["airbyte_utils", "dbt_utils"]
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization's
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
+transient: false
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
+on_schema_change: sync_all_columns
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order: ["airbyte_utils", "dbt_utils"]
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization"s
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won"t need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
vars:
|
||||||
|
dbt_utils_dispatch_list: ["airbyte_utils"]
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# add dependencies. these will get pulled during the `dbt deps` process.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
|
revision: 0.8.2
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
## Installing dbt
|
||||||
|
|
||||||
|
1. Activate your venv and run `pip3 install dbt`
|
||||||
|
1. Copy `airbyte-normalization/sample_files/profiles.yml` over to `~/.dbt/profiles.yml`
|
||||||
|
1. Edit to configure your profiles accordingly
|
||||||
|
|
||||||
|
## Running dbt
|
||||||
|
|
||||||
|
1. `cd airbyte-normalization`
|
||||||
|
1. You can now run dbt commands, to check the setup is fine: `dbt debug`
|
||||||
|
1. To build the dbt tables in your warehouse: `dbt run`
|
||||||
|
|
||||||
|
## Running dbt from Airbyte generated config
|
||||||
|
|
||||||
|
1. You can also change directory (`cd /tmp/dev_root/workspace/1/0/normalize` for example) to one of the workspace generated by Airbyte within one of the `normalize` folder.
|
||||||
|
1. You should find `profiles.yml` and a bunch of other dbt files/folders created there.
|
||||||
|
1. To check everything is setup properly: `dbt debug --profiles-dir=$(pwd) --project-dir=$(pwd)`
|
||||||
|
1. You can modify the `.sql` files and run `dbt run --profiles-dir=$(pwd) --project-dir=$(pwd)` too
|
||||||
|
1. You can inspect compiled dbt `.sql` files before they are run in the destination engine in `normalize/build/compiled` or `normalize/build/run` folders
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
# This file is necessary to install dbt-utils with dbt deps
|
||||||
|
# the content will be overwritten by the transform function
|
||||||
|
|
||||||
|
# Name your package! Package names should contain only lowercase characters
|
||||||
|
# and underscores. A good package name should reflect your organization's
|
||||||
|
# name or the intended use of these models
|
||||||
|
name: "airbyte_utils"
|
||||||
|
version: "1.0"
|
||||||
|
config-version: 2
|
||||||
|
|
||||||
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
|
profile: "normalize"
|
||||||
|
|
||||||
|
# These configurations specify where dbt should look for different types of files.
|
||||||
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
|
model-paths: ["models"]
|
||||||
|
docs-paths: ["docs"]
|
||||||
|
analysis-paths: ["analysis"]
|
||||||
|
test-paths: ["tests"]
|
||||||
|
seed-paths: ["data"]
|
||||||
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
|
packages-install-path: "/dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
|
- "build"
|
||||||
|
- "dbt_modules"
|
||||||
|
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
|
||||||
|
# all schemas should be unquoted
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
|
||||||
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
|
# are materialized, and more!
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
+on_schema_change: sync_all_columns
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order: ["airbyte_utils", "dbt_utils"]
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
{% macro clean_tmp_tables(schemas) -%}
|
||||||
|
{{ adapter.dispatch('clean_tmp_tables')(schemas) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
-- default
|
||||||
|
{% macro default__clean_tmp_tables(schemas) -%}
|
||||||
|
{% do exceptions.warn("\tINFO: CLEANING TEST LEFTOVERS IS NOT IMPLEMENTED FOR THIS DESTINATION. CONSIDER TO REMOVE TEST TABLES MANUALY.\n") %}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
-- for redshift
|
||||||
|
{% macro redshift__clean_tmp_tables(schemas) %}
|
||||||
|
{%- for tmp_schema in schemas -%}
|
||||||
|
{% do log("\tDROP SCHEMA IF EXISTS " ~ tmp_schema, info=True) %}
|
||||||
|
{%- set drop_query -%}
|
||||||
|
drop schema if exists {{ tmp_schema }} cascade;
|
||||||
|
{%- endset -%}
|
||||||
|
{%- do run_query(drop_query) -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{% endmacro %}
|
||||||
@@ -0,0 +1,173 @@
|
|||||||
|
{#
|
||||||
|
Adapter Macros for the following functions:
|
||||||
|
- Bigquery: unnest() -> https://cloud.google.com/bigquery/docs/reference/standard-sql/arrays#flattening-arrays-and-repeated-fields
|
||||||
|
- Snowflake: flatten() -> https://docs.snowflake.com/en/sql-reference/functions/flatten.html
|
||||||
|
- Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/
|
||||||
|
- postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/
|
||||||
|
- MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15
|
||||||
|
- ClickHouse: ARRAY JOIN –> https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/
|
||||||
|
#}
|
||||||
|
|
||||||
|
{# cross_join_unnest ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{% macro cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
{{ adapter.dispatch('cross_join_unnest')(stream_name, array_col) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
{% do exceptions.warn("Undefined macro cross_join_unnest for this destination engine") %}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro bigquery__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
cross join unnest({{ array_col }}) as {{ array_col }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
ARRAY JOIN {{ array_col }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
{% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro postgres__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
cross join jsonb_array_elements(
|
||||||
|
case jsonb_typeof({{ array_col }})
|
||||||
|
when 'array' then {{ array_col }}
|
||||||
|
else '[]' end
|
||||||
|
) as _airbyte_nested_data
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
cross join table(flatten({{ array_col }})) as {{ array_col }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
{# https://docs.microsoft.com/en-us/sql/relational-databases/json/convert-json-data-to-rows-and-columns-with-openjson-sql-server?view=sql-server-ver15#option-1---openjson-with-the-default-output #}
|
||||||
|
CROSS APPLY (
|
||||||
|
SELECT [value] = CASE
|
||||||
|
WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value]))
|
||||||
|
WHEN [type] = 5 THEN [value]
|
||||||
|
END
|
||||||
|
FROM OPENJSON({{ array_col }})
|
||||||
|
) AS {{ array_col }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# unnested_column_value -- this macro is related to unnest_cte #}
|
||||||
|
|
||||||
|
{% macro unnested_column_value(column_col) -%}
|
||||||
|
{{ adapter.dispatch('unnested_column_value')(column_col) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__unnested_column_value(column_col) -%}
|
||||||
|
{{ column_col }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro postgres__unnested_column_value(column_col) -%}
|
||||||
|
_airbyte_nested_data
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__unnested_column_value(column_col) -%}
|
||||||
|
{{ column_col }}.value
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__unnested_column_value(column_col) -%}
|
||||||
|
_airbyte_nested_data
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__unnested_column_value(column_col) -%}
|
||||||
|
_airbyte_nested_data
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__unnested_column_value(column_col) -%}
|
||||||
|
_airbyte_nested_data
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__unnested_column_value(column_col) -%}
|
||||||
|
_airbyte_nested_data
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__unnested_column_value(column_col) -%}
|
||||||
|
{{ column_col }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__unnested_column_value(column_col) -%}
|
||||||
|
{# unnested array/sub_array will be located in `value` column afterwards, we need to address to it #}
|
||||||
|
{{ column_col }}.value
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# unnest_cte ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{% macro unnest_cte(from_table, stream_name, column_col) -%}
|
||||||
|
{{ adapter.dispatch('unnest_cte')(from_table, stream_name, column_col) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__unnest_cte(from_table, stream_name, column_col) -%}
|
||||||
|
{# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #}
|
||||||
|
with joined as (
|
||||||
|
select
|
||||||
|
table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
|
||||||
|
_airbyte_nested_data
|
||||||
|
from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data
|
||||||
|
)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__unnest_cte(from_table, stream_name, column_col) -%}
|
||||||
|
{%- if not execute -%}
|
||||||
|
{{ return('') }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{%- call statement('max_json_array_length', fetch_result=True) -%}
|
||||||
|
with max_value as (
|
||||||
|
select max(json_length({{ column_col }})) as max_number_of_items
|
||||||
|
from {{ from_table }}
|
||||||
|
)
|
||||||
|
select
|
||||||
|
case when max_number_of_items is not null and max_number_of_items > 1
|
||||||
|
then max_number_of_items
|
||||||
|
else 1 end as max_number_of_items
|
||||||
|
from max_value
|
||||||
|
{%- endcall -%}
|
||||||
|
|
||||||
|
{%- set max_length = load_result('max_json_array_length') -%}
|
||||||
|
with numbers as (
|
||||||
|
{{ dbt_utils.generate_series(max_length["data"][0][0]) }}
|
||||||
|
),
|
||||||
|
joined as (
|
||||||
|
select
|
||||||
|
_airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
|
||||||
|
{# -- json_extract(column_col, '$[i][0]') as _airbyte_nested_data #}
|
||||||
|
json_extract({{ column_col }}, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data
|
||||||
|
from {{ from_table }}
|
||||||
|
cross join numbers
|
||||||
|
-- only generate the number of records in the cross join that corresponds
|
||||||
|
-- to the number of items in {{ from_table }}.{{ column_col }}
|
||||||
|
where numbers.generated_number <= json_length({{ column_col }})
|
||||||
|
)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__unnest_cte(from_table, stream_name, column_col) -%}
|
||||||
|
{{ mysql__unnest_cte(from_table, stream_name, column_col) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__unnest_cte(from_table, stream_name, column_col) -%}
|
||||||
|
{{ mysql__unnest_cte(from_table, stream_name, column_col) }}
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
{#
|
||||||
|
concat in dbt 0.6.4 used to work fine for bigquery but the new implementaion in 0.7.3 is less scalable (can not handle too many columns)
|
||||||
|
Therefore, we revert the implementation here and add versions for missing destinations
|
||||||
|
#}
|
||||||
|
|
||||||
|
{% macro concat(fields) -%}
|
||||||
|
{{ adapter.dispatch('concat')(fields) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro bigquery__concat(fields) -%}
|
||||||
|
{#-- concat() in SQL bigquery scales better with number of columns than using the '||' operator --#}
|
||||||
|
concat({{ fields|join(', ') }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__concat(fields) -%}
|
||||||
|
{#-- MySQL doesn't support the '||' operator as concatenation by default --#}
|
||||||
|
concat({{ fields|join(', ') }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__concat(fields) -%}
|
||||||
|
{#-- CONCAT() in SQL SERVER accepts from 2 to 254 arguments, we use batches for the main concat, to overcome the limit. --#}
|
||||||
|
{% set concat_chunks = [] %}
|
||||||
|
{% for chunk in fields|batch(253) -%}
|
||||||
|
{% set _ = concat_chunks.append( "concat(" ~ chunk|join(', ') ~ ",'')" ) %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
concat({{ concat_chunks|join(', ') }}, '')
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__concat(fields) -%}
|
||||||
|
concat({{ fields|join(', ') }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__concat(fields) -%}
|
||||||
|
concat({{ fields|join(', ') }})
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{% macro mysql__current_timestamp() %}
|
||||||
|
CURRENT_TIMESTAMP
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__current_timestamp() %}
|
||||||
|
CURRENT_TIMESTAMP
|
||||||
|
{% endmacro %}
|
||||||
@@ -0,0 +1,394 @@
|
|||||||
|
{# json ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{%- macro type_json() -%}
|
||||||
|
{{ adapter.dispatch('type_json')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro default__type_json() %}
|
||||||
|
string
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro redshift__type_json() -%}
|
||||||
|
super
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro postgres__type_json() %}
|
||||||
|
jsonb
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro oracle__type_json() -%}
|
||||||
|
varchar2(4000)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro snowflake__type_json() %}
|
||||||
|
variant
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro mysql__type_json() -%}
|
||||||
|
json
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro sqlserver__type_json() -%}
|
||||||
|
NVARCHAR(max)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_json() %}
|
||||||
|
String
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro tidb__type_json() -%}
|
||||||
|
json
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro duckdb__type_json() -%}
|
||||||
|
json
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{# string ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{%- macro mysql__type_string() -%}
|
||||||
|
char
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro oracle__type_string() -%}
|
||||||
|
varchar2(4000)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro sqlserver__type_string() %}
|
||||||
|
NVARCHAR(max)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro clickhouse__type_string() -%}
|
||||||
|
String
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{#-- TODO: Remove this macro when dbt issue regarding unlimited varchars on postgres is resolved (https://github.com/dbt-labs/dbt-core/issues/5238) and we've upgraded to the latest version of dbt --#}
|
||||||
|
{%- macro postgres__type_string() -%}
|
||||||
|
text
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro tidb__type_string() -%}
|
||||||
|
char(1000)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro duckdb__type_string() -%}
|
||||||
|
VARCHAR
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{# float ------------------------------------------------- #}
|
||||||
|
{% macro mysql__type_float() %}
|
||||||
|
float
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__type_float() %}
|
||||||
|
float
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_float() %}
|
||||||
|
Float64
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_float() %}
|
||||||
|
float
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_float() %}
|
||||||
|
DOUBLE
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# int ------------------------------------------------- #}
|
||||||
|
{% macro default__type_int() %}
|
||||||
|
int
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__type_int() %}
|
||||||
|
signed
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__type_int() %}
|
||||||
|
int
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_int() %}
|
||||||
|
INT
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_int() %}
|
||||||
|
signed
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_int() %}
|
||||||
|
INTEGER
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# bigint ------------------------------------------------- #}
|
||||||
|
{% macro mysql__type_bigint() %}
|
||||||
|
signed
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__type_bigint() %}
|
||||||
|
numeric
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_bigint() %}
|
||||||
|
BIGINT
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_bigint() %}
|
||||||
|
signed
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_bigint() %}
|
||||||
|
BIGINT
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# numeric ------------------------------------------------- --#}
|
||||||
|
{% macro mysql__type_numeric() %}
|
||||||
|
float
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_numeric() %}
|
||||||
|
Float64
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_numeric() %}
|
||||||
|
float
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_numeric() %}
|
||||||
|
DOUBLE
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# very_large_integer --------------------------------------- --#}
|
||||||
|
{#
|
||||||
|
Most databases don't have a true unbounded numeric datatype, so we use a really big numeric field.
|
||||||
|
Our type terminology unfortunately collides with DB terminology (i.e. "big_integer" means different things in different contexts)
|
||||||
|
so this macro needs to be called very_large_integer.
|
||||||
|
#}
|
||||||
|
{%- macro type_very_large_integer() -%}
|
||||||
|
{{ adapter.dispatch('type_very_large_integer')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro default__type_very_large_integer() %}
|
||||||
|
numeric
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__type_very_large_integer() %}
|
||||||
|
numeric
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__type_very_large_integer() %}
|
||||||
|
decimal(38, 0)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_very_large_integer() %}
|
||||||
|
decimal128(0)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_very_large_integer() %}
|
||||||
|
decimal(38, 0)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_very_large_integer() %}
|
||||||
|
DECIMAL(38, 0)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# timestamp ------------------------------------------------- --#}
|
||||||
|
{% macro mysql__type_timestamp() %}
|
||||||
|
time
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro sqlserver__type_timestamp() -%}
|
||||||
|
{#-- in TSQL timestamp is really datetime --#}
|
||||||
|
{#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
|
||||||
|
datetime
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_timestamp() %}
|
||||||
|
DateTime64
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_timestamp() %}
|
||||||
|
time
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_timestamp() %}
|
||||||
|
TIMESTAMP
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# timestamp with time zone ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{%- macro type_timestamp_with_timezone() -%}
|
||||||
|
{{ adapter.dispatch('type_timestamp_with_timezone')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro default__type_timestamp_with_timezone() %}
|
||||||
|
timestamp with time zone
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro bigquery__type_timestamp_with_timezone() %}
|
||||||
|
timestamp
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{#-- MySQL doesnt allow cast operation with nullif to work with DATETIME and doesn't support storing of timezone so we have to use char --#}
|
||||||
|
{#-- https://bugs.mysql.com/bug.php?id=77805 --#}
|
||||||
|
{%- macro mysql__type_timestamp_with_timezone() -%}
|
||||||
|
char(1024)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro oracle__type_timestamp_with_timezone() %}
|
||||||
|
varchar2(4000)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro sqlserver__type_timestamp_with_timezone() -%}
|
||||||
|
datetimeoffset
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro redshift__type_timestamp_with_timezone() %}
|
||||||
|
TIMESTAMPTZ
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_timestamp_with_timezone() %}
|
||||||
|
DateTime64
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro tidb__type_timestamp_with_timezone() -%}
|
||||||
|
char(1000)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro duckdb__type_timestamp_with_timezone() -%}
|
||||||
|
TIMESTAMPTZ
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{# timestamp without time zone ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{%- macro type_timestamp_without_timezone() -%}
|
||||||
|
{{ adapter.dispatch('type_timestamp_without_timezone')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro default__type_timestamp_without_timezone() %}
|
||||||
|
timestamp
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro sqlserver__type_timestamp_without_timezone() -%}
|
||||||
|
{#-- in TSQL timestamp is really datetime or datetime2 --#}
|
||||||
|
{#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
|
||||||
|
datetime2
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro bigquery__type_timestamp_without_timezone() %}
|
||||||
|
datetime
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__type_timestamp_without_timezone() %}
|
||||||
|
varchar2(4000)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__type_timestamp_without_timezone() %}
|
||||||
|
TIMESTAMP
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_timestamp_without_timezone() %}
|
||||||
|
datetime
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_timestamp_without_timezone() %}
|
||||||
|
TIMESTAMP
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# time without time zone ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{%- macro type_time_without_timezone() -%}
|
||||||
|
{{ adapter.dispatch('type_time_without_timezone')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro default__type_time_without_timezone() %}
|
||||||
|
time
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__type_time_without_timezone() %}
|
||||||
|
varchar2(4000)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__type_time_without_timezone() %}
|
||||||
|
TIME
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_time_without_timezone() %}
|
||||||
|
String
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__type_time_without_timezone() %}
|
||||||
|
time
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__type_time_without_timezone() %}
|
||||||
|
TIMESTAMP
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{# time with time zone ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{%- macro type_time_with_timezone() -%}
|
||||||
|
{{ adapter.dispatch('type_time_with_timezone')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro default__type_time_with_timezone() %}
|
||||||
|
time with time zone
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro mysql__type_time_with_timezone() -%}
|
||||||
|
char(1024)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro sqlserver__type_time_with_timezone() -%}
|
||||||
|
NVARCHAR(max)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro bigquery__type_time_with_timezone() %}
|
||||||
|
STRING
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__type_time_with_timezone() %}
|
||||||
|
varchar2(4000)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__type_time_with_timezone() %}
|
||||||
|
varchar
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__type_time_with_timezone() %}
|
||||||
|
TIMETZ
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_time_with_timezone() %}
|
||||||
|
String
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro tidb__type_time_with_timezone() -%}
|
||||||
|
char(1000)
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro duckdb__type_time_with_timezone() -%}
|
||||||
|
TIMESTAMPTZ
|
||||||
|
{%- endmacro -%}
|
||||||
|
{# date ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{%- macro type_date() -%}
|
||||||
|
{{ adapter.dispatch('type_date')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro default__type_date() %}
|
||||||
|
date
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__type_date() %}
|
||||||
|
varchar2(4000)
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{%- macro sqlserver__type_date() -%}
|
||||||
|
date
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro clickhouse__type_date() %}
|
||||||
|
Date32
|
||||||
|
{% endmacro %}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{% macro mysql__except() %}
|
||||||
|
{% do exceptions.warn("MySQL does not support EXCEPT operator") %}
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__except() %}
|
||||||
|
minus
|
||||||
|
{% endmacro %}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
{# converting hash in varchar _macro #}
|
||||||
|
|
||||||
|
{% macro sqlserver__hash(field) -%}
|
||||||
|
convert(varchar(32), HashBytes('md5', coalesce(cast({{field}} as {{dbt_utils.type_string()}}), '')), 2)
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,317 @@
|
|||||||
|
{#
|
||||||
|
Adapter Macros for the following functions:
|
||||||
|
- Bigquery: JSON_EXTRACT(json_string_expr, json_path_format) -> https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions
|
||||||
|
- Snowflake: JSON_EXTRACT_PATH_TEXT( <column_identifier> , '<path_name>' ) -> https://docs.snowflake.com/en/sql-reference/functions/json_extract_path_text.html
|
||||||
|
- Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html
|
||||||
|
- Postgres: json_extract_path_text(<from_json>, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html
|
||||||
|
- MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
|
||||||
|
- ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/
|
||||||
|
- TiDB: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://docs.pingcap.com/tidb/stable/json-functions
|
||||||
|
- DuckDB: json_extract(json, 'path') note: If path is a LIST, the result will be a LIST of JSON -> https://duckdb.org/docs/extensions/json
|
||||||
|
#}
|
||||||
|
|
||||||
|
{# format_json_path -------------------------------------------------- #}
|
||||||
|
{% macro format_json_path(json_path_list) -%}
|
||||||
|
{{ adapter.dispatch('format_json_path')(json_path_list) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__format_json_path(json_path_list) -%}
|
||||||
|
{{ '.' ~ json_path_list|join('.') }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__format_json_path(json_path_list) -%}
|
||||||
|
{{ '\'$."' ~ json_path_list|join('."') ~ '"\'' }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{#
|
||||||
|
BigQuery has different JSONPath syntax depending on which function you call.
|
||||||
|
Most of our macros use the "legacy" JSON functions, so this function uses
|
||||||
|
the legacy syntax.
|
||||||
|
|
||||||
|
These paths look like: "$['foo']['bar']"
|
||||||
|
#}
|
||||||
|
{% macro bigquery__format_json_path(json_path_list) -%}
|
||||||
|
{%- set str_list = [] -%}
|
||||||
|
{%- for json_path in json_path_list -%}
|
||||||
|
{%- if str_list.append(json_path.replace('"', '\\"')) -%} {%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ '"$[\'' ~ str_list|join('\'][\'') ~ '\']"' }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{#
|
||||||
|
For macros which use the newer JSON functions, define a new_format_json_path
|
||||||
|
macro which generates the correct path syntax.
|
||||||
|
|
||||||
|
These paths look like: '$."foo"."bar"'
|
||||||
|
#}
|
||||||
|
{% macro bigquery_new_format_json_path(json_path_list) -%}
|
||||||
|
{%- set str_list = [] -%}
|
||||||
|
{%- for json_path in json_path_list -%}
|
||||||
|
{%- if str_list.append(json_path.replace('\'', '\\\'')) -%} {%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ '\'$."' ~ str_list|join('"."') ~ '"\'' }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro postgres__format_json_path(json_path_list) -%}
|
||||||
|
{%- set str_list = [] -%}
|
||||||
|
{%- for json_path in json_path_list -%}
|
||||||
|
{%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ "'" ~ str_list|join("','") ~ "'" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__format_json_path(json_path_list) -%}
|
||||||
|
{# -- '$."x"."y"."z"' #}
|
||||||
|
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__format_json_path(json_path_list) -%}
|
||||||
|
{%- set quote = '"' -%}
|
||||||
|
{%- set str_list = [] -%}
|
||||||
|
{%- for json_path in json_path_list -%}
|
||||||
|
{%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ quote ~ str_list|join(quote + "," + quote) ~ quote }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__format_json_path(json_path_list) -%}
|
||||||
|
{%- set str_list = [] -%}
|
||||||
|
{%- for json_path in json_path_list -%}
|
||||||
|
{%- if str_list.append(json_path.replace("'", "''").replace('"', '""')) -%} {%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ "'\"" ~ str_list|join('"."') ~ "\"'" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__format_json_path(json_path_list) -%}
|
||||||
|
{# -- '$."x"."y"."z"' #}
|
||||||
|
{%- set str_list = [] -%}
|
||||||
|
{%- for json_path in json_path_list -%}
|
||||||
|
{%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ "'$.\"" ~ str_list|join(".") ~ "\"'" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__format_json_path(json_path_list) -%}
|
||||||
|
{%- set str_list = [] -%}
|
||||||
|
{%- for json_path in json_path_list -%}
|
||||||
|
{%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{{ "'" ~ str_list|join("','") ~ "'" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__format_json_path(json_path_list) -%}
|
||||||
|
{# -- '$."x"."y"."z"' #}
|
||||||
|
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__format_json_path(json_path_list) -%}
|
||||||
|
{# -- '$."x"."y"."z"' #}
|
||||||
|
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# json_extract ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{{ adapter.dispatch('json_extract')(from_table, json_column, json_path_list, normalized_json_path) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro bigquery__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() == '' %}
|
||||||
|
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% else %}
|
||||||
|
json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro postgres__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() == '' %}
|
||||||
|
jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{% else %}
|
||||||
|
jsonb_extract_path({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{% endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() == '' %}
|
||||||
|
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% else %}
|
||||||
|
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() != '' -%}
|
||||||
|
{%- set json_column = from_table|string() + "." + json_column|string() -%}
|
||||||
|
{%- endif -%}
|
||||||
|
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() == '' %}
|
||||||
|
get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
|
{% else %}
|
||||||
|
get_path(parse_json({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
|
{% endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() == '' %}
|
||||||
|
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
|
{% else %}
|
||||||
|
JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
|
{% endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() == '' %}
|
||||||
|
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% else %}
|
||||||
|
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{%- if from_table|string() == '' %}
|
||||||
|
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% else %}
|
||||||
|
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{% endif -%}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# json_extract_scalar ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{{ adapter.dispatch('json_extract_scalar')(json_column, json_path_list, normalized_json_path) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract_scalar({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro bigquery__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract_scalar({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro postgres__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
jsonb_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }} RETURNING CHAR)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
to_varchar(get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }}))
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_value({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
IF(
|
||||||
|
JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }})) = 'null',
|
||||||
|
NULL,
|
||||||
|
JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }}))
|
||||||
|
)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract_string({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# json_extract_array ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{{ adapter.dispatch('json_extract_array')(json_column, json_path_list, normalized_json_path) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract_array({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro bigquery__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract_array({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro postgres__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro mysql__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{{ json_column }}.{{ format_json_path(json_path_list) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro tidb__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# json_extract_string_array ------------------------------------------------- #}
|
||||||
|
|
||||||
|
{% macro json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{{ adapter.dispatch('json_extract_string_array')(json_column, json_path_list, normalized_json_path) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
{{ json_extract_array(json_column, json_path_list, normalized_json_path) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{#
|
||||||
|
See https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_extract_string_array
|
||||||
|
|
||||||
|
BigQuery does not allow NULL entries in REPEATED fields, so we replace those with literal "NULL" strings.
|
||||||
|
#}
|
||||||
|
{% macro bigquery__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
|
array(
|
||||||
|
select ifnull(x, "NULL")
|
||||||
|
from unnest(json_value_array({{ json_column }}, {{ bigquery_new_format_json_path(normalized_json_path) }})) as x
|
||||||
|
)
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
{# quote ---------------------------------- #}
|
||||||
|
{% macro quote(column_name) -%}
|
||||||
|
{{ adapter.dispatch('quote')(column_name) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__quote(column_name) -%}
|
||||||
|
adapter.quote(column_name)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__quote(column_name) -%}
|
||||||
|
{{ '\"' ~ column_name ~ '\"'}}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__quote(column_name) -%}
|
||||||
|
{{ '\"' ~ column_name ~ '\"'}}
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
{# surrogate_key ---------------------------------- #}
|
||||||
|
|
||||||
|
{% macro oracle__surrogate_key(field_list) -%}
|
||||||
|
ora_hash(
|
||||||
|
{%- for field in field_list %}
|
||||||
|
{% if not loop.last %}
|
||||||
|
{{ field }} || '~' ||
|
||||||
|
{% else %}
|
||||||
|
{{ field }}
|
||||||
|
{% endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__surrogate_key(field_list) -%}
|
||||||
|
assumeNotNull(hex(MD5(
|
||||||
|
{%- for field in field_list %}
|
||||||
|
{% if not loop.last %}
|
||||||
|
toString({{ field }}) || '~' ||
|
||||||
|
{% else %}
|
||||||
|
toString({{ field }})
|
||||||
|
{% endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
)))
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,105 @@
|
|||||||
|
|
||||||
|
{# boolean_to_string ------------------------------------------------- #}
|
||||||
|
{% macro boolean_to_string(boolean_column) -%}
|
||||||
|
{{ adapter.dispatch('boolean_to_string')(boolean_column) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__boolean_to_string(boolean_column) -%}
|
||||||
|
{{ boolean_column }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__boolean_to_string(boolean_column) -%}
|
||||||
|
case when {{ boolean_column }} then 'true' else 'false' end
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# array_to_string ------------------------------------------------- #}
|
||||||
|
{% macro array_to_string(array_column) -%}
|
||||||
|
{{ adapter.dispatch('array_to_string')(array_column) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__array_to_string(array_column) -%}
|
||||||
|
{{ array_column }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro bigquery__array_to_string(array_column) -%}
|
||||||
|
array_to_string({{ array_column }}, "|", "")
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__array_to_string(array_column) -%}
|
||||||
|
cast({{ array_column }} as varchar2(4000))
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro sqlserver__array_to_string(array_column) -%}
|
||||||
|
cast({{ array_column }} as {{dbt_utils.type_string()}})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__array_to_string(array_column) -%}
|
||||||
|
json_serialize({{array_column}})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# object_to_string ------------------------------------------------- #}
|
||||||
|
{% macro object_to_string(object_column) -%}
|
||||||
|
{{ adapter.dispatch('object_to_string')(object_column) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__object_to_string(object_column) -%}
|
||||||
|
{{ object_column }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__object_to_string(object_column) -%}
|
||||||
|
json_serialize({{object_column}})
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# cast_to_boolean ------------------------------------------------- #}
|
||||||
|
{% macro cast_to_boolean(field) -%}
|
||||||
|
{{ adapter.dispatch('cast_to_boolean')(field) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro default__cast_to_boolean(field) -%}
|
||||||
|
cast({{ field }} as boolean)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# -- MySQL does not support cast function converting string directly to boolean (an alias of tinyint(1), https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast #}
|
||||||
|
{% macro mysql__cast_to_boolean(field) -%}
|
||||||
|
IF(lower({{ field }}) = 'true', true, false)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# TiDB does not support cast string to boolean #}
|
||||||
|
{% macro tidb__cast_to_boolean(field) -%}
|
||||||
|
IF(lower({{ field }}) = 'true', true, false)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro duckdb__cast_to_boolean(field) -%}
|
||||||
|
cast({{ field }} as boolean)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro redshift__cast_to_boolean(field) -%}
|
||||||
|
cast({{ field }} as boolean)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #}
|
||||||
|
{% macro sqlserver__cast_to_boolean(field) -%}
|
||||||
|
cast({{ field }} as bit)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #}
|
||||||
|
{% macro clickhouse__cast_to_boolean(field) -%}
|
||||||
|
IF(lower({{ field }}) = 'true', 1, 0)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# empty_string_to_null ------------------------------------------------- #}
|
||||||
|
{% macro empty_string_to_null(field) -%}
|
||||||
|
{{ return(adapter.dispatch('empty_string_to_null')(field)) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{%- macro default__empty_string_to_null(field) -%}
|
||||||
|
nullif({{ field }}, '')
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{%- macro duckdb__empty_string_to_null(field) -%}
|
||||||
|
nullif(nullif({{ field }}, 'null'), '')
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{%- macro redshift__empty_string_to_null(field) -%}
|
||||||
|
nullif({{ field }}::varchar, '')
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
-- see https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas/#an-alternative-pattern-for-generating-schema-names
|
||||||
|
{% macro generate_schema_name(custom_schema_name, node) -%}
|
||||||
|
{{ generate_schema_name_for_env(custom_schema_name, node) }}
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,61 @@
|
|||||||
|
{#
|
||||||
|
These macros control how incremental models are updated in Airbyte's normalization step
|
||||||
|
- get_max_normalized_cursor retrieve the value of the last normalized data
|
||||||
|
- incremental_clause controls the predicate to filter on new data to process incrementally
|
||||||
|
#}
|
||||||
|
|
||||||
|
{% macro incremental_clause(col_emitted_at, tablename) -%}
|
||||||
|
{{ adapter.dispatch('incremental_clause')(col_emitted_at, tablename) }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{%- macro default__incremental_clause(col_emitted_at, tablename) -%}
|
||||||
|
{% if is_incremental() %}
|
||||||
|
and coalesce(
|
||||||
|
cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}),
|
||||||
|
{# -- if {{ col_emitted_at }} is NULL in either table, the previous comparison would evaluate to NULL, #}
|
||||||
|
{# -- so we coalesce and make sure the row is always returned for incremental processing instead #}
|
||||||
|
true)
|
||||||
|
{% endif %}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{# -- see https://on-systems.tech/113-beware-dbt-incremental-updates-against-snowflake-external-tables/ #}
|
||||||
|
{%- macro snowflake__incremental_clause(col_emitted_at, tablename) -%}
|
||||||
|
{% if is_incremental() %}
|
||||||
|
{% if get_max_normalized_cursor(col_emitted_at, tablename) %}
|
||||||
|
and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >
|
||||||
|
cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }})
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{# -- see https://cloud.google.com/bigquery/docs/querying-partitioned-tables#best_practices_for_partition_pruning #}
|
||||||
|
{%- macro bigquery__incremental_clause(col_emitted_at, tablename) -%}
|
||||||
|
{% if is_incremental() %}
|
||||||
|
{% if get_max_normalized_cursor(col_emitted_at, tablename) %}
|
||||||
|
and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >
|
||||||
|
cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }})
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro sqlserver__incremental_clause(col_emitted_at, tablename) -%}
|
||||||
|
{% if is_incremental() %}
|
||||||
|
and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}) is null
|
||||||
|
or cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >
|
||||||
|
(select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}))
|
||||||
|
{% endif %}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{% macro get_max_normalized_cursor(col_emitted_at, tablename) %}
|
||||||
|
{% if execute and is_incremental() %}
|
||||||
|
{% if env_var('INCREMENTAL_CURSOR', 'UNSET') == 'UNSET' %}
|
||||||
|
{% set query %}
|
||||||
|
select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}
|
||||||
|
{% endset %}
|
||||||
|
{% set max_cursor = run_query(query).columns[0][0] %}
|
||||||
|
{% do return(max_cursor) %}
|
||||||
|
{% else %}
|
||||||
|
{% do return(env_var('INCREMENTAL_CURSOR')) %}
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
{% endmacro %}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
{% macro oracle__test_equal_rowcount(model, compare_model) %}
|
||||||
|
|
||||||
|
{#-- Needs to be set at parse time, before we return '' below --#}
|
||||||
|
{{ config(fail_calc = 'coalesce(diff_count, 0)') }}
|
||||||
|
|
||||||
|
{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
|
||||||
|
{%- if not execute -%}
|
||||||
|
{{ return('') }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
with a as (
|
||||||
|
|
||||||
|
select count(*) as count_a from {{ model }}
|
||||||
|
|
||||||
|
),
|
||||||
|
b as (
|
||||||
|
|
||||||
|
select count(*) as count_b from {{ compare_model }}
|
||||||
|
|
||||||
|
),
|
||||||
|
final as (
|
||||||
|
|
||||||
|
select
|
||||||
|
count_a,
|
||||||
|
count_b,
|
||||||
|
abs(count_a - count_b) as diff_count
|
||||||
|
from a
|
||||||
|
cross join b
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
select diff_count from final
|
||||||
|
|
||||||
|
{% endmacro %}
|
||||||
@@ -0,0 +1,107 @@
|
|||||||
|
{#
|
||||||
|
-- Adapted from https://github.com/dbt-labs/dbt-utils/blob/0-19-0-updates/macros/schema_tests/equality.sql
|
||||||
|
-- dbt-utils version: 0.6.4
|
||||||
|
-- This macro needs to be updated accordingly when dbt-utils is upgraded.
|
||||||
|
-- This is needed because MySQL does not support the EXCEPT operator!
|
||||||
|
#}
|
||||||
|
|
||||||
|
{% macro mysql__test_equality(model, compare_model, compare_columns=None) %}
|
||||||
|
|
||||||
|
{%- if not execute -%}
|
||||||
|
{{ return('') }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{%- do dbt_utils._is_relation(model, 'test_equality') -%}
|
||||||
|
|
||||||
|
{%- if not compare_columns -%}
|
||||||
|
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
|
||||||
|
{%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{% set compare_cols_csv = compare_columns | join(', ') %}
|
||||||
|
|
||||||
|
with a as (
|
||||||
|
select * from {{ model }}
|
||||||
|
),
|
||||||
|
|
||||||
|
b as (
|
||||||
|
select * from {{ compare_model }}
|
||||||
|
),
|
||||||
|
|
||||||
|
a_minus_b as (
|
||||||
|
select {{ compare_cols_csv }} from a
|
||||||
|
where ({{ compare_cols_csv }}) not in
|
||||||
|
(select {{ compare_cols_csv }} from b)
|
||||||
|
),
|
||||||
|
|
||||||
|
b_minus_a as (
|
||||||
|
select {{ compare_cols_csv }} from b
|
||||||
|
where ({{ compare_cols_csv }}) not in
|
||||||
|
(select {{ compare_cols_csv }} from a)
|
||||||
|
),
|
||||||
|
|
||||||
|
unioned as (
|
||||||
|
select * from a_minus_b
|
||||||
|
union all
|
||||||
|
select * from b_minus_a
|
||||||
|
),
|
||||||
|
|
||||||
|
final as (
|
||||||
|
select (select count(*) from unioned) +
|
||||||
|
(select abs(
|
||||||
|
(select count(*) from a_minus_b) -
|
||||||
|
(select count(*) from b_minus_a)
|
||||||
|
))
|
||||||
|
as count
|
||||||
|
)
|
||||||
|
|
||||||
|
select count from final
|
||||||
|
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro oracle__test_equality(model) %}
|
||||||
|
{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
|
||||||
|
{%- if not execute -%}
|
||||||
|
{{ return('') }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
-- setup
|
||||||
|
{%- do dbt_utils._is_relation(model, 'test_equality') -%}
|
||||||
|
|
||||||
|
{#-
|
||||||
|
If the compare_cols arg is provided, we can run this test without querying the
|
||||||
|
information schema — this allows the model to be an ephemeral model
|
||||||
|
-#}
|
||||||
|
{%- set compare_columns = kwargs.get('compare_columns', None) -%}
|
||||||
|
|
||||||
|
{%- if not compare_columns -%}
|
||||||
|
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
|
||||||
|
{%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %}
|
||||||
|
{% set compare_cols_csv = compare_columns | join(', ') %}
|
||||||
|
|
||||||
|
with a as (
|
||||||
|
select * from {{ model }}
|
||||||
|
),
|
||||||
|
b as (
|
||||||
|
select * from {{ compare_model }}
|
||||||
|
),
|
||||||
|
a_minus_b as (
|
||||||
|
select {{compare_cols_csv}} from a
|
||||||
|
{{ dbt_utils.except() }}
|
||||||
|
select {{compare_cols_csv}} from b
|
||||||
|
),
|
||||||
|
b_minus_a as (
|
||||||
|
select {{compare_cols_csv}} from b
|
||||||
|
{{ dbt_utils.except() }}
|
||||||
|
select {{compare_cols_csv}} from a
|
||||||
|
),
|
||||||
|
unioned as (
|
||||||
|
select * from a_minus_b
|
||||||
|
union all
|
||||||
|
select * from b_minus_a
|
||||||
|
)
|
||||||
|
select count(*) from unioned
|
||||||
|
{% endmacro %}
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
{#
|
||||||
|
This overrides the behavior of the macro `should_full_refresh` so full refresh are triggered if:
|
||||||
|
- the dbt cli is run with --full-refresh flag or the model is configured explicitly to full_refresh
|
||||||
|
- the column _airbyte_ab_id does not exists in the normalized tables and make sure it is well populated.
|
||||||
|
#}
|
||||||
|
|
||||||
|
{%- macro need_full_refresh(col_ab_id, target_table=this) -%}
|
||||||
|
{%- if not execute -%}
|
||||||
|
{{ return(false) }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- set found_column = [] %}
|
||||||
|
{%- set cols = adapter.get_columns_in_relation(target_table) -%}
|
||||||
|
{%- for col in cols -%}
|
||||||
|
{%- if col.column == col_ab_id -%}
|
||||||
|
{% do found_column.append(col.column) %}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- if found_column -%}
|
||||||
|
{{ return(false) }}
|
||||||
|
{%- else -%}
|
||||||
|
{{ dbt_utils.log_info(target_table ~ "." ~ col_ab_id ~ " does not exist yet. The table will be created or rebuilt with dbt.full_refresh") }}
|
||||||
|
{{ return(true) }}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro should_full_refresh() -%}
|
||||||
|
{% set config_full_refresh = config.get('full_refresh') %}
|
||||||
|
{%- if config_full_refresh is none -%}
|
||||||
|
{% set config_full_refresh = flags.FULL_REFRESH %}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if not config_full_refresh -%}
|
||||||
|
{% set config_full_refresh = need_full_refresh(get_col_ab_id(), this) %}
|
||||||
|
{%- endif -%}
|
||||||
|
{% do return(config_full_refresh) %}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro get_col_ab_id() -%}
|
||||||
|
{{ adapter.dispatch('get_col_ab_id')() }}
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro default__get_col_ab_id() -%}
|
||||||
|
_airbyte_ab_id
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro oracle__get_col_ab_id() -%}
|
||||||
|
"_AIRBYTE_AB_ID"
|
||||||
|
{%- endmacro -%}
|
||||||
|
|
||||||
|
{%- macro snowflake__get_col_ab_id() -%}
|
||||||
|
_AIRBYTE_AB_ID
|
||||||
|
{%- endmacro -%}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
{#
|
||||||
|
Similar to the star macro here: https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/star.sql
|
||||||
|
|
||||||
|
This star_intersect macro takes an additional 'intersect' relation as argument.
|
||||||
|
Its behavior is to select columns from both 'intersect' and 'from' relations with the following rules:
|
||||||
|
- if the columns are existing in both 'from' and the 'intersect' relations, then the column from 'intersect' is used
|
||||||
|
- if it's not in the both relation, then only the column in the 'from' relation is used
|
||||||
|
#}
|
||||||
|
{% macro star_intersect(from, intersect, from_alias=False, intersect_alias=False, except=[]) -%}
|
||||||
|
{%- do dbt_utils._is_relation(from, 'star_intersect') -%}
|
||||||
|
{%- do dbt_utils._is_ephemeral(from, 'star_intersect') -%}
|
||||||
|
{%- do dbt_utils._is_relation(intersect, 'star_intersect') -%}
|
||||||
|
{%- do dbt_utils._is_ephemeral(intersect, 'star_intersect') -%}
|
||||||
|
|
||||||
|
{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
|
||||||
|
{%- if not execute -%}
|
||||||
|
{{ return('') }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{%- set include_cols = [] %}
|
||||||
|
{%- set cols = adapter.get_columns_in_relation(from) -%}
|
||||||
|
{%- set except = except | map("lower") | list %}
|
||||||
|
{%- for col in cols -%}
|
||||||
|
{%- if col.column|lower not in except -%}
|
||||||
|
{% do include_cols.append(col.column) %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
|
||||||
|
{%- set include_intersect_cols = [] %}
|
||||||
|
{%- set intersect_cols = adapter.get_columns_in_relation(intersect) -%}
|
||||||
|
{%- for col in intersect_cols -%}
|
||||||
|
{%- if col.column|lower not in except -%}
|
||||||
|
{% do include_intersect_cols.append(col.column) %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
|
||||||
|
{%- for col in include_cols %}
|
||||||
|
{%- if col in include_intersect_cols -%}
|
||||||
|
{%- if intersect_alias %}{{ intersect_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }}
|
||||||
|
{%- if not loop.last %},{{ '\n ' }}{% endif %}
|
||||||
|
{%- else %}
|
||||||
|
{%- if from_alias %}{{ from_alias }}.{% else %}{{ from }}.{%- endif -%}{{ adapter.quote(col)|trim }} as {{ adapter.quote(col)|trim }}
|
||||||
|
{%- if not loop.last %},{{ '\n ' }}{% endif %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor -%}
|
||||||
|
{%- endmacro %}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# add dependencies. these will get pulled during the `dbt deps` process.
|
||||||
|
|
||||||
|
packages:
|
||||||
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
|
revision: 0.8.2
|
||||||
@@ -0,0 +1,3 @@
|
|||||||
|
# This dockerfile only exists to pull and re-export this image converted to the local arch of this machine
|
||||||
|
# It is then consumed by the Dockerfile in this direcotry as "fishtownanalytics/dbt:1.0.0-dev"
|
||||||
|
FROM fishtownanalytics/dbt:1.0.0
|
||||||
@@ -0,0 +1,66 @@
|
|||||||
|
version: "3.7"
|
||||||
|
|
||||||
|
services:
|
||||||
|
normalization:
|
||||||
|
image: airbyte/normalization:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-mssql:
|
||||||
|
image: airbyte/normalization-mssql:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: mssql.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-mysql:
|
||||||
|
image: airbyte/normalization-mysql:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: mysql.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-oracle:
|
||||||
|
image: airbyte/normalization-oracle:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: oracle.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-clickhouse:
|
||||||
|
image: airbyte/normalization-clickhouse:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: clickhouse.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-snowflake:
|
||||||
|
image: airbyte/normalization-snowflake:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: snowflake.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-redshift:
|
||||||
|
image: airbyte/normalization-redshift:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: redshift.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-tidb:
|
||||||
|
image: airbyte/normalization-tidb:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: tidb.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
|
normalization-duckdb:
|
||||||
|
image: airbyte/normalization-duckdb:${VERSION}
|
||||||
|
build:
|
||||||
|
dockerfile: duckdb.Dockerfile
|
||||||
|
context: .
|
||||||
|
labels:
|
||||||
|
io.airbyte.git-revision: ${GIT_REVISION}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
version: "3.7"
|
||||||
|
|
||||||
|
# this file only exists so that we can easily check that all of these images exist in docker hub in check_images_exist.sh
|
||||||
|
services:
|
||||||
|
normalization:
|
||||||
|
image: airbyte/normalization:${VERSION}
|
||||||
|
normalization-mssql:
|
||||||
|
image: airbyte/normalization-mssql:${VERSION}
|
||||||
|
normalization-mysql:
|
||||||
|
image: airbyte/normalization-mysql:${VERSION}
|
||||||
|
normalization-oracle:
|
||||||
|
image: airbyte/normalization-oracle:${VERSION}
|
||||||
|
normalization-clickhouse:
|
||||||
|
image: airbyte/normalization-clickhouse:${VERSION}
|
||||||
|
normalization-snowflake:
|
||||||
|
image: airbyte/normalization-snowflake:${VERSION}
|
||||||
|
normalization-redshift:
|
||||||
|
image: airbyte/normalization-redshift:${VERSION}
|
||||||
|
normalization-tidb:
|
||||||
|
image: airbyte/normalization-tidb:${VERSION}
|
||||||
|
normalization-duckdb:
|
||||||
|
image: airbyte/normalization-duckdb:${VERSION}
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
FROM fishtownanalytics/dbt:1.0.0
|
||||||
|
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte
|
||||||
|
|
||||||
|
# Install SSH Tunneling dependencies
|
||||||
|
RUN apt-get update && apt-get install -y jq sshpass
|
||||||
|
|
||||||
|
WORKDIR /airbyte
|
||||||
|
COPY entrypoint.sh .
|
||||||
|
COPY build/sshtunneling.sh .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code
|
||||||
|
COPY normalization ./normalization
|
||||||
|
COPY setup.py .
|
||||||
|
COPY dbt-project-template/ ./dbt-template/
|
||||||
|
|
||||||
|
# Install python dependencies
|
||||||
|
WORKDIR /airbyte/base_python_structs
|
||||||
|
|
||||||
|
# workaround for https://github.com/yaml/pyyaml/issues/601
|
||||||
|
# this should be fixed in the airbyte/base-airbyte-protocol-python image
|
||||||
|
RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation
|
||||||
|
|
||||||
|
RUN pip install .
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code
|
||||||
|
RUN pip install .
|
||||||
|
RUN pip install dbt-duckdb==1.0.1
|
||||||
|
|
||||||
|
#adding duckdb manually (outside of setup.py - lots of errors)
|
||||||
|
RUN pip install duckdb
|
||||||
|
|
||||||
|
WORKDIR /airbyte/normalization_code/dbt-template/
|
||||||
|
# Download external dbt dependencies
|
||||||
|
RUN dbt deps
|
||||||
|
|
||||||
|
WORKDIR /airbyte
|
||||||
|
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
|
||||||
|
ENTRYPOINT ["/airbyte/entrypoint.sh"]
|
||||||
|
|
||||||
|
LABEL io.airbyte.name=airbyte/normalization-duckdb
|
||||||
160
airbyte-integrations/bases/base-normalization/entrypoint.sh
Executable file
160
airbyte-integrations/bases/base-normalization/entrypoint.sh
Executable file
@@ -0,0 +1,160 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value.
|
||||||
|
|
||||||
|
function echo2() {
|
||||||
|
echo >&2 "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
function error() {
|
||||||
|
echo2 "$@"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
function config_cleanup() {
|
||||||
|
# Remove config file as it might still contain sensitive credentials (for example,
|
||||||
|
# injected OAuth Parameters should not be visible to custom docker images running custom transformation operations)
|
||||||
|
rm -f "${CONFIG_FILE}"
|
||||||
|
}
|
||||||
|
|
||||||
|
function check_dbt_event_buffer_size() {
|
||||||
|
ret=0
|
||||||
|
dbt --help | grep -E -- '--event-buffer-size' && return
|
||||||
|
ret=1
|
||||||
|
}
|
||||||
|
|
||||||
|
PROJECT_DIR=$(pwd)
|
||||||
|
|
||||||
|
# How many commits should be downloaded from git to view history of a branch
|
||||||
|
GIT_HISTORY_DEPTH=5
|
||||||
|
|
||||||
|
# This function produces a working DBT project folder at the $PROJECT_DIR path so that dbt commands can be run
|
||||||
|
# from it successfully with the proper credentials. This can be accomplished by providing different custom variables
|
||||||
|
# to tweak the final project structure. For example, we can either use a user-provided base folder (git repo) or
|
||||||
|
# use the standard/base template folder to generate normalization models from.
|
||||||
|
function configuredbt() {
|
||||||
|
# We first need to generate a workspace folder for a dbt project to run from:
|
||||||
|
if [[ -z "${GIT_REPO}" ]]; then
|
||||||
|
# No git repository provided, use the dbt-template folder (shipped inside normalization docker image)
|
||||||
|
# as the base folder for dbt workspace
|
||||||
|
cp -r /airbyte/normalization_code/dbt-template/* "${PROJECT_DIR}"
|
||||||
|
echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}"
|
||||||
|
set +e # allow script to continue running even if next commands fail to run properly
|
||||||
|
# Generate a profiles.yml file for the selected destination/integration type
|
||||||
|
transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}"
|
||||||
|
if [[ -n "${CATALOG_FILE}" ]]; then
|
||||||
|
# If catalog file is provided, generate normalization models, otherwise skip it
|
||||||
|
echo "Running: transform-catalog --integration-type ${INTEGRATION_TYPE} --profile-config-dir ${PROJECT_DIR} --catalog ${CATALOG_FILE} --out ${PROJECT_DIR}/models/generated/ --json-column _airbyte_data"
|
||||||
|
transform-catalog --integration-type "${INTEGRATION_TYPE}" --profile-config-dir "${PROJECT_DIR}" --catalog "${CATALOG_FILE}" --out "${PROJECT_DIR}/models/generated/" --json-column "_airbyte_data"
|
||||||
|
TRANSFORM_EXIT_CODE=$?
|
||||||
|
if [ ${TRANSFORM_EXIT_CODE} -ne 0 ]; then
|
||||||
|
echo -e "\nShowing destination_catalog.json to diagnose/debug errors (${TRANSFORM_EXIT_CODE}):\n"
|
||||||
|
cat "${CATALOG_FILE}" | jq
|
||||||
|
exit ${TRANSFORM_EXIT_CODE}
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value.
|
||||||
|
else
|
||||||
|
trap config_cleanup EXIT
|
||||||
|
# Use git repository as a base workspace folder for dbt projects
|
||||||
|
if [[ -d git_repo ]]; then
|
||||||
|
rm -rf git_repo
|
||||||
|
fi
|
||||||
|
# Make a shallow clone of the latest git repository in the workspace folder
|
||||||
|
if [[ -z "${GIT_BRANCH}" ]]; then
|
||||||
|
# Checkout a particular branch from the git repository
|
||||||
|
echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} --single-branch \$GIT_REPO git_repo"
|
||||||
|
git clone --depth ${GIT_HISTORY_DEPTH} --single-branch "${GIT_REPO}" git_repo
|
||||||
|
else
|
||||||
|
# No git branch specified, use the default branch of the git repository
|
||||||
|
echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} -b ${GIT_BRANCH} --single-branch \$GIT_REPO git_repo"
|
||||||
|
git clone --depth ${GIT_HISTORY_DEPTH} -b "${GIT_BRANCH}" --single-branch "${GIT_REPO}" git_repo
|
||||||
|
fi
|
||||||
|
# Print few history logs to make it easier for users to verify the right code version has been checked out from git
|
||||||
|
echo "Last 5 commits in git_repo:"
|
||||||
|
(cd git_repo; git log --oneline -${GIT_HISTORY_DEPTH}; cd -)
|
||||||
|
# Generate a profiles.yml file for the selected destination/integration type
|
||||||
|
echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}"
|
||||||
|
transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}"
|
||||||
|
config_cleanup
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
## todo: make it easy to select source or destination and validate based on selection by adding an integration type env variable.
|
||||||
|
function main() {
|
||||||
|
CMD="$1"
|
||||||
|
shift 1 || error "command not specified."
|
||||||
|
|
||||||
|
while [ $# -ne 0 ]; do
|
||||||
|
case "$1" in
|
||||||
|
--config)
|
||||||
|
CONFIG_FILE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--catalog)
|
||||||
|
CATALOG_FILE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--integration-type)
|
||||||
|
INTEGRATION_TYPE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--git-repo)
|
||||||
|
GIT_REPO="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--git-branch)
|
||||||
|
GIT_BRANCH="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
error "Unknown option: $1"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
case "$CMD" in
|
||||||
|
run)
|
||||||
|
configuredbt
|
||||||
|
. /airbyte/sshtunneling.sh
|
||||||
|
openssh "${PROJECT_DIR}/ssh.json"
|
||||||
|
trap 'closessh' EXIT
|
||||||
|
|
||||||
|
set +e # allow script to continue running even if next commands fail to run properly
|
||||||
|
# We don't run dbt 1.0.x on all destinations (because their plugins don't support it yet)
|
||||||
|
# So we need to only pass `--event-buffer-size` if it's supported by DBT.
|
||||||
|
# Same goes for JSON formatted logging.
|
||||||
|
check_dbt_event_buffer_size
|
||||||
|
if [ "$ret" -eq 0 ]; then
|
||||||
|
echo -e "\nDBT >=1.0.0 detected; using 10K event buffer size\n"
|
||||||
|
dbt_additional_args="--event-buffer-size=10000 --log-format json"
|
||||||
|
else
|
||||||
|
dbt_additional_args=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run dbt to compile and execute the generated normalization models
|
||||||
|
dbt ${dbt_additional_args} run --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}"
|
||||||
|
DBT_EXIT_CODE=$?
|
||||||
|
if [ ${DBT_EXIT_CODE} -ne 0 ]; then
|
||||||
|
echo -e "\nDiagnosing dbt debug to check if destination is available for dbt and well configured (${DBT_EXIT_CODE}):\n"
|
||||||
|
dbt debug --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}"
|
||||||
|
DBT_DEBUG_EXIT_CODE=$?
|
||||||
|
if [ ${DBT_DEBUG_EXIT_CODE} -eq 0 ]; then
|
||||||
|
# dbt debug is successful, so the error must be somewhere else...
|
||||||
|
echo -e "\nForward dbt output logs to diagnose/debug errors (${DBT_DEBUG_EXIT_CODE}):\n"
|
||||||
|
cat "${PROJECT_DIR}/../logs/dbt.log"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
closessh
|
||||||
|
exit ${DBT_EXIT_CODE}
|
||||||
|
;;
|
||||||
|
configure-dbt)
|
||||||
|
configuredbt
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
error "Unknown command: $CMD"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
||||||
@@ -0,0 +1,740 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
import string
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from copy import copy
|
||||||
|
from typing import Any, Callable, Dict, List, Union
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from normalization.destination_type import DestinationType
|
||||||
|
from normalization.transform_catalog.transform import read_yaml_config, write_yaml_config
|
||||||
|
from normalization.transform_config.transform import TransformConfig
|
||||||
|
|
||||||
|
NORMALIZATION_TEST_TARGET = "NORMALIZATION_TEST_TARGET"
|
||||||
|
NORMALIZATION_TEST_MSSQL_DB_PORT = "NORMALIZATION_TEST_MSSQL_DB_PORT"
|
||||||
|
NORMALIZATION_TEST_MYSQL_DB_PORT = "NORMALIZATION_TEST_MYSQL_DB_PORT"
|
||||||
|
NORMALIZATION_TEST_POSTGRES_DB_PORT = "NORMALIZATION_TEST_POSTGRES_DB_PORT"
|
||||||
|
NORMALIZATION_TEST_CLICKHOUSE_DB_PORT = "NORMALIZATION_TEST_CLICKHOUSE_DB_PORT"
|
||||||
|
NORMALIZATION_TEST_TIDB_DB_PORT = "NORMALIZATION_TEST_TIDB_DB_PORT"
|
||||||
|
NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH = "NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH"
|
||||||
|
|
||||||
|
|
||||||
|
class DbtIntegrationTest(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.target_schema = "test_normalization"
|
||||||
|
self.container_prefix = f"test_normalization_db_{self.random_string(3)}"
|
||||||
|
self.db_names = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_random_string(prefix: str) -> str:
|
||||||
|
return prefix + DbtIntegrationTest.random_string(5)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def random_string(length: int) -> str:
|
||||||
|
return "".join(random.choice(string.ascii_lowercase) for i in range(length))
|
||||||
|
|
||||||
|
def set_target_schema(self, target_schema: str):
|
||||||
|
self.target_schema = target_schema
|
||||||
|
|
||||||
|
def setup_db(self, destinations_to_test: List[str]):
|
||||||
|
if DestinationType.POSTGRES.value in destinations_to_test:
|
||||||
|
self.setup_postgres_db()
|
||||||
|
if DestinationType.MYSQL.value in destinations_to_test:
|
||||||
|
self.setup_mysql_db()
|
||||||
|
if DestinationType.MSSQL.value in destinations_to_test:
|
||||||
|
self.setup_mssql_db()
|
||||||
|
if DestinationType.CLICKHOUSE.value in destinations_to_test:
|
||||||
|
self.setup_clickhouse_db()
|
||||||
|
if DestinationType.TIDB.value in destinations_to_test:
|
||||||
|
self.setup_tidb_db()
|
||||||
|
|
||||||
|
def setup_postgres_db(self):
|
||||||
|
start_db = True
|
||||||
|
if os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT):
|
||||||
|
port = int(os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT))
|
||||||
|
start_db = False
|
||||||
|
else:
|
||||||
|
port = self.find_free_port()
|
||||||
|
config = {
|
||||||
|
"host": "localhost",
|
||||||
|
"username": "integration-tests",
|
||||||
|
"password": "integration-tests",
|
||||||
|
"port": port,
|
||||||
|
"database": "postgres",
|
||||||
|
"schema": self.target_schema,
|
||||||
|
}
|
||||||
|
if start_db:
|
||||||
|
self.db_names.append("postgres")
|
||||||
|
print("Starting localhost postgres container for tests")
|
||||||
|
commands = [
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--name",
|
||||||
|
f"{self.container_prefix}_postgres",
|
||||||
|
"-e",
|
||||||
|
f"POSTGRES_USER={config['username']}",
|
||||||
|
"-e",
|
||||||
|
f"POSTGRES_PASSWORD={config['password']}",
|
||||||
|
"-p",
|
||||||
|
f"{config['port']}:5432",
|
||||||
|
"-d",
|
||||||
|
"marcosmarxm/postgres-ssl:dev",
|
||||||
|
"-c",
|
||||||
|
"ssl=on",
|
||||||
|
"-c",
|
||||||
|
"ssl_cert_file=/var/lib/postgresql/server.crt",
|
||||||
|
"-c",
|
||||||
|
"ssl_key_file=/var/lib/postgresql/server.key",
|
||||||
|
]
|
||||||
|
print("Executing: ", " ".join(commands))
|
||||||
|
subprocess.call(commands)
|
||||||
|
print("....Waiting for Postgres DB to start...15 sec")
|
||||||
|
time.sleep(15)
|
||||||
|
if not os.path.exists("../secrets"):
|
||||||
|
os.makedirs("../secrets")
|
||||||
|
with open("../secrets/postgres.json", "w") as fh:
|
||||||
|
fh.write(json.dumps(config))
|
||||||
|
|
||||||
|
def setup_mysql_db(self):
|
||||||
|
start_db = True
|
||||||
|
if os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT):
|
||||||
|
port = int(os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT))
|
||||||
|
start_db = False
|
||||||
|
else:
|
||||||
|
port = self.find_free_port()
|
||||||
|
config = {
|
||||||
|
"host": "localhost",
|
||||||
|
"port": port,
|
||||||
|
"database": self.target_schema,
|
||||||
|
"username": "root",
|
||||||
|
"password": "",
|
||||||
|
}
|
||||||
|
if start_db:
|
||||||
|
self.db_names.append("mysql")
|
||||||
|
print("Starting localhost mysql container for tests")
|
||||||
|
commands = [
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--name",
|
||||||
|
f"{self.container_prefix}_mysql",
|
||||||
|
"-e",
|
||||||
|
"MYSQL_ALLOW_EMPTY_PASSWORD=yes",
|
||||||
|
"-e",
|
||||||
|
"MYSQL_INITDB_SKIP_TZINFO=yes",
|
||||||
|
"-e",
|
||||||
|
f"MYSQL_DATABASE={config['database']}",
|
||||||
|
"-e",
|
||||||
|
"MYSQL_ROOT_HOST=%",
|
||||||
|
"-p",
|
||||||
|
f"{config['port']}:3306",
|
||||||
|
"-d",
|
||||||
|
"mysql/mysql-server",
|
||||||
|
]
|
||||||
|
print("Executing: ", " ".join(commands))
|
||||||
|
subprocess.call(commands)
|
||||||
|
print("....Waiting for MySQL DB to start...15 sec")
|
||||||
|
time.sleep(15)
|
||||||
|
if not os.path.exists("../secrets"):
|
||||||
|
os.makedirs("../secrets")
|
||||||
|
with open("../secrets/mysql.json", "w") as fh:
|
||||||
|
fh.write(json.dumps(config))
|
||||||
|
|
||||||
|
def setup_mssql_db(self):
|
||||||
|
start_db = True
|
||||||
|
if os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT):
|
||||||
|
port = int(os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT))
|
||||||
|
start_db = False
|
||||||
|
else:
|
||||||
|
port = self.find_free_port()
|
||||||
|
config = {
|
||||||
|
"host": "localhost",
|
||||||
|
"username": "SA",
|
||||||
|
"password": "MyStr0ngP@ssw0rd",
|
||||||
|
"port": port,
|
||||||
|
"database": self.target_schema,
|
||||||
|
"schema": self.target_schema,
|
||||||
|
}
|
||||||
|
if start_db:
|
||||||
|
self.db_names.append("mssql")
|
||||||
|
print("Starting localhost MS SQL Server container for tests")
|
||||||
|
command_start_container = [
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--name",
|
||||||
|
f"{self.container_prefix}_mssql",
|
||||||
|
"-h",
|
||||||
|
f"{self.container_prefix}_mssql",
|
||||||
|
"-e",
|
||||||
|
"ACCEPT_EULA='Y'",
|
||||||
|
"-e",
|
||||||
|
f"SA_PASSWORD='{config['password']}'",
|
||||||
|
"-e",
|
||||||
|
"MSSQL_PID='Standard'",
|
||||||
|
"-p",
|
||||||
|
f"{config['port']}:1433",
|
||||||
|
"-d",
|
||||||
|
"mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04",
|
||||||
|
]
|
||||||
|
# cmds & parameters
|
||||||
|
cmd_start_container = " ".join(command_start_container)
|
||||||
|
wait_sec = 30
|
||||||
|
# run the docker container
|
||||||
|
print("Executing: ", cmd_start_container)
|
||||||
|
subprocess.check_call(cmd_start_container, shell=True)
|
||||||
|
# wait for service is available
|
||||||
|
print(f"....Waiting for MS SQL Server to start...{wait_sec} sec")
|
||||||
|
time.sleep(wait_sec)
|
||||||
|
# Run additional commands to prepare the table
|
||||||
|
command_create_db = [
|
||||||
|
"docker",
|
||||||
|
"exec",
|
||||||
|
f"{self.container_prefix}_mssql",
|
||||||
|
"/opt/mssql-tools/bin/sqlcmd",
|
||||||
|
"-S",
|
||||||
|
config["host"],
|
||||||
|
"-U",
|
||||||
|
config["username"],
|
||||||
|
"-P",
|
||||||
|
config["password"],
|
||||||
|
"-Q",
|
||||||
|
f"CREATE DATABASE [{config['database']}]",
|
||||||
|
]
|
||||||
|
# create test db
|
||||||
|
print("Executing: ", " ".join(command_create_db))
|
||||||
|
subprocess.call(command_create_db)
|
||||||
|
if not os.path.exists("../secrets"):
|
||||||
|
os.makedirs("../secrets")
|
||||||
|
with open("../secrets/mssql.json", "w") as fh:
|
||||||
|
fh.write(json.dumps(config))
|
||||||
|
|
||||||
|
def setup_clickhouse_db(self):
|
||||||
|
"""
|
||||||
|
ClickHouse official JDBC driver uses HTTP port 8123.
|
||||||
|
|
||||||
|
Ref: https://altinity.com/blog/2019/3/15/clickhouse-networking-part-1
|
||||||
|
"""
|
||||||
|
start_db = True
|
||||||
|
port = 8123
|
||||||
|
if os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT):
|
||||||
|
port = int(os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT))
|
||||||
|
start_db = False
|
||||||
|
if start_db:
|
||||||
|
port = self.find_free_port()
|
||||||
|
config = {
|
||||||
|
"host": "localhost",
|
||||||
|
"port": port,
|
||||||
|
"database": self.target_schema,
|
||||||
|
"username": "default",
|
||||||
|
"password": "",
|
||||||
|
"ssl": False,
|
||||||
|
}
|
||||||
|
if start_db:
|
||||||
|
self.db_names.append("clickhouse")
|
||||||
|
print("Starting localhost clickhouse container for tests")
|
||||||
|
commands = [
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--name",
|
||||||
|
f"{self.container_prefix}_clickhouse",
|
||||||
|
"--ulimit",
|
||||||
|
"nofile=262144:262144",
|
||||||
|
"-p",
|
||||||
|
f"{config['port']}:8123", # clickhouse JDBC driver use HTTP port
|
||||||
|
"-d",
|
||||||
|
# so far, only the latest version ClickHouse server image turned on
|
||||||
|
# window functions
|
||||||
|
"clickhouse/clickhouse-server:latest",
|
||||||
|
]
|
||||||
|
print("Executing: ", " ".join(commands))
|
||||||
|
subprocess.call(commands)
|
||||||
|
print("....Waiting for ClickHouse DB to start...15 sec")
|
||||||
|
time.sleep(15)
|
||||||
|
# Run additional commands to prepare the table
|
||||||
|
command_create_db = [
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--link",
|
||||||
|
f"{self.container_prefix}_clickhouse:clickhouse-server",
|
||||||
|
"clickhouse/clickhouse-client:21.8.10.19",
|
||||||
|
"--host",
|
||||||
|
"clickhouse-server",
|
||||||
|
"--query",
|
||||||
|
f"CREATE DATABASE IF NOT EXISTS {config['database']}",
|
||||||
|
]
|
||||||
|
# create test db
|
||||||
|
print("Executing: ", " ".join(command_create_db))
|
||||||
|
subprocess.call(command_create_db)
|
||||||
|
if not os.path.exists("../secrets"):
|
||||||
|
os.makedirs("../secrets")
|
||||||
|
with open("../secrets/clickhouse.json", "w") as fh:
|
||||||
|
fh.write(json.dumps(config))
|
||||||
|
|
||||||
|
def setup_tidb_db(self):
|
||||||
|
start_db = True
|
||||||
|
if os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT):
|
||||||
|
port = int(os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT))
|
||||||
|
start_db = False
|
||||||
|
else:
|
||||||
|
port = self.find_free_port()
|
||||||
|
config = {
|
||||||
|
"host": "127.0.0.1",
|
||||||
|
"port": port,
|
||||||
|
"database": self.target_schema,
|
||||||
|
"schema": self.target_schema,
|
||||||
|
"username": "root",
|
||||||
|
"password": "",
|
||||||
|
"ssl": False,
|
||||||
|
}
|
||||||
|
if start_db:
|
||||||
|
self.db_names.append("tidb")
|
||||||
|
print("Starting tidb container for tests")
|
||||||
|
commands = [
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--name",
|
||||||
|
f"{self.container_prefix}_tidb",
|
||||||
|
"-p",
|
||||||
|
f"{config['port']}:4000",
|
||||||
|
"-d",
|
||||||
|
"pingcap/tidb:v5.4.0",
|
||||||
|
]
|
||||||
|
print("Executing: ", " ".join(commands))
|
||||||
|
subprocess.call(commands)
|
||||||
|
print("....Waiting for TiDB to start...15 sec")
|
||||||
|
time.sleep(15)
|
||||||
|
command_create_db = [
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--link",
|
||||||
|
f"{self.container_prefix}_tidb:tidb",
|
||||||
|
"arey/mysql-client",
|
||||||
|
"--host=tidb",
|
||||||
|
"--user=root",
|
||||||
|
"--port=4000",
|
||||||
|
f"--execute=CREATE DATABASE IF NOT EXISTS {self.target_schema}",
|
||||||
|
]
|
||||||
|
print("Executing: ", " ".join(command_create_db))
|
||||||
|
subprocess.call(command_create_db)
|
||||||
|
if not os.path.exists("../secrets"):
|
||||||
|
os.makedirs("../secrets")
|
||||||
|
with open("../secrets/tidb.json", "w") as fh:
|
||||||
|
fh.write(json.dumps(config))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def find_free_port():
|
||||||
|
"""
|
||||||
|
Find an unused port to create a database listening on localhost to run destination-postgres
|
||||||
|
"""
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s.bind(("", 0))
|
||||||
|
addr = s.getsockname()
|
||||||
|
s.close()
|
||||||
|
return addr[1]
|
||||||
|
|
||||||
|
def tear_down_db(self):
|
||||||
|
for db_name in self.db_names:
|
||||||
|
print(f"Stopping localhost {db_name} container for tests")
|
||||||
|
try:
|
||||||
|
subprocess.call(["docker", "kill", f"{self.container_prefix}_{db_name}"])
|
||||||
|
except Exception as e:
|
||||||
|
print(f"WARN: Exception while shutting down {db_name}: {e}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def change_current_test_dir(request):
|
||||||
|
# This makes the test run whether it is executed from the tests folder (with pytest/gradle)
|
||||||
|
# or from the base-normalization folder (through pycharm)
|
||||||
|
integration_tests_dir = os.path.join(request.fspath.dirname, "integration_tests")
|
||||||
|
if os.path.exists(integration_tests_dir):
|
||||||
|
os.chdir(integration_tests_dir)
|
||||||
|
else:
|
||||||
|
os.chdir(request.fspath.dirname)
|
||||||
|
|
||||||
|
def generate_profile_yaml_file(
|
||||||
|
self, destination_type: DestinationType, test_root_dir: str, random_schema: bool = False
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Each destination requires different settings to connect to. This step generates the adequate profiles.yml
|
||||||
|
as described here: https://docs.getdbt.com/reference/profiles.yml
|
||||||
|
"""
|
||||||
|
config_generator = TransformConfig()
|
||||||
|
profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json")
|
||||||
|
# Adapt credential file to look like destination config.json
|
||||||
|
if destination_type.value == DestinationType.BIGQUERY.value:
|
||||||
|
credentials = profiles_config["basic_bigquery_config"]
|
||||||
|
profiles_config = {
|
||||||
|
"credentials_json": json.dumps(credentials),
|
||||||
|
"dataset_id": self.target_schema,
|
||||||
|
"project_id": credentials["project_id"],
|
||||||
|
"dataset_location": "US",
|
||||||
|
}
|
||||||
|
elif destination_type.value == DestinationType.MYSQL.value:
|
||||||
|
profiles_config["database"] = self.target_schema
|
||||||
|
elif destination_type.value == DestinationType.REDSHIFT.value:
|
||||||
|
profiles_config["schema"] = self.target_schema
|
||||||
|
if random_schema:
|
||||||
|
profiles_config["schema"] = self.target_schema + "_" + "".join(random.choices(string.ascii_lowercase, k=5))
|
||||||
|
else:
|
||||||
|
profiles_config["schema"] = self.target_schema
|
||||||
|
if destination_type.value == DestinationType.CLICKHOUSE.value:
|
||||||
|
clickhouse_config = copy(profiles_config)
|
||||||
|
profiles_yaml = config_generator.transform(destination_type, clickhouse_config)
|
||||||
|
else:
|
||||||
|
profiles_yaml = config_generator.transform(destination_type, profiles_config)
|
||||||
|
config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml")
|
||||||
|
return profiles_config
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def run_destination_process(message_file: str, test_root_dir: str, commands: List[str]):
|
||||||
|
print("Executing: ", " ".join(commands))
|
||||||
|
with open(os.path.join(test_root_dir, "destination_output.log"), "ab") as f:
|
||||||
|
process = subprocess.Popen(commands, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
|
def writer():
|
||||||
|
if os.path.exists(message_file):
|
||||||
|
with open(message_file, "rb") as input_data:
|
||||||
|
while True:
|
||||||
|
line = input_data.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
if not line.startswith(b"//"):
|
||||||
|
process.stdin.write(line)
|
||||||
|
process.stdin.close()
|
||||||
|
|
||||||
|
thread = threading.Thread(target=writer)
|
||||||
|
thread.start()
|
||||||
|
for line in iter(process.stdout.readline, b""):
|
||||||
|
f.write(line)
|
||||||
|
sys.stdout.write(line.decode("utf-8"))
|
||||||
|
thread.join()
|
||||||
|
process.wait()
|
||||||
|
return process.returncode == 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_normalization_image(destination_type: DestinationType) -> str:
|
||||||
|
if DestinationType.MSSQL.value == destination_type.value:
|
||||||
|
return "airbyte/normalization-mssql:dev"
|
||||||
|
elif DestinationType.MYSQL.value == destination_type.value:
|
||||||
|
return "airbyte/normalization-mysql:dev"
|
||||||
|
elif DestinationType.ORACLE.value == destination_type.value:
|
||||||
|
return "airbyte/normalization-oracle:dev"
|
||||||
|
elif DestinationType.CLICKHOUSE.value == destination_type.value:
|
||||||
|
return "airbyte/normalization-clickhouse:dev"
|
||||||
|
elif DestinationType.SNOWFLAKE.value == destination_type.value:
|
||||||
|
return "airbyte/normalization-snowflake:dev"
|
||||||
|
elif DestinationType.REDSHIFT.value == destination_type.value:
|
||||||
|
return "airbyte/normalization-redshift:dev"
|
||||||
|
elif DestinationType.TIDB.value == destination_type.value:
|
||||||
|
return "airbyte/normalization-tidb:dev"
|
||||||
|
else:
|
||||||
|
return "airbyte/normalization:dev"
|
||||||
|
|
||||||
|
def dbt_check(self, destination_type: DestinationType, test_root_dir: str):
|
||||||
|
"""
|
||||||
|
Run the dbt CLI to perform transformations on the test raw data in the destination
|
||||||
|
"""
|
||||||
|
normalization_image: str = self.get_normalization_image(destination_type)
|
||||||
|
# Perform sanity check on dbt project settings
|
||||||
|
assert self.run_check_dbt_command(normalization_image, "debug", test_root_dir)
|
||||||
|
assert self.run_check_dbt_command(normalization_image, "deps", test_root_dir)
|
||||||
|
|
||||||
|
def dbt_run(self, destination_type: DestinationType, test_root_dir: str, force_full_refresh: bool = False):
|
||||||
|
"""
|
||||||
|
Run the dbt CLI to perform transformations on the test raw data in the destination
|
||||||
|
"""
|
||||||
|
normalization_image: str = self.get_normalization_image(destination_type)
|
||||||
|
# Compile dbt models files into destination sql dialect, then run the transformation queries
|
||||||
|
assert self.run_check_dbt_command(normalization_image, "run", test_root_dir, force_full_refresh)
|
||||||
|
|
||||||
|
def dbt_run_macro(self, destination_type: DestinationType, test_root_dir: str, macro: str, macro_args: str = None):
|
||||||
|
"""
|
||||||
|
Run the dbt CLI to perform transformations on the test raw data in the destination, using independent macro.
|
||||||
|
"""
|
||||||
|
normalization_image: str = self.get_normalization_image(destination_type)
|
||||||
|
# Compile dbt models files into destination sql dialect, then run the transformation queries
|
||||||
|
assert self.run_dbt_run_operation(normalization_image, test_root_dir, macro, macro_args)
|
||||||
|
|
||||||
|
def run_check_dbt_command(self, normalization_image: str, command: str, cwd: str, force_full_refresh: bool = False) -> bool:
|
||||||
|
"""
|
||||||
|
Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs
|
||||||
|
"""
|
||||||
|
if any([normalization_image.startswith(x) for x in ["airbyte/normalization-oracle", "airbyte/normalization-clickhouse"]]):
|
||||||
|
dbtAdditionalArgs = []
|
||||||
|
else:
|
||||||
|
dbtAdditionalArgs = ["--event-buffer-size=10000"]
|
||||||
|
|
||||||
|
commands = (
|
||||||
|
[
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--init",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}:/workspace",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}/build:/build",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}/logs:/logs",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}/build/dbt_packages:/dbt",
|
||||||
|
"--network",
|
||||||
|
"host",
|
||||||
|
"--entrypoint",
|
||||||
|
"/usr/local/bin/dbt",
|
||||||
|
"-i",
|
||||||
|
normalization_image,
|
||||||
|
]
|
||||||
|
+ dbtAdditionalArgs
|
||||||
|
+ [
|
||||||
|
command,
|
||||||
|
"--profiles-dir=/workspace",
|
||||||
|
"--project-dir=/workspace",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
if force_full_refresh:
|
||||||
|
commands.append("--full-refresh")
|
||||||
|
command = f"{command} --full-refresh"
|
||||||
|
print("Executing: ", " ".join(commands))
|
||||||
|
print(f"Equivalent to: dbt {command} --profiles-dir={cwd} --project-dir={cwd}")
|
||||||
|
return self.run_check_dbt_subprocess(commands, cwd)
|
||||||
|
|
||||||
|
def run_dbt_run_operation(self, normalization_image: str, cwd: str, macro: str, macro_args: str = None) -> bool:
|
||||||
|
"""
|
||||||
|
Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs
|
||||||
|
"""
|
||||||
|
args = ["--args", macro_args] if macro_args else []
|
||||||
|
commands = (
|
||||||
|
[
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"--init",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}:/workspace",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}/build:/build",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}/logs:/logs",
|
||||||
|
"-v",
|
||||||
|
f"{cwd}/build/dbt_packages:/dbt",
|
||||||
|
"--network",
|
||||||
|
"host",
|
||||||
|
"--entrypoint",
|
||||||
|
"/usr/local/bin/dbt",
|
||||||
|
"-i",
|
||||||
|
normalization_image,
|
||||||
|
]
|
||||||
|
+ ["run-operation", macro]
|
||||||
|
+ args
|
||||||
|
+ ["--profiles-dir=/workspace", "--project-dir=/workspace"]
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Executing: ", " ".join(commands))
|
||||||
|
print(f"Equivalent to: dbt run-operation {macro} --args {macro_args} --profiles-dir={cwd} --project-dir={cwd}")
|
||||||
|
return self.run_check_dbt_subprocess(commands, cwd)
|
||||||
|
|
||||||
|
def run_check_dbt_subprocess(self, commands: list, cwd: str):
|
||||||
|
error_count = 0
|
||||||
|
with open(os.path.join(cwd, "dbt_output.log"), "ab") as f:
|
||||||
|
process = subprocess.Popen(commands, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=os.environ)
|
||||||
|
for line in iter(lambda: process.stdout.readline(), b""):
|
||||||
|
f.write(line)
|
||||||
|
str_line = line.decode("utf-8")
|
||||||
|
sys.stdout.write(str_line)
|
||||||
|
# keywords to match lines as signaling errors
|
||||||
|
if "ERROR" in str_line or "FAIL" in str_line or "WARNING" in str_line:
|
||||||
|
# exception keywords in lines to ignore as errors (such as summary or expected warnings)
|
||||||
|
is_exception = False
|
||||||
|
for except_clause in [
|
||||||
|
"Done.", # DBT Summary
|
||||||
|
"PASS=", # DBT Summary
|
||||||
|
"Nothing to do.", # When no schema/data tests are setup
|
||||||
|
"Configuration paths exist in your dbt_project.yml", # When no cte / view are generated
|
||||||
|
"Error loading config file: .dockercfg: $HOME is not defined", # ignore warning
|
||||||
|
"depends on a node named 'disabled_test' which was not found", # Tests throwing warning because it is disabled
|
||||||
|
"The requested image's platform (linux/amd64) does not match the detected host platform "
|
||||||
|
+ "(linux/arm64/v8) and no specific platform was requested", # temporary patch until we publish images for arm64
|
||||||
|
]:
|
||||||
|
if except_clause in str_line:
|
||||||
|
is_exception = True
|
||||||
|
break
|
||||||
|
if not is_exception:
|
||||||
|
# count lines signaling an error/failure/warning
|
||||||
|
error_count += 1
|
||||||
|
process.wait()
|
||||||
|
message = (
|
||||||
|
f"{' '.join(commands)}\n\tterminated with return code {process.returncode} "
|
||||||
|
f"with {error_count} 'Error/Warning/Fail' mention(s)."
|
||||||
|
)
|
||||||
|
print(message)
|
||||||
|
assert error_count == 0, message
|
||||||
|
assert process.returncode == 0, message
|
||||||
|
if error_count > 0:
|
||||||
|
return False
|
||||||
|
return process.returncode == 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def copy_replace(src, dst, pattern=None, replace_value=None):
|
||||||
|
"""
|
||||||
|
Copies a file from src to dst replacing pattern by replace_value
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
src : string
|
||||||
|
Path to the source filename to copy from
|
||||||
|
dst : string
|
||||||
|
Path to the output filename to copy to
|
||||||
|
pattern
|
||||||
|
list of Patterns to replace inside the src file
|
||||||
|
replace_value
|
||||||
|
list of Values to replace by in the dst file
|
||||||
|
"""
|
||||||
|
file1 = open(src, "r") if isinstance(src, str) else src
|
||||||
|
file2 = open(dst, "w") if isinstance(dst, str) else dst
|
||||||
|
pattern = [pattern] if isinstance(pattern, str) else pattern
|
||||||
|
replace_value = [replace_value] if isinstance(replace_value, str) else replace_value
|
||||||
|
if replace_value and pattern:
|
||||||
|
if len(replace_value) != len(pattern):
|
||||||
|
raise Exception("Invalid parameters: pattern and replace_value" " have different sizes.")
|
||||||
|
rules = [(re.compile(regex, re.IGNORECASE), value) for regex, value in zip(pattern, replace_value)]
|
||||||
|
else:
|
||||||
|
rules = []
|
||||||
|
for line in file1:
|
||||||
|
if rules:
|
||||||
|
for rule in rules:
|
||||||
|
line = re.sub(rule[0], rule[1], line)
|
||||||
|
file2.write(line)
|
||||||
|
if isinstance(src, str):
|
||||||
|
file1.close()
|
||||||
|
if isinstance(dst, str):
|
||||||
|
file2.close()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_test_targets() -> List[str]:
|
||||||
|
"""
|
||||||
|
Returns a list of destinations to run tests on.
|
||||||
|
|
||||||
|
if the environment variable NORMALIZATION_TEST_TARGET is set with a comma separated list of destination names,
|
||||||
|
then the tests are run only on that subsets of destinations
|
||||||
|
Otherwise tests are run against all destinations
|
||||||
|
"""
|
||||||
|
if os.getenv(NORMALIZATION_TEST_TARGET):
|
||||||
|
target_str = os.getenv(NORMALIZATION_TEST_TARGET)
|
||||||
|
return [d.value for d in {DestinationType.from_string(s.strip()) for s in target_str.split(",")}]
|
||||||
|
else:
|
||||||
|
return [d.value for d in DestinationType]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def update_yaml_file(filename: str, callback: Callable):
|
||||||
|
config = read_yaml_config(filename)
|
||||||
|
updated, config = callback(config)
|
||||||
|
if updated:
|
||||||
|
write_yaml_config(config, filename)
|
||||||
|
|
||||||
|
def clean_tmp_tables(
|
||||||
|
self,
|
||||||
|
destination_type: Union[DestinationType, List[DestinationType]],
|
||||||
|
test_type: str,
|
||||||
|
tmp_folders: list = None,
|
||||||
|
git_versioned_tests: list = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Cleans-up all temporary schemas created during the test session.
|
||||||
|
It parses the provided tmp_folders: List[str] or uses `git_versioned_tests` to find sources.yml files generated for the tests.
|
||||||
|
It gets target schemas created by the tests and removes them using custom scenario specified in
|
||||||
|
`dbt-project-template/macros/clean_tmp_tables.sql` macro.
|
||||||
|
|
||||||
|
REQUIREMENTS:
|
||||||
|
1) Idealy, the schemas should have unique names like: test_normalization_<some_random_string> to avoid conflicts.
|
||||||
|
2) The `clean_tmp_tables.sql` macro should have the specific macro for target destination to proceed.
|
||||||
|
|
||||||
|
INPUT ARGUMENTS:
|
||||||
|
:: destination_type : either single destination or list of destinations
|
||||||
|
:: test_type: either "ephemeral" or "normalization" should be supplied.
|
||||||
|
:: tmp_folders: should be supplied if test_type = "ephemeral", to get schemas from /build/normalization_test_output folders
|
||||||
|
:: git_versioned_tests: should be supplied if test_type = "normalization", to get schemas from integration_tests/normalization_test_output folders
|
||||||
|
|
||||||
|
EXAMPLE:
|
||||||
|
clean_up_args = {
|
||||||
|
"destination_type": [ DestinationType.REDSHIFT, DestinationType.POSTGRES, ... ]
|
||||||
|
"test_type": "normalization",
|
||||||
|
"git_versioned_tests": git_versioned_tests,
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
path_to_sources: str = "/models/generated/sources.yml"
|
||||||
|
test_folders: dict = {}
|
||||||
|
source_files: dict = {}
|
||||||
|
schemas_to_remove: dict = {}
|
||||||
|
|
||||||
|
# collecting information about tmp_tables created for the test for each destination
|
||||||
|
for destination in destination_type:
|
||||||
|
test_folders[destination.value] = []
|
||||||
|
source_files[destination.value] = []
|
||||||
|
schemas_to_remove[destination.value] = []
|
||||||
|
|
||||||
|
# based on test_type select path to source files
|
||||||
|
if test_type == "ephemeral" or test_type == "test_reset_scd_overwrite":
|
||||||
|
if not tmp_folders:
|
||||||
|
raise TypeError("`tmp_folders` arg is not provided.")
|
||||||
|
for folder in tmp_folders:
|
||||||
|
if destination.value in folder:
|
||||||
|
test_folders[destination.value].append(folder)
|
||||||
|
source_files[destination.value].append(f"{folder}{path_to_sources}")
|
||||||
|
elif test_type == "normalization":
|
||||||
|
if not git_versioned_tests:
|
||||||
|
raise TypeError("`git_versioned_tests` arg is not provided.")
|
||||||
|
base_path = f"{pathlib.Path().absolute()}/integration_tests/normalization_test_output"
|
||||||
|
for test in git_versioned_tests:
|
||||||
|
test_root_dir: str = f"{base_path}/{destination.value}/{test}"
|
||||||
|
test_folders[destination.value].append(test_root_dir)
|
||||||
|
source_files[destination.value].append(f"{test_root_dir}{path_to_sources}")
|
||||||
|
else:
|
||||||
|
raise TypeError(f"\n`test_type`: {test_type} is not a registered, use `ephemeral` or `normalization` instead.\n")
|
||||||
|
|
||||||
|
# parse source.yml files from test folders to get schemas and table names created for the tests
|
||||||
|
for file in source_files[destination.value]:
|
||||||
|
source_yml = {}
|
||||||
|
try:
|
||||||
|
with open(file, "r") as source_file:
|
||||||
|
source_yml = yaml.safe_load(source_file)
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"\n{destination.value}: {file} doesn't exist, consider to remove any temp_tables and schemas manually!\n")
|
||||||
|
pass
|
||||||
|
test_sources: list = source_yml.get("sources", []) if source_yml else []
|
||||||
|
|
||||||
|
for source in test_sources:
|
||||||
|
target_schema: str = source.get("name")
|
||||||
|
if target_schema not in schemas_to_remove[destination.value]:
|
||||||
|
schemas_to_remove[destination.value].append(target_schema)
|
||||||
|
# adding _airbyte_* tmp schemas to be removed
|
||||||
|
schemas_to_remove[destination.value].append(f"_airbyte_{target_schema}")
|
||||||
|
|
||||||
|
# cleaning up tmp_tables generated by the tests
|
||||||
|
for destination in destination_type:
|
||||||
|
if not schemas_to_remove[destination.value]:
|
||||||
|
print(f"\n\t{destination.value.upper()} DESTINATION: SKIP CLEANING, NOTHING TO REMOVE.\n")
|
||||||
|
else:
|
||||||
|
print(f"\n\t{destination.value.upper()} DESTINATION: CLEANING LEFTOVERS...\n")
|
||||||
|
print(f"\t{schemas_to_remove[destination.value]}\n")
|
||||||
|
test_root_folder = test_folders[destination.value][0]
|
||||||
|
args = json.dumps({"schemas": schemas_to_remove[destination.value]})
|
||||||
|
self.dbt_check(destination, test_root_folder)
|
||||||
|
self.dbt_run_macro(destination, test_root_folder, "clean_tmp_tables", args)
|
||||||
@@ -0,0 +1,125 @@
|
|||||||
|
name: airbyte_utils
|
||||||
|
version: '1.0'
|
||||||
|
config-version: 2
|
||||||
|
profile: normalize
|
||||||
|
model-paths:
|
||||||
|
- models
|
||||||
|
docs-paths:
|
||||||
|
- docs
|
||||||
|
analysis-paths:
|
||||||
|
- analysis
|
||||||
|
test-paths:
|
||||||
|
- tests
|
||||||
|
seed-paths:
|
||||||
|
- data
|
||||||
|
macro-paths:
|
||||||
|
- macros
|
||||||
|
target-path: ../build
|
||||||
|
log-path: ../logs
|
||||||
|
packages-install-path: /dbt
|
||||||
|
clean-targets:
|
||||||
|
- build
|
||||||
|
- dbt_modules
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
+on_schema_change: sync_all_columns
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order:
|
||||||
|
- airbyte_utils
|
||||||
|
- dbt_utils
|
||||||
|
vars:
|
||||||
|
json_column: _airbyte_data
|
||||||
|
models_to_source:
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
|
||||||
|
non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
|
||||||
|
non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
|
||||||
|
non_nested_stream_without_namespace_resulting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
|
||||||
|
some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty
|
||||||
|
some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty
|
||||||
|
some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty
|
||||||
|
some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty
|
||||||
|
some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty
|
||||||
|
simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
|
||||||
|
simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
|
||||||
|
simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
|
||||||
|
simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
|
||||||
|
conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar
|
||||||
|
conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar
|
||||||
|
conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar
|
||||||
|
conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar
|
||||||
|
conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array
|
||||||
|
conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array
|
||||||
|
conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array
|
||||||
|
conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array
|
||||||
|
unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
arrays_ab1: test_normalization._airbyte_raw_arrays
|
||||||
|
arrays_ab2: test_normalization._airbyte_raw_arrays
|
||||||
|
arrays_ab3: test_normalization._airbyte_raw_arrays
|
||||||
|
arrays: test_normalization._airbyte_raw_arrays
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays
|
||||||
|
arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays
|
||||||
|
arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays
|
||||||
|
arrays_nested_array_parent: test_normalization._airbyte_raw_arrays
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name
|
||||||
|
unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_owner_column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_owner_column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_owner_column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias
|
||||||
|
unnest_alias_children_owner_column___with__quotes: test_normalization._airbyte_raw_unnest_alias
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
|
||||||
|
partition by range_bucket(
|
||||||
|
_airbyte_active_row,
|
||||||
|
generate_array(0, 1, 1)
|
||||||
|
)
|
||||||
|
cluster by _airbyte_unique_key_scd, _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')
|
||||||
|
with
|
||||||
|
|
||||||
|
input_data as (
|
||||||
|
select *
|
||||||
|
from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg`
|
||||||
|
-- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
),
|
||||||
|
|
||||||
|
scd_data as (
|
||||||
|
-- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
date,
|
||||||
|
`partition`,
|
||||||
|
date as _airbyte_start_at,
|
||||||
|
lag(date) over (
|
||||||
|
partition by id
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) as _airbyte_end_at,
|
||||||
|
case when row_number() over (
|
||||||
|
partition by id
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) = 1 then 1 else 0 end as _airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
|
||||||
|
from input_data
|
||||||
|
),
|
||||||
|
dedup_data as (
|
||||||
|
select
|
||||||
|
-- we need to ensure de-duplicated rows for merge/update queries
|
||||||
|
-- additionally, we generate a unique key for the scd table
|
||||||
|
row_number() over (
|
||||||
|
partition by
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_emitted_at
|
||||||
|
order by _airbyte_active_row desc, _airbyte_ab_id
|
||||||
|
) as _airbyte_row_num,
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(_airbyte_start_at as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(_airbyte_emitted_at as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_unique_key_scd,
|
||||||
|
scd_data.*
|
||||||
|
from scd_data
|
||||||
|
)
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_unique_key_scd,
|
||||||
|
id,
|
||||||
|
date,
|
||||||
|
`partition`,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_end_at,
|
||||||
|
_airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
|
||||||
|
from dedup_data where _airbyte_row_num = 1
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names`
|
||||||
|
partition by timestamp_trunc(_airbyte_emitted_at, day)
|
||||||
|
cluster by _airbyte_unique_key, _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
date,
|
||||||
|
`partition`,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
|
||||||
|
from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
|
||||||
|
-- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
where 1 = 1
|
||||||
|
and _airbyte_active_row = 1
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
|
||||||
|
partition by timestamp_trunc(_airbyte_emitted_at, day)
|
||||||
|
cluster by _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
|
||||||
|
select
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
|
||||||
|
json_extract_array(`partition`, "$['double_array_data']") as double_array_data,
|
||||||
|
json_extract_array(`partition`, "$['DATA']") as DATA,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as table_alias
|
||||||
|
-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
|
||||||
|
where 1 = 1
|
||||||
|
and `partition` is not null
|
||||||
|
|
||||||
|
), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1
|
||||||
|
select
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
|
||||||
|
double_array_data,
|
||||||
|
DATA,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1
|
||||||
|
-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 as (
|
||||||
|
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(array_to_string(double_array_data, "|", "") as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(array_to_string(DATA, "|", "") as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_partition_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 tmp
|
||||||
|
-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
)-- Final base SQL model
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3
|
||||||
|
select
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
|
||||||
|
double_array_data,
|
||||||
|
DATA,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_partition_hashid
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3
|
||||||
|
-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA`
|
||||||
|
partition by timestamp_trunc(_airbyte_emitted_at, day)
|
||||||
|
cluster by _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
|
||||||
|
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
json_extract_scalar(DATA, "$['currency']") as currency,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias
|
||||||
|
-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
|
||||||
|
cross join unnest(DATA) as DATA
|
||||||
|
where 1 = 1
|
||||||
|
and DATA is not null
|
||||||
|
|
||||||
|
), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
cast(currency as
|
||||||
|
string
|
||||||
|
) as currency,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1
|
||||||
|
-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 as (
|
||||||
|
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(currency as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_DATA_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 tmp
|
||||||
|
-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
)-- Final base SQL model
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
currency,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_DATA_hashid
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3
|
||||||
|
-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data`
|
||||||
|
partition by timestamp_trunc(_airbyte_emitted_at, day)
|
||||||
|
cluster by _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
|
||||||
|
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
json_extract_scalar(double_array_data, "$['id']") as id,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias
|
||||||
|
-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
|
||||||
|
cross join unnest(double_array_data) as double_array_data
|
||||||
|
where 1 = 1
|
||||||
|
and double_array_data is not null
|
||||||
|
|
||||||
|
), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
cast(id as
|
||||||
|
string
|
||||||
|
) as id,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1
|
||||||
|
-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 as (
|
||||||
|
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_double_array_data_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 tmp
|
||||||
|
-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
)-- Final base SQL model
|
||||||
|
-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
id,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_double_array_data_hashid
|
||||||
|
from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3
|
||||||
|
-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
|
||||||
|
select
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date,
|
||||||
|
{{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }},
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias
|
||||||
|
-- nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }}
|
||||||
|
select
|
||||||
|
cast(id as {{ dbt_utils.type_string() }}) as id,
|
||||||
|
cast(date as {{ dbt_utils.type_string() }}) as date,
|
||||||
|
cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }},
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }}
|
||||||
|
-- nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "nested-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
|
||||||
|
{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'DATA') }}
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
{{ json_extract_scalar(unnested_column_value('DATA'), ['currency'], ['currency']) }} as currency,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias
|
||||||
|
-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
|
||||||
|
{{ cross_join_unnest('partition', 'DATA') }}
|
||||||
|
where 1 = 1
|
||||||
|
and DATA is not null
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "nested-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
|
||||||
|
select
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
|
||||||
|
{{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data,
|
||||||
|
{{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as DATA,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} as table_alias
|
||||||
|
-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
|
||||||
|
where 1 = 1
|
||||||
|
and {{ adapter.quote('partition') }} is not null
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "nested-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
|
||||||
|
{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'double_array_data') }}
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
{{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias
|
||||||
|
-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
|
||||||
|
{{ cross_join_unnest('partition', 'double_array_data') }}
|
||||||
|
where 1 = 1
|
||||||
|
and double_array_data is not null
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,164 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"],
|
||||||
|
partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}},
|
||||||
|
unique_key = "_airbyte_unique_key_scd",
|
||||||
|
schema = "test_normalization",
|
||||||
|
post_hook = ["
|
||||||
|
{%
|
||||||
|
set final_table_relation = adapter.get_relation(
|
||||||
|
database=this.database,
|
||||||
|
schema=this.schema,
|
||||||
|
identifier='nested_stream_with_complex_columns_resulting_into_long_names'
|
||||||
|
)
|
||||||
|
%}
|
||||||
|
{#
|
||||||
|
If the final table doesn't exist, then obviously we can't delete anything from it.
|
||||||
|
Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync)
|
||||||
|
So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway)
|
||||||
|
#}
|
||||||
|
{%
|
||||||
|
if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name')
|
||||||
|
%}
|
||||||
|
-- Delete records which are no longer active:
|
||||||
|
-- This query is equivalent, but the left join version is more performant:
|
||||||
|
-- delete from final_table where unique_key in (
|
||||||
|
-- select unique_key from scd_table where 1 = 1 <incremental_clause(normalized_at, final_table)>
|
||||||
|
-- ) and unique_key not in (
|
||||||
|
-- select unique_key from scd_table where active_row = 1 <incremental_clause(normalized_at, final_table)>
|
||||||
|
-- )
|
||||||
|
-- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD
|
||||||
|
-- entries that were _updated_ recently. This is because a deleted record will have an SCD record
|
||||||
|
-- which was emitted a long time ago, but recently re-normalized to have active_row = 0.
|
||||||
|
delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in (
|
||||||
|
select recent_records.unique_key
|
||||||
|
from (
|
||||||
|
select distinct _airbyte_unique_key as unique_key
|
||||||
|
from {{ this }}
|
||||||
|
where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }}
|
||||||
|
) recent_records
|
||||||
|
left join (
|
||||||
|
select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count
|
||||||
|
from {{ this }}
|
||||||
|
where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }}
|
||||||
|
group by _airbyte_unique_key
|
||||||
|
) active_counts
|
||||||
|
on recent_records.unique_key = active_counts.unique_key
|
||||||
|
where active_count is null or active_count = 0
|
||||||
|
)
|
||||||
|
{% else %}
|
||||||
|
-- We have to have a non-empty query, so just do a noop delete
|
||||||
|
delete from {{ this }} where 1=0
|
||||||
|
{% endif %}
|
||||||
|
","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"],
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')
|
||||||
|
with
|
||||||
|
{% if is_incremental() %}
|
||||||
|
new_data as (
|
||||||
|
-- retrieve incremental "new" data
|
||||||
|
select
|
||||||
|
*
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }}
|
||||||
|
-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
),
|
||||||
|
new_data_ids as (
|
||||||
|
-- build a subset of _airbyte_unique_key from rows that are new
|
||||||
|
select distinct
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
]) }} as _airbyte_unique_key
|
||||||
|
from new_data
|
||||||
|
),
|
||||||
|
empty_new_data as (
|
||||||
|
-- build an empty table to only keep the table's column types
|
||||||
|
select * from new_data where 1 = 0
|
||||||
|
),
|
||||||
|
previous_active_scd_data as (
|
||||||
|
-- retrieve "incomplete old" data that needs to be updated with an end date because of new changes
|
||||||
|
select
|
||||||
|
{{ star_intersect(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }}
|
||||||
|
from {{ this }} as this_data
|
||||||
|
-- make a join with new_data using primary key to filter active data that need to be updated only
|
||||||
|
join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key
|
||||||
|
-- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes)
|
||||||
|
left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id
|
||||||
|
where _airbyte_active_row = 1
|
||||||
|
),
|
||||||
|
input_data as (
|
||||||
|
select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data
|
||||||
|
union all
|
||||||
|
select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data
|
||||||
|
),
|
||||||
|
{% else %}
|
||||||
|
input_data as (
|
||||||
|
select *
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }}
|
||||||
|
-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
|
||||||
|
),
|
||||||
|
{% endif %}
|
||||||
|
scd_data as (
|
||||||
|
-- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
]) }} as _airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
date,
|
||||||
|
{{ adapter.quote('partition') }},
|
||||||
|
date as _airbyte_start_at,
|
||||||
|
lag(date) over (
|
||||||
|
partition by id
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) as _airbyte_end_at,
|
||||||
|
case when row_number() over (
|
||||||
|
partition by id
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) = 1 then 1 else 0 end as _airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
|
||||||
|
from input_data
|
||||||
|
),
|
||||||
|
dedup_data as (
|
||||||
|
select
|
||||||
|
-- we need to ensure de-duplicated rows for merge/update queries
|
||||||
|
-- additionally, we generate a unique key for the scd table
|
||||||
|
row_number() over (
|
||||||
|
partition by
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_emitted_at
|
||||||
|
order by _airbyte_active_row desc, _airbyte_ab_id
|
||||||
|
) as _airbyte_row_num,
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'_airbyte_unique_key',
|
||||||
|
'_airbyte_start_at',
|
||||||
|
'_airbyte_emitted_at'
|
||||||
|
]) }} as _airbyte_unique_key_scd,
|
||||||
|
scd_data.*
|
||||||
|
from scd_data
|
||||||
|
)
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_unique_key_scd,
|
||||||
|
id,
|
||||||
|
date,
|
||||||
|
{{ adapter.quote('partition') }},
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_end_at,
|
||||||
|
_airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
|
||||||
|
from dedup_data where _airbyte_row_num = 1
|
||||||
|
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"],
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = "_airbyte_unique_key",
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
date,
|
||||||
|
{{ adapter.quote('partition') }},
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
|
||||||
|
-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
|
||||||
|
where 1 = 1
|
||||||
|
and _airbyte_active_row = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "nested" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }}
|
||||||
|
select
|
||||||
|
_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
|
||||||
|
double_array_data,
|
||||||
|
DATA,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_partition_hashid
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }}
|
||||||
|
-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "nested" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }}
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
currency,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_DATA_hashid
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }}
|
||||||
|
-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "nested" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }}
|
||||||
|
select
|
||||||
|
_airbyte_partition_hashid,
|
||||||
|
id,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_double_array_data_hashid
|
||||||
|
from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }}
|
||||||
|
-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
version: 2
|
||||||
|
sources:
|
||||||
|
- name: test_normalization
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
schema: false
|
||||||
|
identifier: false
|
||||||
|
tables:
|
||||||
|
- name: _airbyte_raw_arrays
|
||||||
|
- name: _airbyte_raw_conflict_stream_array
|
||||||
|
- name: _airbyte_raw_conflict_stream_name
|
||||||
|
- name: _airbyte_raw_conflict_stream_scalar
|
||||||
|
- name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
|
||||||
|
- name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
|
||||||
|
- name: _airbyte_raw_some_stream_that_was_empty
|
||||||
|
- name: _airbyte_raw_unnest_alias
|
||||||
|
- name: test_normalization_namespace
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
schema: false
|
||||||
|
identifier: false
|
||||||
|
tables:
|
||||||
|
- name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as DBT_INTERNAL_DEST
|
||||||
|
using (
|
||||||
|
select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd__dbt_tmp`
|
||||||
|
) as DBT_INTERNAL_SOURCE
|
||||||
|
on
|
||||||
|
DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
when matched then update set
|
||||||
|
`_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`
|
||||||
|
|
||||||
|
|
||||||
|
when not matched then insert
|
||||||
|
(`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
|
||||||
|
values
|
||||||
|
(`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names` as DBT_INTERNAL_DEST
|
||||||
|
using (
|
||||||
|
select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names__dbt_tmp`
|
||||||
|
) as DBT_INTERNAL_SOURCE
|
||||||
|
on
|
||||||
|
DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
when matched then update set
|
||||||
|
`_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`
|
||||||
|
|
||||||
|
|
||||||
|
when not matched then insert
|
||||||
|
(`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
|
||||||
|
values
|
||||||
|
(`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as DBT_INTERNAL_DEST
|
||||||
|
using (
|
||||||
|
select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition__dbt_tmp`
|
||||||
|
) as DBT_INTERNAL_SOURCE
|
||||||
|
on FALSE
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
when not matched then insert
|
||||||
|
(`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`)
|
||||||
|
values
|
||||||
|
(`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA` as DBT_INTERNAL_DEST
|
||||||
|
using (
|
||||||
|
select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA__dbt_tmp`
|
||||||
|
) as DBT_INTERNAL_SOURCE
|
||||||
|
on FALSE
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
when not matched then insert
|
||||||
|
(`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`)
|
||||||
|
values
|
||||||
|
(`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data` as DBT_INTERNAL_DEST
|
||||||
|
using (
|
||||||
|
select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data__dbt_tmp`
|
||||||
|
) as DBT_INTERNAL_SOURCE
|
||||||
|
on FALSE
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
when not matched then insert
|
||||||
|
(`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`)
|
||||||
|
values
|
||||||
|
(`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
name: airbyte_utils
|
||||||
|
version: '1.0'
|
||||||
|
config-version: 2
|
||||||
|
profile: normalize
|
||||||
|
model-paths:
|
||||||
|
- modified_models
|
||||||
|
docs-paths:
|
||||||
|
- docs
|
||||||
|
analysis-paths:
|
||||||
|
- analysis
|
||||||
|
test-paths:
|
||||||
|
- tests
|
||||||
|
seed-paths:
|
||||||
|
- data
|
||||||
|
macro-paths:
|
||||||
|
- macros
|
||||||
|
target-path: ../build
|
||||||
|
log-path: ../logs
|
||||||
|
packages-install-path: /dbt
|
||||||
|
clean-targets:
|
||||||
|
- build
|
||||||
|
- dbt_modules
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
+on_schema_change: sync_all_columns
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order:
|
||||||
|
- airbyte_utils
|
||||||
|
- dbt_utils
|
||||||
|
vars:
|
||||||
|
json_column: _airbyte_data
|
||||||
|
models_to_source:
|
||||||
|
exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
exchange_rate: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
@@ -0,0 +1,90 @@
|
|||||||
|
name: airbyte_utils
|
||||||
|
version: '1.0'
|
||||||
|
config-version: 2
|
||||||
|
profile: normalize
|
||||||
|
model-paths:
|
||||||
|
- models
|
||||||
|
docs-paths:
|
||||||
|
- docs
|
||||||
|
analysis-paths:
|
||||||
|
- analysis
|
||||||
|
test-paths:
|
||||||
|
- tests
|
||||||
|
seed-paths:
|
||||||
|
- data
|
||||||
|
macro-paths:
|
||||||
|
- macros
|
||||||
|
target-path: ../build
|
||||||
|
log-path: ../logs
|
||||||
|
packages-install-path: /dbt
|
||||||
|
clean-targets:
|
||||||
|
- build
|
||||||
|
- dbt_modules
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
schema: false
|
||||||
|
identifier: true
|
||||||
|
models:
|
||||||
|
airbyte_utils:
|
||||||
|
+materialized: table
|
||||||
|
generated:
|
||||||
|
airbyte_ctes:
|
||||||
|
+tags: airbyte_internal_cte
|
||||||
|
+materialized: ephemeral
|
||||||
|
airbyte_incremental:
|
||||||
|
+tags: incremental_tables
|
||||||
|
+materialized: incremental
|
||||||
|
+on_schema_change: sync_all_columns
|
||||||
|
airbyte_tables:
|
||||||
|
+tags: normalized_tables
|
||||||
|
+materialized: table
|
||||||
|
airbyte_views:
|
||||||
|
+tags: airbyte_internal_views
|
||||||
|
+materialized: view
|
||||||
|
dispatch:
|
||||||
|
- macro_namespace: dbt_utils
|
||||||
|
search_order:
|
||||||
|
- airbyte_utils
|
||||||
|
- dbt_utils
|
||||||
|
vars:
|
||||||
|
json_column: _airbyte_data
|
||||||
|
models_to_source:
|
||||||
|
exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
exchange_rate: test_normalization._airbyte_raw_exchange_rate
|
||||||
|
dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded
|
||||||
|
pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx
|
||||||
|
pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx
|
||||||
|
pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx
|
||||||
|
pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx
|
||||||
|
pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx
|
||||||
|
1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number
|
||||||
|
1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number
|
||||||
|
1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number
|
||||||
|
1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number
|
||||||
|
1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number
|
||||||
|
multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts
|
||||||
|
multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts
|
||||||
|
multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts
|
||||||
|
multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts
|
||||||
|
multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts
|
||||||
|
types_testing_ab1: test_normalization._airbyte_raw_types_testing
|
||||||
|
types_testing_ab2: test_normalization._airbyte_raw_types_testing
|
||||||
|
types_testing_stg: test_normalization._airbyte_raw_types_testing
|
||||||
|
types_testing_scd: test_normalization._airbyte_raw_types_testing
|
||||||
|
types_testing: test_normalization._airbyte_raw_types_testing
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd`
|
||||||
|
partition by range_bucket(
|
||||||
|
_airbyte_active_row,
|
||||||
|
generate_array(0, 1, 1)
|
||||||
|
)
|
||||||
|
cluster by _airbyte_unique_key_scd, _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
-- depends_on: ref('dedup_exchange_rate_stg')
|
||||||
|
with
|
||||||
|
|
||||||
|
input_data as (
|
||||||
|
select *
|
||||||
|
from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg`
|
||||||
|
-- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
),
|
||||||
|
|
||||||
|
scd_data as (
|
||||||
|
-- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(currency as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(NZD as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
date as _airbyte_start_at,
|
||||||
|
lag(date) over (
|
||||||
|
partition by id, currency, cast(NZD as
|
||||||
|
string
|
||||||
|
)
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) as _airbyte_end_at,
|
||||||
|
case when row_number() over (
|
||||||
|
partition by id, currency, cast(NZD as
|
||||||
|
string
|
||||||
|
)
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) = 1 then 1 else 0 end as _airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from input_data
|
||||||
|
),
|
||||||
|
dedup_data as (
|
||||||
|
select
|
||||||
|
-- we need to ensure de-duplicated rows for merge/update queries
|
||||||
|
-- additionally, we generate a unique key for the scd table
|
||||||
|
row_number() over (
|
||||||
|
partition by
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_emitted_at
|
||||||
|
order by _airbyte_active_row desc, _airbyte_ab_id
|
||||||
|
) as _airbyte_row_num,
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(_airbyte_start_at as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(_airbyte_emitted_at as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_unique_key_scd,
|
||||||
|
scd_data.*
|
||||||
|
from scd_data
|
||||||
|
)
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_unique_key_scd,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_end_at,
|
||||||
|
_airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from dedup_data where _airbyte_row_num = 1
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate`
|
||||||
|
partition by timestamp_trunc(_airbyte_emitted_at, day)
|
||||||
|
cluster by _airbyte_unique_key, _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd`
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd`
|
||||||
|
-- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
and _airbyte_active_row = 1
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,145 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate`
|
||||||
|
partition by timestamp_trunc(_airbyte_emitted_at, day)
|
||||||
|
cluster by _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
with __dbt__cte__exchange_rate_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
|
||||||
|
select
|
||||||
|
json_extract_scalar(_airbyte_data, "$['id']") as id,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['currency']") as currency,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['date']") as date,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['USD']") as USD,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias
|
||||||
|
-- exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
), __dbt__cte__exchange_rate_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__exchange_rate_ab1
|
||||||
|
select
|
||||||
|
cast(id as
|
||||||
|
int64
|
||||||
|
) as id,
|
||||||
|
cast(currency as
|
||||||
|
string
|
||||||
|
) as currency,
|
||||||
|
cast(nullif(date, '') as
|
||||||
|
date
|
||||||
|
) as date,
|
||||||
|
cast(nullif(timestamp_col, '') as
|
||||||
|
timestamp
|
||||||
|
) as timestamp_col,
|
||||||
|
cast(HKD_special___characters as
|
||||||
|
float64
|
||||||
|
) as HKD_special___characters,
|
||||||
|
cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
) as HKD_special___characters_1,
|
||||||
|
cast(NZD as
|
||||||
|
float64
|
||||||
|
) as NZD,
|
||||||
|
cast(USD as
|
||||||
|
float64
|
||||||
|
) as USD,
|
||||||
|
cast(column___with__quotes as
|
||||||
|
string
|
||||||
|
) as column___with__quotes,
|
||||||
|
cast(nullif(datetime_tz, '') as
|
||||||
|
timestamp
|
||||||
|
) as datetime_tz,
|
||||||
|
cast(nullif(datetime_no_tz, '') as
|
||||||
|
datetime
|
||||||
|
) as datetime_no_tz,
|
||||||
|
cast(nullif(time_tz, '') as
|
||||||
|
STRING
|
||||||
|
) as time_tz,
|
||||||
|
cast(nullif(time_no_tz, '') as
|
||||||
|
time
|
||||||
|
) as time_no_tz,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__exchange_rate_ab1
|
||||||
|
-- exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
), __dbt__cte__exchange_rate_ab3 as (
|
||||||
|
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__exchange_rate_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(currency as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(date as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(timestamp_col as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(NZD as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(USD as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(column___with__quotes as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(datetime_tz as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(datetime_no_tz as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(time_tz as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(time_no_tz as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_exchange_rate_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__exchange_rate_ab2 tmp
|
||||||
|
-- exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
)-- Final base SQL model
|
||||||
|
-- depends_on: __dbt__cte__exchange_rate_ab3
|
||||||
|
select
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
column___with__quotes,
|
||||||
|
datetime_tz,
|
||||||
|
datetime_no_tz,
|
||||||
|
time_tz,
|
||||||
|
time_no_tz,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_exchange_rate_hashid
|
||||||
|
from __dbt__cte__exchange_rate_ab3
|
||||||
|
-- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg`
|
||||||
|
OPTIONS()
|
||||||
|
as
|
||||||
|
with __dbt__cte__dedup_exchange_rate_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
select
|
||||||
|
json_extract_scalar(_airbyte_data, "$['id']") as id,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['currency']") as currency,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['date']") as date,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['USD']") as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
), __dbt__cte__dedup_exchange_rate_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__dedup_exchange_rate_ab1
|
||||||
|
select
|
||||||
|
cast(id as
|
||||||
|
int64
|
||||||
|
) as id,
|
||||||
|
cast(currency as
|
||||||
|
string
|
||||||
|
) as currency,
|
||||||
|
cast(nullif(date, '') as
|
||||||
|
date
|
||||||
|
) as date,
|
||||||
|
cast(nullif(timestamp_col, '') as
|
||||||
|
timestamp
|
||||||
|
) as timestamp_col,
|
||||||
|
cast(HKD_special___characters as
|
||||||
|
float64
|
||||||
|
) as HKD_special___characters,
|
||||||
|
cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
) as HKD_special___characters_1,
|
||||||
|
cast(NZD as
|
||||||
|
float64
|
||||||
|
) as NZD,
|
||||||
|
cast(USD as
|
||||||
|
float64
|
||||||
|
) as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__dedup_exchange_rate_ab1
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
)-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__dedup_exchange_rate_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(currency as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(date as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(timestamp_col as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(NZD as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(USD as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_dedup_exchange_rate_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__dedup_exchange_rate_ab2 tmp
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
;
|
||||||
|
|
||||||
@@ -0,0 +1,83 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace view `dataline-integration-testing`._airbyte_test_normalization.`multiple_column_names_conflicts_stg`
|
||||||
|
OPTIONS()
|
||||||
|
as
|
||||||
|
with __dbt__cte__multiple_column_names_conflicts_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts
|
||||||
|
select
|
||||||
|
json_extract_scalar(_airbyte_data, "$['id']") as id,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['User Id']") as User_Id,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['user_id']") as user_id_1,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['User id']") as User_id_2,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['user id']") as user_id_3,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['User@Id']") as User_Id_4,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['UserId']") as UserId,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias
|
||||||
|
-- multiple_column_names_conflicts
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
), __dbt__cte__multiple_column_names_conflicts_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1
|
||||||
|
select
|
||||||
|
cast(id as
|
||||||
|
int64
|
||||||
|
) as id,
|
||||||
|
cast(User_Id as
|
||||||
|
string
|
||||||
|
) as User_Id,
|
||||||
|
cast(user_id_1 as
|
||||||
|
float64
|
||||||
|
) as user_id_1,
|
||||||
|
cast(User_id_2 as
|
||||||
|
float64
|
||||||
|
) as User_id_2,
|
||||||
|
cast(user_id_3 as
|
||||||
|
float64
|
||||||
|
) as user_id_3,
|
||||||
|
cast(User_Id_4 as
|
||||||
|
string
|
||||||
|
) as User_Id_4,
|
||||||
|
cast(UserId as
|
||||||
|
float64
|
||||||
|
) as UserId,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__multiple_column_names_conflicts_ab1
|
||||||
|
-- multiple_column_names_conflicts
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
)-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(User_Id as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(user_id_1 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(User_id_2 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(user_id_3 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(User_Id_4 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(UserId as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_multiple_column_names_conflicts_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__multiple_column_names_conflicts_ab2 tmp
|
||||||
|
-- multiple_column_names_conflicts
|
||||||
|
where 1 = 1
|
||||||
|
;
|
||||||
|
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
select
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters_1,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: {{ ref('dedup_exchange_rate_ab1') }}
|
||||||
|
select
|
||||||
|
cast(id as {{ dbt_utils.type_bigint() }}) as id,
|
||||||
|
cast(currency as {{ dbt_utils.type_string() }}) as currency,
|
||||||
|
cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date,
|
||||||
|
cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col,
|
||||||
|
cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters,
|
||||||
|
cast(HKD_special___characters_1 as {{ dbt_utils.type_string() }}) as HKD_special___characters_1,
|
||||||
|
cast(NZD as {{ dbt_utils.type_float() }}) as NZD,
|
||||||
|
cast(USD as {{ dbt_utils.type_float() }}) as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('dedup_exchange_rate_ab1') }}
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,178 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"],
|
||||||
|
partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}},
|
||||||
|
unique_key = "_airbyte_unique_key_scd",
|
||||||
|
schema = "test_normalization",
|
||||||
|
post_hook = ["
|
||||||
|
{%
|
||||||
|
set final_table_relation = adapter.get_relation(
|
||||||
|
database=this.database,
|
||||||
|
schema=this.schema,
|
||||||
|
identifier='dedup_exchange_rate'
|
||||||
|
)
|
||||||
|
%}
|
||||||
|
{#
|
||||||
|
If the final table doesn't exist, then obviously we can't delete anything from it.
|
||||||
|
Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync)
|
||||||
|
So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway)
|
||||||
|
#}
|
||||||
|
{%
|
||||||
|
if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name')
|
||||||
|
%}
|
||||||
|
-- Delete records which are no longer active:
|
||||||
|
-- This query is equivalent, but the left join version is more performant:
|
||||||
|
-- delete from final_table where unique_key in (
|
||||||
|
-- select unique_key from scd_table where 1 = 1 <incremental_clause(normalized_at, final_table)>
|
||||||
|
-- ) and unique_key not in (
|
||||||
|
-- select unique_key from scd_table where active_row = 1 <incremental_clause(normalized_at, final_table)>
|
||||||
|
-- )
|
||||||
|
-- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD
|
||||||
|
-- entries that were _updated_ recently. This is because a deleted record will have an SCD record
|
||||||
|
-- which was emitted a long time ago, but recently re-normalized to have active_row = 0.
|
||||||
|
delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in (
|
||||||
|
select recent_records.unique_key
|
||||||
|
from (
|
||||||
|
select distinct _airbyte_unique_key as unique_key
|
||||||
|
from {{ this }}
|
||||||
|
where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
|
||||||
|
) recent_records
|
||||||
|
left join (
|
||||||
|
select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count
|
||||||
|
from {{ this }}
|
||||||
|
where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
|
||||||
|
group by _airbyte_unique_key
|
||||||
|
) active_counts
|
||||||
|
on recent_records.unique_key = active_counts.unique_key
|
||||||
|
where active_count is null or active_count = 0
|
||||||
|
)
|
||||||
|
{% else %}
|
||||||
|
-- We have to have a non-empty query, so just do a noop delete
|
||||||
|
delete from {{ this }} where 1=0
|
||||||
|
{% endif %}
|
||||||
|
","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"],
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- depends_on: ref('dedup_exchange_rate_stg')
|
||||||
|
with
|
||||||
|
{% if is_incremental() %}
|
||||||
|
new_data as (
|
||||||
|
-- retrieve incremental "new" data
|
||||||
|
select
|
||||||
|
*
|
||||||
|
from {{ ref('dedup_exchange_rate_stg') }}
|
||||||
|
-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
),
|
||||||
|
new_data_ids as (
|
||||||
|
-- build a subset of _airbyte_unique_key from rows that are new
|
||||||
|
select distinct
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
'currency',
|
||||||
|
'NZD',
|
||||||
|
]) }} as _airbyte_unique_key
|
||||||
|
from new_data
|
||||||
|
),
|
||||||
|
empty_new_data as (
|
||||||
|
-- build an empty table to only keep the table's column types
|
||||||
|
select * from new_data where 1 = 0
|
||||||
|
),
|
||||||
|
previous_active_scd_data as (
|
||||||
|
-- retrieve "incomplete old" data that needs to be updated with an end date because of new changes
|
||||||
|
select
|
||||||
|
{{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }}
|
||||||
|
from {{ this }} as this_data
|
||||||
|
-- make a join with new_data using primary key to filter active data that need to be updated only
|
||||||
|
join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key
|
||||||
|
-- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes)
|
||||||
|
left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id
|
||||||
|
where _airbyte_active_row = 1
|
||||||
|
),
|
||||||
|
input_data as (
|
||||||
|
select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data
|
||||||
|
union all
|
||||||
|
select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data
|
||||||
|
),
|
||||||
|
{% else %}
|
||||||
|
input_data as (
|
||||||
|
select *
|
||||||
|
from {{ ref('dedup_exchange_rate_stg') }}
|
||||||
|
-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
),
|
||||||
|
{% endif %}
|
||||||
|
scd_data as (
|
||||||
|
-- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
'currency',
|
||||||
|
'NZD',
|
||||||
|
]) }} as _airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
date as _airbyte_start_at,
|
||||||
|
lag(date) over (
|
||||||
|
partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }})
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) as _airbyte_end_at,
|
||||||
|
case when row_number() over (
|
||||||
|
partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }})
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) = 1 then 1 else 0 end as _airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from input_data
|
||||||
|
),
|
||||||
|
dedup_data as (
|
||||||
|
select
|
||||||
|
-- we need to ensure de-duplicated rows for merge/update queries
|
||||||
|
-- additionally, we generate a unique key for the scd table
|
||||||
|
row_number() over (
|
||||||
|
partition by
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_emitted_at
|
||||||
|
order by _airbyte_active_row desc, _airbyte_ab_id
|
||||||
|
) as _airbyte_row_num,
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'_airbyte_unique_key',
|
||||||
|
'_airbyte_start_at',
|
||||||
|
'_airbyte_emitted_at'
|
||||||
|
]) }} as _airbyte_unique_key_scd,
|
||||||
|
scd_data.*
|
||||||
|
from scd_data
|
||||||
|
)
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_unique_key_scd,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_end_at,
|
||||||
|
_airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from dedup_data where _airbyte_row_num = 1
|
||||||
|
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"],
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = "_airbyte_unique_key",
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('dedup_exchange_rate_scd') }}
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from {{ ref('dedup_exchange_rate_scd') }}
|
||||||
|
-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
where 1 = 1
|
||||||
|
and _airbyte_active_row = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('exchange_rate_ab3') }}
|
||||||
|
select
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
column___with__quotes,
|
||||||
|
datetime_tz,
|
||||||
|
datetime_no_tz,
|
||||||
|
time_tz,
|
||||||
|
time_no_tz,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_exchange_rate_hashid
|
||||||
|
from {{ ref('exchange_rate_ab3') }}
|
||||||
|
-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }}
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: {{ ref('dedup_exchange_rate_ab2') }}
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
'currency',
|
||||||
|
'date',
|
||||||
|
'timestamp_col',
|
||||||
|
'HKD_special___characters',
|
||||||
|
'HKD_special___characters_1',
|
||||||
|
'NZD',
|
||||||
|
'USD',
|
||||||
|
]) }} as _airbyte_dedup_exchange_rate_hashid,
|
||||||
|
tmp.*
|
||||||
|
from {{ ref('dedup_exchange_rate_ab2') }} tmp
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
version: 2
|
||||||
|
sources:
|
||||||
|
- name: test_normalization
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
schema: false
|
||||||
|
identifier: false
|
||||||
|
tables:
|
||||||
|
- name: _airbyte_raw_1_prefix_startwith_number
|
||||||
|
- name: _airbyte_raw_dedup_cdc_excluded
|
||||||
|
- name: _airbyte_raw_dedup_exchange_rate
|
||||||
|
- name: _airbyte_raw_exchange_rate
|
||||||
|
- name: _airbyte_raw_multiple_column_names_conflicts
|
||||||
|
- name: _airbyte_raw_pos_dedup_cdcx
|
||||||
|
- name: _airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
|
- name: _airbyte_raw_types_testing
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
select
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: {{ ref('dedup_exchange_rate_ab1') }}
|
||||||
|
select
|
||||||
|
cast(id as {{ dbt_utils.type_float() }}) as id,
|
||||||
|
cast(currency as {{ dbt_utils.type_string() }}) as currency,
|
||||||
|
cast(new_column as {{ dbt_utils.type_float() }}) as new_column,
|
||||||
|
cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date,
|
||||||
|
cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col,
|
||||||
|
cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters,
|
||||||
|
cast(NZD as {{ dbt_utils.type_float() }}) as NZD,
|
||||||
|
cast(USD as {{ dbt_utils.type_bigint() }}) as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('dedup_exchange_rate_ab1') }}
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,178 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"],
|
||||||
|
partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}},
|
||||||
|
unique_key = "_airbyte_unique_key_scd",
|
||||||
|
schema = "test_normalization",
|
||||||
|
post_hook = ["
|
||||||
|
{%
|
||||||
|
set final_table_relation = adapter.get_relation(
|
||||||
|
database=this.database,
|
||||||
|
schema=this.schema,
|
||||||
|
identifier='dedup_exchange_rate'
|
||||||
|
)
|
||||||
|
%}
|
||||||
|
{#
|
||||||
|
If the final table doesn't exist, then obviously we can't delete anything from it.
|
||||||
|
Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync)
|
||||||
|
So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway)
|
||||||
|
#}
|
||||||
|
{%
|
||||||
|
if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name')
|
||||||
|
%}
|
||||||
|
-- Delete records which are no longer active:
|
||||||
|
-- This query is equivalent, but the left join version is more performant:
|
||||||
|
-- delete from final_table where unique_key in (
|
||||||
|
-- select unique_key from scd_table where 1 = 1 <incremental_clause(normalized_at, final_table)>
|
||||||
|
-- ) and unique_key not in (
|
||||||
|
-- select unique_key from scd_table where active_row = 1 <incremental_clause(normalized_at, final_table)>
|
||||||
|
-- )
|
||||||
|
-- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD
|
||||||
|
-- entries that were _updated_ recently. This is because a deleted record will have an SCD record
|
||||||
|
-- which was emitted a long time ago, but recently re-normalized to have active_row = 0.
|
||||||
|
delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in (
|
||||||
|
select recent_records.unique_key
|
||||||
|
from (
|
||||||
|
select distinct _airbyte_unique_key as unique_key
|
||||||
|
from {{ this }}
|
||||||
|
where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
|
||||||
|
) recent_records
|
||||||
|
left join (
|
||||||
|
select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count
|
||||||
|
from {{ this }}
|
||||||
|
where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
|
||||||
|
group by _airbyte_unique_key
|
||||||
|
) active_counts
|
||||||
|
on recent_records.unique_key = active_counts.unique_key
|
||||||
|
where active_count is null or active_count = 0
|
||||||
|
)
|
||||||
|
{% else %}
|
||||||
|
-- We have to have a non-empty query, so just do a noop delete
|
||||||
|
delete from {{ this }} where 1=0
|
||||||
|
{% endif %}
|
||||||
|
","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"],
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- depends_on: ref('dedup_exchange_rate_stg')
|
||||||
|
with
|
||||||
|
{% if is_incremental() %}
|
||||||
|
new_data as (
|
||||||
|
-- retrieve incremental "new" data
|
||||||
|
select
|
||||||
|
*
|
||||||
|
from {{ ref('dedup_exchange_rate_stg') }}
|
||||||
|
-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
),
|
||||||
|
new_data_ids as (
|
||||||
|
-- build a subset of _airbyte_unique_key from rows that are new
|
||||||
|
select distinct
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
'currency',
|
||||||
|
'NZD',
|
||||||
|
]) }} as _airbyte_unique_key
|
||||||
|
from new_data
|
||||||
|
),
|
||||||
|
empty_new_data as (
|
||||||
|
-- build an empty table to only keep the table's column types
|
||||||
|
select * from new_data where 1 = 0
|
||||||
|
),
|
||||||
|
previous_active_scd_data as (
|
||||||
|
-- retrieve "incomplete old" data that needs to be updated with an end date because of new changes
|
||||||
|
select
|
||||||
|
{{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }}
|
||||||
|
from {{ this }} as this_data
|
||||||
|
-- make a join with new_data using primary key to filter active data that need to be updated only
|
||||||
|
join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key
|
||||||
|
-- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes)
|
||||||
|
left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id
|
||||||
|
where _airbyte_active_row = 1
|
||||||
|
),
|
||||||
|
input_data as (
|
||||||
|
select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data
|
||||||
|
union all
|
||||||
|
select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data
|
||||||
|
),
|
||||||
|
{% else %}
|
||||||
|
input_data as (
|
||||||
|
select *
|
||||||
|
from {{ ref('dedup_exchange_rate_stg') }}
|
||||||
|
-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
),
|
||||||
|
{% endif %}
|
||||||
|
scd_data as (
|
||||||
|
-- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
'currency',
|
||||||
|
'NZD',
|
||||||
|
]) }} as _airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
new_column,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
date as _airbyte_start_at,
|
||||||
|
lag(date) over (
|
||||||
|
partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }})
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) as _airbyte_end_at,
|
||||||
|
case when row_number() over (
|
||||||
|
partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }})
|
||||||
|
order by
|
||||||
|
date is null asc,
|
||||||
|
date desc,
|
||||||
|
_airbyte_emitted_at desc
|
||||||
|
) = 1 then 1 else 0 end as _airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from input_data
|
||||||
|
),
|
||||||
|
dedup_data as (
|
||||||
|
select
|
||||||
|
-- we need to ensure de-duplicated rows for merge/update queries
|
||||||
|
-- additionally, we generate a unique key for the scd table
|
||||||
|
row_number() over (
|
||||||
|
partition by
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_emitted_at
|
||||||
|
order by _airbyte_active_row desc, _airbyte_ab_id
|
||||||
|
) as _airbyte_row_num,
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'_airbyte_unique_key',
|
||||||
|
'_airbyte_start_at',
|
||||||
|
'_airbyte_emitted_at'
|
||||||
|
]) }} as _airbyte_unique_key_scd,
|
||||||
|
scd_data.*
|
||||||
|
from scd_data
|
||||||
|
)
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
_airbyte_unique_key_scd,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
new_column,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
_airbyte_start_at,
|
||||||
|
_airbyte_end_at,
|
||||||
|
_airbyte_active_row,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from dedup_data where _airbyte_row_num = 1
|
||||||
|
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"],
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = "_airbyte_unique_key",
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('dedup_exchange_rate_scd') }}
|
||||||
|
select
|
||||||
|
_airbyte_unique_key,
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
new_column,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_dedup_exchange_rate_hashid
|
||||||
|
from {{ ref('dedup_exchange_rate_scd') }}
|
||||||
|
-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
|
||||||
|
where 1 = 1
|
||||||
|
and _airbyte_active_row = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "test_normalization",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('exchange_rate_ab3') }}
|
||||||
|
select
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
new_column,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
column___with__quotes,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_exchange_rate_hashid
|
||||||
|
from {{ ref('exchange_rate_ab3') }}
|
||||||
|
-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }}
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
@@ -0,0 +1,26 @@
|
|||||||
|
{{ config(
|
||||||
|
cluster_by = "_airbyte_emitted_at",
|
||||||
|
partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_test_normalization",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: {{ ref('dedup_exchange_rate_ab2') }}
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'id',
|
||||||
|
'currency',
|
||||||
|
'new_column',
|
||||||
|
'date',
|
||||||
|
'timestamp_col',
|
||||||
|
'HKD_special___characters',
|
||||||
|
'NZD',
|
||||||
|
'USD',
|
||||||
|
]) }} as _airbyte_dedup_exchange_rate_hashid,
|
||||||
|
tmp.*
|
||||||
|
from {{ ref('dedup_exchange_rate_ab2') }} tmp
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
{{ incremental_clause('_airbyte_emitted_at', this) }}
|
||||||
|
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
version: 2
|
||||||
|
sources:
|
||||||
|
- name: test_normalization
|
||||||
|
quoting:
|
||||||
|
database: true
|
||||||
|
schema: false
|
||||||
|
identifier: false
|
||||||
|
tables:
|
||||||
|
- name: _airbyte_raw_dedup_cdc_excluded
|
||||||
|
- name: _airbyte_raw_dedup_exchange_rate
|
||||||
|
- name: _airbyte_raw_exchange_rate
|
||||||
|
- name: _airbyte_raw_renamed_dedup_cdc_excluded
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` as DBT_INTERNAL_DEST
|
||||||
|
using (
|
||||||
|
select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd__dbt_tmp`
|
||||||
|
) as DBT_INTERNAL_SOURCE
|
||||||
|
on
|
||||||
|
DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
when matched then update set
|
||||||
|
`_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid`
|
||||||
|
|
||||||
|
|
||||||
|
when not matched then insert
|
||||||
|
(`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
|
||||||
|
values
|
||||||
|
(`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` as DBT_INTERNAL_DEST
|
||||||
|
using (
|
||||||
|
select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate__dbt_tmp`
|
||||||
|
) as DBT_INTERNAL_SOURCE
|
||||||
|
on
|
||||||
|
DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
when matched then update set
|
||||||
|
`_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid`
|
||||||
|
|
||||||
|
|
||||||
|
when not matched then insert
|
||||||
|
(`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
|
||||||
|
values
|
||||||
|
(`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,145 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate`
|
||||||
|
partition by timestamp_trunc(_airbyte_emitted_at, day)
|
||||||
|
cluster by _airbyte_emitted_at
|
||||||
|
OPTIONS()
|
||||||
|
as (
|
||||||
|
|
||||||
|
with __dbt__cte__exchange_rate_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
|
||||||
|
select
|
||||||
|
json_extract_scalar(_airbyte_data, "$['id']") as id,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['currency']") as currency,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['date']") as date,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['USD']") as USD,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias
|
||||||
|
-- exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
), __dbt__cte__exchange_rate_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__exchange_rate_ab1
|
||||||
|
select
|
||||||
|
cast(id as
|
||||||
|
int64
|
||||||
|
) as id,
|
||||||
|
cast(currency as
|
||||||
|
string
|
||||||
|
) as currency,
|
||||||
|
cast(nullif(date, '') as
|
||||||
|
date
|
||||||
|
) as date,
|
||||||
|
cast(nullif(timestamp_col, '') as
|
||||||
|
timestamp
|
||||||
|
) as timestamp_col,
|
||||||
|
cast(HKD_special___characters as
|
||||||
|
float64
|
||||||
|
) as HKD_special___characters,
|
||||||
|
cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
) as HKD_special___characters_1,
|
||||||
|
cast(NZD as
|
||||||
|
float64
|
||||||
|
) as NZD,
|
||||||
|
cast(USD as
|
||||||
|
float64
|
||||||
|
) as USD,
|
||||||
|
cast(column___with__quotes as
|
||||||
|
string
|
||||||
|
) as column___with__quotes,
|
||||||
|
cast(nullif(datetime_tz, '') as
|
||||||
|
timestamp
|
||||||
|
) as datetime_tz,
|
||||||
|
cast(nullif(datetime_no_tz, '') as
|
||||||
|
datetime
|
||||||
|
) as datetime_no_tz,
|
||||||
|
cast(nullif(time_tz, '') as
|
||||||
|
STRING
|
||||||
|
) as time_tz,
|
||||||
|
cast(nullif(time_no_tz, '') as
|
||||||
|
time
|
||||||
|
) as time_no_tz,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__exchange_rate_ab1
|
||||||
|
-- exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
), __dbt__cte__exchange_rate_ab3 as (
|
||||||
|
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__exchange_rate_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(currency as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(date as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(timestamp_col as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(NZD as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(USD as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(column___with__quotes as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(datetime_tz as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(datetime_no_tz as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(time_tz as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(time_no_tz as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_exchange_rate_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__exchange_rate_ab2 tmp
|
||||||
|
-- exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
)-- Final base SQL model
|
||||||
|
-- depends_on: __dbt__cte__exchange_rate_ab3
|
||||||
|
select
|
||||||
|
id,
|
||||||
|
currency,
|
||||||
|
date,
|
||||||
|
timestamp_col,
|
||||||
|
HKD_special___characters,
|
||||||
|
HKD_special___characters_1,
|
||||||
|
NZD,
|
||||||
|
USD,
|
||||||
|
column___with__quotes,
|
||||||
|
datetime_tz,
|
||||||
|
datetime_no_tz,
|
||||||
|
time_tz,
|
||||||
|
time_no_tz,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at,
|
||||||
|
_airbyte_exchange_rate_hashid
|
||||||
|
from __dbt__cte__exchange_rate_ab3
|
||||||
|
-- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
);
|
||||||
|
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
|
||||||
|
|
||||||
|
create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg`
|
||||||
|
OPTIONS()
|
||||||
|
as
|
||||||
|
with __dbt__cte__dedup_exchange_rate_ab1 as (
|
||||||
|
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
|
||||||
|
select
|
||||||
|
json_extract_scalar(_airbyte_data, "$['id']") as id,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['currency']") as currency,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['date']") as date,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
|
||||||
|
json_extract_scalar(_airbyte_data, "$['USD']") as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
), __dbt__cte__dedup_exchange_rate_ab2 as (
|
||||||
|
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: __dbt__cte__dedup_exchange_rate_ab1
|
||||||
|
select
|
||||||
|
cast(id as
|
||||||
|
int64
|
||||||
|
) as id,
|
||||||
|
cast(currency as
|
||||||
|
string
|
||||||
|
) as currency,
|
||||||
|
cast(nullif(date, '') as
|
||||||
|
date
|
||||||
|
) as date,
|
||||||
|
cast(nullif(timestamp_col, '') as
|
||||||
|
timestamp
|
||||||
|
) as timestamp_col,
|
||||||
|
cast(HKD_special___characters as
|
||||||
|
float64
|
||||||
|
) as HKD_special___characters,
|
||||||
|
cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
) as HKD_special___characters_1,
|
||||||
|
cast(NZD as
|
||||||
|
float64
|
||||||
|
) as NZD,
|
||||||
|
cast(USD as
|
||||||
|
float64
|
||||||
|
) as USD,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
CURRENT_TIMESTAMP() as _airbyte_normalized_at
|
||||||
|
from __dbt__cte__dedup_exchange_rate_ab1
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
|
||||||
|
)-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: __dbt__cte__dedup_exchange_rate_ab2
|
||||||
|
select
|
||||||
|
to_hex(md5(cast(concat(coalesce(cast(id as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(currency as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(date as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(timestamp_col as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(HKD_special___characters_1 as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(NZD as
|
||||||
|
string
|
||||||
|
), ''), '-', coalesce(cast(USD as
|
||||||
|
string
|
||||||
|
), '')) as
|
||||||
|
string
|
||||||
|
))) as _airbyte_dedup_exchange_rate_hashid,
|
||||||
|
tmp.*
|
||||||
|
from __dbt__cte__dedup_exchange_rate_ab2 tmp
|
||||||
|
-- dedup_exchange_rate
|
||||||
|
where 1 = 1
|
||||||
|
;
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user