Fix build: Revert "chore: clean out unused "bases" and utils (#53234)" (#53621)

2025-12-19 18:14:56 -05:00 · 2025-02-10 13:36:30 -08:00
parent b51d9a4043
commit c8e3ec0210
679 changed files with 41745 additions and 0 deletions
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -14,3 +14,7 @@ area/documentation:
 CDK:
  - airbyte-cdk/*
  - airbyte-cdk/**/*
 normalization:
  - airbyte-integrations/bases/base-normalization/*
  - airbyte-integrations/bases/base-normalization/**/*
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,6 +6,8 @@ exclude: |
    ^.*?/node_modules/.*$|
    ^.*?/charts/.*$|
    ^airbyte-integrations/bases/base-normalization/.*$|
    ^.*?/normalization_test_output/.*$|
    ^.*?/pnpm-lock\.yaml$|
    ^.*?/source-amplitude/unit_tests/api_data/zipped\.json$|
--- a/airbyte-integrations/bases/base-java/.dockerignore
+++ b/airbyte-integrations/bases/base-java/.dockerignore
@@ -0,0 +1,5 @@
 *
 !Dockerfile
 !build
 !javabase.sh
 !run_with_normalization.sh
--- a/airbyte-integrations/bases/base-java/Dockerfile
+++ b/airbyte-integrations/bases/base-java/Dockerfile
@@ -0,0 +1,34 @@
 ### WARNING ###
 # The Java connector Dockerfiles will soon be deprecated.
 # This Dockerfile is not used to build the connector image we publish to DockerHub.
 # The new logic to build the connector image is declared with Dagger here:
 # https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649
 # If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder.
 # Please reach out to the Connectors Operations team if you have any question.
 ARG JDK_VERSION=17.0.8
 FROM amazoncorretto:${JDK_VERSION}
 COPY --from=airbyte/integration-base:dev /airbyte /airbyte
 RUN yum update -y && yum install -y tar openssl && yum clean all
 WORKDIR /airbyte
 # Add the Datadog Java APM agent
 ADD https://dtdg.co/latest-java-tracer dd-java-agent.jar
 COPY javabase.sh .
 COPY run_with_normalization.sh .
 # airbyte base commands
 ENV AIRBYTE_SPEC_CMD "/airbyte/javabase.sh --spec"
 ENV AIRBYTE_CHECK_CMD "/airbyte/javabase.sh --check"
 ENV AIRBYTE_DISCOVER_CMD "/airbyte/javabase.sh --discover"
 ENV AIRBYTE_READ_CMD "/airbyte/javabase.sh --read"
 ENV AIRBYTE_WRITE_CMD "/airbyte/javabase.sh --write"
 ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh"
 ENTRYPOINT ["/airbyte/base.sh"]
 LABEL io.airbyte.version=0.1.2
 LABEL io.airbyte.name=airbyte/integration-base-java
--- a/airbyte-integrations/bases/base-java/build.gradle
+++ b/airbyte-integrations/bases/base-java/build.gradle
@@ -0,0 +1,3 @@
 plugins {
    id 'airbyte-docker-legacy'
 }
--- a/airbyte-integrations/bases/base-java/javabase.sh
+++ b/airbyte-integrations/bases/base-java/javabase.sh
@@ -0,0 +1,33 @@
 #!/usr/bin/env bash
 set -e
 # if IS_CAPTURE_HEAP_DUMP_ON_ERROR is set to true, then will capture Heap dump on OutOfMemory error
 if [[ $IS_CAPTURE_HEAP_DUMP_ON_ERROR = true ]]; then
  arrayOfSupportedConnectors=("source-postgres" "source-mssql" "source-mysql" )
  # The heap dump would be captured only in case when java-based connector fails with OutOfMemory error
  if [[ " ${arrayOfSupportedConnectors[*]} " =~ " $APPLICATION " ]]; then
      JAVA_OPTS=$JAVA_OPTS" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/data/dump.hprof"
      export JAVA_OPTS
      echo "Added JAVA_OPTS=$JAVA_OPTS"
      echo "APPLICATION=$APPLICATION"
  fi
 fi
 #30781 - Allocate 32KB for log4j appender buffer to ensure that each line is logged in a single println
 JAVA_OPTS=$JAVA_OPTS" -Dlog4j.encoder.byteBufferSize=32768 -Dlog4j2.configurationFile=log4j2.xml"
 #needed because we make ThreadLocal.get(Thread) accessible in IntegrationRunner.stopOrphanedThreads
 JAVA_OPTS=$JAVA_OPTS" --add-opens=java.base/java.lang=ALL-UNNAMED"
 # tell jooq to be quiet (https://stackoverflow.com/questions/28272284/how-to-disable-jooqs-self-ad-message-in-3-4)
 JAVA_OPTS=$JAVA_OPTS" -Dorg.jooq.no-logo=true -Dorg.jooq.no-tips=true"
 export JAVA_OPTS
 # Wrap run script in a script so that we can lazy evaluate the value of APPLICATION. APPLICATION is
 # set by the dockerfile that inherits base-java, so it cannot be evaluated when base-java is built.
 # We also need to make sure that stdin of the script is piped to the stdin of the java application.
 if [[ $A = --write ]]; then
  cat <&0 | /airbyte/bin/"$APPLICATION" "$@"
 else
  /airbyte/bin/"$APPLICATION" "$@"
 fi
--- a/airbyte-integrations/bases/base-java/run_with_normalization.sh
+++ b/airbyte-integrations/bases/base-java/run_with_normalization.sh
@@ -0,0 +1,61 @@
 #!/bin/bash
 # Intentionally no set -e, because we want to run normalization even if the destination fails
 set -o pipefail
 /airbyte/base.sh $@
 destination_exit_code=$?
 echo '{"type": "LOG","log":{"level":"INFO","message":"Destination process done (exit code '"$destination_exit_code"')"}}'
 # store original args
 args=$@
 while [ $# -ne 0 ]; do
  case "$1" in
  --config)
    CONFIG_FILE="$2"
    shift 2
    ;;
  *)
    # move on
    shift
    ;;
  esac
 done
 # restore original args after shifts
 set -- $args
 USE_1S1T_FORMAT="false"
 if [[ -s "$CONFIG_FILE" ]]; then
  USE_1S1T_FORMAT=$(jq -r '.use_1s1t_format' "$CONFIG_FILE")
 fi
 if test "$1" != 'write'
 then
  normalization_exit_code=0
 elif test "$NORMALIZATION_TECHNIQUE" = 'LEGACY' && test "$USE_1S1T_FORMAT" != "true"
 then
  echo '{"type": "LOG","log":{"level":"INFO","message":"Starting in-connector normalization"}}'
  # Normalization tries to create this file from the connector config and crashes if it already exists
  # so just nuke it and let normalization recreate it.
  # Use -f to avoid error if it doesn't exist, since it's only created for certain SSL modes.
  rm -f ca.crt
  # the args in a write command are `write --catalog foo.json --config bar.json`
  # so if we remove the `write`, we can just pass the rest directly into normalization
  /airbyte/entrypoint.sh run ${@:2} --integration-type $AIRBYTE_NORMALIZATION_INTEGRATION | java -cp "/airbyte/lib/*" io.airbyte.cdk.integrations.destination.normalization.NormalizationLogParser
  normalization_exit_code=$?
  echo '{"type": "LOG","log":{"level":"INFO","message":"In-connector normalization done (exit code '"$normalization_exit_code"')"}}'
 else
  echo '{"type": "LOG","log":{"level":"INFO","message":"Skipping in-connector normalization"}}'
  normalization_exit_code=0
 fi
 if test $destination_exit_code -ne 0
 then
  exit $destination_exit_code
 elif test $normalization_exit_code -ne 0
 then
  exit $normalization_exit_code
 else
  exit 0
 fi
--- a/airbyte-integrations/bases/base-normalization/.dockerignore
+++ b/airbyte-integrations/bases/base-normalization/.dockerignore
@@ -0,0 +1,13 @@
 *
 !Dockerfile
 !entrypoint.sh
 !build/sshtunneling.sh
 !setup.py
 !normalization
 !dbt-project-template
 !dbt-project-template-mssql
 !dbt-project-template-mysql
 !dbt-project-template-oracle
 !dbt-project-template-clickhouse
 !dbt-project-template-snowflake
 !dbt-project-template-redshift
--- a/airbyte-integrations/bases/base-normalization/.gitignore
+++ b/airbyte-integrations/bases/base-normalization/.gitignore
@@ -0,0 +1,51 @@
 build/
 logs/
 dbt-project-template/models/generated/
 dbt-project-template/test_output.log
 dbt_modules/
 secrets/
 dist/
 integration_tests/normalization_test_output/*/*/macros
 integration_tests/normalization_test_output/*/*/tests
 integration_tests/normalization_test_output/**/*.json
 integration_tests/normalization_test_output/**/*.log
 integration_tests/normalization_test_output/**/*.md
 integration_tests/normalization_test_output/**/*.sql
 integration_tests/normalization_test_output/**/*.yml
 !integration_tests/normalization_test_output/**/*dbt_project.yml
 !integration_tests/normalization_test_output/**/generated/sources.yml
 # We keep a minimal/restricted subset of sql files for all destinations to avoid noise in diff
 # Simple Streams
 !integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql
 !integration_tests/normalization_test_output/**/DEDUP_EXCHANGE_RATE*.sql
 !integration_tests/normalization_test_output/**/exchange_rate.sql
 !integration_tests/normalization_test_output/**/EXCHANGE_RATE.sql
 !integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql
 # Nested Streams
 # Parent table
 !integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql
 !integration_tests/normalization_test_output/**/nested_stream_with*_names_scd.sql
 !integration_tests/normalization_test_output/**/nested_stream_with*_names.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_AB*.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_SCD.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES.sql
 # Nested table
 !integration_tests/normalization_test_output/**/nested_stream_with_*_partition_ab1.sql
 !integration_tests/normalization_test_output/**/nested_stream_with_*_data_ab1.sql
 !integration_tests/normalization_test_output/**/nested_stream_with*_partition_scd.sql
 !integration_tests/normalization_test_output/**/nested_stream_with*_data_scd.sql
 !integration_tests/normalization_test_output/**/nested_stream_with*_partition.sql
 !integration_tests/normalization_test_output/**/nested_stream_with*_data.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_PARTITION_AB1.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_DATA_AB1.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION_SCD.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA_SCD.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION.sql
 !integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA.sql
 # but we keep all sql files for Postgres
 !integration_tests/normalization_test_output/postgres/**/*.sql
 integration_tests/normalization_test_output/postgres/**/dbt_data_tests
 integration_tests/normalization_test_output/postgres/**/dbt_schema_tests
--- a/airbyte-integrations/bases/base-normalization/Dockerfile
+++ b/airbyte-integrations/bases/base-normalization/Dockerfile
@@ -0,0 +1,37 @@
 FROM fishtownanalytics/dbt:1.0.0
 COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte
 # Install SSH Tunneling dependencies
 RUN apt-get update && apt-get install -y jq sshpass
 WORKDIR /airbyte
 COPY entrypoint.sh .
 COPY build/sshtunneling.sh .
 WORKDIR /airbyte/normalization_code
 COPY normalization ./normalization
 COPY setup.py .
 COPY dbt-project-template/ ./dbt-template/
 # Install python dependencies
 WORKDIR /airbyte/base_python_structs
 # workaround for https://github.com/yaml/pyyaml/issues/601
 # this should be fixed in the airbyte/base-airbyte-protocol-python image
 RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation
 RUN pip install .
 WORKDIR /airbyte/normalization_code
 RUN pip install .
 WORKDIR /airbyte/normalization_code/dbt-template/
 # Download external dbt dependencies
 RUN dbt deps
 WORKDIR /airbyte
 ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
 ENTRYPOINT ["/airbyte/entrypoint.sh"]
 LABEL io.airbyte.version=0.4.3
 LABEL io.airbyte.name=airbyte/normalization
--- a/airbyte-integrations/bases/base-normalization/build.gradle
+++ b/airbyte-integrations/bases/base-normalization/build.gradle
@@ -0,0 +1,57 @@
 plugins {
    id 'airbyte-docker-legacy'
    id 'airbyte-python'
 }
 dependencies {
    testFixtures(project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies'))
 }
 // we need to access the sshtunneling script from airbyte-workers for ssh support
 def copySshScript = tasks.register('copySshScript', Copy) {
    from "${project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').buildDir}/resources/testFixtures"
    into "${buildDir}"
    include "sshtunneling.sh"
 }
 copySshScript.configure {
    dependsOn project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').tasks.named('processTestFixturesResources')
 }
 // make sure the copy task above worked (if it fails, it fails silently annoyingly)
 def checkSshScriptCopy = tasks.register('checkSshScriptCopy') {
    doFirst {
        assert file("${buildDir}/sshtunneling.sh").exists() : "Copy of sshtunneling.sh failed."
    }
 }
 checkSshScriptCopy.configure {
    dependsOn copySshScript
 }
 def generate = tasks.register('generate')
 generate.configure {
    dependsOn checkSshScriptCopy
 }
 tasks.named('check').configure {
    dependsOn generate
 }
 tasks.named("jar").configure {
    dependsOn copySshScript
 }
 [
        'bigquery',
        'mysql',
        'postgres',
        'redshift',
        'snowflake',
        'oracle',
        'mssql',
        'clickhouse',
        'tidb',
 ].each {destinationName ->
    tasks.matching { it.name == 'integrationTestPython' }.configureEach {
        dependsOn project(":airbyte-integrations:connectors:destination-$destinationName").tasks.named('assemble')
    }
 }
--- a/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile
+++ b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile
@@ -0,0 +1,36 @@
 FROM ghcr.io/dbt-labs/dbt-core:1.3.1
 COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte
 # Install SSH Tunneling dependencies
 RUN apt-get update && apt-get install -y jq sshpass
 WORKDIR /airbyte
 COPY entrypoint.sh .
 COPY build/sshtunneling.sh .
 WORKDIR /airbyte/normalization_code
 COPY normalization ./normalization
 COPY setup.py .
 COPY dbt-project-template/ ./dbt-template/
 # Install python dependencies
 WORKDIR /airbyte/base_python_structs
 # workaround for https://github.com/yaml/pyyaml/issues/601
 # this should be fixed in the airbyte/base-airbyte-protocol-python image
 RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation
 RUN pip install .
 WORKDIR /airbyte/normalization_code
 RUN pip install .
 WORKDIR /airbyte/normalization_code/dbt-template/
 RUN pip install "dbt-clickhouse>=1.4.0"
 # Download external dbt dependencies
 RUN dbt deps
 WORKDIR /airbyte
 ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
 ENTRYPOINT ["/airbyte/entrypoint.sh"]
 LABEL io.airbyte.name=airbyte/normalization-clickhouse
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml
@@ -0,0 +1,65 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: true
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        # ephemeral materialization isn't supported in ClickHouse yet
        +materialized: view
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        # schema change test isn't supported in ClickHouse yet
        +on_schema_change: "ignore"
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
  - macro_namespace: dbt_utils
    search_order: ["airbyte_utils", "dbt_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml
@@ -0,0 +1,5 @@
 # add dependencies. these will get pulled during the `dbt deps` process.
 packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils.git"
    revision: 0.8.2
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml
@@ -0,0 +1,63 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: false
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        +on_schema_change: sync_all_columns
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
  - macro_namespace: dbt_utils
    search_order: ["airbyte_utils", "dbt_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml
@@ -0,0 +1,5 @@
 # add dependencies. these will get pulled during the `dbt deps` process.
 packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils.git"
    revision: 0.8.2
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml
@@ -0,0 +1,61 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: false
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 vars:
  dbt_utils_dispatch_list: ["airbyte_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml
@@ -0,0 +1,5 @@
 # add dependencies. these will get pulled during the `dbt deps` process.
 packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils.git"
    revision: 0.8.2
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml
@@ -0,0 +1,63 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization"s
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won"t need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: false
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        # incremental is not enabled for MySql yet
        #+materialized: incremental
        +materialized: table
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 vars:
  dbt_utils_dispatch_list: ["airbyte_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml
@@ -0,0 +1,5 @@
 # add dependencies. these will get pulled during the `dbt deps` process.
 packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils.git"
    revision: 0.8.2
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml
@@ -0,0 +1,61 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `source-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 source-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 data-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 modules-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: false
  schema: false
  identifier: false
 # You can define configurations for models in the `source-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        # incremental is not enabled for Oracle yet
        #+materialized: incremental
        +materialized: table
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 vars:
  dbt_utils_dispatch_list: ["airbyte_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml
@@ -0,0 +1,5 @@
 # add dependencies. these will get pulled during the `dbt deps` process.
 packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils.git"
    revision: 0.6.4
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml
@@ -0,0 +1,66 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: false
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  +transient: false
  # https://docs.aws.amazon.com/redshift/latest/dg/super-configurations.html
  +pre-hook: "SET enable_case_sensitive_identifier to TRUE"
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        +on_schema_change: sync_all_columns
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
  - macro_namespace: dbt_utils
    search_order: ["airbyte_utils", "dbt_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml
@@ -0,0 +1,64 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: false
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  +transient: false
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        +on_schema_change: sync_all_columns
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
  - macro_namespace: dbt_utils
    search_order: ["airbyte_utils", "dbt_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml
@@ -0,0 +1,61 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization"s
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won"t need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: false
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 vars:
  dbt_utils_dispatch_list: ["airbyte_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml
@@ -0,0 +1,5 @@
 # add dependencies. these will get pulled during the `dbt deps` process.
 packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils.git"
    revision: 0.8.2
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md
@@ -0,0 +1,19 @@
 ## Installing dbt
 1. Activate your venv and run `pip3 install dbt`
 1. Copy `airbyte-normalization/sample_files/profiles.yml` over to `~/.dbt/profiles.yml`
 1. Edit to configure your profiles accordingly
 ## Running dbt
 1. `cd airbyte-normalization`
 1. You can now run dbt commands, to check the setup is fine: `dbt debug`
 1. To build the dbt tables in your warehouse: `dbt run`
 ## Running dbt from Airbyte generated config
 1. You can also change directory (`cd /tmp/dev_root/workspace/1/0/normalize` for example) to one of the workspace generated by Airbyte within one of the `normalize` folder.
 1. You should find `profiles.yml` and a bunch of other dbt files/folders created there.
 1. To check everything is setup properly: `dbt debug --profiles-dir=$(pwd) --project-dir=$(pwd)`
 1. You can modify the `.sql` files and run `dbt run --profiles-dir=$(pwd) --project-dir=$(pwd)` too
 1. You can inspect compiled dbt `.sql` files before they are run in the destination engine in `normalize/build/compiled` or `normalize/build/run` folders
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml
@@ -0,0 +1,63 @@
 # This file is necessary to install dbt-utils with dbt deps
 # the content will be overwritten by the transform function
 # Name your package! Package names should contain only lowercase characters
 # and underscores. A good package name should reflect your organization's
 # name or the intended use of these models
 name: "airbyte_utils"
 version: "1.0"
 config-version: 2
 # This setting configures which "profile" dbt uses for this project. Profiles contain
 # database connection information, and should be configured in the  ~/.dbt/profiles.yml file
 profile: "normalize"
 # These configurations specify where dbt should look for different types of files.
 # The `model-paths` config, for example, states that source models can be found
 # in the "models/" directory. You probably won't need to change these!
 model-paths: ["models"]
 docs-paths: ["docs"]
 analysis-paths: ["analysis"]
 test-paths: ["tests"]
 seed-paths: ["data"]
 macro-paths: ["macros"]
 target-path: "../build" # directory which will store compiled SQL files
 log-path: "../logs" # directory which will store DBT logs
 packages-install-path: "/dbt" # directory which will store external DBT dependencies
 clean-targets: # directories to be removed by `dbt clean`
  - "build"
  - "dbt_modules"
 quoting:
  database: true
  # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
  # all schemas should be unquoted
  schema: false
  identifier: true
 # You can define configurations for models in the `model-paths` directory here.
 # Using these configurations, you can enable or disable models, change how they
 # are materialized, and more!
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        +on_schema_change: sync_all_columns
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
  - macro_namespace: dbt_utils
    search_order: ["airbyte_utils", "dbt_utils"]
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql
@@ -0,0 +1,19 @@
 {% macro clean_tmp_tables(schemas) -%}
  {{ adapter.dispatch('clean_tmp_tables')(schemas) }}
 {%- endmacro %}
 -- default
 {% macro default__clean_tmp_tables(schemas) -%}
    {% do exceptions.warn("\tINFO: CLEANING TEST LEFTOVERS IS NOT IMPLEMENTED FOR THIS DESTINATION. CONSIDER TO REMOVE TEST TABLES MANUALY.\n") %}
 {%- endmacro %}
 -- for redshift
 {% macro redshift__clean_tmp_tables(schemas) %}
    {%- for tmp_schema in schemas -%}
        {% do log("\tDROP SCHEMA IF EXISTS " ~ tmp_schema, info=True) %}
        {%- set drop_query -%}
            drop schema if exists {{ tmp_schema }} cascade;
        {%- endset -%}
        {%- do run_query(drop_query) -%}
    {%- endfor -%}
 {% endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql
@@ -0,0 +1,173 @@
 {#
    Adapter Macros for the following functions:
    - Bigquery: unnest() -> https://cloud.google.com/bigquery/docs/reference/standard-sql/arrays#flattening-arrays-and-repeated-fields
    - Snowflake: flatten() -> https://docs.snowflake.com/en/sql-reference/functions/flatten.html
    - Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/
    - postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/
    - MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15
    - ClickHouse: ARRAY JOIN –> https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/
 #}
 {# cross_join_unnest -------------------------------------------------     #}
 {% macro cross_join_unnest(stream_name, array_col) -%}
  {{ adapter.dispatch('cross_join_unnest')(stream_name, array_col) }}
 {%- endmacro %}
 {% macro default__cross_join_unnest(stream_name, array_col) -%}
    {% do exceptions.warn("Undefined macro cross_join_unnest for this destination engine") %}
 {%- endmacro %}
 {% macro bigquery__cross_join_unnest(stream_name, array_col) -%}
    cross join unnest({{ array_col }}) as {{ array_col }}
 {%- endmacro %}
 {% macro clickhouse__cross_join_unnest(stream_name, array_col) -%}
    ARRAY JOIN {{ array_col }}
 {%- endmacro %}
 {% macro oracle__cross_join_unnest(stream_name, array_col) -%}
    {% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %}
 {%- endmacro %}
 {% macro postgres__cross_join_unnest(stream_name, array_col) -%}
    cross join jsonb_array_elements(
        case jsonb_typeof({{ array_col }})
        when 'array' then {{ array_col }}
        else '[]' end
    ) as _airbyte_nested_data
 {%- endmacro %}
 {% macro mysql__cross_join_unnest(stream_name, array_col) -%}
    left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
 {%- endmacro %}
 {% macro tidb__cross_join_unnest(stream_name, array_col) -%}
    left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
 {%- endmacro %}
 {% macro duckdb__cross_join_unnest(stream_name, array_col) -%}
    left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
 {%- endmacro %}
 {% macro redshift__cross_join_unnest(stream_name, array_col) -%}
    left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
 {%- endmacro %}
 {% macro snowflake__cross_join_unnest(stream_name, array_col) -%}
    cross join table(flatten({{ array_col }})) as {{ array_col }}
 {%- endmacro %}
 {% macro sqlserver__cross_join_unnest(stream_name, array_col) -%}
 {# https://docs.microsoft.com/en-us/sql/relational-databases/json/convert-json-data-to-rows-and-columns-with-openjson-sql-server?view=sql-server-ver15#option-1---openjson-with-the-default-output #}
    CROSS APPLY (
 	    SELECT [value] = CASE
 			WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value]))
 			WHEN [type] = 5 THEN [value]
 			END
 	    FROM OPENJSON({{ array_col }})
    ) AS {{ array_col }}
 {%- endmacro %}
 {# unnested_column_value -- this macro is related to unnest_cte #}
 {% macro unnested_column_value(column_col) -%}
  {{ adapter.dispatch('unnested_column_value')(column_col) }}
 {%- endmacro %}
 {% macro default__unnested_column_value(column_col) -%}
    {{ column_col }}
 {%- endmacro %}
 {% macro postgres__unnested_column_value(column_col) -%}
    _airbyte_nested_data
 {%- endmacro %}
 {% macro snowflake__unnested_column_value(column_col) -%}
    {{ column_col }}.value
 {%- endmacro %}
 {% macro redshift__unnested_column_value(column_col) -%}
    _airbyte_nested_data
 {%- endmacro %}
 {% macro mysql__unnested_column_value(column_col) -%}
    _airbyte_nested_data
 {%- endmacro %}
 {% macro tidb__unnested_column_value(column_col) -%}
    _airbyte_nested_data
 {%- endmacro %}
 {% macro duckdb__unnested_column_value(column_col) -%}
    _airbyte_nested_data
 {%- endmacro %}
 {% macro oracle__unnested_column_value(column_col) -%}
    {{ column_col }}
 {%- endmacro %}
 {% macro sqlserver__unnested_column_value(column_col) -%}
    {# unnested array/sub_array will be located in `value` column afterwards, we need to address to it #}
    {{ column_col }}.value
 {%- endmacro %}
 {# unnest_cte -------------------------------------------------     #}
 {% macro unnest_cte(from_table, stream_name, column_col) -%}
  {{ adapter.dispatch('unnest_cte')(from_table, stream_name, column_col) }}
 {%- endmacro %}
 {% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %}
 {% macro redshift__unnest_cte(from_table, stream_name, column_col) -%}
    {# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #}
    with joined as (
        select
            table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
            _airbyte_nested_data
        from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data
    )
 {%- endmacro %}
 {% macro mysql__unnest_cte(from_table, stream_name, column_col) -%}
    {%- if not execute -%}
        {{ return('') }}
    {% endif %}
    {%- call statement('max_json_array_length', fetch_result=True) -%}
        with max_value as (
            select max(json_length({{ column_col }})) as max_number_of_items
            from {{ from_table }}
        )
        select
            case when max_number_of_items is not null and max_number_of_items > 1
            then max_number_of_items
            else 1 end as max_number_of_items
        from max_value
    {%- endcall -%}
    {%- set max_length = load_result('max_json_array_length') -%}
    with numbers as (
        {{ dbt_utils.generate_series(max_length["data"][0][0]) }}
    ),
    joined as (
        select
            _airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
            {# -- json_extract(column_col, '$[i][0]') as _airbyte_nested_data #}
            json_extract({{ column_col }}, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data
        from {{ from_table }}
        cross join numbers
        -- only generate the number of records in the cross join that corresponds
        -- to the number of items in {{ from_table }}.{{ column_col }}
        where numbers.generated_number <= json_length({{ column_col }})
    )
 {%- endmacro %}
 {% macro tidb__unnest_cte(from_table, stream_name, column_col) -%}
    {{ mysql__unnest_cte(from_table, stream_name, column_col) }}
 {%- endmacro %}
 {% macro duckdb__unnest_cte(from_table, stream_name, column_col) -%}
    {{ mysql__unnest_cte(from_table, stream_name, column_col) }}
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql
@@ -0,0 +1,36 @@
 {#
    concat in dbt 0.6.4 used to work fine for bigquery but the new implementaion in 0.7.3 is less scalable (can not handle too many columns)
    Therefore, we revert the implementation here and add versions for missing destinations
 #}
 {% macro concat(fields) -%}
  {{ adapter.dispatch('concat')(fields) }}
 {%- endmacro %}
 {% macro bigquery__concat(fields) -%}
    {#-- concat() in SQL bigquery scales better with number of columns than using the '||' operator --#}
    concat({{ fields|join(', ') }})
 {%- endmacro %}
 {% macro mysql__concat(fields) -%}
    {#-- MySQL doesn't support the '||' operator as concatenation by default --#}
    concat({{ fields|join(', ') }})
 {%- endmacro %}
 {% macro sqlserver__concat(fields) -%}
    {#-- CONCAT() in SQL SERVER accepts from 2 to 254 arguments, we use batches for the main concat, to overcome the limit. --#}
    {% set concat_chunks = [] %}
    {% for chunk in fields|batch(253) -%}
        {% set _ = concat_chunks.append( "concat(" ~ chunk|join(', ') ~ ",'')" ) %}
    {% endfor %}
    concat({{ concat_chunks|join(', ') }}, '')
 {%- endmacro %}
 {% macro tidb__concat(fields) -%}
    concat({{ fields|join(', ') }})
 {%- endmacro %}
 {% macro duckdb__concat(fields) -%}
    concat({{ fields|join(', ') }})
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql
@@ -0,0 +1,7 @@
 {% macro mysql__current_timestamp() %}
    CURRENT_TIMESTAMP
 {% endmacro %}
 {% macro oracle__current_timestamp() %}
    CURRENT_TIMESTAMP
 {% endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql
@@ -0,0 +1,394 @@
 {# json  -------------------------------------------------     #}
 {%- macro type_json() -%}
  {{ adapter.dispatch('type_json')() }}
 {%- endmacro -%}
 {% macro default__type_json() %}
    string
 {% endmacro %}
 {%- macro redshift__type_json() -%}
    super
 {%- endmacro -%}
 {% macro postgres__type_json() %}
    jsonb
 {% endmacro %}
 {%- macro oracle__type_json() -%}
    varchar2(4000)
 {%- endmacro -%}
 {% macro snowflake__type_json() %}
    variant
 {% endmacro %}
 {%- macro mysql__type_json() -%}
    json
 {%- endmacro -%}
 {%- macro sqlserver__type_json() -%}
    NVARCHAR(max)
 {%- endmacro -%}
 {% macro clickhouse__type_json() %}
    String
 {% endmacro %}
 {%- macro tidb__type_json() -%}
    json
 {%- endmacro -%}
 {%- macro duckdb__type_json() -%}
    json
 {%- endmacro -%}
 {# string ------------------------------------------------- #}
 {%- macro mysql__type_string() -%}
    char
 {%- endmacro -%}
 {%- macro oracle__type_string() -%}
    varchar2(4000)
 {%- endmacro -%}
 {% macro sqlserver__type_string() %}
    NVARCHAR(max)
 {%- endmacro -%}
 {%- macro clickhouse__type_string() -%}
    String
 {%- endmacro -%}
 {#-- TODO: Remove this macro when dbt issue regarding unlimited varchars on postgres is resolved (https://github.com/dbt-labs/dbt-core/issues/5238) and we've upgraded to the latest version of dbt --#}
 {%- macro postgres__type_string() -%}
    text
 {%- endmacro -%}
 {%- macro tidb__type_string() -%}
    char(1000)
 {%- endmacro -%}
 {%- macro duckdb__type_string() -%}
    VARCHAR
 {%- endmacro -%}
 {# float ------------------------------------------------- #}
 {% macro mysql__type_float() %}
    float
 {% endmacro %}
 {% macro oracle__type_float() %}
    float
 {% endmacro %}
 {% macro clickhouse__type_float() %}
    Float64
 {% endmacro %}
 {% macro tidb__type_float() %}
    float
 {% endmacro %}
 {% macro duckdb__type_float() %}
    DOUBLE
 {% endmacro %}
 {# int  ------------------------------------------------- #}
 {% macro default__type_int() %}
    int
 {% endmacro %}
 {% macro mysql__type_int() %}
    signed
 {% endmacro %}
 {% macro oracle__type_int() %}
    int
 {% endmacro %}
 {% macro clickhouse__type_int() %}
    INT
 {% endmacro %}
 {% macro tidb__type_int() %}
    signed
 {% endmacro %}
 {% macro duckdb__type_int() %}
    INTEGER
 {% endmacro %}
 {# bigint ------------------------------------------------- #}
 {% macro mysql__type_bigint() %}
    signed
 {% endmacro %}
 {% macro oracle__type_bigint() %}
    numeric
 {% endmacro %}
 {% macro clickhouse__type_bigint() %}
    BIGINT
 {% endmacro %}
 {% macro tidb__type_bigint() %}
    signed
 {% endmacro %}
 {% macro duckdb__type_bigint() %}
    BIGINT
 {% endmacro %}
 {# numeric ------------------------------------------------- --#}
 {% macro mysql__type_numeric() %}
    float
 {% endmacro %}
 {% macro clickhouse__type_numeric() %}
    Float64
 {% endmacro %}
 {% macro tidb__type_numeric() %}
    float
 {% endmacro %}
 {% macro duckdb__type_numeric() %}
    DOUBLE
 {% endmacro %}
 {# very_large_integer --------------------------------------- --#}
 {#
 Most databases don't have a true unbounded numeric datatype, so we use a really big numeric field.
 Our type terminology unfortunately collides with DB terminology (i.e. "big_integer" means different things in different contexts)
 so this macro needs to be called very_large_integer.
 #}
 {%- macro type_very_large_integer() -%}
  {{ adapter.dispatch('type_very_large_integer')() }}
 {%- endmacro -%}
 {% macro default__type_very_large_integer() %}
    numeric
 {% endmacro %}
 {% macro snowflake__type_very_large_integer() %}
    numeric
 {% endmacro %}
 {% macro mysql__type_very_large_integer() %}
    decimal(38, 0)
 {% endmacro %}
 {% macro clickhouse__type_very_large_integer() %}
    decimal128(0)
 {% endmacro %}
 {% macro tidb__type_very_large_integer() %}
    decimal(38, 0)
 {% endmacro %}
 {% macro duckdb__type_very_large_integer() %}
    DECIMAL(38, 0)
 {% endmacro %}
 {# timestamp ------------------------------------------------- --#}
 {% macro mysql__type_timestamp() %}
    time
 {% endmacro %}
 {%- macro sqlserver__type_timestamp() -%}
    {#-- in TSQL timestamp is really datetime --#}
    {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
    datetime
 {%- endmacro -%}
 {% macro clickhouse__type_timestamp() %}
    DateTime64
 {% endmacro %}
 {% macro tidb__type_timestamp() %}
    time
 {% endmacro %}
 {% macro duckdb__type_timestamp() %}
    TIMESTAMP
 {% endmacro %}
 {# timestamp with time zone  -------------------------------------------------     #}
 {%- macro type_timestamp_with_timezone() -%}
  {{ adapter.dispatch('type_timestamp_with_timezone')() }}
 {%- endmacro -%}
 {% macro default__type_timestamp_with_timezone() %}
    timestamp with time zone
 {% endmacro %}
 {% macro bigquery__type_timestamp_with_timezone() %}
    timestamp
 {% endmacro %}
 {#-- MySQL doesnt allow cast operation with nullif to work with DATETIME and doesn't support storing of timezone so we have to use char --#}
 {#-- https://bugs.mysql.com/bug.php?id=77805 --#}
 {%- macro mysql__type_timestamp_with_timezone() -%}
    char(1024)
 {%- endmacro -%}
 {% macro oracle__type_timestamp_with_timezone() %}
    varchar2(4000)
 {% endmacro %}
 {%- macro sqlserver__type_timestamp_with_timezone() -%}
    datetimeoffset
 {%- endmacro -%}
 {% macro redshift__type_timestamp_with_timezone() %}
    TIMESTAMPTZ
 {% endmacro %}
 {% macro clickhouse__type_timestamp_with_timezone() %}
    DateTime64
 {% endmacro %}
 {%- macro tidb__type_timestamp_with_timezone() -%}
    char(1000)
 {%- endmacro -%}
 {%- macro duckdb__type_timestamp_with_timezone() -%}
    TIMESTAMPTZ
 {%- endmacro -%}
 {# timestamp without time zone  -------------------------------------------------     #}
 {%- macro type_timestamp_without_timezone() -%}
  {{ adapter.dispatch('type_timestamp_without_timezone')() }}
 {%- endmacro -%}
 {% macro default__type_timestamp_without_timezone() %}
    timestamp
 {% endmacro %}
 {%- macro sqlserver__type_timestamp_without_timezone() -%}
    {#-- in TSQL timestamp is really datetime or datetime2 --#}
    {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
    datetime2
 {%- endmacro -%}
 {% macro bigquery__type_timestamp_without_timezone() %}
    datetime
 {% endmacro %}
 {% macro oracle__type_timestamp_without_timezone() %}
    varchar2(4000)
 {% endmacro %}
 {% macro redshift__type_timestamp_without_timezone() %}
    TIMESTAMP
 {% endmacro %}
 {% macro tidb__type_timestamp_without_timezone() %}
    datetime
 {% endmacro %}
 {% macro duckdb__type_timestamp_without_timezone() %}
    TIMESTAMP
 {% endmacro %}
 {# time without time zone  -------------------------------------------------     #}
 {%- macro type_time_without_timezone() -%}
  {{ adapter.dispatch('type_time_without_timezone')() }}
 {%- endmacro -%}
 {% macro default__type_time_without_timezone() %}
    time
 {% endmacro %}
 {% macro oracle__type_time_without_timezone() %}
    varchar2(4000)
 {% endmacro %}
 {% macro redshift__type_time_without_timezone() %}
    TIME
 {% endmacro %}
 {% macro clickhouse__type_time_without_timezone() %}
    String
 {% endmacro %}
 {% macro tidb__type_time_without_timezone() %}
    time
 {% endmacro %}
 {% macro duckdb__type_time_without_timezone() %}
    TIMESTAMP 
 {% endmacro %}
 {# time with time zone  -------------------------------------------------     #}
 {%- macro type_time_with_timezone() -%}
  {{ adapter.dispatch('type_time_with_timezone')() }}
 {%- endmacro -%}
 {% macro default__type_time_with_timezone() %}
    time with time zone
 {% endmacro %}
 {%- macro mysql__type_time_with_timezone() -%}
    char(1024)
 {%- endmacro -%}
 {%- macro sqlserver__type_time_with_timezone() -%}
    NVARCHAR(max)
 {%- endmacro -%}
 {% macro bigquery__type_time_with_timezone() %}
    STRING
 {% endmacro %}
 {% macro oracle__type_time_with_timezone() %}
    varchar2(4000)
 {% endmacro %}
 {% macro snowflake__type_time_with_timezone() %}
    varchar
 {% endmacro %}
 {% macro redshift__type_time_with_timezone() %}
    TIMETZ
 {% endmacro %}
 {% macro clickhouse__type_time_with_timezone() %}
    String
 {% endmacro %}
 {%- macro tidb__type_time_with_timezone() -%}
    char(1000)
 {%- endmacro -%}
 {%- macro duckdb__type_time_with_timezone() -%}
    TIMESTAMPTZ
 {%- endmacro -%}
 {# date  -------------------------------------------------     #}
 {%- macro type_date() -%}
  {{ adapter.dispatch('type_date')() }}
 {%- endmacro -%}
 {% macro default__type_date() %}
    date
 {% endmacro %}
 {% macro oracle__type_date() %}
    varchar2(4000)
 {% endmacro %}
 {%- macro sqlserver__type_date() -%}
    date
 {%- endmacro -%}
 {% macro clickhouse__type_date() %}
    Date32
 {% endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql
@@ -0,0 +1,7 @@
 {% macro mysql__except() %}
    {% do exceptions.warn("MySQL does not support EXCEPT operator") %}
 {% endmacro %}
 {% macro oracle__except() %}
    minus
 {% endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql
@@ -0,0 +1,5 @@
 {# converting hash in varchar _macro #}
 {% macro sqlserver__hash(field) -%}
    convert(varchar(32), HashBytes('md5',  coalesce(cast({{field}} as {{dbt_utils.type_string()}}), '')), 2)
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql
@@ -0,0 +1,317 @@
 {#
    Adapter Macros for the following functions:
    - Bigquery: JSON_EXTRACT(json_string_expr, json_path_format) -> https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions
    - Snowflake: JSON_EXTRACT_PATH_TEXT( <column_identifier> , '<path_name>' ) -> https://docs.snowflake.com/en/sql-reference/functions/json_extract_path_text.html
    - Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html
    - Postgres: json_extract_path_text(<from_json>, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html
    - MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
    - ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/
    - TiDB: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://docs.pingcap.com/tidb/stable/json-functions
    - DuckDB: json_extract(json, 'path') note: If path is a LIST, the result will be a LIST of JSON -> https://duckdb.org/docs/extensions/json
 #}
 {# format_json_path --------------------------------------------------     #}
 {% macro format_json_path(json_path_list) -%}
    {{ adapter.dispatch('format_json_path')(json_path_list) }}
 {%- endmacro %}
 {% macro default__format_json_path(json_path_list) -%}
    {{ '.' ~ json_path_list|join('.') }}
 {%- endmacro %}
 {% macro oracle__format_json_path(json_path_list) -%}
  {{ '\'$."' ~ json_path_list|join('."') ~ '"\'' }}
 {%- endmacro %}
 {#
    BigQuery has different JSONPath syntax depending on which function you call.
    Most of our macros use the "legacy" JSON functions, so this function uses
    the legacy syntax.
    These paths look like: "$['foo']['bar']"
 #}
 {% macro bigquery__format_json_path(json_path_list) -%}
    {%- set str_list = [] -%}
    {%- for json_path in json_path_list -%}
        {%- if str_list.append(json_path.replace('"', '\\"')) -%} {%- endif -%}
    {%- endfor -%}
    {{ '"$[\'' ~ str_list|join('\'][\'') ~ '\']"' }}
 {%- endmacro %}
 {#
    For macros which use the newer JSON functions, define a new_format_json_path
    macro which generates the correct path syntax.
    These paths look like: '$."foo"."bar"'
 #}
 {% macro bigquery_new_format_json_path(json_path_list) -%}
    {%- set str_list = [] -%}
    {%- for json_path in json_path_list -%}
        {%- if str_list.append(json_path.replace('\'', '\\\'')) -%} {%- endif -%}
    {%- endfor -%}
    {{ '\'$."' ~ str_list|join('"."') ~ '"\'' }}
 {%- endmacro %}
 {% macro postgres__format_json_path(json_path_list) -%}
    {%- set str_list = [] -%}
    {%- for json_path in json_path_list -%}
        {%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%}
    {%- endfor -%}
    {{ "'" ~ str_list|join("','") ~ "'" }}
 {%- endmacro %}
 {% macro mysql__format_json_path(json_path_list) -%}
    {# -- '$."x"."y"."z"' #}
    {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
 {%- endmacro %}
 {% macro redshift__format_json_path(json_path_list) -%}
    {%- set quote = '"' -%}
    {%- set str_list = [] -%}
    {%- for json_path in json_path_list -%}
        {%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%}
    {%- endfor -%}
    {{ quote ~ str_list|join(quote + "," + quote) ~ quote }}
 {%- endmacro %}
 {% macro snowflake__format_json_path(json_path_list) -%}
    {%- set str_list = [] -%}
    {%- for json_path in json_path_list -%}
        {%- if str_list.append(json_path.replace("'", "''").replace('"', '""')) -%} {%- endif -%}
    {%- endfor -%}
    {{ "'\"" ~ str_list|join('"."') ~ "\"'" }}
 {%- endmacro %}
 {% macro sqlserver__format_json_path(json_path_list) -%}
    {# -- '$."x"."y"."z"' #}
    {%- set str_list = [] -%}
    {%- for json_path in json_path_list -%}
        {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%}
    {%- endfor -%}
    {{ "'$.\"" ~ str_list|join(".") ~ "\"'" }}
 {%- endmacro %}
 {% macro clickhouse__format_json_path(json_path_list) -%}
    {%- set str_list = [] -%}
    {%- for json_path in json_path_list -%}
        {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%}
    {%- endfor -%}
    {{ "'" ~ str_list|join("','") ~ "'" }}
 {%- endmacro %}
 {% macro tidb__format_json_path(json_path_list) -%}
    {# -- '$."x"."y"."z"' #}
    {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
 {%- endmacro %}
 {% macro duckdb__format_json_path(json_path_list) -%}
    {# -- '$."x"."y"."z"' #}
    {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
 {%- endmacro %}
 {# json_extract -------------------------------------------------     #}
 {% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {{ adapter.dispatch('json_extract')(from_table, json_column, json_path_list, normalized_json_path) }}
 {%- endmacro %}
 {% macro default__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro oracle__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {% macro bigquery__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() == '' %}
        json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% else %}
        json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% endif -%}
 {%- endmacro %}
 {% macro postgres__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() == '' %}
        jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }})
    {% else %}
        jsonb_extract_path({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
    {% endif -%}
 {%- endmacro %}
 {% macro mysql__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() == '' %}
        json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% else %}
        json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% endif -%}
 {%- endmacro %}
 {% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() != '' -%}
    {%- set json_column = from_table|string() + "." + json_column|string() -%}
    {%- endif -%}
    case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
 {%- endmacro %}
 {% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() == '' %}
        get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }})
    {% else %}
        get_path(parse_json({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
    {% endif -%}
 {%- endmacro %}
 {% macro sqlserver__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() == '' %}
        JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
    {% else %}
        JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
    {% endif -%}
 {%- endmacro %}
 {% macro tidb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() == '' %}
        json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% else %}
        json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% endif -%}
 {%- endmacro %}
 {% macro duckdb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
    {%- if from_table|string() == '' %}
        json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% else %}
        json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
    {% endif -%}
 {%- endmacro %}
 {# json_extract_scalar -------------------------------------------------     #}
 {% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    {{ adapter.dispatch('json_extract_scalar')(json_column, json_path_list, normalized_json_path) }}
 {%- endmacro %}
 {% macro default__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    json_extract_scalar({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro oracle__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {% macro bigquery__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    json_extract_scalar({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {% macro postgres__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    jsonb_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro mysql__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }} RETURNING CHAR)
 {%- endmacro %}
 {% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
 {%- endmacro %}
 {% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    to_varchar(get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }}))
 {%- endmacro %}
 {% macro sqlserver__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    json_value({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro tidb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    IF(
        JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }})) = 'null',
        NULL,
        JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }}))
    )
 {%- endmacro %}
 {% macro duckdb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
    json_extract_string({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {# json_extract_array -------------------------------------------------     #}
 {% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    {{ adapter.dispatch('json_extract_array')(json_column, json_path_list, normalized_json_path) }}
 {%- endmacro %}
 {% macro default__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    json_extract_array({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro oracle__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {% macro bigquery__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    json_extract_array({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {% macro postgres__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro mysql__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    {{ json_column }}.{{ format_json_path(json_path_list) }}
 {%- endmacro %}
 {% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 {% macro tidb__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {% macro duckdb__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
    json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
 {%- endmacro %}
 {# json_extract_string_array -------------------------------------------------     #}
 {% macro json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
    {{ adapter.dispatch('json_extract_string_array')(json_column, json_path_list, normalized_json_path) }}
 {%- endmacro %}
 {% macro default__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
    {{ json_extract_array(json_column, json_path_list, normalized_json_path) }}
 {%- endmacro %}
 {#
 See https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_extract_string_array
 BigQuery does not allow NULL entries in REPEATED fields, so we replace those with literal "NULL" strings.
 #}
 {% macro bigquery__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
    array(
        select ifnull(x, "NULL")
        from unnest(json_value_array({{ json_column }}, {{ bigquery_new_format_json_path(normalized_json_path) }})) as x
    )
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql
@@ -0,0 +1,16 @@
 {# quote  ----------------------------------     #}
 {% macro quote(column_name) -%}
  {{ adapter.dispatch('quote')(column_name) }}
 {%- endmacro %}
 {% macro default__quote(column_name) -%}
  adapter.quote(column_name)
 {%- endmacro %}
 {% macro oracle__quote(column_name) -%}
  {{ '\"' ~ column_name ~ '\"'}}
 {%- endmacro %}
 {% macro clickhouse__quote(column_name) -%}
  {{ '\"' ~ column_name ~ '\"'}}
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql
@@ -0,0 +1,25 @@
 {# surrogate_key  ----------------------------------     #}
 {% macro oracle__surrogate_key(field_list) -%}
    ora_hash(
        {%- for field in field_list %}
            {% if not loop.last %}
                {{ field }} || '~' ||
            {% else %}
                {{ field }}
            {% endif %}
        {%- endfor %}
    )
 {%- endmacro %}
 {% macro clickhouse__surrogate_key(field_list) -%}
    assumeNotNull(hex(MD5(
        {%- for field in field_list %}
            {% if not loop.last %}
                toString({{ field }}) || '~' ||
            {% else %}
                toString({{ field }})
            {% endif %}
        {%- endfor %}
    )))
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql
@@ -0,0 +1,105 @@
 {# boolean_to_string -------------------------------------------------     #}
 {% macro boolean_to_string(boolean_column) -%}
  {{ adapter.dispatch('boolean_to_string')(boolean_column) }}
 {%- endmacro %}
 {% macro default__boolean_to_string(boolean_column) -%}
    {{ boolean_column }}
 {%- endmacro %}
 {% macro redshift__boolean_to_string(boolean_column) -%}
    case when {{ boolean_column }} then 'true' else 'false' end
 {%- endmacro %}
 {# array_to_string -------------------------------------------------     #}
 {% macro array_to_string(array_column) -%}
  {{ adapter.dispatch('array_to_string')(array_column) }}
 {%- endmacro %}
 {% macro default__array_to_string(array_column) -%}
    {{ array_column }}
 {%- endmacro %}
 {% macro bigquery__array_to_string(array_column) -%}
    array_to_string({{ array_column }}, "|", "")
 {%- endmacro %}
 {% macro oracle__array_to_string(array_column) -%}
    cast({{ array_column }} as varchar2(4000))
 {%- endmacro %}
 {% macro sqlserver__array_to_string(array_column) -%}
    cast({{ array_column }} as {{dbt_utils.type_string()}})
 {%- endmacro %}
 {% macro redshift__array_to_string(array_column) -%}
    json_serialize({{array_column}})
 {%- endmacro %}
 {# object_to_string -------------------------------------------------     #}
 {% macro object_to_string(object_column) -%}
  {{ adapter.dispatch('object_to_string')(object_column) }}
 {%- endmacro %}
 {% macro default__object_to_string(object_column) -%}
    {{ object_column }}
 {%- endmacro %}
 {% macro redshift__object_to_string(object_column) -%}
    json_serialize({{object_column}})
 {%- endmacro %}
 {# cast_to_boolean -------------------------------------------------     #}
 {% macro cast_to_boolean(field) -%}
    {{ adapter.dispatch('cast_to_boolean')(field) }}
 {%- endmacro %}
 {% macro default__cast_to_boolean(field) -%}
    cast({{ field }} as boolean)
 {%- endmacro %}
 {# -- MySQL does not support cast function converting string directly to boolean (an alias of tinyint(1), https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast #}
 {% macro mysql__cast_to_boolean(field) -%}
    IF(lower({{ field }}) = 'true', true, false)
 {%- endmacro %}
 {# TiDB does not support cast string to boolean #}
 {% macro tidb__cast_to_boolean(field) -%}
    IF(lower({{ field }}) = 'true', true, false)
 {%- endmacro %}
 {% macro duckdb__cast_to_boolean(field) -%}
    cast({{ field }} as boolean)
 {%- endmacro %}
 {% macro redshift__cast_to_boolean(field) -%}
    cast({{ field }} as boolean)
 {%- endmacro %}
 {# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #}
 {% macro sqlserver__cast_to_boolean(field) -%}
    cast({{ field }} as bit)
 {%- endmacro %}
 {# -- ClickHouse does not support converting string directly to Int8, it must go through int first #}
 {% macro clickhouse__cast_to_boolean(field) -%}
    IF(lower({{ field }}) = 'true', 1, 0)
 {%- endmacro %}
 {# empty_string_to_null -------------------------------------------------     #}
 {% macro empty_string_to_null(field) -%}
    {{ return(adapter.dispatch('empty_string_to_null')(field)) }}
 {%- endmacro %}
 {%- macro default__empty_string_to_null(field) -%}
    nullif({{ field }}, '')
 {%- endmacro %}
 {%- macro duckdb__empty_string_to_null(field) -%}
    nullif(nullif({{ field }}, 'null'), '')
 {%- endmacro %}
 {%- macro redshift__empty_string_to_null(field) -%}
    nullif({{ field }}::varchar, '')
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql
@@ -0,0 +1,4 @@
 -- see https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas/#an-alternative-pattern-for-generating-schema-names
 {% macro generate_schema_name(custom_schema_name, node) -%}
    {{ generate_schema_name_for_env(custom_schema_name, node) }}
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql
@@ -0,0 +1,61 @@
 {#
    These macros control how incremental models are updated in Airbyte's normalization step
    - get_max_normalized_cursor retrieve the value of the last normalized data
    - incremental_clause controls the predicate to filter on new data to process incrementally
 #}
 {% macro incremental_clause(col_emitted_at, tablename)  -%}
  {{ adapter.dispatch('incremental_clause')(col_emitted_at, tablename) }}
 {%- endmacro %}
 {%- macro default__incremental_clause(col_emitted_at, tablename) -%}
 {% if is_incremental() %}
 and coalesce(
    cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}),
    {# -- if {{ col_emitted_at }} is NULL in either table, the previous comparison would evaluate to NULL, #}
    {# -- so we coalesce and make sure the row is always returned for incremental processing instead #}
    true)
 {% endif %}
 {%- endmacro -%}
 {# -- see https://on-systems.tech/113-beware-dbt-incremental-updates-against-snowflake-external-tables/ #}
 {%- macro snowflake__incremental_clause(col_emitted_at, tablename) -%}
 {% if is_incremental() %}
    {% if get_max_normalized_cursor(col_emitted_at, tablename) %}
 and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >
    cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }})
    {% endif %}
 {% endif %}
 {%- endmacro -%}
 {# -- see https://cloud.google.com/bigquery/docs/querying-partitioned-tables#best_practices_for_partition_pruning #}
 {%- macro bigquery__incremental_clause(col_emitted_at, tablename) -%}
 {% if is_incremental() %}
    {% if get_max_normalized_cursor(col_emitted_at, tablename) %}
 and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >
    cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }})
    {% endif %}
 {% endif %}
 {%- endmacro -%}
 {%- macro sqlserver__incremental_clause(col_emitted_at, tablename) -%}
 {% if is_incremental() %}
 and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}) is null
  or cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >
     (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}))
 {% endif %}
 {%- endmacro -%}
 {% macro get_max_normalized_cursor(col_emitted_at, tablename) %}
 {% if execute and is_incremental() %}
 {% if env_var('INCREMENTAL_CURSOR', 'UNSET') == 'UNSET' %}
     {% set query %}
        select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}
     {% endset %}
     {% set max_cursor = run_query(query).columns[0][0] %}
     {% do return(max_cursor) %}
 {% else %}
    {% do return(env_var('INCREMENTAL_CURSOR')) %}
 {% endif %}
 {% endif %}
 {% endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql
@@ -0,0 +1,34 @@
 {% macro oracle__test_equal_rowcount(model, compare_model) %}
 {#-- Needs to be set at parse time, before we return '' below --#}
 {{ config(fail_calc = 'coalesce(diff_count, 0)') }}
 {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
 {%- if not execute -%}
    {{ return('') }}
 {% endif %}
 with a as (
    select count(*) as count_a from {{ model }}
 ),
 b as (
    select count(*) as count_b from {{ compare_model }}
 ),
 final as (
    select
        count_a,
        count_b,
        abs(count_a - count_b) as diff_count
    from a
    cross join b
 )
 select diff_count from final
 {% endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql
@@ -0,0 +1,107 @@
 {#
 -- Adapted from https://github.com/dbt-labs/dbt-utils/blob/0-19-0-updates/macros/schema_tests/equality.sql
 -- dbt-utils version: 0.6.4
 -- This macro needs to be updated accordingly when dbt-utils is upgraded.
 -- This is needed because MySQL does not support the EXCEPT operator!
 #}
 {% macro mysql__test_equality(model, compare_model, compare_columns=None) %}
    {%- if not execute -%}
        {{ return('') }}
    {% endif %}
    {%- do dbt_utils._is_relation(model, 'test_equality') -%}
    {%- if not compare_columns -%}
        {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
        {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
    {%- endif -%}
    {% set compare_cols_csv = compare_columns | join(', ') %}
    with a as (
        select * from {{ model }}
    ),
    b as (
        select * from {{ compare_model }}
    ),
    a_minus_b as (
        select {{ compare_cols_csv }} from a
        where ({{ compare_cols_csv }}) not in
            (select {{ compare_cols_csv }} from b)
    ),
    b_minus_a as (
        select {{ compare_cols_csv }} from b
        where ({{ compare_cols_csv }}) not in
            (select {{ compare_cols_csv }} from a)
    ),
    unioned as (
        select * from a_minus_b
        union all
        select * from b_minus_a
    ),
    final as (
        select (select count(*) from unioned) +
        (select abs(
            (select count(*) from a_minus_b) -
            (select count(*) from b_minus_a)
            ))
        as count
    )
    select count from final
 {% endmacro %}
 {% macro oracle__test_equality(model) %}
    {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
    {%- if not execute -%}
        {{ return('') }}
    {% endif %}
    -- setup
    {%- do dbt_utils._is_relation(model, 'test_equality') -%}
    {#-
    If the compare_cols arg is provided, we can run this test without querying the
    information schema — this allows the model to be an ephemeral model
    -#}
    {%- set compare_columns = kwargs.get('compare_columns', None) -%}
    {%- if not compare_columns -%}
        {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
        {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
    {%- endif -%}
    {% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %}
    {% set compare_cols_csv = compare_columns | join(', ') %}
    with a as (
        select * from {{ model }}
    ),
    b as (
        select * from {{ compare_model }}
    ),
    a_minus_b as (
        select {{compare_cols_csv}} from a
        {{ dbt_utils.except() }}
        select {{compare_cols_csv}} from b
    ),
    b_minus_a as (
        select {{compare_cols_csv}} from b
        {{ dbt_utils.except() }}
        select {{compare_cols_csv}} from a
    ),
    unioned as (
        select * from a_minus_b
        union all
        select * from b_minus_a
    )
    select count(*) from unioned
 {% endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql
@@ -0,0 +1,51 @@
 {#
    This overrides the behavior of the macro `should_full_refresh` so full refresh are triggered if:
    - the dbt cli is run with --full-refresh flag or the model is configured explicitly to full_refresh
    - the column _airbyte_ab_id does not exists in the normalized tables and make sure it is well populated.
 #}
 {%- macro need_full_refresh(col_ab_id, target_table=this) -%}
    {%- if not execute -%}
        {{ return(false) }}
    {%- endif -%}
    {%- set found_column = [] %}
    {%- set cols = adapter.get_columns_in_relation(target_table) -%}
    {%- for col in cols -%}
        {%- if col.column == col_ab_id -%}
            {% do found_column.append(col.column) %}
        {%- endif -%}
    {%- endfor -%}
    {%- if found_column -%}
        {{ return(false) }}
    {%- else -%}
        {{ dbt_utils.log_info(target_table ~ "." ~ col_ab_id ~ " does not exist yet. The table will be created or rebuilt with dbt.full_refresh") }}
        {{ return(true) }}
    {%- endif -%}
 {%- endmacro -%}
 {%- macro should_full_refresh() -%}
  {% set config_full_refresh = config.get('full_refresh') %}
  {%- if config_full_refresh is none -%}
    {% set config_full_refresh = flags.FULL_REFRESH %}
  {%- endif -%}
  {%- if not config_full_refresh -%}
    {% set config_full_refresh = need_full_refresh(get_col_ab_id(), this) %}
  {%- endif -%}
  {% do return(config_full_refresh) %}
 {%- endmacro -%}
 {%- macro get_col_ab_id() -%}
  {{ adapter.dispatch('get_col_ab_id')() }}
 {%- endmacro -%}
 {%- macro default__get_col_ab_id() -%}
    _airbyte_ab_id
 {%- endmacro -%}
 {%- macro oracle__get_col_ab_id() -%}
    "_AIRBYTE_AB_ID"
 {%- endmacro -%}
 {%- macro snowflake__get_col_ab_id() -%}
    _AIRBYTE_AB_ID
 {%- endmacro -%}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql
@@ -0,0 +1,46 @@
 {#
    Similar to the star macro here: https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/star.sql
    This star_intersect macro takes an additional 'intersect' relation as argument.
    Its behavior is to select columns from both 'intersect' and 'from' relations with the following rules:
    - if the columns are existing in both 'from' and the 'intersect' relations, then the column from 'intersect' is used
    - if it's not in the both relation, then only the column in the 'from' relation is used
 #}
 {% macro star_intersect(from, intersect, from_alias=False, intersect_alias=False, except=[]) -%}
    {%- do dbt_utils._is_relation(from, 'star_intersect') -%}
    {%- do dbt_utils._is_ephemeral(from, 'star_intersect') -%}
    {%- do dbt_utils._is_relation(intersect, 'star_intersect') -%}
    {%- do dbt_utils._is_ephemeral(intersect, 'star_intersect') -%}
    {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #}
    {%- if not execute -%}
        {{ return('') }}
    {% endif %}
    {%- set include_cols = [] %}
    {%- set cols = adapter.get_columns_in_relation(from) -%}
    {%- set except = except | map("lower") | list %}
    {%- for col in cols -%}
        {%- if col.column|lower not in except -%}
            {% do include_cols.append(col.column) %}
        {%- endif %}
    {%- endfor %}
    {%- set include_intersect_cols = [] %}
    {%- set intersect_cols = adapter.get_columns_in_relation(intersect) -%}
    {%- for col in intersect_cols -%}
        {%- if col.column|lower not in except -%}
            {% do include_intersect_cols.append(col.column) %}
        {%- endif %}
    {%- endfor %}
    {%- for col in include_cols %}
        {%- if col in include_intersect_cols -%}
            {%- if intersect_alias %}{{ intersect_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }}
            {%- if not loop.last %},{{ '\n  ' }}{% endif %}
        {%- else %}
            {%- if from_alias %}{{ from_alias }}.{% else %}{{ from }}.{%- endif -%}{{ adapter.quote(col)|trim }} as {{ adapter.quote(col)|trim }}
            {%- if not loop.last %},{{ '\n  ' }}{% endif %}
        {%- endif %}
    {%- endfor -%}
 {%- endmacro %}
--- a/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml
+++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml
@@ -0,0 +1,5 @@
 # add dependencies. these will get pulled during the `dbt deps` process.
 packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils.git"
    revision: 0.8.2
--- a/airbyte-integrations/bases/base-normalization/dbt.Dockerfile
+++ b/airbyte-integrations/bases/base-normalization/dbt.Dockerfile
@@ -0,0 +1,3 @@
 # This dockerfile only exists to pull and re-export this image converted to the local arch of this machine
 # It is then consumed by the Dockerfile in this direcotry as "fishtownanalytics/dbt:1.0.0-dev"
 FROM fishtownanalytics/dbt:1.0.0
--- a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml
+++ b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml
@@ -0,0 +1,66 @@
 version: "3.7"
 services:
  normalization:
    image: airbyte/normalization:${VERSION}
    build:
      dockerfile: Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-mssql:
    image: airbyte/normalization-mssql:${VERSION}
    build:
      dockerfile: mssql.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-mysql:
    image: airbyte/normalization-mysql:${VERSION}
    build:
      dockerfile: mysql.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-oracle:
    image: airbyte/normalization-oracle:${VERSION}
    build:
      dockerfile: oracle.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-clickhouse:
    image: airbyte/normalization-clickhouse:${VERSION}
    build:
      dockerfile: clickhouse.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-snowflake:
    image: airbyte/normalization-snowflake:${VERSION}
    build:
      dockerfile: snowflake.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-redshift:
    image: airbyte/normalization-redshift:${VERSION}
    build:
      dockerfile: redshift.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-tidb:
    image: airbyte/normalization-tidb:${VERSION}
    build:
      dockerfile: tidb.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
  normalization-duckdb:
    image: airbyte/normalization-duckdb:${VERSION}
    build:
      dockerfile: duckdb.Dockerfile
      context: .
      labels:
        io.airbyte.git-revision: ${GIT_REVISION}
--- a/airbyte-integrations/bases/base-normalization/docker-compose.yaml
+++ b/airbyte-integrations/bases/base-normalization/docker-compose.yaml
@@ -0,0 +1,22 @@
 version: "3.7"
 # this file only exists so that we can easily check that all of these images exist in docker hub in check_images_exist.sh
 services:
  normalization:
    image: airbyte/normalization:${VERSION}
  normalization-mssql:
    image: airbyte/normalization-mssql:${VERSION}
  normalization-mysql:
    image: airbyte/normalization-mysql:${VERSION}
  normalization-oracle:
    image: airbyte/normalization-oracle:${VERSION}
  normalization-clickhouse:
    image: airbyte/normalization-clickhouse:${VERSION}
  normalization-snowflake:
    image: airbyte/normalization-snowflake:${VERSION}
  normalization-redshift:
    image: airbyte/normalization-redshift:${VERSION}
  normalization-tidb:
    image: airbyte/normalization-tidb:${VERSION}
  normalization-duckdb:
    image: airbyte/normalization-duckdb:${VERSION}
--- a/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile
+++ b/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile
@@ -0,0 +1,40 @@
 FROM fishtownanalytics/dbt:1.0.0
 COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte
 # Install SSH Tunneling dependencies
 RUN apt-get update && apt-get install -y jq sshpass
 WORKDIR /airbyte
 COPY entrypoint.sh .
 COPY build/sshtunneling.sh .
 WORKDIR /airbyte/normalization_code
 COPY normalization ./normalization
 COPY setup.py .
 COPY dbt-project-template/ ./dbt-template/
 # Install python dependencies
 WORKDIR /airbyte/base_python_structs
 # workaround for https://github.com/yaml/pyyaml/issues/601
 # this should be fixed in the airbyte/base-airbyte-protocol-python image
 RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation
 RUN pip install .
 WORKDIR /airbyte/normalization_code
 RUN pip install .
 RUN pip install dbt-duckdb==1.0.1
 #adding duckdb manually (outside of setup.py - lots of errors)
 RUN pip install duckdb
 WORKDIR /airbyte/normalization_code/dbt-template/
 # Download external dbt dependencies
 RUN dbt deps
 WORKDIR /airbyte
 ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
 ENTRYPOINT ["/airbyte/entrypoint.sh"]
 LABEL io.airbyte.name=airbyte/normalization-duckdb
--- a/airbyte-integrations/bases/base-normalization/entrypoint.sh
+++ b/airbyte-integrations/bases/base-normalization/entrypoint.sh
@@ -0,0 +1,160 @@
 #!/usr/bin/env bash
 set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value.
 function echo2() {
  echo >&2 "$@"
 }
 function error() {
  echo2 "$@"
  exit 1
 }
 function config_cleanup() {
  # Remove config file as it might still contain sensitive credentials  (for example,
  # injected OAuth Parameters should not be visible to custom docker images running custom transformation operations)
  rm -f "${CONFIG_FILE}"
 }
 function check_dbt_event_buffer_size() {
  ret=0
  dbt --help | grep -E -- '--event-buffer-size' && return
  ret=1
 }
 PROJECT_DIR=$(pwd)
 # How many commits should be downloaded from git to view history of a branch
 GIT_HISTORY_DEPTH=5
 # This function produces a working DBT project folder at the $PROJECT_DIR path so that dbt commands can be run
 # from it successfully with the proper credentials. This can be accomplished by providing different custom variables
 # to tweak the final project structure. For example, we can either use a user-provided base folder (git repo) or
 # use the standard/base template folder to generate normalization models from.
 function configuredbt() {
  # We first need to generate a workspace folder for a dbt project to run from:
  if [[ -z "${GIT_REPO}" ]]; then
    # No git repository provided, use the dbt-template folder (shipped inside normalization docker image)
    # as the base folder for dbt workspace
    cp -r /airbyte/normalization_code/dbt-template/* "${PROJECT_DIR}"
    echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}"
    set +e # allow script to continue running even if next commands fail to run properly
    # Generate a profiles.yml file for the selected destination/integration type
    transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}"
    if [[ -n "${CATALOG_FILE}" ]]; then
      # If catalog file is provided, generate normalization models, otherwise skip it
      echo "Running: transform-catalog --integration-type ${INTEGRATION_TYPE} --profile-config-dir ${PROJECT_DIR} --catalog ${CATALOG_FILE} --out ${PROJECT_DIR}/models/generated/ --json-column _airbyte_data"
      transform-catalog --integration-type "${INTEGRATION_TYPE}" --profile-config-dir "${PROJECT_DIR}" --catalog "${CATALOG_FILE}" --out "${PROJECT_DIR}/models/generated/" --json-column "_airbyte_data"
      TRANSFORM_EXIT_CODE=$?
      if [ ${TRANSFORM_EXIT_CODE} -ne 0 ]; then
        echo -e "\nShowing destination_catalog.json to diagnose/debug errors (${TRANSFORM_EXIT_CODE}):\n"
        cat "${CATALOG_FILE}" | jq
        exit ${TRANSFORM_EXIT_CODE}
      fi
    fi
    set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value.
  else
    trap config_cleanup EXIT
    # Use git repository as a base workspace folder for dbt projects
    if [[ -d git_repo ]]; then
      rm -rf git_repo
    fi
    # Make a shallow clone of the latest git repository in the workspace folder
    if [[ -z "${GIT_BRANCH}" ]]; then
      # Checkout a particular branch from the git repository
      echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} --single-branch  \$GIT_REPO git_repo"
      git clone --depth ${GIT_HISTORY_DEPTH} --single-branch  "${GIT_REPO}" git_repo
    else
      # No git branch specified, use the default branch of the git repository
      echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} -b ${GIT_BRANCH} --single-branch  \$GIT_REPO git_repo"
      git clone --depth ${GIT_HISTORY_DEPTH} -b "${GIT_BRANCH}" --single-branch  "${GIT_REPO}" git_repo
    fi
    # Print few history logs to make it easier for users to verify the right code version has been checked out from git
    echo "Last 5 commits in git_repo:"
    (cd git_repo; git log --oneline -${GIT_HISTORY_DEPTH}; cd -)
    # Generate a profiles.yml file for the selected destination/integration type
    echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}"
    transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}"
    config_cleanup
  fi
 }
 ## todo: make it easy to select source or destination and validate based on selection by adding an integration type env variable.
 function main() {
  CMD="$1"
  shift 1 || error "command not specified."
  while [ $# -ne 0 ]; do
    case "$1" in
    --config)
      CONFIG_FILE="$2"
      shift 2
      ;;
    --catalog)
      CATALOG_FILE="$2"
      shift 2
      ;;
    --integration-type)
      INTEGRATION_TYPE="$2"
      shift 2
      ;;
    --git-repo)
      GIT_REPO="$2"
      shift 2
      ;;
    --git-branch)
      GIT_BRANCH="$2"
      shift 2
      ;;
    *)
      error "Unknown option: $1"
      ;;
    esac
  done
  case "$CMD" in
  run)
    configuredbt
    . /airbyte/sshtunneling.sh
    openssh "${PROJECT_DIR}/ssh.json"
    trap 'closessh' EXIT
    set +e # allow script to continue running even if next commands fail to run properly
    # We don't run dbt 1.0.x on all destinations (because their plugins don't support it yet)
    # So we need to only pass `--event-buffer-size` if it's supported by DBT.
    # Same goes for JSON formatted logging.
    check_dbt_event_buffer_size
    if [ "$ret" -eq 0 ]; then
      echo -e "\nDBT >=1.0.0 detected; using 10K event buffer size\n"
      dbt_additional_args="--event-buffer-size=10000 --log-format json"
    else
      dbt_additional_args=""
    fi
    # Run dbt to compile and execute the generated normalization models
    dbt ${dbt_additional_args} run --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}"
    DBT_EXIT_CODE=$?
    if [ ${DBT_EXIT_CODE} -ne 0 ]; then
      echo -e "\nDiagnosing dbt debug to check if destination is available for dbt and well configured (${DBT_EXIT_CODE}):\n"
      dbt debug --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}"
      DBT_DEBUG_EXIT_CODE=$?
      if [ ${DBT_DEBUG_EXIT_CODE} -eq 0 ]; then
        # dbt debug is successful, so the error must be somewhere else...
        echo -e "\nForward dbt output logs to diagnose/debug errors (${DBT_DEBUG_EXIT_CODE}):\n"
        cat "${PROJECT_DIR}/../logs/dbt.log"
      fi
    fi
    closessh
    exit ${DBT_EXIT_CODE}
    ;;
  configure-dbt)
    configuredbt
    ;;
  *)
    error "Unknown command: $CMD"
    ;;
  esac
 }
 main "$@"
--- a/airbyte-integrations/bases/base-normalization/integration_tests/init.py
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/init.py
--- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py
@@ -0,0 +1,740 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 import json
 import os
 import pathlib
 import random
 import re
 import socket
 import string
 import subprocess
 import sys
 import threading
 import time
 from copy import copy
 from typing import Any, Callable, Dict, List, Union
 import yaml
 from normalization.destination_type import DestinationType
 from normalization.transform_catalog.transform import read_yaml_config, write_yaml_config
 from normalization.transform_config.transform import TransformConfig
 NORMALIZATION_TEST_TARGET = "NORMALIZATION_TEST_TARGET"
 NORMALIZATION_TEST_MSSQL_DB_PORT = "NORMALIZATION_TEST_MSSQL_DB_PORT"
 NORMALIZATION_TEST_MYSQL_DB_PORT = "NORMALIZATION_TEST_MYSQL_DB_PORT"
 NORMALIZATION_TEST_POSTGRES_DB_PORT = "NORMALIZATION_TEST_POSTGRES_DB_PORT"
 NORMALIZATION_TEST_CLICKHOUSE_DB_PORT = "NORMALIZATION_TEST_CLICKHOUSE_DB_PORT"
 NORMALIZATION_TEST_TIDB_DB_PORT = "NORMALIZATION_TEST_TIDB_DB_PORT"
 NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH = "NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH"
 class DbtIntegrationTest(object):
    def __init__(self):
        self.target_schema = "test_normalization"
        self.container_prefix = f"test_normalization_db_{self.random_string(3)}"
        self.db_names = []
    @staticmethod
    def generate_random_string(prefix: str) -> str:
        return prefix + DbtIntegrationTest.random_string(5)
    @staticmethod
    def random_string(length: int) -> str:
        return "".join(random.choice(string.ascii_lowercase) for i in range(length))
    def set_target_schema(self, target_schema: str):
        self.target_schema = target_schema
    def setup_db(self, destinations_to_test: List[str]):
        if DestinationType.POSTGRES.value in destinations_to_test:
            self.setup_postgres_db()
        if DestinationType.MYSQL.value in destinations_to_test:
            self.setup_mysql_db()
        if DestinationType.MSSQL.value in destinations_to_test:
            self.setup_mssql_db()
        if DestinationType.CLICKHOUSE.value in destinations_to_test:
            self.setup_clickhouse_db()
        if DestinationType.TIDB.value in destinations_to_test:
            self.setup_tidb_db()
    def setup_postgres_db(self):
        start_db = True
        if os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT):
            port = int(os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT))
            start_db = False
        else:
            port = self.find_free_port()
        config = {
            "host": "localhost",
            "username": "integration-tests",
            "password": "integration-tests",
            "port": port,
            "database": "postgres",
            "schema": self.target_schema,
        }
        if start_db:
            self.db_names.append("postgres")
            print("Starting localhost postgres container for tests")
            commands = [
                "docker",
                "run",
                "--rm",
                "--name",
                f"{self.container_prefix}_postgres",
                "-e",
                f"POSTGRES_USER={config['username']}",
                "-e",
                f"POSTGRES_PASSWORD={config['password']}",
                "-p",
                f"{config['port']}:5432",
                "-d",
                "marcosmarxm/postgres-ssl:dev",
                "-c",
                "ssl=on",
                "-c",
                "ssl_cert_file=/var/lib/postgresql/server.crt",
                "-c",
                "ssl_key_file=/var/lib/postgresql/server.key",
            ]
            print("Executing: ", " ".join(commands))
            subprocess.call(commands)
            print("....Waiting for Postgres DB to start...15 sec")
            time.sleep(15)
        if not os.path.exists("../secrets"):
            os.makedirs("../secrets")
        with open("../secrets/postgres.json", "w") as fh:
            fh.write(json.dumps(config))
    def setup_mysql_db(self):
        start_db = True
        if os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT):
            port = int(os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT))
            start_db = False
        else:
            port = self.find_free_port()
        config = {
            "host": "localhost",
            "port": port,
            "database": self.target_schema,
            "username": "root",
            "password": "",
        }
        if start_db:
            self.db_names.append("mysql")
            print("Starting localhost mysql container for tests")
            commands = [
                "docker",
                "run",
                "--rm",
                "--name",
                f"{self.container_prefix}_mysql",
                "-e",
                "MYSQL_ALLOW_EMPTY_PASSWORD=yes",
                "-e",
                "MYSQL_INITDB_SKIP_TZINFO=yes",
                "-e",
                f"MYSQL_DATABASE={config['database']}",
                "-e",
                "MYSQL_ROOT_HOST=%",
                "-p",
                f"{config['port']}:3306",
                "-d",
                "mysql/mysql-server",
            ]
            print("Executing: ", " ".join(commands))
            subprocess.call(commands)
            print("....Waiting for MySQL DB to start...15 sec")
            time.sleep(15)
        if not os.path.exists("../secrets"):
            os.makedirs("../secrets")
        with open("../secrets/mysql.json", "w") as fh:
            fh.write(json.dumps(config))
    def setup_mssql_db(self):
        start_db = True
        if os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT):
            port = int(os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT))
            start_db = False
        else:
            port = self.find_free_port()
        config = {
            "host": "localhost",
            "username": "SA",
            "password": "MyStr0ngP@ssw0rd",
            "port": port,
            "database": self.target_schema,
            "schema": self.target_schema,
        }
        if start_db:
            self.db_names.append("mssql")
            print("Starting localhost MS SQL Server container for tests")
            command_start_container = [
                "docker",
                "run",
                "--rm",
                "--name",
                f"{self.container_prefix}_mssql",
                "-h",
                f"{self.container_prefix}_mssql",
                "-e",
                "ACCEPT_EULA='Y'",
                "-e",
                f"SA_PASSWORD='{config['password']}'",
                "-e",
                "MSSQL_PID='Standard'",
                "-p",
                f"{config['port']}:1433",
                "-d",
                "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04",
            ]
            # cmds & parameters
            cmd_start_container = " ".join(command_start_container)
            wait_sec = 30
            # run the docker container
            print("Executing: ", cmd_start_container)
            subprocess.check_call(cmd_start_container, shell=True)
            # wait for service is available
            print(f"....Waiting for MS SQL Server to start...{wait_sec} sec")
            time.sleep(wait_sec)
            # Run additional commands to prepare the table
            command_create_db = [
                "docker",
                "exec",
                f"{self.container_prefix}_mssql",
                "/opt/mssql-tools/bin/sqlcmd",
                "-S",
                config["host"],
                "-U",
                config["username"],
                "-P",
                config["password"],
                "-Q",
                f"CREATE DATABASE [{config['database']}]",
            ]
            # create test db
            print("Executing: ", " ".join(command_create_db))
            subprocess.call(command_create_db)
        if not os.path.exists("../secrets"):
            os.makedirs("../secrets")
        with open("../secrets/mssql.json", "w") as fh:
            fh.write(json.dumps(config))
    def setup_clickhouse_db(self):
        """
        ClickHouse official JDBC driver uses HTTP port 8123.
        Ref: https://altinity.com/blog/2019/3/15/clickhouse-networking-part-1
        """
        start_db = True
        port = 8123
        if os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT):
            port = int(os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT))
            start_db = False
        if start_db:
            port = self.find_free_port()
        config = {
            "host": "localhost",
            "port": port,
            "database": self.target_schema,
            "username": "default",
            "password": "",
            "ssl": False,
        }
        if start_db:
            self.db_names.append("clickhouse")
            print("Starting localhost clickhouse container for tests")
            commands = [
                "docker",
                "run",
                "--rm",
                "--name",
                f"{self.container_prefix}_clickhouse",
                "--ulimit",
                "nofile=262144:262144",
                "-p",
                f"{config['port']}:8123",  # clickhouse JDBC driver use HTTP port
                "-d",
                # so far, only the latest version ClickHouse server image turned on
                # window functions
                "clickhouse/clickhouse-server:latest",
            ]
            print("Executing: ", " ".join(commands))
            subprocess.call(commands)
            print("....Waiting for ClickHouse DB to start...15 sec")
            time.sleep(15)
        # Run additional commands to prepare the table
        command_create_db = [
            "docker",
            "run",
            "--rm",
            "--link",
            f"{self.container_prefix}_clickhouse:clickhouse-server",
            "clickhouse/clickhouse-client:21.8.10.19",
            "--host",
            "clickhouse-server",
            "--query",
            f"CREATE DATABASE IF NOT EXISTS {config['database']}",
        ]
        # create test db
        print("Executing: ", " ".join(command_create_db))
        subprocess.call(command_create_db)
        if not os.path.exists("../secrets"):
            os.makedirs("../secrets")
        with open("../secrets/clickhouse.json", "w") as fh:
            fh.write(json.dumps(config))
    def setup_tidb_db(self):
        start_db = True
        if os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT):
            port = int(os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT))
            start_db = False
        else:
            port = self.find_free_port()
        config = {
            "host": "127.0.0.1",
            "port": port,
            "database": self.target_schema,
            "schema": self.target_schema,
            "username": "root",
            "password": "",
            "ssl": False,
        }
        if start_db:
            self.db_names.append("tidb")
            print("Starting tidb container for tests")
            commands = [
                "docker",
                "run",
                "--rm",
                "--name",
                f"{self.container_prefix}_tidb",
                "-p",
                f"{config['port']}:4000",
                "-d",
                "pingcap/tidb:v5.4.0",
            ]
            print("Executing: ", " ".join(commands))
            subprocess.call(commands)
            print("....Waiting for TiDB to start...15 sec")
            time.sleep(15)
        command_create_db = [
            "docker",
            "run",
            "--rm",
            "--link",
            f"{self.container_prefix}_tidb:tidb",
            "arey/mysql-client",
            "--host=tidb",
            "--user=root",
            "--port=4000",
            f"--execute=CREATE DATABASE IF NOT EXISTS {self.target_schema}",
        ]
        print("Executing: ", " ".join(command_create_db))
        subprocess.call(command_create_db)
        if not os.path.exists("../secrets"):
            os.makedirs("../secrets")
        with open("../secrets/tidb.json", "w") as fh:
            fh.write(json.dumps(config))
    @staticmethod
    def find_free_port():
        """
        Find an unused port to create a database listening on localhost to run destination-postgres
        """
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.bind(("", 0))
        addr = s.getsockname()
        s.close()
        return addr[1]
    def tear_down_db(self):
        for db_name in self.db_names:
            print(f"Stopping localhost {db_name} container for tests")
            try:
                subprocess.call(["docker", "kill", f"{self.container_prefix}_{db_name}"])
            except Exception as e:
                print(f"WARN: Exception while shutting down {db_name}: {e}")
    @staticmethod
    def change_current_test_dir(request):
        # This makes the test run whether it is executed from the tests folder (with pytest/gradle)
        # or from the base-normalization folder (through pycharm)
        integration_tests_dir = os.path.join(request.fspath.dirname, "integration_tests")
        if os.path.exists(integration_tests_dir):
            os.chdir(integration_tests_dir)
        else:
            os.chdir(request.fspath.dirname)
    def generate_profile_yaml_file(
        self, destination_type: DestinationType, test_root_dir: str, random_schema: bool = False
    ) -> Dict[str, Any]:
        """
        Each destination requires different settings to connect to. This step generates the adequate profiles.yml
        as described here: https://docs.getdbt.com/reference/profiles.yml
        """
        config_generator = TransformConfig()
        profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json")
        # Adapt credential file to look like destination config.json
        if destination_type.value == DestinationType.BIGQUERY.value:
            credentials = profiles_config["basic_bigquery_config"]
            profiles_config = {
                "credentials_json": json.dumps(credentials),
                "dataset_id": self.target_schema,
                "project_id": credentials["project_id"],
                "dataset_location": "US",
            }
        elif destination_type.value == DestinationType.MYSQL.value:
            profiles_config["database"] = self.target_schema
        elif destination_type.value == DestinationType.REDSHIFT.value:
            profiles_config["schema"] = self.target_schema
            if random_schema:
                profiles_config["schema"] = self.target_schema + "_" + "".join(random.choices(string.ascii_lowercase, k=5))
        else:
            profiles_config["schema"] = self.target_schema
        if destination_type.value == DestinationType.CLICKHOUSE.value:
            clickhouse_config = copy(profiles_config)
            profiles_yaml = config_generator.transform(destination_type, clickhouse_config)
        else:
            profiles_yaml = config_generator.transform(destination_type, profiles_config)
        config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml")
        return profiles_config
    @staticmethod
    def run_destination_process(message_file: str, test_root_dir: str, commands: List[str]):
        print("Executing: ", " ".join(commands))
        with open(os.path.join(test_root_dir, "destination_output.log"), "ab") as f:
            process = subprocess.Popen(commands, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            def writer():
                if os.path.exists(message_file):
                    with open(message_file, "rb") as input_data:
                        while True:
                            line = input_data.readline()
                            if not line:
                                break
                            if not line.startswith(b"//"):
                                process.stdin.write(line)
                process.stdin.close()
            thread = threading.Thread(target=writer)
            thread.start()
            for line in iter(process.stdout.readline, b""):
                f.write(line)
                sys.stdout.write(line.decode("utf-8"))
            thread.join()
            process.wait()
        return process.returncode == 0
    @staticmethod
    def get_normalization_image(destination_type: DestinationType) -> str:
        if DestinationType.MSSQL.value == destination_type.value:
            return "airbyte/normalization-mssql:dev"
        elif DestinationType.MYSQL.value == destination_type.value:
            return "airbyte/normalization-mysql:dev"
        elif DestinationType.ORACLE.value == destination_type.value:
            return "airbyte/normalization-oracle:dev"
        elif DestinationType.CLICKHOUSE.value == destination_type.value:
            return "airbyte/normalization-clickhouse:dev"
        elif DestinationType.SNOWFLAKE.value == destination_type.value:
            return "airbyte/normalization-snowflake:dev"
        elif DestinationType.REDSHIFT.value == destination_type.value:
            return "airbyte/normalization-redshift:dev"
        elif DestinationType.TIDB.value == destination_type.value:
            return "airbyte/normalization-tidb:dev"
        else:
            return "airbyte/normalization:dev"
    def dbt_check(self, destination_type: DestinationType, test_root_dir: str):
        """
        Run the dbt CLI to perform transformations on the test raw data in the destination
        """
        normalization_image: str = self.get_normalization_image(destination_type)
        # Perform sanity check on dbt project settings
        assert self.run_check_dbt_command(normalization_image, "debug", test_root_dir)
        assert self.run_check_dbt_command(normalization_image, "deps", test_root_dir)
    def dbt_run(self, destination_type: DestinationType, test_root_dir: str, force_full_refresh: bool = False):
        """
        Run the dbt CLI to perform transformations on the test raw data in the destination
        """
        normalization_image: str = self.get_normalization_image(destination_type)
        # Compile dbt models files into destination sql dialect, then run the transformation queries
        assert self.run_check_dbt_command(normalization_image, "run", test_root_dir, force_full_refresh)
    def dbt_run_macro(self, destination_type: DestinationType, test_root_dir: str, macro: str, macro_args: str = None):
        """
        Run the dbt CLI to perform transformations on the test raw data in the destination, using independent macro.
        """
        normalization_image: str = self.get_normalization_image(destination_type)
        # Compile dbt models files into destination sql dialect, then run the transformation queries
        assert self.run_dbt_run_operation(normalization_image, test_root_dir, macro, macro_args)
    def run_check_dbt_command(self, normalization_image: str, command: str, cwd: str, force_full_refresh: bool = False) -> bool:
        """
        Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs
        """
        if any([normalization_image.startswith(x) for x in ["airbyte/normalization-oracle", "airbyte/normalization-clickhouse"]]):
            dbtAdditionalArgs = []
        else:
            dbtAdditionalArgs = ["--event-buffer-size=10000"]
        commands = (
            [
                "docker",
                "run",
                "--rm",
                "--init",
                "-v",
                f"{cwd}:/workspace",
                "-v",
                f"{cwd}/build:/build",
                "-v",
                f"{cwd}/logs:/logs",
                "-v",
                f"{cwd}/build/dbt_packages:/dbt",
                "--network",
                "host",
                "--entrypoint",
                "/usr/local/bin/dbt",
                "-i",
                normalization_image,
            ]
            + dbtAdditionalArgs
            + [
                command,
                "--profiles-dir=/workspace",
                "--project-dir=/workspace",
            ]
        )
        if force_full_refresh:
            commands.append("--full-refresh")
            command = f"{command} --full-refresh"
        print("Executing: ", " ".join(commands))
        print(f"Equivalent to: dbt {command} --profiles-dir={cwd} --project-dir={cwd}")
        return self.run_check_dbt_subprocess(commands, cwd)
    def run_dbt_run_operation(self, normalization_image: str, cwd: str, macro: str, macro_args: str = None) -> bool:
        """
        Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs
        """
        args = ["--args", macro_args] if macro_args else []
        commands = (
            [
                "docker",
                "run",
                "--rm",
                "--init",
                "-v",
                f"{cwd}:/workspace",
                "-v",
                f"{cwd}/build:/build",
                "-v",
                f"{cwd}/logs:/logs",
                "-v",
                f"{cwd}/build/dbt_packages:/dbt",
                "--network",
                "host",
                "--entrypoint",
                "/usr/local/bin/dbt",
                "-i",
                normalization_image,
            ]
            + ["run-operation", macro]
            + args
            + ["--profiles-dir=/workspace", "--project-dir=/workspace"]
        )
        print("Executing: ", " ".join(commands))
        print(f"Equivalent to: dbt run-operation {macro} --args {macro_args} --profiles-dir={cwd} --project-dir={cwd}")
        return self.run_check_dbt_subprocess(commands, cwd)
    def run_check_dbt_subprocess(self, commands: list, cwd: str):
        error_count = 0
        with open(os.path.join(cwd, "dbt_output.log"), "ab") as f:
            process = subprocess.Popen(commands, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=os.environ)
            for line in iter(lambda: process.stdout.readline(), b""):
                f.write(line)
                str_line = line.decode("utf-8")
                sys.stdout.write(str_line)
                # keywords to match lines as signaling errors
                if "ERROR" in str_line or "FAIL" in str_line or "WARNING" in str_line:
                    # exception keywords in lines to ignore as errors (such as summary or expected warnings)
                    is_exception = False
                    for except_clause in [
                        "Done.",  # DBT Summary
                        "PASS=",  # DBT Summary
                        "Nothing to do.",  # When no schema/data tests are setup
                        "Configuration paths exist in your dbt_project.yml",  # When no cte / view are generated
                        "Error loading config file: .dockercfg: $HOME is not defined",  # ignore warning
                        "depends on a node named 'disabled_test' which was not found",  # Tests throwing warning because it is disabled
                        "The requested image's platform (linux/amd64) does not match the detected host platform "
                        + "(linux/arm64/v8) and no specific platform was requested",  # temporary patch until we publish images for arm64
                    ]:
                        if except_clause in str_line:
                            is_exception = True
                            break
                    if not is_exception:
                        # count lines signaling an error/failure/warning
                        error_count += 1
        process.wait()
        message = (
            f"{' '.join(commands)}\n\tterminated with return code {process.returncode} "
            f"with {error_count} 'Error/Warning/Fail' mention(s)."
        )
        print(message)
        assert error_count == 0, message
        assert process.returncode == 0, message
        if error_count > 0:
            return False
        return process.returncode == 0
    @staticmethod
    def copy_replace(src, dst, pattern=None, replace_value=None):
        """
        Copies a file from src to dst replacing pattern by replace_value
        Parameters
        ----------
        src : string
            Path to the source filename to copy from
        dst : string
            Path to the output filename to copy to
        pattern
            list of Patterns to replace inside the src file
        replace_value
            list of Values to replace by in the dst file
        """
        file1 = open(src, "r") if isinstance(src, str) else src
        file2 = open(dst, "w") if isinstance(dst, str) else dst
        pattern = [pattern] if isinstance(pattern, str) else pattern
        replace_value = [replace_value] if isinstance(replace_value, str) else replace_value
        if replace_value and pattern:
            if len(replace_value) != len(pattern):
                raise Exception("Invalid parameters: pattern and replace_value" " have different sizes.")
            rules = [(re.compile(regex, re.IGNORECASE), value) for regex, value in zip(pattern, replace_value)]
        else:
            rules = []
        for line in file1:
            if rules:
                for rule in rules:
                    line = re.sub(rule[0], rule[1], line)
            file2.write(line)
        if isinstance(src, str):
            file1.close()
        if isinstance(dst, str):
            file2.close()
    @staticmethod
    def get_test_targets() -> List[str]:
        """
        Returns a list of destinations to run tests on.
        if the environment variable NORMALIZATION_TEST_TARGET is set with a comma separated list of destination names,
        then the tests are run only on that subsets of destinations
        Otherwise tests are run against all destinations
        """
        if os.getenv(NORMALIZATION_TEST_TARGET):
            target_str = os.getenv(NORMALIZATION_TEST_TARGET)
            return [d.value for d in {DestinationType.from_string(s.strip()) for s in target_str.split(",")}]
        else:
            return [d.value for d in DestinationType]
    @staticmethod
    def update_yaml_file(filename: str, callback: Callable):
        config = read_yaml_config(filename)
        updated, config = callback(config)
        if updated:
            write_yaml_config(config, filename)
    def clean_tmp_tables(
        self,
        destination_type: Union[DestinationType, List[DestinationType]],
        test_type: str,
        tmp_folders: list = None,
        git_versioned_tests: list = None,
    ):
        """
        Cleans-up all temporary schemas created during the test session.
        It parses the provided tmp_folders: List[str] or uses `git_versioned_tests` to find sources.yml files generated for the tests.
        It gets target schemas created by the tests and removes them using custom scenario specified in
            `dbt-project-template/macros/clean_tmp_tables.sql` macro.
        REQUIREMENTS:
        1) Idealy, the schemas should have unique names like: test_normalization_<some_random_string> to avoid conflicts.
        2) The `clean_tmp_tables.sql` macro should have the specific macro for target destination to proceed.
        INPUT ARGUMENTS:
        ::  destination_type : either single destination or list of destinations
        ::  test_type: either "ephemeral" or "normalization" should be supplied.
        ::  tmp_folders: should be supplied if test_type = "ephemeral", to get schemas from /build/normalization_test_output folders
        ::  git_versioned_tests: should be supplied if test_type = "normalization", to get schemas from integration_tests/normalization_test_output folders
        EXAMPLE:
            clean_up_args = {
                "destination_type": [ DestinationType.REDSHIFT, DestinationType.POSTGRES, ... ]
                "test_type": "normalization",
                "git_versioned_tests": git_versioned_tests,
            }
        """
        path_to_sources: str = "/models/generated/sources.yml"
        test_folders: dict = {}
        source_files: dict = {}
        schemas_to_remove: dict = {}
        # collecting information about tmp_tables created for the test for each destination
        for destination in destination_type:
            test_folders[destination.value] = []
            source_files[destination.value] = []
            schemas_to_remove[destination.value] = []
            # based on test_type select path to source files
            if test_type == "ephemeral" or test_type == "test_reset_scd_overwrite":
                if not tmp_folders:
                    raise TypeError("`tmp_folders` arg is not provided.")
                for folder in tmp_folders:
                    if destination.value in folder:
                        test_folders[destination.value].append(folder)
                        source_files[destination.value].append(f"{folder}{path_to_sources}")
            elif test_type == "normalization":
                if not git_versioned_tests:
                    raise TypeError("`git_versioned_tests` arg is not provided.")
                base_path = f"{pathlib.Path().absolute()}/integration_tests/normalization_test_output"
                for test in git_versioned_tests:
                    test_root_dir: str = f"{base_path}/{destination.value}/{test}"
                    test_folders[destination.value].append(test_root_dir)
                    source_files[destination.value].append(f"{test_root_dir}{path_to_sources}")
            else:
                raise TypeError(f"\n`test_type`: {test_type} is not a registered, use `ephemeral` or `normalization` instead.\n")
            # parse source.yml files from test folders to get schemas and table names created for the tests
            for file in source_files[destination.value]:
                source_yml = {}
                try:
                    with open(file, "r") as source_file:
                        source_yml = yaml.safe_load(source_file)
                except FileNotFoundError:
                    print(f"\n{destination.value}: {file} doesn't exist, consider to remove any temp_tables and schemas manually!\n")
                    pass
                test_sources: list = source_yml.get("sources", []) if source_yml else []
                for source in test_sources:
                    target_schema: str = source.get("name")
                    if target_schema not in schemas_to_remove[destination.value]:
                        schemas_to_remove[destination.value].append(target_schema)
                        # adding _airbyte_* tmp schemas to be removed
                        schemas_to_remove[destination.value].append(f"_airbyte_{target_schema}")
        # cleaning up tmp_tables generated by the tests
        for destination in destination_type:
            if not schemas_to_remove[destination.value]:
                print(f"\n\t{destination.value.upper()} DESTINATION: SKIP CLEANING, NOTHING TO REMOVE.\n")
            else:
                print(f"\n\t{destination.value.upper()} DESTINATION: CLEANING LEFTOVERS...\n")
                print(f"\t{schemas_to_remove[destination.value]}\n")
                test_root_folder = test_folders[destination.value][0]
                args = json.dumps({"schemas": schemas_to_remove[destination.value]})
                self.dbt_check(destination, test_root_folder)
                self.dbt_run_macro(destination, test_root_folder, "clean_tmp_tables", args)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml
@@ -0,0 +1,125 @@
 name: airbyte_utils
 version: '1.0'
 config-version: 2
 profile: normalize
 model-paths:
 - models
 docs-paths:
 - docs
 analysis-paths:
 - analysis
 test-paths:
 - tests
 seed-paths:
 - data
 macro-paths:
 - macros
 target-path: ../build
 log-path: ../logs
 packages-install-path: /dbt
 clean-targets:
 - build
 - dbt_modules
 quoting:
  database: true
  schema: false
  identifier: true
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        +on_schema_change: sync_all_columns
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
 - macro_namespace: dbt_utils
  search_order:
  - airbyte_utils
  - dbt_utils
 vars:
  json_column: _airbyte_data
  models_to_source:
    nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
    non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
    non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
    non_nested_stream_without_namespace_resulting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
    some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty
    some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty
    some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty
    some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty
    some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty
    simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
    simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
    simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
    simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
    conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar
    conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar
    conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar
    conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar
    conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array
    conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array
    conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array
    conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array
    unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias
    unnest_alias: test_normalization._airbyte_raw_unnest_alias
    arrays_ab1: test_normalization._airbyte_raw_arrays
    arrays_ab2: test_normalization._airbyte_raw_arrays
    arrays_ab3: test_normalization._airbyte_raw_arrays
    arrays: test_normalization._airbyte_raw_arrays
    nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name
    unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children: test_normalization._airbyte_raw_unnest_alias
    arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays
    arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays
    arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays
    arrays_nested_array_parent: test_normalization._airbyte_raw_arrays
    nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
    conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name
    conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name
    unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_owner_column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_owner_column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_owner_column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias
    unnest_alias_children_owner_column___with__quotes: test_normalization._airbyte_raw_unnest_alias
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
@@ -0,0 +1,90 @@
  create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
  partition by range_bucket(
            _airbyte_active_row,
            generate_array(0, 1, 1)
        )
  cluster by _airbyte_unique_key_scd, _airbyte_emitted_at
  OPTIONS()
  as (
 -- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')
 with
 input_data as (
    select *
    from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg`
    -- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
 ),
 scd_data as (
    -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
    select
      to_hex(md5(cast(concat(coalesce(cast(id as 
    string
 ), '')) as 
    string
 ))) as _airbyte_unique_key,
      id,
      date,
      `partition`,
      date as _airbyte_start_at,
      lag(date) over (
        partition by id
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) as _airbyte_end_at,
      case when row_number() over (
        partition by id
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) = 1 then 1 else 0 end as _airbyte_active_row,
      _airbyte_ab_id,
      _airbyte_emitted_at,
      _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
    from input_data
 ),
 dedup_data as (
    select
        -- we need to ensure de-duplicated rows for merge/update queries
        -- additionally, we generate a unique key for the scd table
        row_number() over (
            partition by
                _airbyte_unique_key,
                _airbyte_start_at,
                _airbyte_emitted_at
            order by _airbyte_active_row desc, _airbyte_ab_id
        ) as _airbyte_row_num,
        to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as 
    string
 ), ''), '-', coalesce(cast(_airbyte_start_at as 
    string
 ), ''), '-', coalesce(cast(_airbyte_emitted_at as 
    string
 ), '')) as 
    string
 ))) as _airbyte_unique_key_scd,
        scd_data.*
    from scd_data
 )
 select
    _airbyte_unique_key,
    _airbyte_unique_key_scd,
    id,
    date,
    `partition`,
    _airbyte_start_at,
    _airbyte_end_at,
    _airbyte_active_row,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
 from dedup_data where _airbyte_row_num = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql
@@ -0,0 +1,26 @@
  create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names`
  partition by timestamp_trunc(_airbyte_emitted_at, day)
  cluster by _airbyte_unique_key, _airbyte_emitted_at
  OPTIONS()
  as (
 -- Final base SQL model
 -- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
 select
    _airbyte_unique_key,
    id,
    date,
    `partition`,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
 from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
 -- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
 where 1 = 1
 and _airbyte_active_row = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql
@@ -0,0 +1,74 @@
  create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
  partition by timestamp_trunc(_airbyte_emitted_at, day)
  cluster by _airbyte_emitted_at
  OPTIONS()
  as (
 with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
 select
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
    json_extract_array(`partition`, "$['double_array_data']") as double_array_data,
    json_extract_array(`partition`, "$['DATA']") as DATA,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as table_alias
 -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
 where 1 = 1
 and `partition` is not null
 ),  __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1
 select
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
    double_array_data,
    DATA,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1
 -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
 where 1 = 1
 ),  __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 as (
 -- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as 
    string
 ), ''), '-', coalesce(cast(array_to_string(double_array_data, "|", "") as 
    string
 ), ''), '-', coalesce(cast(array_to_string(DATA, "|", "") as 
    string
 ), '')) as 
    string
 ))) as _airbyte_partition_hashid,
    tmp.*
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 tmp
 -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
 where 1 = 1
 )-- Final base SQL model
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3
 select
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
    double_array_data,
    DATA,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_partition_hashid
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3
 -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd`
 where 1 = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql
@@ -0,0 +1,73 @@
  create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA`
  partition by timestamp_trunc(_airbyte_emitted_at, day)
  cluster by _airbyte_emitted_at
  OPTIONS()
  as (
 with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
 select
    _airbyte_partition_hashid,
    json_extract_scalar(DATA, "$['currency']") as currency,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias
 -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
 cross join unnest(DATA) as DATA
 where 1 = 1
 and DATA is not null
 ),  __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1
 select
    _airbyte_partition_hashid,
    cast(currency as 
    string
 ) as currency,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1
 -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
 where 1 = 1
 ),  __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 as (
 -- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as 
    string
 ), ''), '-', coalesce(cast(currency as 
    string
 ), '')) as 
    string
 ))) as _airbyte_DATA_hashid,
    tmp.*
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 tmp
 -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
 where 1 = 1
 )-- Final base SQL model
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3
 select
    _airbyte_partition_hashid,
    currency,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_DATA_hashid
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3
 -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
 where 1 = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql
@@ -0,0 +1,73 @@
  create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data`
  partition by timestamp_trunc(_airbyte_emitted_at, day)
  cluster by _airbyte_emitted_at
  OPTIONS()
  as (
 with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
 select
    _airbyte_partition_hashid,
    json_extract_scalar(double_array_data, "$['id']") as id,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias
 -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
 cross join unnest(double_array_data) as double_array_data
 where 1 = 1
 and double_array_data is not null
 ),  __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1
 select
    _airbyte_partition_hashid,
    cast(id as 
    string
 ) as id,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1
 -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
 where 1 = 1
 ),  __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 as (
 -- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as 
    string
 ), ''), '-', coalesce(cast(id as 
    string
 ), '')) as 
    string
 ))) as _airbyte_double_array_data_hashid,
    tmp.*
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 tmp
 -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
 where 1 = 1
 )-- Final base SQL model
 -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3
 select
    _airbyte_partition_hashid,
    id,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_double_array_data_hashid
 from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3
 -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition`
 where 1 = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql
@@ -0,0 +1,21 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
 select
    {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id,
    {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date,
    {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }},
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias
 -- nested_stream_with_complex_columns_resulting_into_long_names
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql
@@ -0,0 +1,21 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }}
 select
    cast(id as {{ dbt_utils.type_string() }}) as id,
    cast(date as {{ dbt_utils.type_string() }}) as date,
    cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }},
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }}
 -- nested_stream_with_complex_columns_resulting_into_long_names
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql
@@ -0,0 +1,22 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    schema = "_airbyte_test_normalization",
    tags = [ "nested-intermediate" ]
 ) }}
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
 {{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'DATA') }}
 select
    _airbyte_partition_hashid,
    {{ json_extract_scalar(unnested_column_value('DATA'), ['currency'], ['currency']) }} as currency,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias
 -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA
 {{ cross_join_unnest('partition', 'DATA') }}
 where 1 = 1
 and DATA is not null
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql
@@ -0,0 +1,21 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    schema = "_airbyte_test_normalization",
    tags = [ "nested-intermediate" ]
 ) }}
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
 select
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
    {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data,
    {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as DATA,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} as table_alias
 -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition
 where 1 = 1
 and {{ adapter.quote('partition') }} is not null
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql
@@ -0,0 +1,22 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    schema = "_airbyte_test_normalization",
    tags = [ "nested-intermediate" ]
 ) }}
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
 {{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'double_array_data') }}
 select
    _airbyte_partition_hashid,
    {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias
 -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data
 {{ cross_join_unnest('partition', 'double_array_data') }}
 where 1 = 1
 and double_array_data is not null
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
@@ -0,0 +1,164 @@
 {{ config(
    cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"],
    partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}},
    unique_key = "_airbyte_unique_key_scd",
    schema = "test_normalization",
    post_hook = ["
                    {%
                    set final_table_relation = adapter.get_relation(
                            database=this.database,
                            schema=this.schema,
                            identifier='nested_stream_with_complex_columns_resulting_into_long_names'
                        )
                    %}
                    {#
                    If the final table doesn't exist, then obviously we can't delete anything from it.
                    Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync)
                    So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway)
                    #}
                    {%
                    if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name')
                    %}
                    -- Delete records which are no longer active:
                    -- This query is equivalent, but the left join version is more performant:
                    -- delete from final_table where unique_key in (
                    --     select unique_key from scd_table where 1 = 1 <incremental_clause(normalized_at, final_table)>
                    -- ) and unique_key not in (
                    --     select unique_key from scd_table where active_row = 1 <incremental_clause(normalized_at, final_table)>
                    -- )
                    -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD
                    -- entries that were _updated_ recently. This is because a deleted record will have an SCD record
                    -- which was emitted a long time ago, but recently re-normalized to have active_row = 0.
                    delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in (
                        select recent_records.unique_key
                        from (
                                select distinct _airbyte_unique_key as unique_key
                                from {{ this }}
                                where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }}
                            ) recent_records
                            left join (
                                select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count
                                from {{ this }}
                                where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }}
                                group by _airbyte_unique_key
                            ) active_counts
                            on recent_records.unique_key = active_counts.unique_key
                        where active_count is null or active_count = 0
                    )
                    {% else %}
                    -- We have to have a non-empty query, so just do a noop delete
                    delete from {{ this }} where 1=0
                    {% endif %}
                    ","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"],
    tags = [ "top-level" ]
 ) }}
 -- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')
 with
 {% if is_incremental() %}
 new_data as (
    -- retrieve incremental "new" data
    select
        *
    from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')  }}
    -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
    where 1 = 1
    {{ incremental_clause('_airbyte_emitted_at', this) }}
 ),
 new_data_ids as (
    -- build a subset of _airbyte_unique_key from rows that are new
    select distinct
        {{ dbt_utils.surrogate_key([
            'id',
        ]) }} as _airbyte_unique_key
    from new_data
 ),
 empty_new_data as (
    -- build an empty table to only keep the table's column types
    select * from new_data where 1 = 0
 ),
 previous_active_scd_data as (
    -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes
    select
        {{ star_intersect(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }}
    from {{ this }} as this_data
    -- make a join with new_data using primary key to filter active data that need to be updated only
    join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key
    -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes)
    left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id
    where _airbyte_active_row = 1
 ),
 input_data as (
    select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data
    union all
    select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data
 ),
 {% else %}
 input_data as (
    select *
    from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')  }}
    -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
 ),
 {% endif %}
 scd_data as (
    -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
    select
      {{ dbt_utils.surrogate_key([
      'id',
      ]) }} as _airbyte_unique_key,
      id,
      date,
      {{ adapter.quote('partition') }},
      date as _airbyte_start_at,
      lag(date) over (
        partition by id
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) as _airbyte_end_at,
      case when row_number() over (
        partition by id
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) = 1 then 1 else 0 end as _airbyte_active_row,
      _airbyte_ab_id,
      _airbyte_emitted_at,
      _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
    from input_data
 ),
 dedup_data as (
    select
        -- we need to ensure de-duplicated rows for merge/update queries
        -- additionally, we generate a unique key for the scd table
        row_number() over (
            partition by
                _airbyte_unique_key,
                _airbyte_start_at,
                _airbyte_emitted_at
            order by _airbyte_active_row desc, _airbyte_ab_id
        ) as _airbyte_row_num,
        {{ dbt_utils.surrogate_key([
          '_airbyte_unique_key',
          '_airbyte_start_at',
          '_airbyte_emitted_at'
        ]) }} as _airbyte_unique_key_scd,
        scd_data.*
    from scd_data
 )
 select
    _airbyte_unique_key,
    _airbyte_unique_key_scd,
    id,
    date,
    {{ adapter.quote('partition') }},
    _airbyte_start_at,
    _airbyte_end_at,
    _airbyte_active_row,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
 from dedup_data where _airbyte_row_num = 1
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql
@@ -0,0 +1,24 @@
 {{ config(
    cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"],
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = "_airbyte_unique_key",
    schema = "test_normalization",
    tags = [ "top-level" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
 select
    _airbyte_unique_key,
    id,
    date,
    {{ adapter.quote('partition') }},
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
 -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }}
 where 1 = 1
 and _airbyte_active_row = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql
@@ -0,0 +1,21 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    schema = "test_normalization",
    tags = [ "nested" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }}
 select
    _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid,
    double_array_data,
    DATA,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_partition_hashid
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }}
 -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }}
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql
@@ -0,0 +1,20 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    schema = "test_normalization",
    tags = [ "nested" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }}
 select
    _airbyte_partition_hashid,
    currency,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_DATA_hashid
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }}
 -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql
@@ -0,0 +1,20 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    schema = "test_normalization",
    tags = [ "nested" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }}
 select
    _airbyte_partition_hashid,
    id,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_double_array_data_hashid
 from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }}
 -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }}
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml
@@ -0,0 +1,23 @@
 version: 2
 sources:
 - name: test_normalization
  quoting:
    database: true
    schema: false
    identifier: false
  tables:
  - name: _airbyte_raw_arrays
  - name: _airbyte_raw_conflict_stream_array
  - name: _airbyte_raw_conflict_stream_name
  - name: _airbyte_raw_conflict_stream_scalar
  - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names
  - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names
  - name: _airbyte_raw_some_stream_that_was_empty
  - name: _airbyte_raw_unnest_alias
 - name: test_normalization_namespace
  quoting:
    database: true
    schema: false
    identifier: false
  tables:
  - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
@@ -0,0 +1,27 @@
    merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as DBT_INTERNAL_DEST
        using (
          select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd__dbt_tmp`
        ) as DBT_INTERNAL_SOURCE
        on 
            DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd
    when matched then update set
        `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`
    when not matched then insert
        (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
    values
        (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql
@@ -0,0 +1,27 @@
    merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names` as DBT_INTERNAL_DEST
        using (
          select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names__dbt_tmp`
        ) as DBT_INTERNAL_SOURCE
        on 
            DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key
    when matched then update set
        `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`
    when not matched then insert
        (`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
    values
        (`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql
@@ -0,0 +1,21 @@
    merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as DBT_INTERNAL_DEST
        using (
          select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition__dbt_tmp`
        ) as DBT_INTERNAL_SOURCE
        on FALSE
    when not matched then insert
        (`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`)
    values
        (`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql
@@ -0,0 +1,21 @@
    merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA` as DBT_INTERNAL_DEST
        using (
          select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA__dbt_tmp`
        ) as DBT_INTERNAL_SOURCE
        on FALSE
    when not matched then insert
        (`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`)
    values
        (`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql
@@ -0,0 +1,21 @@
    merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data` as DBT_INTERNAL_DEST
        using (
          select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data__dbt_tmp`
        ) as DBT_INTERNAL_SOURCE
        on FALSE
    when not matched then insert
        (`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`)
    values
        (`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml
@@ -0,0 +1,70 @@
 name: airbyte_utils
 version: '1.0'
 config-version: 2
 profile: normalize
 model-paths:
 - modified_models
 docs-paths:
 - docs
 analysis-paths:
 - analysis
 test-paths:
 - tests
 seed-paths:
 - data
 macro-paths:
 - macros
 target-path: ../build
 log-path: ../logs
 packages-install-path: /dbt
 clean-targets:
 - build
 - dbt_modules
 quoting:
  database: true
  schema: false
  identifier: true
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        +on_schema_change: sync_all_columns
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
 - macro_namespace: dbt_utils
  search_order:
  - airbyte_utils
  - dbt_utils
 vars:
  json_column: _airbyte_data
  models_to_source:
    exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate
    exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate
    exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate
    exchange_rate: test_normalization._airbyte_raw_exchange_rate
    dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate
    renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml
@@ -0,0 +1,90 @@
 name: airbyte_utils
 version: '1.0'
 config-version: 2
 profile: normalize
 model-paths:
 - models
 docs-paths:
 - docs
 analysis-paths:
 - analysis
 test-paths:
 - tests
 seed-paths:
 - data
 macro-paths:
 - macros
 target-path: ../build
 log-path: ../logs
 packages-install-path: /dbt
 clean-targets:
 - build
 - dbt_modules
 quoting:
  database: true
  schema: false
  identifier: true
 models:
  airbyte_utils:
    +materialized: table
    generated:
      airbyte_ctes:
        +tags: airbyte_internal_cte
        +materialized: ephemeral
      airbyte_incremental:
        +tags: incremental_tables
        +materialized: incremental
        +on_schema_change: sync_all_columns
      airbyte_tables:
        +tags: normalized_tables
        +materialized: table
      airbyte_views:
        +tags: airbyte_internal_views
        +materialized: view
 dispatch:
 - macro_namespace: dbt_utils
  search_order:
  - airbyte_utils
  - dbt_utils
 vars:
  json_column: _airbyte_data
  models_to_source:
    exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate
    exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate
    exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate
    exchange_rate: test_normalization._airbyte_raw_exchange_rate
    dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate
    dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate
    renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
    dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded
    dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded
    pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx
    pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx
    pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx
    pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx
    pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx
    1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number
    1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number
    1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number
    1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number
    1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number
    multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts
    multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts
    multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts
    multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts
    multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts
    types_testing_ab1: test_normalization._airbyte_raw_types_testing
    types_testing_ab2: test_normalization._airbyte_raw_types_testing
    types_testing_stg: test_normalization._airbyte_raw_types_testing
    types_testing_scd: test_normalization._airbyte_raw_types_testing
    types_testing: test_normalization._airbyte_raw_types_testing
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -0,0 +1,108 @@
  create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd`
  partition by range_bucket(
            _airbyte_active_row,
            generate_array(0, 1, 1)
        )
  cluster by _airbyte_unique_key_scd, _airbyte_emitted_at
  OPTIONS()
  as (
 -- depends_on: ref('dedup_exchange_rate_stg')
 with
 input_data as (
    select *
    from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg`
    -- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
 ),
 scd_data as (
    -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
    select
      to_hex(md5(cast(concat(coalesce(cast(id as 
    string
 ), ''), '-', coalesce(cast(currency as 
    string
 ), ''), '-', coalesce(cast(NZD as 
    string
 ), '')) as 
    string
 ))) as _airbyte_unique_key,
      id,
      currency,
      date,
      timestamp_col,
      HKD_special___characters,
      HKD_special___characters_1,
      NZD,
      USD,
      date as _airbyte_start_at,
      lag(date) over (
        partition by id, currency, cast(NZD as 
    string
 )
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) as _airbyte_end_at,
      case when row_number() over (
        partition by id, currency, cast(NZD as 
    string
 )
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) = 1 then 1 else 0 end as _airbyte_active_row,
      _airbyte_ab_id,
      _airbyte_emitted_at,
      _airbyte_dedup_exchange_rate_hashid
    from input_data
 ),
 dedup_data as (
    select
        -- we need to ensure de-duplicated rows for merge/update queries
        -- additionally, we generate a unique key for the scd table
        row_number() over (
            partition by
                _airbyte_unique_key,
                _airbyte_start_at,
                _airbyte_emitted_at
            order by _airbyte_active_row desc, _airbyte_ab_id
        ) as _airbyte_row_num,
        to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as 
    string
 ), ''), '-', coalesce(cast(_airbyte_start_at as 
    string
 ), ''), '-', coalesce(cast(_airbyte_emitted_at as 
    string
 ), '')) as 
    string
 ))) as _airbyte_unique_key_scd,
        scd_data.*
    from scd_data
 )
 select
    _airbyte_unique_key,
    _airbyte_unique_key_scd,
    id,
    currency,
    date,
    timestamp_col,
    HKD_special___characters,
    HKD_special___characters_1,
    NZD,
    USD,
    _airbyte_start_at,
    _airbyte_end_at,
    _airbyte_active_row,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_dedup_exchange_rate_hashid
 from dedup_data where _airbyte_row_num = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
@@ -0,0 +1,31 @@
  create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate`
  partition by timestamp_trunc(_airbyte_emitted_at, day)
  cluster by _airbyte_unique_key, _airbyte_emitted_at
  OPTIONS()
  as (
 -- Final base SQL model
 -- depends_on: `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd`
 select
    _airbyte_unique_key,
    id,
    currency,
    date,
    timestamp_col,
    HKD_special___characters,
    HKD_special___characters_1,
    NZD,
    USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_dedup_exchange_rate_hashid
 from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd`
 -- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
 where 1 = 1
 and _airbyte_active_row = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql
@@ -0,0 +1,145 @@
  create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate`
  partition by timestamp_trunc(_airbyte_emitted_at, day)
  cluster by _airbyte_emitted_at
  OPTIONS()
  as (
 with __dbt__cte__exchange_rate_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
 select
    json_extract_scalar(_airbyte_data, "$['id']") as id,
    json_extract_scalar(_airbyte_data, "$['currency']") as currency,
    json_extract_scalar(_airbyte_data, "$['date']") as date,
    json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
    json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
    json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
    json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
    json_extract_scalar(_airbyte_data, "$['USD']") as USD,
    json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes,
    json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz,
    json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz,
    json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz,
    json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias
 -- exchange_rate
 where 1 = 1
 ),  __dbt__cte__exchange_rate_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__exchange_rate_ab1
 select
    cast(id as 
    int64
 ) as id,
    cast(currency as 
    string
 ) as currency,
    cast(nullif(date, '') as 
    date
 ) as date,
    cast(nullif(timestamp_col, '') as 
    timestamp
 ) as timestamp_col,
    cast(HKD_special___characters as 
    float64
 ) as HKD_special___characters,
    cast(HKD_special___characters_1 as 
    string
 ) as HKD_special___characters_1,
    cast(NZD as 
    float64
 ) as NZD,
    cast(USD as 
    float64
 ) as USD,
    cast(column___with__quotes as 
    string
 ) as column___with__quotes,
    cast(nullif(datetime_tz, '') as 
    timestamp
 ) as datetime_tz,
    cast(nullif(datetime_no_tz, '') as 
    datetime
 ) as datetime_no_tz,
    cast(nullif(time_tz, '') as 
    STRING
 ) as time_tz,
    cast(nullif(time_no_tz, '') as 
    time
 ) as time_no_tz,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__exchange_rate_ab1
 -- exchange_rate
 where 1 = 1
 ),  __dbt__cte__exchange_rate_ab3 as (
 -- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__exchange_rate_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(id as 
    string
 ), ''), '-', coalesce(cast(currency as 
    string
 ), ''), '-', coalesce(cast(date as 
    string
 ), ''), '-', coalesce(cast(timestamp_col as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters_1 as 
    string
 ), ''), '-', coalesce(cast(NZD as 
    string
 ), ''), '-', coalesce(cast(USD as 
    string
 ), ''), '-', coalesce(cast(column___with__quotes as 
    string
 ), ''), '-', coalesce(cast(datetime_tz as 
    string
 ), ''), '-', coalesce(cast(datetime_no_tz as 
    string
 ), ''), '-', coalesce(cast(time_tz as 
    string
 ), ''), '-', coalesce(cast(time_no_tz as 
    string
 ), '')) as 
    string
 ))) as _airbyte_exchange_rate_hashid,
    tmp.*
 from __dbt__cte__exchange_rate_ab2 tmp
 -- exchange_rate
 where 1 = 1
 )-- Final base SQL model
 -- depends_on: __dbt__cte__exchange_rate_ab3
 select
    id,
    currency,
    date,
    timestamp_col,
    HKD_special___characters,
    HKD_special___characters_1,
    NZD,
    USD,
    column___with__quotes,
    datetime_tz,
    datetime_no_tz,
    time_tz,
    time_no_tz,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_exchange_rate_hashid
 from __dbt__cte__exchange_rate_ab3
 -- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
 where 1 = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
@@ -0,0 +1,89 @@
  create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg`
  OPTIONS()
  as 
 with __dbt__cte__dedup_exchange_rate_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
 select
    json_extract_scalar(_airbyte_data, "$['id']") as id,
    json_extract_scalar(_airbyte_data, "$['currency']") as currency,
    json_extract_scalar(_airbyte_data, "$['date']") as date,
    json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
    json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
    json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
    json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
    json_extract_scalar(_airbyte_data, "$['USD']") as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias
 -- dedup_exchange_rate
 where 1 = 1
 ),  __dbt__cte__dedup_exchange_rate_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__dedup_exchange_rate_ab1
 select
    cast(id as 
    int64
 ) as id,
    cast(currency as 
    string
 ) as currency,
    cast(nullif(date, '') as 
    date
 ) as date,
    cast(nullif(timestamp_col, '') as 
    timestamp
 ) as timestamp_col,
    cast(HKD_special___characters as 
    float64
 ) as HKD_special___characters,
    cast(HKD_special___characters_1 as 
    string
 ) as HKD_special___characters_1,
    cast(NZD as 
    float64
 ) as NZD,
    cast(USD as 
    float64
 ) as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__dedup_exchange_rate_ab1
 -- dedup_exchange_rate
 where 1 = 1
 )-- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__dedup_exchange_rate_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(id as 
    string
 ), ''), '-', coalesce(cast(currency as 
    string
 ), ''), '-', coalesce(cast(date as 
    string
 ), ''), '-', coalesce(cast(timestamp_col as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters_1 as 
    string
 ), ''), '-', coalesce(cast(NZD as 
    string
 ), ''), '-', coalesce(cast(USD as 
    string
 ), '')) as 
    string
 ))) as _airbyte_dedup_exchange_rate_hashid,
    tmp.*
 from __dbt__cte__dedup_exchange_rate_ab2 tmp
 -- dedup_exchange_rate
 where 1 = 1
 ;
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql
@@ -0,0 +1,83 @@
  create or replace view `dataline-integration-testing`._airbyte_test_normalization.`multiple_column_names_conflicts_stg`
  OPTIONS()
  as 
 with __dbt__cte__multiple_column_names_conflicts_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts
 select
    json_extract_scalar(_airbyte_data, "$['id']") as id,
    json_extract_scalar(_airbyte_data, "$['User Id']") as User_Id,
    json_extract_scalar(_airbyte_data, "$['user_id']") as user_id_1,
    json_extract_scalar(_airbyte_data, "$['User id']") as User_id_2,
    json_extract_scalar(_airbyte_data, "$['user id']") as user_id_3,
    json_extract_scalar(_airbyte_data, "$['User@Id']") as User_Id_4,
    json_extract_scalar(_airbyte_data, "$['UserId']") as UserId,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias
 -- multiple_column_names_conflicts
 where 1 = 1
 ),  __dbt__cte__multiple_column_names_conflicts_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1
 select
    cast(id as 
    int64
 ) as id,
    cast(User_Id as 
    string
 ) as User_Id,
    cast(user_id_1 as 
    float64
 ) as user_id_1,
    cast(User_id_2 as 
    float64
 ) as User_id_2,
    cast(user_id_3 as 
    float64
 ) as user_id_3,
    cast(User_Id_4 as 
    string
 ) as User_Id_4,
    cast(UserId as 
    float64
 ) as UserId,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__multiple_column_names_conflicts_ab1
 -- multiple_column_names_conflicts
 where 1 = 1
 )-- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(id as 
    string
 ), ''), '-', coalesce(cast(User_Id as 
    string
 ), ''), '-', coalesce(cast(user_id_1 as 
    string
 ), ''), '-', coalesce(cast(User_id_2 as 
    string
 ), ''), '-', coalesce(cast(user_id_3 as 
    string
 ), ''), '-', coalesce(cast(User_Id_4 as 
    string
 ), ''), '-', coalesce(cast(UserId as 
    string
 ), '')) as 
    string
 ))) as _airbyte_multiple_column_names_conflicts_hashid,
    tmp.*
 from __dbt__cte__multiple_column_names_conflicts_ab2 tmp
 -- multiple_column_names_conflicts
 where 1 = 1
 ;
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql
@@ -0,0 +1,26 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
 select
    {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id,
    {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency,
    {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date,
    {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col,
    {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters,
    {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters_1,
    {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD,
    {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias
 -- dedup_exchange_rate
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql
@@ -0,0 +1,26 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: {{ ref('dedup_exchange_rate_ab1') }}
 select
    cast(id as {{ dbt_utils.type_bigint() }}) as id,
    cast(currency as {{ dbt_utils.type_string() }}) as currency,
    cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date,
    cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col,
    cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters,
    cast(HKD_special___characters_1 as {{ dbt_utils.type_string() }}) as HKD_special___characters_1,
    cast(NZD as {{ dbt_utils.type_float() }}) as NZD,
    cast(USD as {{ dbt_utils.type_float() }}) as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ ref('dedup_exchange_rate_ab1') }}
 -- dedup_exchange_rate
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -0,0 +1,178 @@
 {{ config(
    cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"],
    partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}},
    unique_key = "_airbyte_unique_key_scd",
    schema = "test_normalization",
    post_hook = ["
                    {%
                    set final_table_relation = adapter.get_relation(
                            database=this.database,
                            schema=this.schema,
                            identifier='dedup_exchange_rate'
                        )
                    %}
                    {#
                    If the final table doesn't exist, then obviously we can't delete anything from it.
                    Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync)
                    So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway)
                    #}
                    {%
                    if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name')
                    %}
                    -- Delete records which are no longer active:
                    -- This query is equivalent, but the left join version is more performant:
                    -- delete from final_table where unique_key in (
                    --     select unique_key from scd_table where 1 = 1 <incremental_clause(normalized_at, final_table)>
                    -- ) and unique_key not in (
                    --     select unique_key from scd_table where active_row = 1 <incremental_clause(normalized_at, final_table)>
                    -- )
                    -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD
                    -- entries that were _updated_ recently. This is because a deleted record will have an SCD record
                    -- which was emitted a long time ago, but recently re-normalized to have active_row = 0.
                    delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in (
                        select recent_records.unique_key
                        from (
                                select distinct _airbyte_unique_key as unique_key
                                from {{ this }}
                                where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
                            ) recent_records
                            left join (
                                select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count
                                from {{ this }}
                                where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
                                group by _airbyte_unique_key
                            ) active_counts
                            on recent_records.unique_key = active_counts.unique_key
                        where active_count is null or active_count = 0
                    )
                    {% else %}
                    -- We have to have a non-empty query, so just do a noop delete
                    delete from {{ this }} where 1=0
                    {% endif %}
                    ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"],
    tags = [ "top-level" ]
 ) }}
 -- depends_on: ref('dedup_exchange_rate_stg')
 with
 {% if is_incremental() %}
 new_data as (
    -- retrieve incremental "new" data
    select
        *
    from {{ ref('dedup_exchange_rate_stg')  }}
    -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
    where 1 = 1
    {{ incremental_clause('_airbyte_emitted_at', this) }}
 ),
 new_data_ids as (
    -- build a subset of _airbyte_unique_key from rows that are new
    select distinct
        {{ dbt_utils.surrogate_key([
            'id',
            'currency',
            'NZD',
        ]) }} as _airbyte_unique_key
    from new_data
 ),
 empty_new_data as (
    -- build an empty table to only keep the table's column types
    select * from new_data where 1 = 0
 ),
 previous_active_scd_data as (
    -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes
    select
        {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }}
    from {{ this }} as this_data
    -- make a join with new_data using primary key to filter active data that need to be updated only
    join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key
    -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes)
    left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id
    where _airbyte_active_row = 1
 ),
 input_data as (
    select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data
    union all
    select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data
 ),
 {% else %}
 input_data as (
    select *
    from {{ ref('dedup_exchange_rate_stg')  }}
    -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
 ),
 {% endif %}
 scd_data as (
    -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
    select
      {{ dbt_utils.surrogate_key([
      'id',
      'currency',
      'NZD',
      ]) }} as _airbyte_unique_key,
      id,
      currency,
      date,
      timestamp_col,
      HKD_special___characters,
      HKD_special___characters_1,
      NZD,
      USD,
      date as _airbyte_start_at,
      lag(date) over (
        partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }})
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) as _airbyte_end_at,
      case when row_number() over (
        partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }})
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) = 1 then 1 else 0 end as _airbyte_active_row,
      _airbyte_ab_id,
      _airbyte_emitted_at,
      _airbyte_dedup_exchange_rate_hashid
    from input_data
 ),
 dedup_data as (
    select
        -- we need to ensure de-duplicated rows for merge/update queries
        -- additionally, we generate a unique key for the scd table
        row_number() over (
            partition by
                _airbyte_unique_key,
                _airbyte_start_at,
                _airbyte_emitted_at
            order by _airbyte_active_row desc, _airbyte_ab_id
        ) as _airbyte_row_num,
        {{ dbt_utils.surrogate_key([
          '_airbyte_unique_key',
          '_airbyte_start_at',
          '_airbyte_emitted_at'
        ]) }} as _airbyte_unique_key_scd,
        scd_data.*
    from scd_data
 )
 select
    _airbyte_unique_key,
    _airbyte_unique_key_scd,
    id,
    currency,
    date,
    timestamp_col,
    HKD_special___characters,
    HKD_special___characters_1,
    NZD,
    USD,
    _airbyte_start_at,
    _airbyte_end_at,
    _airbyte_active_row,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_dedup_exchange_rate_hashid
 from dedup_data where _airbyte_row_num = 1
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
@@ -0,0 +1,29 @@
 {{ config(
    cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"],
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = "_airbyte_unique_key",
    schema = "test_normalization",
    tags = [ "top-level" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('dedup_exchange_rate_scd') }}
 select
    _airbyte_unique_key,
    id,
    currency,
    date,
    timestamp_col,
    HKD_special___characters,
    HKD_special___characters_1,
    NZD,
    USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_dedup_exchange_rate_hashid
 from {{ ref('dedup_exchange_rate_scd') }}
 -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
 where 1 = 1
 and _airbyte_active_row = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql
@@ -0,0 +1,31 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "test_normalization",
    tags = [ "top-level" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('exchange_rate_ab3') }}
 select
    id,
    currency,
    date,
    timestamp_col,
    HKD_special___characters,
    HKD_special___characters_1,
    NZD,
    USD,
    column___with__quotes,
    datetime_tz,
    datetime_no_tz,
    time_tz,
    time_no_tz,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_exchange_rate_hashid
 from {{ ref('exchange_rate_ab3') }}
 -- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }}
 where 1 = 1
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
@@ -0,0 +1,26 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to build a hash column based on the values of this record
 -- depends_on: {{ ref('dedup_exchange_rate_ab2') }}
 select
    {{ dbt_utils.surrogate_key([
        'id',
        'currency',
        'date',
        'timestamp_col',
        'HKD_special___characters',
        'HKD_special___characters_1',
        'NZD',
        'USD',
    ]) }} as _airbyte_dedup_exchange_rate_hashid,
    tmp.*
 from {{ ref('dedup_exchange_rate_ab2') }} tmp
 -- dedup_exchange_rate
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml
@@ -0,0 +1,16 @@
 version: 2
 sources:
 - name: test_normalization
  quoting:
    database: true
    schema: false
    identifier: false
  tables:
  - name: _airbyte_raw_1_prefix_startwith_number
  - name: _airbyte_raw_dedup_cdc_excluded
  - name: _airbyte_raw_dedup_exchange_rate
  - name: _airbyte_raw_exchange_rate
  - name: _airbyte_raw_multiple_column_names_conflicts
  - name: _airbyte_raw_pos_dedup_cdcx
  - name: _airbyte_raw_renamed_dedup_cdc_excluded
  - name: _airbyte_raw_types_testing
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql
@@ -0,0 +1,26 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
 select
    {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id,
    {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency,
    {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column,
    {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date,
    {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col,
    {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters,
    {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD,
    {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias
 -- dedup_exchange_rate
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql
@@ -0,0 +1,26 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: {{ ref('dedup_exchange_rate_ab1') }}
 select
    cast(id as {{ dbt_utils.type_float() }}) as id,
    cast(currency as {{ dbt_utils.type_string() }}) as currency,
    cast(new_column as {{ dbt_utils.type_float() }}) as new_column,
    cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date,
    cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col,
    cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters,
    cast(NZD as {{ dbt_utils.type_float() }}) as NZD,
    cast(USD as {{ dbt_utils.type_bigint() }}) as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at
 from {{ ref('dedup_exchange_rate_ab1') }}
 -- dedup_exchange_rate
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -0,0 +1,178 @@
 {{ config(
    cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"],
    partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}},
    unique_key = "_airbyte_unique_key_scd",
    schema = "test_normalization",
    post_hook = ["
                    {%
                    set final_table_relation = adapter.get_relation(
                            database=this.database,
                            schema=this.schema,
                            identifier='dedup_exchange_rate'
                        )
                    %}
                    {#
                    If the final table doesn't exist, then obviously we can't delete anything from it.
                    Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync)
                    So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway)
                    #}
                    {%
                    if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name')
                    %}
                    -- Delete records which are no longer active:
                    -- This query is equivalent, but the left join version is more performant:
                    -- delete from final_table where unique_key in (
                    --     select unique_key from scd_table where 1 = 1 <incremental_clause(normalized_at, final_table)>
                    -- ) and unique_key not in (
                    --     select unique_key from scd_table where active_row = 1 <incremental_clause(normalized_at, final_table)>
                    -- )
                    -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD
                    -- entries that were _updated_ recently. This is because a deleted record will have an SCD record
                    -- which was emitted a long time ago, but recently re-normalized to have active_row = 0.
                    delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in (
                        select recent_records.unique_key
                        from (
                                select distinct _airbyte_unique_key as unique_key
                                from {{ this }}
                                where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
                            ) recent_records
                            left join (
                                select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count
                                from {{ this }}
                                where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }}
                                group by _airbyte_unique_key
                            ) active_counts
                            on recent_records.unique_key = active_counts.unique_key
                        where active_count is null or active_count = 0
                    )
                    {% else %}
                    -- We have to have a non-empty query, so just do a noop delete
                    delete from {{ this }} where 1=0
                    {% endif %}
                    ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"],
    tags = [ "top-level" ]
 ) }}
 -- depends_on: ref('dedup_exchange_rate_stg')
 with
 {% if is_incremental() %}
 new_data as (
    -- retrieve incremental "new" data
    select
        *
    from {{ ref('dedup_exchange_rate_stg')  }}
    -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
    where 1 = 1
    {{ incremental_clause('_airbyte_emitted_at', this) }}
 ),
 new_data_ids as (
    -- build a subset of _airbyte_unique_key from rows that are new
    select distinct
        {{ dbt_utils.surrogate_key([
            'id',
            'currency',
            'NZD',
        ]) }} as _airbyte_unique_key
    from new_data
 ),
 empty_new_data as (
    -- build an empty table to only keep the table's column types
    select * from new_data where 1 = 0
 ),
 previous_active_scd_data as (
    -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes
    select
        {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }}
    from {{ this }} as this_data
    -- make a join with new_data using primary key to filter active data that need to be updated only
    join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key
    -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes)
    left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id
    where _airbyte_active_row = 1
 ),
 input_data as (
    select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data
    union all
    select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data
 ),
 {% else %}
 input_data as (
    select *
    from {{ ref('dedup_exchange_rate_stg')  }}
    -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
 ),
 {% endif %}
 scd_data as (
    -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
    select
      {{ dbt_utils.surrogate_key([
      'id',
      'currency',
      'NZD',
      ]) }} as _airbyte_unique_key,
      id,
      currency,
      new_column,
      date,
      timestamp_col,
      HKD_special___characters,
      NZD,
      USD,
      date as _airbyte_start_at,
      lag(date) over (
        partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }})
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) as _airbyte_end_at,
      case when row_number() over (
        partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }})
        order by
            date is null asc,
            date desc,
            _airbyte_emitted_at desc
      ) = 1 then 1 else 0 end as _airbyte_active_row,
      _airbyte_ab_id,
      _airbyte_emitted_at,
      _airbyte_dedup_exchange_rate_hashid
    from input_data
 ),
 dedup_data as (
    select
        -- we need to ensure de-duplicated rows for merge/update queries
        -- additionally, we generate a unique key for the scd table
        row_number() over (
            partition by
                _airbyte_unique_key,
                _airbyte_start_at,
                _airbyte_emitted_at
            order by _airbyte_active_row desc, _airbyte_ab_id
        ) as _airbyte_row_num,
        {{ dbt_utils.surrogate_key([
          '_airbyte_unique_key',
          '_airbyte_start_at',
          '_airbyte_emitted_at'
        ]) }} as _airbyte_unique_key_scd,
        scd_data.*
    from scd_data
 )
 select
    _airbyte_unique_key,
    _airbyte_unique_key_scd,
    id,
    currency,
    new_column,
    date,
    timestamp_col,
    HKD_special___characters,
    NZD,
    USD,
    _airbyte_start_at,
    _airbyte_end_at,
    _airbyte_active_row,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_dedup_exchange_rate_hashid
 from dedup_data where _airbyte_row_num = 1
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
@@ -0,0 +1,29 @@
 {{ config(
    cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"],
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = "_airbyte_unique_key",
    schema = "test_normalization",
    tags = [ "top-level" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('dedup_exchange_rate_scd') }}
 select
    _airbyte_unique_key,
    id,
    currency,
    new_column,
    date,
    timestamp_col,
    HKD_special___characters,
    NZD,
    USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_dedup_exchange_rate_hashid
 from {{ ref('dedup_exchange_rate_scd') }}
 -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }}
 where 1 = 1
 and _airbyte_active_row = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql
@@ -0,0 +1,27 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "test_normalization",
    tags = [ "top-level" ]
 ) }}
 -- Final base SQL model
 -- depends_on: {{ ref('exchange_rate_ab3') }}
 select
    id,
    currency,
    new_column,
    date,
    timestamp_col,
    HKD_special___characters,
    NZD,
    USD,
    column___with__quotes,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    {{ current_timestamp() }} as _airbyte_normalized_at,
    _airbyte_exchange_rate_hashid
 from {{ ref('exchange_rate_ab3') }}
 -- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }}
 where 1 = 1
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
@@ -0,0 +1,26 @@
 {{ config(
    cluster_by = "_airbyte_emitted_at",
    partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"},
    unique_key = '_airbyte_ab_id',
    schema = "_airbyte_test_normalization",
    tags = [ "top-level-intermediate" ]
 ) }}
 -- SQL model to build a hash column based on the values of this record
 -- depends_on: {{ ref('dedup_exchange_rate_ab2') }}
 select
    {{ dbt_utils.surrogate_key([
        'id',
        'currency',
        'new_column',
        'date',
        'timestamp_col',
        'HKD_special___characters',
        'NZD',
        'USD',
    ]) }} as _airbyte_dedup_exchange_rate_hashid,
    tmp.*
 from {{ ref('dedup_exchange_rate_ab2') }} tmp
 -- dedup_exchange_rate
 where 1 = 1
 {{ incremental_clause('_airbyte_emitted_at', this) }}
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml
@@ -0,0 +1,12 @@
 version: 2
 sources:
 - name: test_normalization
  quoting:
    database: true
    schema: false
    identifier: false
  tables:
  - name: _airbyte_raw_dedup_cdc_excluded
  - name: _airbyte_raw_dedup_exchange_rate
  - name: _airbyte_raw_exchange_rate
  - name: _airbyte_raw_renamed_dedup_cdc_excluded
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -0,0 +1,27 @@
    merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` as DBT_INTERNAL_DEST
        using (
          select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd__dbt_tmp`
        ) as DBT_INTERNAL_SOURCE
        on 
            DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd
    when matched then update set
        `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid`
    when not matched then insert
        (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
    values
        (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql
@@ -0,0 +1,27 @@
    merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` as DBT_INTERNAL_DEST
        using (
          select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate__dbt_tmp`
        ) as DBT_INTERNAL_SOURCE
        on 
            DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key
    when matched then update set
        `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid`
    when not matched then insert
        (`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
    values
        (`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`)
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql
@@ -0,0 +1,145 @@
  create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate`
  partition by timestamp_trunc(_airbyte_emitted_at, day)
  cluster by _airbyte_emitted_at
  OPTIONS()
  as (
 with __dbt__cte__exchange_rate_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
 select
    json_extract_scalar(_airbyte_data, "$['id']") as id,
    json_extract_scalar(_airbyte_data, "$['currency']") as currency,
    json_extract_scalar(_airbyte_data, "$['date']") as date,
    json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
    json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
    json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
    json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
    json_extract_scalar(_airbyte_data, "$['USD']") as USD,
    json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes,
    json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz,
    json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz,
    json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz,
    json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias
 -- exchange_rate
 where 1 = 1
 ),  __dbt__cte__exchange_rate_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__exchange_rate_ab1
 select
    cast(id as 
    int64
 ) as id,
    cast(currency as 
    string
 ) as currency,
    cast(nullif(date, '') as 
    date
 ) as date,
    cast(nullif(timestamp_col, '') as 
    timestamp
 ) as timestamp_col,
    cast(HKD_special___characters as 
    float64
 ) as HKD_special___characters,
    cast(HKD_special___characters_1 as 
    string
 ) as HKD_special___characters_1,
    cast(NZD as 
    float64
 ) as NZD,
    cast(USD as 
    float64
 ) as USD,
    cast(column___with__quotes as 
    string
 ) as column___with__quotes,
    cast(nullif(datetime_tz, '') as 
    timestamp
 ) as datetime_tz,
    cast(nullif(datetime_no_tz, '') as 
    datetime
 ) as datetime_no_tz,
    cast(nullif(time_tz, '') as 
    STRING
 ) as time_tz,
    cast(nullif(time_no_tz, '') as 
    time
 ) as time_no_tz,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__exchange_rate_ab1
 -- exchange_rate
 where 1 = 1
 ),  __dbt__cte__exchange_rate_ab3 as (
 -- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__exchange_rate_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(id as 
    string
 ), ''), '-', coalesce(cast(currency as 
    string
 ), ''), '-', coalesce(cast(date as 
    string
 ), ''), '-', coalesce(cast(timestamp_col as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters_1 as 
    string
 ), ''), '-', coalesce(cast(NZD as 
    string
 ), ''), '-', coalesce(cast(USD as 
    string
 ), ''), '-', coalesce(cast(column___with__quotes as 
    string
 ), ''), '-', coalesce(cast(datetime_tz as 
    string
 ), ''), '-', coalesce(cast(datetime_no_tz as 
    string
 ), ''), '-', coalesce(cast(time_tz as 
    string
 ), ''), '-', coalesce(cast(time_no_tz as 
    string
 ), '')) as 
    string
 ))) as _airbyte_exchange_rate_hashid,
    tmp.*
 from __dbt__cte__exchange_rate_ab2 tmp
 -- exchange_rate
 where 1 = 1
 )-- Final base SQL model
 -- depends_on: __dbt__cte__exchange_rate_ab3
 select
    id,
    currency,
    date,
    timestamp_col,
    HKD_special___characters,
    HKD_special___characters_1,
    NZD,
    USD,
    column___with__quotes,
    datetime_tz,
    datetime_no_tz,
    time_tz,
    time_no_tz,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at,
    _airbyte_exchange_rate_hashid
 from __dbt__cte__exchange_rate_ab3
 -- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate
 where 1 = 1
  );
--- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
+++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql
@@ -0,0 +1,89 @@
  create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg`
  OPTIONS()
  as 
 with __dbt__cte__dedup_exchange_rate_ab1 as (
 -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
 -- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate
 select
    json_extract_scalar(_airbyte_data, "$['id']") as id,
    json_extract_scalar(_airbyte_data, "$['currency']") as currency,
    json_extract_scalar(_airbyte_data, "$['date']") as date,
    json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col,
    json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters,
    json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1,
    json_extract_scalar(_airbyte_data, "$['NZD']") as NZD,
    json_extract_scalar(_airbyte_data, "$['USD']") as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias
 -- dedup_exchange_rate
 where 1 = 1
 ),  __dbt__cte__dedup_exchange_rate_ab2 as (
 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
 -- depends_on: __dbt__cte__dedup_exchange_rate_ab1
 select
    cast(id as 
    int64
 ) as id,
    cast(currency as 
    string
 ) as currency,
    cast(nullif(date, '') as 
    date
 ) as date,
    cast(nullif(timestamp_col, '') as 
    timestamp
 ) as timestamp_col,
    cast(HKD_special___characters as 
    float64
 ) as HKD_special___characters,
    cast(HKD_special___characters_1 as 
    string
 ) as HKD_special___characters_1,
    cast(NZD as 
    float64
 ) as NZD,
    cast(USD as 
    float64
 ) as USD,
    _airbyte_ab_id,
    _airbyte_emitted_at,
    CURRENT_TIMESTAMP() as _airbyte_normalized_at
 from __dbt__cte__dedup_exchange_rate_ab1
 -- dedup_exchange_rate
 where 1 = 1
 )-- SQL model to build a hash column based on the values of this record
 -- depends_on: __dbt__cte__dedup_exchange_rate_ab2
 select
    to_hex(md5(cast(concat(coalesce(cast(id as 
    string
 ), ''), '-', coalesce(cast(currency as 
    string
 ), ''), '-', coalesce(cast(date as 
    string
 ), ''), '-', coalesce(cast(timestamp_col as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters as 
    string
 ), ''), '-', coalesce(cast(HKD_special___characters_1 as 
    string
 ), ''), '-', coalesce(cast(NZD as 
    string
 ), ''), '-', coalesce(cast(USD as 
    string
 ), '')) as 
    string
 ))) as _airbyte_dedup_exchange_rate_hashid,
    tmp.*
 from __dbt__cte__dedup_exchange_rate_ab2 tmp
 -- dedup_exchange_rate
 where 1 = 1
 ;
--- a/Show More
+++ b/Show More