1
0
mirror of synced 2025-12-25 02:09:19 -05:00

feat(source-google-analytics): migrate to low-code (#60342)

Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
This commit is contained in:
Serhii Lazebnyi
2025-06-06 12:42:43 +02:00
committed by GitHub
parent 108b82dff4
commit 952c8f3bbc
33 changed files with 4194 additions and 6662 deletions

View File

@@ -1,3 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

View File

@@ -1,100 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import datetime
import json
from copy import deepcopy
import pytest
# json credentials with fake private key
json_credentials = """
{
"type": "service_account",
"project_id": "unittest-project-id",
"private_key_id": "9qf98e52oda52g5ne23al6evnf13649c2u077162c",
"private_key": "-----BEGIN PRIVATE KEY-----\\nMIIBVQIBADANBgkqhkiG9w0BAQEFAASCAT8wggE7AgEAAkEA3slcXL+dA36ESmOi\\n1xBhZmp5Hn0WkaHDtW4naba3plva0ibloBNWhFhjQOh7Ff01PVjhT4D5jgqXBIgc\\nz9Gv3QIDAQABAkEArlhYPoD5SB2/O1PjwHgiMPrL1C9B9S/pr1cH4vPJnpY3VKE3\\n5hvdil14YwRrcbmIxMkK2iRLi9lM4mJmdWPy4QIhAPsRFXZSGx0TZsDxD9V0ZJmZ\\n0AuDCj/NF1xB5KPLmp7pAiEA4yoFox6w7ql/a1pUVaLt0NJkDfE+22pxYGNQaiXU\\nuNUCIQCsFLaIJZiN4jlgbxlyLVeya9lLuqIwvqqPQl6q4ad12QIgS9gG48xmdHig\\n8z3IdIMedZ8ZCtKmEun6Cp1+BsK0wDUCIF0nHfSuU+eTQ2qAON2SHIrJf8UeFO7N\\nzdTN1IwwQqjI\\n-----END PRIVATE KEY-----\\n",
"client_email": "google-analytics-access@unittest-project-id.iam.gserviceaccount.com",
"client_id": "213243192021686092537",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/google-analytics-access%40unittest-project-id.iam.gserviceaccount.com"
}
"""
@pytest.fixture
def one_year_ago():
return datetime.datetime.strftime((datetime.datetime.now() - datetime.timedelta(days=1)), "%Y-%m-%d")
@pytest.fixture
def config(one_year_ago):
return {
"property_id": "108176369",
"property_ids": ["108176369"],
"credentials": {"auth_type": "Service", "credentials_json": json_credentials},
"date_ranges_start_date": one_year_ago,
"dimensions": ["date", "deviceCategory", "operatingSystem", "browser"],
"metrics": [
"totalUsers",
"newUsers",
"sessions",
"sessionsPerUser",
"averageSessionDuration",
"screenPageViews",
"screenPageViewsPerSession",
"bounceRate",
],
"keep_empty_rows": True,
"custom_reports": json.dumps(
[
{
"name": "report1",
"dimensions": ["date", "browser"],
"metrics": ["totalUsers", "sessions", "screenPageViews"],
}
]
),
}
@pytest.fixture
def config_without_date_range():
return {
"property_id": "108176369",
"property_ids": ["108176369"],
"credentials": {"auth_type": "Service", "credentials_json": json_credentials},
"dimensions": ["deviceCategory", "operatingSystem", "browser"],
"metrics": [
"totalUsers",
"newUsers",
"sessions",
"sessionsPerUser",
"averageSessionDuration",
"screenPageViews",
"screenPageViewsPerSession",
"bounceRate",
],
"custom_reports": [],
}
@pytest.fixture
def patch_base_class(one_year_ago, config_without_date_range):
return {"config": config_without_date_range}
@pytest.fixture
def config_gen(config):
def inner(**kwargs):
new_config = deepcopy(config)
# WARNING, no support deep dictionaries
new_config.update(kwargs)
return {k: v for k, v in new_config.items() if v is not ...}
return inner

View File

@@ -1,158 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import pytest
import requests
from source_google_analytics_data_api.api_quota import GoogleAnalyticsApiQuota
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
TEST_QUOTA_INSTANCE: GoogleAnalyticsApiQuota = GoogleAnalyticsApiQuota()
@pytest.fixture(name="expected_quota_list")
def expected_quota_list():
"""The Quota were currently handle"""
return ["concurrentRequests", "tokensPerProjectPerHour", "potentiallyThresholdedRequestsPerHour"]
def test_check_initial_quota_is_empty():
"""
Check the initial quota property is empty (== None), but ready to be fullfield.
"""
assert not TEST_QUOTA_INSTANCE.initial_quota
@pytest.mark.parametrize(
("response_quota", "partial_quota", "response_action_exp", "backoff_time_exp", "stop_iter_exp"),
[
# Full Quota
(
{
"propertyQuota": {
"concurrentRequests": {"consumed": 0, "remaining": 10},
"tokensPerProjectPerHour": {"consumed": 1, "remaining": 1735},
"potentiallyThresholdedRequestsPerHour": {"consumed": 1, "remaining": 26},
}
},
False, # partial_quota
ResponseAction.RETRY,
None, # backoff_time_exp
False, # stop_iter_exp
),
# Partial Quota
(
{
"propertyQuota": {
"concurrentRequests": {"consumed": 0, "remaining": 10},
"tokensPerProjectPerHour": {"consumed": 5, "remaining": 955},
"potentiallyThresholdedRequestsPerHour": {"consumed": 3, "remaining": 26},
}
},
True, # partial_quota
ResponseAction.RETRY,
None, # backoff_time_exp
False, # stop_iter_exp
),
# Running out `tokensPerProjectPerHour`
(
{
"propertyQuota": {
"concurrentRequests": {"consumed": 2, "remaining": 8},
"tokensPerProjectPerHour": {
"consumed": 5,
# ~9% from original quota is left
"remaining": 172,
},
"potentiallyThresholdedRequestsPerHour": {"consumed": 3, "remaining": 26},
}
},
True, # partial_quota
ResponseAction.RETRY,
1800, # backoff_time_exp
False, # stop_iter_exp
),
# Running out `concurrentRequests`
(
{
"propertyQuota": {
"concurrentRequests": {
"consumed": 9,
# 10% from original quota is left
"remaining": 1,
},
"tokensPerProjectPerHour": {"consumed": 5, "remaining": 935},
"potentiallyThresholdedRequestsPerHour": {"consumed": 1, "remaining": 26},
}
},
True, # partial_quota
ResponseAction.RETRY,
30, # backoff_time_exp
False, # stop_iter_exp
),
# Running out `potentiallyThresholdedRequestsPerHour`
(
{
"propertyQuota": {
"concurrentRequests": {"consumed": 1, "remaining": 9},
"tokensPerProjectPerHour": {"consumed": 5, "remaining": 935},
"potentiallyThresholdedRequestsPerHour": {
# 7% from original quota is left
"consumed": 26,
"remaining": 2,
},
}
},
True, # partial_quota
ResponseAction.RETRY,
1800, # backoff_time_exp
False, # stop_iter_exp
),
],
ids=[
"Full",
"Partial",
"Running out tokensPerProjectPerHour",
"Running out concurrentRequests",
"Running out potentiallyThresholdedRequestsPerHour",
],
)
def test_check_full_quota(
requests_mock,
expected_quota_list,
response_quota,
partial_quota,
response_action_exp,
backoff_time_exp,
stop_iter_exp,
):
"""
Check the quota and prepare the initial values for subsequent comparison with subsequent response calls.
The default values for the scenario are expected when the quota is full.
"""
# Prepare instance
url = "https://analyticsdata.googleapis.com/v1beta/"
payload = response_quota
requests_mock.post(url, json=payload)
response = requests.post(url)
# process and prepare the scenario
TEST_QUOTA_INSTANCE._check_quota(response)
# TEST BLOCK
# Check the INITIAL QUOTA is saved properly
assert [quota in expected_quota_list for quota in TEST_QUOTA_INSTANCE.initial_quota.keys()]
# Check the CURRENT QUOTA is different from Initial
if partial_quota:
current_quota = TEST_QUOTA_INSTANCE._get_known_quota_from_response(response.json().get("propertyQuota"))
assert not current_quota == TEST_QUOTA_INSTANCE.initial_quota
# Check the scenario is applied based on Quota Values
assert TEST_QUOTA_INSTANCE.response_action is response_action_exp
# backoff_time
assert TEST_QUOTA_INSTANCE.backoff_time == backoff_time_exp
# stop_iter
assert TEST_QUOTA_INSTANCE.stop_iter is stop_iter_exp

View File

@@ -1,30 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import requests
from freezegun import freeze_time
from source_google_analytics_data_api.authenticator import GoogleServiceKeyAuthenticator
@freeze_time("2023-01-01 00:00:00")
def test_token_rotation(requests_mock):
credentials = {
"client_email": "client_email",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIBVQIBADANBgkqhkiG9w0BAQEFAASCAT8wggE7AgEAAkEA3slcXL+dA36ESmOi\n1xBhZmp5Hn0WkaHDtW4naba3plva0ibloBNWhFhjQOh7Ff01PVjhT4D5jgqXBIgc\nz9Gv3QIDAQABAkEArlhYPoD5SB2/O1PjwHgiMPrL1C9B9S/pr1cH4vPJnpY3VKE3\n5hvdil14YwRrcbmIxMkK2iRLi9lM4mJmdWPy4QIhAPsRFXZSGx0TZsDxD9V0ZJmZ\n0AuDCj/NF1xB5KPLmp7pAiEA4yoFox6w7ql/a1pUVaLt0NJkDfE+22pxYGNQaiXU\nuNUCIQCsFLaIJZiN4jlgbxlyLVeya9lLuqIwvqqPQl6q4ad12QIgS9gG48xmdHig\n8z3IdIMedZ8ZCtKmEun6Cp1+BsK0wDUCIF0nHfSuU+eTQ2qAON2SHIrJf8UeFO7N\nzdTN1IwwQqjI\n-----END PRIVATE KEY-----\n",
"client_id": "client_id",
}
authenticator = GoogleServiceKeyAuthenticator(credentials)
auth_request = requests_mock.register_uri(
"POST", authenticator._google_oauth2_token_endpoint, json={"access_token": "bearer_token", "expires_in": 3600}
)
authenticated_request = authenticator(requests.Request())
assert auth_request.call_count == 1
assert auth_request.last_request.qs.get("assertion") == [
"eyjhbgcioijsuzi1niisimtpzci6imnsawvudf9pzcisinr5cci6ikpxvcj9.eyjpc3mioijjbgllbnrfzw1hawwilcjzy29wzsi6imh0dhbzoi8vd3d3lmdvb2dszwfwaxmuy29tl2f1dggvyw5hbhl0awnzlnjlywrvbmx5iiwiyxvkijoiahr0chm6ly9vyxv0adiuz29vz2xlyxbpcy5jb20vdg9rzw4ilcjlehaioje2nzi1mzq4mdasimlhdci6mty3mjuzmtiwmh0.oy_do4cxytjclgajcutbolxftlba89bt2ipuegmis7crh9no_q9h4ispv7iquz5d5h58tpftjhdayb5jfuvheq"
]
assert auth_request.last_request.qs.get("grant_type") == ["urn:ietf:params:oauth:grant-type:jwt-bearer"]
assert authenticator._token.get("expires_at") == 1672534800
assert authenticated_request.headers.get("Authorization") == "Bearer bearer_token"

View File

@@ -1,37 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from unittest.mock import patch
from source_google_analytics_data_api import SourceGoogleAnalyticsDataApi
from source_google_analytics_data_api.config_migrations import MigratePropertyID
from airbyte_cdk.entrypoint import AirbyteEntrypoint
@patch.object(SourceGoogleAnalyticsDataApi, "read_config")
@patch.object(SourceGoogleAnalyticsDataApi, "write_config")
@patch.object(AirbyteEntrypoint, "extract_config")
def test_migration(ab_entrypoint_extract_config_mock, source_write_config_mock, source_read_config_mock):
source = SourceGoogleAnalyticsDataApi()
source_read_config_mock.return_value = {
"credentials": {"auth_type": "Service", "credentials_json": "<credentials string ...>"},
"custom_reports": "<custom reports out of current test>",
"date_ranges_start_date": "2023-09-01",
"window_in_days": 30,
"property_id": "111111111",
}
ab_entrypoint_extract_config_mock.return_value = "/path/to/config.json"
def check_migrated_value(new_config, path):
assert path == "/path/to/config.json"
assert "property_id" not in new_config
assert "property_ids" in new_config
assert "111111111" in new_config["property_ids"]
assert len(new_config["property_ids"]) == 1
source_write_config_mock.side_effect = check_migrated_value
MigratePropertyID.migrate(["--config", "/path/to/config.json"], source)

View File

@@ -1,59 +0,0 @@
{
"credentials": {
"auth_type": "Service",
"credentials_json": ""
},
"date_ranges_start_date": "2023-09-01",
"window_in_days": 30,
"property_ids": "314186564",
"custom_reports_array": [
{
"name": "cohort_report",
"dimensions": ["cohort", "cohortNthDay"],
"metrics": ["cohortActiveUsers"],
"cohortSpec": {
"cohorts": [
{
"dimension": "firstSessionDate",
"dateRange": {
"startDate": "2023-04-24",
"endDate": "2023-04-24"
}
}
],
"cohortsRange": {
"endOffset": 100,
"granularity": "DAILY"
},
"cohortReportSettings": {
"accumulate": false
}
}
},
{
"name": "pivot_report",
"dateRanges": [
{
"startDate": "2020-09-01",
"endDate": "2020-09-15"
}
],
"dimensions": ["browser", "country", "language"],
"metrics": ["sessions"],
"pivots": [
{
"fieldNames": ["browser"],
"limit": 5
},
{
"fieldNames": ["country"],
"limit": 250
},
{
"fieldNames": ["language"],
"limit": 15
}
]
}
]
}

View File

@@ -1,48 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import json
import os
from typing import Any, Mapping
import dpath.util
from source_google_analytics_data_api.config_migrations import MigrateCustomReportsCohortSpec
from source_google_analytics_data_api.source import SourceGoogleAnalyticsDataApi
from airbyte_cdk.models import OrchestratorType, Type
from airbyte_cdk.sources import Source
# BASE ARGS
CMD = "check"
TEST_CONFIG_PATH = f"{os.path.dirname(__file__)}/test_config.json"
NEW_TEST_CONFIG_PATH = f"{os.path.dirname(__file__)}/test_new_config.json"
SOURCE_INPUT_ARGS = [CMD, "--config", TEST_CONFIG_PATH]
SOURCE: Source = SourceGoogleAnalyticsDataApi()
# HELPERS
def load_config(config_path: str = TEST_CONFIG_PATH) -> Mapping[str, Any]:
with open(config_path, "r") as config:
return json.load(config)
def test_migrate_config(capsys):
migration_instance = MigrateCustomReportsCohortSpec()
# migrate the test_config
migration_instance.migrate(SOURCE_INPUT_ARGS, SOURCE)
what = capsys.readouterr().out
control_msg = json.loads(what)
assert control_msg["type"] == Type.CONTROL.value
assert control_msg["control"]["type"] == OrchestratorType.CONNECTOR_CONFIG.value
assert control_msg["control"]["connectorConfig"]["config"]["custom_reports_array"][0]["cohortSpec"]["enabled"] == "true"
assert control_msg["control"]["connectorConfig"]["config"]["custom_reports_array"][1]["cohortSpec"]["enabled"] == "false"
def test_should_not_migrate_new_config():
new_config = load_config(NEW_TEST_CONFIG_PATH)
assert not MigrateCustomReportsCohortSpec._should_migrate(new_config)

View File

@@ -1,63 +0,0 @@
{
"credentials": {
"auth_type": "Service",
"credentials_json": ""
},
"date_ranges_start_date": "2023-09-01",
"window_in_days": 30,
"property_ids": "314186564",
"custom_reports_array": [
{
"name": "cohort_report",
"dimensions": ["cohort", "cohortNthDay"],
"metrics": ["cohortActiveUsers"],
"cohortSpec": {
"cohorts": [
{
"dimension": "firstSessionDate",
"dateRange": {
"startDate": "2023-04-24",
"endDate": "2023-04-24"
}
}
],
"cohortsRange": {
"endOffset": 100,
"granularity": "DAILY"
},
"cohortReportSettings": {
"accumulate": false
},
"enable": "true"
}
},
{
"name": "pivot_report",
"dateRanges": [
{
"startDate": "2020-09-01",
"endDate": "2020-09-15"
}
],
"dimensions": ["browser", "country", "language"],
"metrics": ["sessions"],
"pivots": [
{
"fieldNames": ["browser"],
"limit": 5
},
{
"fieldNames": ["country"],
"limit": 250
},
{
"fieldNames": ["language"],
"limit": 15
}
],
"cohortSpec": {
"enabled": "false"
}
}
]
}

View File

@@ -1,7 +0,0 @@
{
"credentials": { "auth_type": "Service", "credentials_json": "" },
"custom_reports": "[{\"name\": \"custom_dimensions\", \"dimensions\": [\"date\", \"country\", \"device\"]}]",
"date_ranges_start_date": "2023-09-01",
"window_in_days": 30,
"property_ids": "314186564"
}

View File

@@ -1,77 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import json
from typing import Any, Mapping
from source_google_analytics_data_api.config_migrations import MigrateCustomReports
from source_google_analytics_data_api.source import SourceGoogleAnalyticsDataApi
from airbyte_cdk.models import OrchestratorType, Type
from airbyte_cdk.sources import Source
# BASE ARGS
CMD = "check"
TEST_CONFIG_PATH = "unit_tests/test_migrations/test_config.json"
SOURCE_INPUT_ARGS = [CMD, "--config", TEST_CONFIG_PATH]
SOURCE: Source = SourceGoogleAnalyticsDataApi()
# HELPERS
def load_config(config_path: str = TEST_CONFIG_PATH) -> Mapping[str, Any]:
with open(config_path, "r") as config:
return json.load(config)
def revert_migration(config_path: str = TEST_CONFIG_PATH) -> None:
with open(config_path, "r") as test_config:
config = json.load(test_config)
config.pop("custom_reports_array")
with open(config_path, "w") as updated_config:
config = json.dumps(config)
updated_config.write(config)
def test_migrate_config(capsys):
migration_instance = MigrateCustomReports()
original_config = load_config()
# migrate the test_config
migration_instance.migrate(SOURCE_INPUT_ARGS, SOURCE)
# load the updated config
test_migrated_config = load_config()
# check migrated property
assert "custom_reports_array" in test_migrated_config
assert isinstance(test_migrated_config["custom_reports_array"], list)
# check the old property is in place
assert "custom_reports" in test_migrated_config
assert isinstance(test_migrated_config["custom_reports"], str)
# check the migration should be skipped, once already done
assert not migration_instance._should_migrate(test_migrated_config)
# load the old custom reports VS migrated
assert json.loads(original_config["custom_reports"]) == test_migrated_config["custom_reports_array"]
# test CONTROL MESSAGE was emitted
control_msg = json.loads(capsys.readouterr().out)
assert control_msg["type"] == Type.CONTROL.value
assert control_msg["control"]["type"] == OrchestratorType.CONNECTOR_CONFIG.value
# old custom_reports are stil type(str)
assert isinstance(control_msg["control"]["connectorConfig"]["config"]["custom_reports"], str)
# new custom_reports are type(list)
assert isinstance(control_msg["control"]["connectorConfig"]["config"]["custom_reports_array"], list)
# check the migrated values
assert control_msg["control"]["connectorConfig"]["config"]["custom_reports_array"][0]["name"] == "custom_dimensions"
assert control_msg["control"]["connectorConfig"]["config"]["custom_reports_array"][0]["dimensions"] == ["date", "country", "device"]
# revert the test_config to the starting point
revert_migration()
def test_config_is_reverted():
# check the test_config state, it has to be the same as before tests
test_config = load_config()
# check the config no longer has the migarted property
assert "custom_reports_array" not in test_config
# check the old property is still there
assert "custom_reports" in test_config
assert isinstance(test_config["custom_reports"], str)

View File

@@ -1,259 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from unittest.mock import MagicMock, patch
import pytest
from source_google_analytics_data_api import SourceGoogleAnalyticsDataApi
from source_google_analytics_data_api.api_quota import GoogleAnalyticsApiQuotaBase
from source_google_analytics_data_api.source import GoogleAnalyticsDatApiErrorHandler, MetadataDescriptor
from source_google_analytics_data_api.utils import NO_DIMENSIONS, NO_METRICS, NO_NAME, WRONG_CUSTOM_REPORT_CONFIG, WRONG_JSON_SYNTAX
from airbyte_cdk.models import AirbyteConnectionStatus, FailureType, Status
from airbyte_cdk.sources.streams.http.http import HttpStatusErrorHandler
from airbyte_cdk.utils import AirbyteTracedException
@pytest.mark.parametrize(
"config_values, is_successful, message",
[
({}, Status.SUCCEEDED, None),
({"custom_reports_array": ...}, Status.SUCCEEDED, None),
({"custom_reports_array": "[]"}, Status.SUCCEEDED, None),
({"custom_reports_array": "invalid"}, Status.FAILED, f"'{WRONG_JSON_SYNTAX}'"),
({"custom_reports_array": "{}"}, Status.FAILED, f"'{WRONG_JSON_SYNTAX}'"),
({"custom_reports_array": "[{}]"}, Status.FAILED, f"'{NO_NAME}'"),
({"custom_reports_array": '[{"name": "name"}]'}, Status.FAILED, f"'{NO_DIMENSIONS}'"),
({"custom_reports_array": '[{"name": "daily_active_users", "dimensions": ["date"]}]'}, Status.FAILED, f"'{NO_METRICS}'"),
(
{"custom_reports_array": '[{"name": "daily_active_users", "metrics": ["totalUsers"], "dimensions": [{"name": "city"}]}]'},
Status.FAILED,
"\"The custom report daily_active_users entered contains invalid dimensions: {'name': 'city'} is not of type 'string'. Validate your custom query with the GA 4 Query Explorer (https://ga-dev-tools.google/ga4/query-explorer/).\"",
),
({"date_ranges_start_date": "2022-20-20"}, Status.FAILED, "\"time data '2022-20-20' does not match format '%Y-%m-%d'\""),
({"date_ranges_end_date": "2022-20-20"}, Status.FAILED, "\"time data '2022-20-20' does not match format '%Y-%m-%d'\""),
(
{"date_ranges_start_date": "2022-12-20", "date_ranges_end_date": "2022-12-10"},
Status.FAILED,
"\"End date '2022-12-10' can not be before start date '2022-12-20'\"",
),
(
{"credentials": {"auth_type": "Service", "credentials_json": "invalid"}},
Status.FAILED,
"'credentials.credentials_json is not valid JSON'",
),
(
{"custom_reports_array": '[{"name": "name", "dimensions": [], "metrics": []}]'},
Status.FAILED,
"'The custom report name entered contains invalid dimensions: [] is too short. Validate your custom query with the GA 4 Query Explorer (https://ga-dev-tools.google/ga4/query-explorer/).'",
),
(
{"custom_reports_array": '[{"name": "daily_active_users", "dimensions": ["date"], "metrics": ["totalUsers"]}]'},
Status.FAILED,
"'Custom reports: daily_active_users already exist as a default report(s).'",
),
(
{"custom_reports_array": '[{"name": "name", "dimensions": ["unknown"], "metrics": ["totalUsers"]}]'},
Status.FAILED,
"'The custom report name entered contains invalid dimensions: unknown. Validate your custom query with the GA 4 Query Explorer (https://ga-dev-tools.google/ga4/query-explorer/).'",
),
(
{"custom_reports_array": '[{"name": "name", "dimensions": ["date"], "metrics": ["unknown"]}]'},
Status.FAILED,
"'The custom report name entered contains invalid metrics: unknown. Validate your custom query with the GA 4 Query Explorer (https://ga-dev-tools.google/ga4/query-explorer/).'",
),
(
{
"custom_reports_array": '[{"name": "pivot_report", "dateRanges": [{ "startDate": "2020-09-01", "endDate": "2020-09-15" }], "dimensions": ["browser", "country", "language"], "metrics": ["sessions"], "pivots": {}}]'
},
Status.FAILED,
"\"The custom report pivot_report entered contains invalid pivots: {} is not of type 'null', 'array'. Ensure the pivot follow the syntax described in the docs (https://developers.google.com/analytics/devguides/reporting/data/v1/rest/v1beta/Pivot).\"",
),
],
)
def test_check(requests_mock, config_gen, config_values, is_successful, message):
requests_mock.register_uri(
"POST", "https://oauth2.googleapis.com/token", json={"access_token": "access_token", "expires_in": 3600, "token_type": "Bearer"}
)
requests_mock.register_uri(
"GET",
"https://analyticsdata.googleapis.com/v1beta/properties/108176369/metadata",
json={
"dimensions": [{"apiName": "date"}, {"apiName": "country"}, {"apiName": "language"}, {"apiName": "browser"}],
"metrics": [{"apiName": "totalUsers"}, {"apiName": "screenPageViews"}, {"apiName": "sessions"}],
},
)
requests_mock.register_uri(
"POST",
"https://analyticsdata.googleapis.com/v1beta/properties/108176369:runReport",
json={
"dimensionHeaders": [{"name": "date"}, {"name": "country"}],
"metricHeaders": [{"name": "totalUsers", "type": "s"}, {"name": "screenPageViews", "type": "m"}],
"rows": [],
},
)
source = SourceGoogleAnalyticsDataApi()
logger = MagicMock()
assert source.check(logger, config_gen(**config_values)) == AirbyteConnectionStatus(status=is_successful, message=message)
@pytest.mark.parametrize("error_code", (400, 403))
def test_check_failure_throws_exception(requests_mock, config_gen, error_code):
requests_mock.register_uri(
"POST", "https://oauth2.googleapis.com/token", json={"access_token": "access_token", "expires_in": 3600, "token_type": "Bearer"}
)
requests_mock.register_uri(
"GET", "https://analyticsdata.googleapis.com/v1beta/properties/UA-11111111/metadata", json={}, status_code=error_code
)
source = SourceGoogleAnalyticsDataApi()
logger = MagicMock()
with pytest.raises(AirbyteTracedException) as e:
source.check(logger, config_gen(property_ids=["UA-11111111"]))
assert e.value.failure_type == FailureType.config_error
assert "Access was denied to the property ID entered." in e.value.message
def test_exhausted_quota_recovers_after_two_retries(requests_mock, config_gen):
"""
If the account runs out of quota the api will return a message asking us to back off for one hour.
We have set backoff time for this scenario to 30 minutes to check if quota is already recovered, if not
it will backoff again 30 minutes and quote should be reestablished by then.
Now, we don't want wait one hour to test out this retry behavior so we will fix time dividing by 600 the quota
recovery time and also the backoff time.
"""
requests_mock.register_uri(
"POST", "https://oauth2.googleapis.com/token", json={"access_token": "access_token", "expires_in": 3600, "token_type": "Bearer"}
)
error_response = {
"error": {
"message": "Exhausted potentially thresholded requests quota. This quota will refresh in under an hour. To learn more, see"
}
}
requests_mock.register_uri(
"GET",
"https://analyticsdata.googleapis.com/v1beta/properties/UA-11111111/metadata",
# first try we get 429 t=~0
[
{"json": error_response, "status_code": 429},
# first retry we get 429 t=~1800
{"json": error_response, "status_code": 429},
# second retry quota is recovered, t=~3600
{
"json": {
"dimensions": [{"apiName": "date"}, {"apiName": "country"}, {"apiName": "language"}, {"apiName": "browser"}],
"metrics": [{"apiName": "totalUsers"}, {"apiName": "screenPageViews"}, {"apiName": "sessions"}],
},
"status_code": 200,
},
],
)
def fix_time(time):
return int(time / 600)
source = SourceGoogleAnalyticsDataApi()
logger = MagicMock()
max_time_fixed = fix_time(GoogleAnalyticsDatApiErrorHandler.QUOTA_RECOVERY_TIME)
potentially_thresholded_requests_per_hour_mapping = GoogleAnalyticsApiQuotaBase.quota_mapping["potentiallyThresholdedRequestsPerHour"]
threshold_backoff_time = potentially_thresholded_requests_per_hour_mapping["backoff"]
fixed_threshold_backoff_time = fix_time(threshold_backoff_time)
potentially_thresholded_requests_per_hour_mapping_fixed = {
**potentially_thresholded_requests_per_hour_mapping,
"backoff": fixed_threshold_backoff_time,
}
with (
patch.object(GoogleAnalyticsDatApiErrorHandler, "QUOTA_RECOVERY_TIME", new=max_time_fixed),
patch.object(
GoogleAnalyticsApiQuotaBase,
"quota_mapping",
new={
**GoogleAnalyticsApiQuotaBase.quota_mapping,
"potentiallyThresholdedRequestsPerHour": potentially_thresholded_requests_per_hour_mapping_fixed,
},
),
):
output = source.check(logger, config_gen(property_ids=["UA-11111111"]))
assert output == AirbyteConnectionStatus(status=Status.SUCCEEDED, message=None)
@pytest.mark.parametrize("error_code", (402, 404, 405))
def test_check_failure(requests_mock, config_gen, error_code):
requests_mock.register_uri(
"POST", "https://oauth2.googleapis.com/token", json={"access_token": "access_token", "expires_in": 3600, "token_type": "Bearer"}
)
requests_mock.register_uri(
"GET", "https://analyticsdata.googleapis.com/v1beta/properties/UA-11111111/metadata", json={}, status_code=error_code
)
source = SourceGoogleAnalyticsDataApi()
logger = MagicMock()
with patch.object(HttpStatusErrorHandler, "max_retries", new=0):
airbyte_status = source.check(logger, config_gen(property_ids=["UA-11111111"]))
assert airbyte_status.status == Status.FAILED
assert airbyte_status.message == repr("Failed to get metadata, over quota, try later")
@pytest.mark.parametrize(
("status_code", "response_error_message"),
(
(403, "Forbidden for some reason"),
(400, "Granularity in the cohortsRange is required."),
),
)
def test_check_incorrect_custom_reports_config(requests_mock, config_gen, status_code, response_error_message):
requests_mock.register_uri(
"POST", "https://oauth2.googleapis.com/token", json={"access_token": "access_token", "expires_in": 3600, "token_type": "Bearer"}
)
requests_mock.register_uri(
"GET",
"https://analyticsdata.googleapis.com/v1beta/properties/108176369/metadata",
json={
"dimensions": [{"apiName": "date"}, {"apiName": "country"}, {"apiName": "language"}, {"apiName": "browser"}],
"metrics": [{"apiName": "totalUsers"}, {"apiName": "screenPageViews"}, {"apiName": "sessions"}],
},
)
requests_mock.register_uri(
"POST",
"https://analyticsdata.googleapis.com/v1beta/properties/108176369:runReport",
status_code=status_code,
json={"error": {"message": response_error_message}},
)
report_name = "cohort_report"
config = {"custom_reports_array": f'[{{"name": "{report_name}", "dimensions": ["date"], "metrics": ["totalUsers"]}}]'}
friendly_message = WRONG_CUSTOM_REPORT_CONFIG.format(report=report_name)
source = SourceGoogleAnalyticsDataApi()
logger = MagicMock()
status, message = source.check_connection(logger, config_gen(**config))
assert status is False
assert message == f"{friendly_message} {response_error_message}"
@pytest.mark.parametrize("status_code", (403, 401))
def test_missing_metadata(requests_mock, status_code):
# required for MetadataDescriptor $instance input
class TestConfig:
config = {
"authenticator": None,
"property_id": 123,
}
# mocking the url for metadata
requests_mock.register_uri(
"GET", "https://analyticsdata.googleapis.com/v1beta/properties/123/metadata", json={}, status_code=status_code
)
metadata_descriptor = MetadataDescriptor()
with pytest.raises(AirbyteTracedException) as e:
metadata_descriptor.__get__(TestConfig(), None)
assert e.value.failure_type == FailureType.config_error
def test_streams(patch_base_class, config_gen):
config = config_gen(property_ids=["Prop1", "PropN"])
source = SourceGoogleAnalyticsDataApi()
streams = source.streams(config)
expected_streams_number = 57 * 2
assert len([stream for stream in streams if "_property_" in stream.name]) == 57
assert len(set(streams)) == expected_streams_number

View File

@@ -1,477 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import datetime
import json
import random
from http import HTTPStatus
from typing import Any, Mapping
from unittest.mock import MagicMock
import pytest
from freezegun import freeze_time
from requests.models import Response
from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream, SourceGoogleAnalyticsDataApi
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ErrorResolution, FailureType, ResponseAction
from .utils import read_incremental
@pytest.fixture
def patch_base_class(mocker, config, config_without_date_range):
# Mock abstract methods to enable instantiating abstract class
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "path", f"{random.randint(100000000, 999999999)}:runReport")
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "primary_key", "test_primary_key")
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "__abstractmethods__", set())
return {"config": config, "config_without_date_range": config_without_date_range}
def test_json_schema(requests_mock, patch_base_class):
requests_mock.register_uri(
"POST", "https://oauth2.googleapis.com/token", json={"access_token": "access_token", "expires_in": 3600, "token_type": "Bearer"}
)
requests_mock.register_uri(
"GET",
"https://analyticsdata.googleapis.com/v1beta/properties/108176369/metadata",
json={
"dimensions": [{"apiName": "date"}, {"apiName": "country"}, {"apiName": "language"}, {"apiName": "browser"}],
"metrics": [{"apiName": "totalUsers"}, {"apiName": "screenPageViews"}, {"apiName": "sessions"}],
},
)
schema = GoogleAnalyticsDataApiBaseStream(
authenticator=MagicMock(), config={"authenticator": MagicMock(), **patch_base_class["config_without_date_range"]}
).get_json_schema()
for d in patch_base_class["config_without_date_range"]["dimensions"]:
assert d in schema["properties"]
for p in patch_base_class["config_without_date_range"]["metrics"]:
assert p in schema["properties"]
assert "startDate" in schema["properties"]
assert "endDate" in schema["properties"]
def test_request_params(patch_base_class):
assert (
GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_params(
stream_state=MagicMock(), stream_slice=MagicMock(), next_page_token=MagicMock()
)
== {}
)
def test_request_body_json(patch_base_class):
stream_slice = {"startDate": "2024-01-01", "endDate": "2024-01-31"}
request_body_params = {"stream_state": MagicMock(), "stream_slice": stream_slice, "next_page_token": None}
expected_body_json = {
"metrics": [
{"name": "totalUsers"},
{"name": "newUsers"},
{"name": "sessions"},
{"name": "sessionsPerUser"},
{"name": "averageSessionDuration"},
{"name": "screenPageViews"},
{"name": "screenPageViewsPerSession"},
{"name": "bounceRate"},
],
"dimensions": [
{"name": "date"},
{"name": "deviceCategory"},
{"name": "operatingSystem"},
{"name": "browser"},
],
"keepEmptyRows": True,
"dateRanges": [
{"startDate": request_body_params["stream_slice"]["startDate"], "endDate": request_body_params["stream_slice"]["endDate"]}
],
"returnPropertyQuota": True,
"offset": str(0),
"limit": "100000",
}
request_body_json = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_body_json(
**request_body_params
)
assert request_body_json == expected_body_json
def test_changed_page_size(patch_base_class):
request_body_params = {"stream_state": MagicMock(), "stream_slice": MagicMock(), "next_page_token": None}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
stream.page_size = 100
request_body_json = stream.request_body_json(**request_body_params)
assert request_body_json["limit"] == "100"
def test_next_page_token_equal_chunk(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response = MagicMock()
response.json.side_effect = [
{"rowCount": 300000},
{"rowCount": 300000},
{"rowCount": 300000},
]
inputs = {"response": response}
expected_tokens = [
{"offset": 100000},
{"offset": 200000},
None,
]
for expected_token in expected_tokens:
assert stream.next_page_token(**inputs) == expected_token
def test_next_page_token(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response = MagicMock()
response.json.side_effect = [
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
]
inputs = {"response": response}
expected_tokens = [
{"offset": 100000},
{"offset": 200000},
{"offset": 300000},
{"offset": 400000},
None,
]
for expected_token in expected_tokens:
assert stream.next_page_token(**inputs) == expected_token
def test_parse_response(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response_data = {
"dimensionHeaders": [{"name": "date"}, {"name": "deviceCategory"}, {"name": "operatingSystem"}, {"name": "browser"}],
"metricHeaders": [
{"name": "totalUsers", "type": "TYPE_INTEGER"},
{"name": "newUsers", "type": "TYPE_INTEGER"},
{"name": "sessions", "type": "TYPE_INTEGER"},
{"name": "sessionsPerUser:parameter", "type": "TYPE_FLOAT"},
{"name": "averageSessionDuration", "type": "TYPE_SECONDS"},
{"name": "screenPageViews", "type": "TYPE_INTEGER"},
{"name": "screenPageViewsPerSession", "type": "TYPE_FLOAT"},
{"name": "bounceRate", "type": "TYPE_FLOAT"},
],
"rows": [
{
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Macintosh"}, {"value": "Chrome"}],
"metricValues": [
{"value": "344.234"}, # This is a float will be converted to int
{"value": "169.345345"}, # This is a float will be converted to int
{"value": "420"},
{"value": "1.2209302325581395"},
{"value": "194.76313766428572"},
{"value": "614"},
{"value": "1.4619047619047618"},
{"value": "0.47857142857142859"},
],
},
{
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Windows"}, {"value": "Chrome"}],
"metricValues": [
{"value": "322"},
{"value": "211"},
{"value": "387"},
{"value": "1.2018633540372672"},
{"value": "249.21595714211884"},
{"value": "669"},
{"value": "1.7286821705426356"},
{"value": "0.42377260981912146"},
],
},
],
"rowCount": 54,
"metadata": {"currencyCode": "USD", "timeZone": "America/Los_Angeles"},
"kind": "analyticsData#runReport",
}
expected_data = [
{
"property_id": "108176369",
"date": "20220731",
"deviceCategory": "desktop",
"operatingSystem": "Macintosh",
"browser": "Chrome",
"totalUsers": 344,
"newUsers": 169,
"sessions": 420,
"sessionsPerUser:parameter": 1.2209302325581395,
"averageSessionDuration": 194.76313766428572,
"screenPageViews": 614,
"screenPageViewsPerSession": 1.4619047619047618,
"bounceRate": 0.47857142857142859,
},
{
"property_id": "108176369",
"date": "20220731",
"deviceCategory": "desktop",
"operatingSystem": "Windows",
"browser": "Chrome",
"totalUsers": 322,
"newUsers": 211,
"sessions": 387,
"sessionsPerUser:parameter": 1.2018633540372672,
"averageSessionDuration": 249.21595714211884,
"screenPageViews": 669,
"screenPageViewsPerSession": 1.7286821705426356,
"bounceRate": 0.42377260981912146,
},
]
response = MagicMock()
response.json.return_value = response_data
inputs = {"response": response, "stream_state": {}}
actual_records: Mapping[str, Any] = list(stream.parse_response(**inputs))
assert actual_records == expected_data
def test_request_headers(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None}
expected_headers = {}
assert stream.request_headers(**inputs) == expected_headers
def test_http_method(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
expected_method = "POST"
assert stream.http_method == expected_method
@pytest.mark.parametrize(
("http_status", "response_action_expected", "response_body"),
[
(HTTPStatus.OK, ResponseAction.SUCCESS, {}),
(HTTPStatus.BAD_REQUEST, ResponseAction.FAIL, {}),
(HTTPStatus.TOO_MANY_REQUESTS, ResponseAction.RETRY, {}),
(HTTPStatus.TOO_MANY_REQUESTS, ResponseAction.RETRY, {"error": {"message": "Exhausted concurrent requests quota."}}),
(HTTPStatus.INTERNAL_SERVER_ERROR, ResponseAction.RETRY, {}),
],
)
def test_should_retry(patch_base_class, http_status, response_action_expected, response_body):
response_mock = Response()
response_mock.status_code = http_status
if response_body:
json_data = response_body
response_mock._content = str.encode(json.dumps(json_data))
response_mock.headers = {"Content-Type": "application/json"}
response_mock.encoding = "utf-8"
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
assert stream.get_error_handler().interpret_response(response_mock).response_action == response_action_expected
def test_backoff_time(patch_base_class):
response_mock = Response()
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
expected_backoff_time = None
assert stream.get_backoff_strategy().backoff_time(response_mock) == expected_backoff_time
@freeze_time("2023-01-01 00:00:00")
def test_stream_slices():
config = {"date_ranges_start_date": datetime.date(2022, 12, 29), "window_in_days": 1, "dimensions": ["date"]}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
slices = list(stream.stream_slices(sync_mode=None))
assert slices == [
{"startDate": "2022-12-29", "endDate": "2022-12-29"},
{"startDate": "2022-12-30", "endDate": "2022-12-30"},
{"startDate": "2022-12-31", "endDate": "2022-12-31"},
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
]
config = {"date_ranges_start_date": datetime.date(2022, 12, 28), "window_in_days": 2, "dimensions": ["date"]}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
slices = list(stream.stream_slices(sync_mode=None))
assert slices == [
{"startDate": "2022-12-28", "endDate": "2022-12-29"},
{"startDate": "2022-12-30", "endDate": "2022-12-31"},
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
]
config = {"date_ranges_start_date": datetime.date(2022, 12, 20), "window_in_days": 5, "dimensions": ["date"]}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
slices = list(stream.stream_slices(sync_mode=None))
assert slices == [
{"startDate": "2022-12-20", "endDate": "2022-12-24"},
{"startDate": "2022-12-25", "endDate": "2022-12-29"},
{"startDate": "2022-12-30", "endDate": "2023-01-01"},
]
config = {
"date_ranges_start_date": datetime.date(2022, 12, 20),
"date_ranges_end_date": datetime.date(2022, 12, 26),
"window_in_days": 5,
"dimensions": ["date"],
}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
slices = list(stream.stream_slices(sync_mode=None))
assert slices == [
{"startDate": "2022-12-20", "endDate": "2022-12-24"},
{"startDate": "2022-12-25", "endDate": "2022-12-26"},
]
@freeze_time("2023-01-01 00:00:00")
def test_full_refresh():
"""
Test case when full refresh state is used
"""
config = {"date_ranges_start_date": datetime.date(2022, 12, 29), "window_in_days": 1, "dimensions": ["browser", "country", "language"]}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
full_refresh_state = {"__ab_full_refresh_state_message": True}
slices = list(stream.stream_slices(sync_mode=None, stream_state=full_refresh_state))
assert slices == [
{"startDate": "2022-12-29", "endDate": "2022-12-29"},
{"startDate": "2022-12-30", "endDate": "2022-12-30"},
{"startDate": "2022-12-31", "endDate": "2022-12-31"},
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
]
def test_read_incremental(requests_mock):
config = {
"property_ids": [123],
"property_id": 123,
"date_ranges_start_date": datetime.date(2022, 1, 6),
"window_in_days": 1,
"dimensions": ["yearWeek"],
"metrics": ["totalUsers"],
}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
stream_state = {}
responses = [
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202201"}], "metricValues": [{"value": "100"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202201"}], "metricValues": [{"value": "110"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202201"}], "metricValues": [{"value": "120"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "130"}]}],
"rowCount": 1,
},
# 2-nd incremental read
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "112"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "125"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "140"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "yearWeek"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "150"}]}],
"rowCount": 1,
},
]
requests_mock.register_uri(
"POST",
"https://analyticsdata.googleapis.com/v1beta/properties/123:runReport",
json=lambda request, context: responses.pop(0),
)
with freeze_time("2022-01-09 12:00:00"):
records = list(read_incremental(stream, stream_state))
print(records)
assert records == [
{"property_id": 123, "yearWeek": "202201", "totalUsers": 100, "startDate": "2022-01-06", "endDate": "2022-01-06"},
{"property_id": 123, "yearWeek": "202201", "totalUsers": 110, "startDate": "2022-01-07", "endDate": "2022-01-07"},
{"property_id": 123, "yearWeek": "202201", "totalUsers": 120, "startDate": "2022-01-08", "endDate": "2022-01-08"},
{"property_id": 123, "yearWeek": "202202", "totalUsers": 130, "startDate": "2022-01-09", "endDate": "2022-01-09"},
]
assert stream_state == {"yearWeek": "202202"}
with freeze_time("2022-01-10 12:00:00"):
records = list(read_incremental(stream, stream_state))
assert records == [
{"property_id": 123, "yearWeek": "202202", "totalUsers": 112, "startDate": "2022-01-08", "endDate": "2022-01-08"},
{"property_id": 123, "yearWeek": "202202", "totalUsers": 125, "startDate": "2022-01-09", "endDate": "2022-01-09"},
{"property_id": 123, "yearWeek": "202202", "totalUsers": 140, "startDate": "2022-01-10", "endDate": "2022-01-10"},
]
@pytest.mark.parametrize(
"config_dimensions, expected_state",
[
pytest.param(["browser", "country", "language", "date"], {"date": "20240320"}, id="test_date_no_cursor_field_dimension"),
pytest.param(["browser", "country", "language"], {}, id="test_date_cursor_field_dimension"),
],
)
def test_get_updated_state(config_dimensions, expected_state):
config = {
"credentials": {
"auth_type": "Service",
"credentials_json": '{ "client_email": "a@gmail.com", "client_id": "1234", "client_secret": "5678", "private_key": "5678"}',
},
"date_ranges_start_date": "2023-04-01",
"window_in_days": 30,
"property_ids": ["123"],
"custom_reports_array": [
{
"name": "pivot_report",
"dateRanges": [{"startDate": "2020-09-01", "endDate": "2020-09-15"}],
"dimensions": config_dimensions,
"metrics": ["sessions"],
"pivots": [
{"fieldNames": ["browser"], "limit": 5},
{"fieldNames": ["country"], "limit": 250},
{"fieldNames": ["language"], "limit": 15},
],
"cohortSpec": {"enabled": "false"},
}
],
}
source = SourceGoogleAnalyticsDataApi()
config = source._validate_and_transform(config, report_names=set())
config["authenticator"] = source.get_authenticator(config)
report_stream = source.instantiate_report_class(config["custom_reports_array"][0], False, config, page_size=100)
actual_state = report_stream.get_updated_state(current_stream_state={}, latest_record={"date": "20240320"})
assert actual_state == expected_state

View File

@@ -1,164 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import json
import sys
from unittest.mock import Mock, mock_open, patch
import pytest
from source_google_analytics_data_api.utils import (
get_source_defined_primary_key,
serialize_to_date_string,
transform_between_filter,
transform_expression,
transform_in_list_filter,
transform_json,
transform_numeric_filter,
transform_string_filter,
)
class TestSerializeToDateString:
@pytest.mark.parametrize(
"input_date, date_format, date_type, expected",
[
("202105", "%Y-%m-%d", "yearWeek", "2021-02-01"),
("202105", "%Y-%m-%d", "yearMonth", "2021-05-01"),
("202245", "%Y-%m-%d", "yearWeek", "2022-11-07"),
("202210", "%Y-%m-%d", "yearMonth", "2022-10-01"),
("2022", "%Y-%m-%d", "year", "2022-01-01"),
],
)
def test_valid_cases(self, input_date, date_format, date_type, expected):
result = serialize_to_date_string(input_date, date_format, date_type)
assert result == expected
def test_invalid_type(self):
with pytest.raises(ValueError):
serialize_to_date_string("202105", "%Y-%m-%d", "invalidType")
class TestTransformFilters:
def test_transform_string_filter(self):
filter_data = {"value": "test", "matchType": ["partial"], "caseSensitive": True}
expected = {"stringFilter": {"value": "test", "matchType": "partial", "caseSensitive": True}}
result = transform_string_filter(filter_data)
assert result == expected
def test_transform_in_list_filter(self):
filter_data = {"values": ["test1", "test2"], "caseSensitive": False}
expected = {"inListFilter": {"values": ["test1", "test2"], "caseSensitive": False}}
result = transform_in_list_filter(filter_data)
assert result == expected
def test_transform_numeric_filter(self):
filter_data = {"value": {"value_type": "doubleValue", "value": 5.5}, "operation": ["equals"]}
expected = {"numericFilter": {"value": {"doubleValue": 5.5}, "operation": "equals"}}
result = transform_numeric_filter(filter_data)
assert result == expected
@pytest.mark.parametrize(
"filter_data, expected",
[
(
{"fromValue": {"value_type": "doubleValue", "value": "10.5"}, "toValue": {"value_type": "doubleValue", "value": "20.5"}},
{"betweenFilter": {"fromValue": {"doubleValue": 10.5}, "toValue": {"doubleValue": 20.5}}},
),
(
{"fromValue": {"value_type": "stringValue", "value": "hello"}, "toValue": {"value_type": "stringValue", "value": "world"}},
{"betweenFilter": {"fromValue": {"stringValue": "hello"}, "toValue": {"stringValue": "world"}}},
),
(
{"fromValue": {"value_type": "doubleValue", "value": 10.5}, "toValue": {"value_type": "doubleValue", "value": 20.5}},
{"betweenFilter": {"fromValue": {"doubleValue": 10.5}, "toValue": {"doubleValue": 20.5}}},
),
],
)
def test_transform_between_filter(self, filter_data, expected):
result = transform_between_filter(filter_data)
assert result == expected
class TestTransformExpression:
@patch("source_google_analytics_data_api.utils.transform_string_filter", Mock(return_value={"stringFilter": "mocked_string_filter"}))
@patch("source_google_analytics_data_api.utils.transform_in_list_filter", Mock(return_value={"inListFilter": "mocked_in_list_filter"}))
@patch("source_google_analytics_data_api.utils.transform_numeric_filter", Mock(return_value={"numericFilter": "mocked_numeric_filter"}))
def test_between_filter(self):
expression = {
"field_name": "some_field",
"filter": {
"filter_name": "betweenFilter",
"fromValue": {"value_type": "doubleValue", "value": "10.5"},
"toValue": {"value_type": "doubleValue", "value": "20.5"},
},
}
expected = {
"filter": {"fieldName": "some_field", "betweenFilter": {"fromValue": {"doubleValue": 10.5}, "toValue": {"doubleValue": 20.5}}}
}
result = transform_expression(expression)
assert result == expected
class TestGetSourceDefinedPrimaryKey:
@pytest.mark.parametrize(
"stream_name, mocked_content, expected",
[
("sample_stream", {"streams": [{"stream": {"name": "sample_stream", "source_defined_primary_key": ["id"]}}]}, ["id"]),
("sample_stream", {"streams": [{"stream": {"name": "different_stream", "source_defined_primary_key": ["id"]}}]}, None),
],
)
def test_primary_key(self, stream_name, mocked_content, expected):
sys.argv = ["script_name", "read", "--catalog", "mocked_catalog_path"]
m = mock_open(read_data=json.dumps(mocked_content))
with patch("builtins.open", m):
with patch("json.loads", return_value=mocked_content):
result = get_source_defined_primary_key(stream_name)
assert result == expected
class TestTransformJson:
@staticmethod
def mock_transform_expression(expression):
return {"transformed": expression}
# Applying pytest monkeypatch for the mock_transform_expression
@pytest.fixture(autouse=True)
def mock_transform_functions(self, monkeypatch):
monkeypatch.setattr("source_google_analytics_data_api.utils.transform_expression", self.mock_transform_expression)
@pytest.mark.parametrize(
"original, expected",
[
(
{
"filter_type": "andGroup",
"expressions": [{"field": "field1", "condition": "cond1"}, {"field": "field2", "condition": "cond2"}],
},
{
"andGroup": {
"expressions": [
{"transformed": {"field": "field1", "condition": "cond1"}},
{"transformed": {"field": "field2", "condition": "cond2"}},
]
}
},
),
(
{"filter_type": "orGroup", "expressions": [{"field": "field1", "condition": "cond1"}]},
{"orGroup": {"expressions": [{"transformed": {"field": "field1", "condition": "cond1"}}]}},
),
(
{"filter_type": "notExpression", "expression": {"field": "field1", "condition": "cond1"}},
{"notExpression": {"transformed": {"field": "field1", "condition": "cond1"}}},
),
(
{"filter_type": "filter", "field": "field1", "condition": "cond1"},
{"transformed": {"condition": "cond1", "field": "field1", "filter_type": "filter"}},
),
({"filter_type": "andGroup"}, {}),
],
)
def test_cases(self, original, expected):
result = transform_json(original)
assert result == expected

View File

@@ -1,17 +0,0 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from typing import Any, MutableMapping
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams import Stream
def read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, Any]):
slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental, stream_state=stream_state)
for _slice in slices:
records = stream_instance.read_records(sync_mode=SyncMode.incremental, stream_slice=_slice, stream_state=stream_state)
for record in records:
stream_state = stream_instance.get_updated_state(stream_state, record)
yield record