476 lines
19 KiB
Python
476 lines
19 KiB
Python
#
|
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
import datetime
|
|
import json
|
|
import random
|
|
from http import HTTPStatus
|
|
from typing import Any, Mapping
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import ErrorResolution, FailureType, ResponseAction
|
|
from freezegun import freeze_time
|
|
from requests.models import Response
|
|
from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream, SourceGoogleAnalyticsDataApi
|
|
|
|
from .utils import read_incremental
|
|
|
|
|
|
@pytest.fixture
|
|
def patch_base_class(mocker, config, config_without_date_range):
|
|
# Mock abstract methods to enable instantiating abstract class
|
|
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "path", f"{random.randint(100000000, 999999999)}:runReport")
|
|
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "primary_key", "test_primary_key")
|
|
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "__abstractmethods__", set())
|
|
|
|
return {"config": config, "config_without_date_range": config_without_date_range}
|
|
|
|
|
|
def test_json_schema(requests_mock, patch_base_class):
|
|
requests_mock.register_uri(
|
|
"POST", "https://oauth2.googleapis.com/token", json={"access_token": "access_token", "expires_in": 3600, "token_type": "Bearer"}
|
|
)
|
|
requests_mock.register_uri(
|
|
"GET",
|
|
"https://analyticsdata.googleapis.com/v1beta/properties/108176369/metadata",
|
|
json={
|
|
"dimensions": [{"apiName": "date"}, {"apiName": "country"}, {"apiName": "language"}, {"apiName": "browser"}],
|
|
"metrics": [{"apiName": "totalUsers"}, {"apiName": "screenPageViews"}, {"apiName": "sessions"}],
|
|
},
|
|
)
|
|
schema = GoogleAnalyticsDataApiBaseStream(
|
|
authenticator=MagicMock(), config={"authenticator": MagicMock(), **patch_base_class["config_without_date_range"]}
|
|
).get_json_schema()
|
|
|
|
for d in patch_base_class["config_without_date_range"]["dimensions"]:
|
|
assert d in schema["properties"]
|
|
|
|
for p in patch_base_class["config_without_date_range"]["metrics"]:
|
|
assert p in schema["properties"]
|
|
|
|
assert "startDate" in schema["properties"]
|
|
assert "endDate" in schema["properties"]
|
|
|
|
|
|
def test_request_params(patch_base_class):
|
|
assert (
|
|
GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_params(
|
|
stream_state=MagicMock(), stream_slice=MagicMock(), next_page_token=MagicMock()
|
|
)
|
|
== {}
|
|
)
|
|
|
|
|
|
def test_request_body_json(patch_base_class):
|
|
stream_slice = {"startDate": "2024-01-01", "endDate": "2024-01-31"}
|
|
request_body_params = {"stream_state": MagicMock(), "stream_slice": stream_slice, "next_page_token": None}
|
|
|
|
expected_body_json = {
|
|
"metrics": [
|
|
{"name": "totalUsers"},
|
|
{"name": "newUsers"},
|
|
{"name": "sessions"},
|
|
{"name": "sessionsPerUser"},
|
|
{"name": "averageSessionDuration"},
|
|
{"name": "screenPageViews"},
|
|
{"name": "screenPageViewsPerSession"},
|
|
{"name": "bounceRate"},
|
|
],
|
|
"dimensions": [
|
|
{"name": "date"},
|
|
{"name": "deviceCategory"},
|
|
{"name": "operatingSystem"},
|
|
{"name": "browser"},
|
|
],
|
|
"keepEmptyRows": True,
|
|
"dateRanges": [{
|
|
"startDate": request_body_params["stream_slice"]["startDate"],
|
|
"endDate": request_body_params["stream_slice"]["endDate"]
|
|
}],
|
|
"returnPropertyQuota": True,
|
|
"offset": str(0),
|
|
"limit": "100000",
|
|
}
|
|
|
|
request_body_json = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_body_json(
|
|
**request_body_params
|
|
)
|
|
assert request_body_json == expected_body_json
|
|
|
|
|
|
def test_changed_page_size(patch_base_class):
|
|
request_body_params = {"stream_state": MagicMock(), "stream_slice": MagicMock(), "next_page_token": None}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
stream.page_size = 100
|
|
request_body_json = stream.request_body_json(**request_body_params)
|
|
assert request_body_json["limit"] == "100"
|
|
|
|
|
|
def test_next_page_token_equal_chunk(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
response = MagicMock()
|
|
response.json.side_effect = [
|
|
{"rowCount": 300000},
|
|
{"rowCount": 300000},
|
|
{"rowCount": 300000},
|
|
]
|
|
inputs = {"response": response}
|
|
|
|
expected_tokens = [
|
|
{"offset": 100000},
|
|
{"offset": 200000},
|
|
None,
|
|
]
|
|
|
|
for expected_token in expected_tokens:
|
|
assert stream.next_page_token(**inputs) == expected_token
|
|
|
|
|
|
def test_next_page_token(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
response = MagicMock()
|
|
response.json.side_effect = [
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
]
|
|
inputs = {"response": response}
|
|
|
|
expected_tokens = [
|
|
{"offset": 100000},
|
|
{"offset": 200000},
|
|
{"offset": 300000},
|
|
{"offset": 400000},
|
|
None,
|
|
]
|
|
|
|
for expected_token in expected_tokens:
|
|
assert stream.next_page_token(**inputs) == expected_token
|
|
|
|
|
|
def test_parse_response(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
|
|
response_data = {
|
|
"dimensionHeaders": [{"name": "date"}, {"name": "deviceCategory"}, {"name": "operatingSystem"}, {"name": "browser"}],
|
|
"metricHeaders": [
|
|
{"name": "totalUsers", "type": "TYPE_INTEGER"},
|
|
{"name": "newUsers", "type": "TYPE_INTEGER"},
|
|
{"name": "sessions", "type": "TYPE_INTEGER"},
|
|
{"name": "sessionsPerUser:parameter", "type": "TYPE_FLOAT"},
|
|
{"name": "averageSessionDuration", "type": "TYPE_SECONDS"},
|
|
{"name": "screenPageViews", "type": "TYPE_INTEGER"},
|
|
{"name": "screenPageViewsPerSession", "type": "TYPE_FLOAT"},
|
|
{"name": "bounceRate", "type": "TYPE_FLOAT"},
|
|
],
|
|
"rows": [
|
|
{
|
|
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Macintosh"}, {"value": "Chrome"}],
|
|
"metricValues": [
|
|
{"value": "344.234"}, # This is a float will be converted to int
|
|
{"value": "169.345345"}, # This is a float will be converted to int
|
|
{"value": "420"},
|
|
{"value": "1.2209302325581395"},
|
|
{"value": "194.76313766428572"},
|
|
{"value": "614"},
|
|
{"value": "1.4619047619047618"},
|
|
{"value": "0.47857142857142859"},
|
|
],
|
|
},
|
|
{
|
|
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Windows"}, {"value": "Chrome"}],
|
|
"metricValues": [
|
|
{"value": "322"},
|
|
{"value": "211"},
|
|
{"value": "387"},
|
|
{"value": "1.2018633540372672"},
|
|
{"value": "249.21595714211884"},
|
|
{"value": "669"},
|
|
{"value": "1.7286821705426356"},
|
|
{"value": "0.42377260981912146"},
|
|
],
|
|
},
|
|
],
|
|
"rowCount": 54,
|
|
"metadata": {"currencyCode": "USD", "timeZone": "America/Los_Angeles"},
|
|
"kind": "analyticsData#runReport",
|
|
}
|
|
|
|
expected_data = [
|
|
{
|
|
"property_id": "108176369",
|
|
"date": "20220731",
|
|
"deviceCategory": "desktop",
|
|
"operatingSystem": "Macintosh",
|
|
"browser": "Chrome",
|
|
"totalUsers": 344,
|
|
"newUsers": 169,
|
|
"sessions": 420,
|
|
"sessionsPerUser:parameter": 1.2209302325581395,
|
|
"averageSessionDuration": 194.76313766428572,
|
|
"screenPageViews": 614,
|
|
"screenPageViewsPerSession": 1.4619047619047618,
|
|
"bounceRate": 0.47857142857142859,
|
|
},
|
|
{
|
|
"property_id": "108176369",
|
|
"date": "20220731",
|
|
"deviceCategory": "desktop",
|
|
"operatingSystem": "Windows",
|
|
"browser": "Chrome",
|
|
"totalUsers": 322,
|
|
"newUsers": 211,
|
|
"sessions": 387,
|
|
"sessionsPerUser:parameter": 1.2018633540372672,
|
|
"averageSessionDuration": 249.21595714211884,
|
|
"screenPageViews": 669,
|
|
"screenPageViewsPerSession": 1.7286821705426356,
|
|
"bounceRate": 0.42377260981912146,
|
|
},
|
|
]
|
|
|
|
response = MagicMock()
|
|
response.json.return_value = response_data
|
|
inputs = {"response": response, "stream_state": {}}
|
|
actual_records: Mapping[str, Any] = list(stream.parse_response(**inputs))
|
|
assert actual_records == expected_data
|
|
|
|
|
|
def test_request_headers(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None}
|
|
expected_headers = {}
|
|
assert stream.request_headers(**inputs) == expected_headers
|
|
|
|
|
|
def test_http_method(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
expected_method = "POST"
|
|
assert stream.http_method == expected_method
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("http_status", "response_action_expected", "response_body"),
|
|
[
|
|
(HTTPStatus.OK, ResponseAction.SUCCESS, {}),
|
|
(HTTPStatus.BAD_REQUEST, ResponseAction.FAIL, {}),
|
|
(HTTPStatus.TOO_MANY_REQUESTS, ResponseAction.RETRY, {}),
|
|
(HTTPStatus.TOO_MANY_REQUESTS, ResponseAction.RETRY, {"error": {"message": "Exhausted concurrent requests quota."}}),
|
|
(HTTPStatus.INTERNAL_SERVER_ERROR, ResponseAction.RETRY, {}),
|
|
],
|
|
)
|
|
def test_should_retry(patch_base_class, http_status, response_action_expected, response_body):
|
|
response_mock = Response()
|
|
response_mock.status_code = http_status
|
|
if response_body:
|
|
json_data = response_body
|
|
response_mock._content = str.encode(json.dumps(json_data))
|
|
response_mock.headers = {'Content-Type': 'application/json'}
|
|
response_mock.encoding = 'utf-8'
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
assert stream.get_error_handler().interpret_response(response_mock).response_action == response_action_expected
|
|
|
|
|
|
def test_backoff_time(patch_base_class):
|
|
response_mock = Response()
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
expected_backoff_time = None
|
|
assert stream.get_backoff_strategy().backoff_time(response_mock) == expected_backoff_time
|
|
|
|
|
|
@freeze_time("2023-01-01 00:00:00")
|
|
def test_stream_slices():
|
|
config = {"date_ranges_start_date": datetime.date(2022, 12, 29), "window_in_days": 1, "dimensions": ["date"]}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
slices = list(stream.stream_slices(sync_mode=None))
|
|
assert slices == [
|
|
{"startDate": "2022-12-29", "endDate": "2022-12-29"},
|
|
{"startDate": "2022-12-30", "endDate": "2022-12-30"},
|
|
{"startDate": "2022-12-31", "endDate": "2022-12-31"},
|
|
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
|
|
]
|
|
|
|
config = {"date_ranges_start_date": datetime.date(2022, 12, 28), "window_in_days": 2, "dimensions": ["date"]}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
slices = list(stream.stream_slices(sync_mode=None))
|
|
assert slices == [
|
|
{"startDate": "2022-12-28", "endDate": "2022-12-29"},
|
|
{"startDate": "2022-12-30", "endDate": "2022-12-31"},
|
|
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
|
|
]
|
|
|
|
config = {"date_ranges_start_date": datetime.date(2022, 12, 20), "window_in_days": 5, "dimensions": ["date"]}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
slices = list(stream.stream_slices(sync_mode=None))
|
|
assert slices == [
|
|
{"startDate": "2022-12-20", "endDate": "2022-12-24"},
|
|
{"startDate": "2022-12-25", "endDate": "2022-12-29"},
|
|
{"startDate": "2022-12-30", "endDate": "2023-01-01"},
|
|
]
|
|
|
|
@freeze_time("2023-01-01 00:00:00")
|
|
def test_full_refresh():
|
|
"""
|
|
Test case when full refresh state is used
|
|
"""
|
|
config = {"date_ranges_start_date": datetime.date(2022, 12, 29), "window_in_days": 1, "dimensions": ["browser", "country", "language"]}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
full_refresh_state = {
|
|
"__ab_full_refresh_state_message": True
|
|
}
|
|
slices = list(stream.stream_slices(sync_mode=None, stream_state=full_refresh_state))
|
|
assert slices == [
|
|
{"startDate": "2022-12-29", "endDate": "2022-12-29"},
|
|
{"startDate": "2022-12-30", "endDate": "2022-12-30"},
|
|
{"startDate": "2022-12-31", "endDate": "2022-12-31"},
|
|
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
|
|
]
|
|
|
|
|
|
def test_read_incremental(requests_mock):
|
|
config = {
|
|
"property_ids": [123],
|
|
"property_id": 123,
|
|
"date_ranges_start_date": datetime.date(2022, 1, 6),
|
|
"window_in_days": 1,
|
|
"dimensions": ["yearWeek"],
|
|
"metrics": ["totalUsers"],
|
|
}
|
|
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
stream_state = {}
|
|
|
|
responses = [
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202201"}], "metricValues": [{"value": "100"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202201"}], "metricValues": [{"value": "110"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202201"}], "metricValues": [{"value": "120"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "130"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
# 2-nd incremental read
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "112"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "125"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "140"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "yearWeek"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "202202"}], "metricValues": [{"value": "150"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
]
|
|
|
|
requests_mock.register_uri(
|
|
"POST",
|
|
"https://analyticsdata.googleapis.com/v1beta/properties/123:runReport",
|
|
json=lambda request, context: responses.pop(0),
|
|
)
|
|
|
|
with freeze_time("2022-01-09 12:00:00"):
|
|
records = list(read_incremental(stream, stream_state))
|
|
print(records)
|
|
assert records == [
|
|
{"property_id": 123, "yearWeek": "202201", "totalUsers": 100, "startDate": "2022-01-06", "endDate": "2022-01-06"},
|
|
{"property_id": 123, "yearWeek": "202201", "totalUsers": 110, "startDate": "2022-01-07", "endDate": "2022-01-07"},
|
|
{"property_id": 123, "yearWeek": "202201", "totalUsers": 120, "startDate": "2022-01-08", "endDate": "2022-01-08"},
|
|
{"property_id": 123, "yearWeek": "202202", "totalUsers": 130, "startDate": "2022-01-09", "endDate": "2022-01-09"},
|
|
]
|
|
|
|
assert stream_state == {"yearWeek": "202202"}
|
|
|
|
with freeze_time("2022-01-10 12:00:00"):
|
|
records = list(read_incremental(stream, stream_state))
|
|
|
|
assert records == [
|
|
{"property_id": 123, "yearWeek": "202202", "totalUsers": 112, "startDate": "2022-01-08", "endDate": "2022-01-08"},
|
|
{"property_id": 123, "yearWeek": "202202", "totalUsers": 125, "startDate": "2022-01-09", "endDate": "2022-01-09"},
|
|
{"property_id": 123, "yearWeek": "202202", "totalUsers": 140, "startDate": "2022-01-10", "endDate": "2022-01-10"},
|
|
]
|
|
|
|
@pytest.mark.parametrize(
|
|
"config_dimensions, expected_state",
|
|
[
|
|
pytest.param(["browser", "country", "language", "date"], {"date": "20240320"}, id="test_date_no_cursor_field_dimension"),
|
|
pytest.param(["browser", "country", "language"], {}, id="test_date_cursor_field_dimension"),
|
|
]
|
|
)
|
|
def test_get_updated_state(config_dimensions, expected_state):
|
|
config = {
|
|
"credentials": {
|
|
"auth_type": "Service",
|
|
"credentials_json": "{ \"client_email\": \"a@gmail.com\", \"client_id\": \"1234\", \"client_secret\": \"5678\", \"private_key\": \"5678\"}"
|
|
},
|
|
"date_ranges_start_date": "2023-04-01",
|
|
"window_in_days": 30,
|
|
"property_ids": ["123"],
|
|
"custom_reports_array": [
|
|
{
|
|
"name": "pivot_report",
|
|
"dateRanges": [{"startDate": "2020-09-01", "endDate": "2020-09-15"}],
|
|
"dimensions": config_dimensions,
|
|
"metrics": ["sessions"],
|
|
"pivots": [
|
|
{
|
|
"fieldNames": ["browser"],
|
|
"limit": 5
|
|
},
|
|
{
|
|
"fieldNames": ["country"],
|
|
"limit": 250
|
|
},
|
|
{
|
|
"fieldNames": ["language"],
|
|
"limit": 15
|
|
}
|
|
],
|
|
"cohortSpec": {
|
|
"enabled": "false"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
source = SourceGoogleAnalyticsDataApi()
|
|
config = source._validate_and_transform(config, report_names=set())
|
|
config["authenticator"] = source.get_authenticator(config)
|
|
report_stream = source.instantiate_report_class(config["custom_reports_array"][0], False, config, page_size=100)
|
|
|
|
actual_state = report_stream.get_updated_state(current_stream_state={}, latest_record={"date": "20240320"})
|
|
|
|
assert actual_state == expected_state
|