* Connector health: source hubspot, gitlab, snapchat-marketing: fix builds * #2277 source GA: limit page size when calling check * source GA: upd changelog * code formatting * #2277 review fixes
399 lines
15 KiB
Python
399 lines
15 KiB
Python
#
|
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
import datetime
|
|
import random
|
|
from http import HTTPStatus
|
|
from typing import Any, Mapping
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
from freezegun import freeze_time
|
|
from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream
|
|
|
|
from .utils import read_incremental
|
|
|
|
json_credentials = """
|
|
{
|
|
"type": "service_account",
|
|
"project_id": "unittest-project-id",
|
|
"private_key_id": "9qf98e52oda52g5ne23al6evnf13649c2u077162c",
|
|
"private_key": "",
|
|
"client_email": "google-analytics-access@unittest-project-id.iam.gserviceaccount.com",
|
|
"client_id": "213243192021686092537",
|
|
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
|
"token_uri": "https://oauth2.googleapis.com/token",
|
|
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
|
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/google-analytics-access%40unittest-project-id.iam.gserviceaccount.com"
|
|
}
|
|
"""
|
|
|
|
|
|
@pytest.fixture
|
|
def patch_base_class(mocker):
|
|
# Mock abstract methods to enable instantiating abstract class
|
|
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "path", f"{random.randint(100000000, 999999999)}:runReport")
|
|
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "primary_key", "test_primary_key")
|
|
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "__abstractmethods__", set())
|
|
|
|
return {
|
|
"config": {
|
|
"property_id": "496180525",
|
|
"credentials": {"auth_type": "Service", "credentials_json": json_credentials},
|
|
"dimensions": ["date", "deviceCategory", "operatingSystem", "browser"],
|
|
"metrics": [
|
|
"totalUsers",
|
|
"newUsers",
|
|
"sessions",
|
|
"sessionsPerUser",
|
|
"averageSessionDuration",
|
|
"screenPageViews",
|
|
"screenPageViewsPerSession",
|
|
"bounceRate",
|
|
],
|
|
"date_ranges_start_date": datetime.datetime.strftime((datetime.datetime.now() - datetime.timedelta(days=1)), "%Y-%m-%d"),
|
|
}
|
|
}
|
|
|
|
|
|
def test_request_params(patch_base_class):
|
|
assert (
|
|
GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_params(
|
|
stream_state=MagicMock(), stream_slice=MagicMock(), next_page_token=MagicMock()
|
|
)
|
|
== {}
|
|
)
|
|
|
|
|
|
def test_request_body_json(patch_base_class):
|
|
request_body_params = {"stream_state": MagicMock(), "stream_slice": MagicMock(), "next_page_token": None}
|
|
|
|
expected_body_json = {
|
|
"metrics": [
|
|
{"name": "totalUsers"},
|
|
{"name": "newUsers"},
|
|
{"name": "sessions"},
|
|
{"name": "sessionsPerUser"},
|
|
{"name": "averageSessionDuration"},
|
|
{"name": "screenPageViews"},
|
|
{"name": "screenPageViewsPerSession"},
|
|
{"name": "bounceRate"},
|
|
],
|
|
"dimensions": [
|
|
{"name": "date"},
|
|
{"name": "deviceCategory"},
|
|
{"name": "operatingSystem"},
|
|
{"name": "browser"},
|
|
],
|
|
"dateRanges": [request_body_params["stream_slice"]],
|
|
"returnPropertyQuota": True,
|
|
"offset": str(0),
|
|
"limit": "100000",
|
|
}
|
|
|
|
request_body_json = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_body_json(
|
|
**request_body_params
|
|
)
|
|
assert request_body_json == expected_body_json
|
|
|
|
|
|
def test_changed_page_size(patch_base_class):
|
|
request_body_params = {"stream_state": MagicMock(), "stream_slice": MagicMock(), "next_page_token": None}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
stream.page_size = 100
|
|
request_body_json = stream.request_body_json(**request_body_params)
|
|
assert request_body_json["limit"] == "100"
|
|
|
|
|
|
def test_next_page_token_equal_chunk(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
response = MagicMock()
|
|
response.json.side_effect = [
|
|
{"rowCount": 300000},
|
|
{"rowCount": 300000},
|
|
{"rowCount": 300000},
|
|
]
|
|
inputs = {"response": response}
|
|
|
|
expected_tokens = [
|
|
{"offset": 100000},
|
|
{"offset": 200000},
|
|
None,
|
|
]
|
|
|
|
for expected_token in expected_tokens:
|
|
assert stream.next_page_token(**inputs) == expected_token
|
|
|
|
|
|
def test_next_page_token(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
response = MagicMock()
|
|
response.json.side_effect = [
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
{"rowCount": 450000},
|
|
]
|
|
inputs = {"response": response}
|
|
|
|
expected_tokens = [
|
|
{"offset": 100000},
|
|
{"offset": 200000},
|
|
{"offset": 300000},
|
|
{"offset": 400000},
|
|
None,
|
|
]
|
|
|
|
for expected_token in expected_tokens:
|
|
assert stream.next_page_token(**inputs) == expected_token
|
|
|
|
|
|
def test_parse_response(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
|
|
response_data = {
|
|
"dimensionHeaders": [{"name": "date"}, {"name": "deviceCategory"}, {"name": "operatingSystem"}, {"name": "browser"}],
|
|
"metricHeaders": [
|
|
{"name": "totalUsers", "type": "TYPE_INTEGER"},
|
|
{"name": "newUsers", "type": "TYPE_INTEGER"},
|
|
{"name": "sessions", "type": "TYPE_INTEGER"},
|
|
{"name": "sessionsPerUser", "type": "TYPE_FLOAT"},
|
|
{"name": "averageSessionDuration", "type": "TYPE_SECONDS"},
|
|
{"name": "screenPageViews", "type": "TYPE_INTEGER"},
|
|
{"name": "screenPageViewsPerSession", "type": "TYPE_FLOAT"},
|
|
{"name": "bounceRate", "type": "TYPE_FLOAT"},
|
|
],
|
|
"rows": [
|
|
{
|
|
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Macintosh"}, {"value": "Chrome"}],
|
|
"metricValues": [
|
|
{"value": "344"},
|
|
{"value": "169"},
|
|
{"value": "420"},
|
|
{"value": "1.2209302325581395"},
|
|
{"value": "194.76313766428572"},
|
|
{"value": "614"},
|
|
{"value": "1.4619047619047618"},
|
|
{"value": "0.47857142857142859"},
|
|
],
|
|
},
|
|
{
|
|
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Windows"}, {"value": "Chrome"}],
|
|
"metricValues": [
|
|
{"value": "322"},
|
|
{"value": "211"},
|
|
{"value": "387"},
|
|
{"value": "1.2018633540372672"},
|
|
{"value": "249.21595714211884"},
|
|
{"value": "669"},
|
|
{"value": "1.7286821705426356"},
|
|
{"value": "0.42377260981912146"},
|
|
],
|
|
},
|
|
],
|
|
"rowCount": 54,
|
|
"metadata": {"currencyCode": "USD", "timeZone": "America/Los_Angeles"},
|
|
"kind": "analyticsData#runReport",
|
|
}
|
|
|
|
expected_data = [
|
|
{
|
|
"property_id": "496180525",
|
|
"date": "20220731",
|
|
"deviceCategory": "desktop",
|
|
"operatingSystem": "Macintosh",
|
|
"browser": "Chrome",
|
|
"totalUsers": 344,
|
|
"newUsers": 169,
|
|
"sessions": 420,
|
|
"sessionsPerUser": 1.2209302325581395,
|
|
"averageSessionDuration": 194.76313766428572,
|
|
"screenPageViews": 614,
|
|
"screenPageViewsPerSession": 1.4619047619047618,
|
|
"bounceRate": 0.47857142857142859,
|
|
},
|
|
{
|
|
"property_id": "496180525",
|
|
"date": "20220731",
|
|
"deviceCategory": "desktop",
|
|
"operatingSystem": "Windows",
|
|
"browser": "Chrome",
|
|
"totalUsers": 322,
|
|
"newUsers": 211,
|
|
"sessions": 387,
|
|
"sessionsPerUser": 1.2018633540372672,
|
|
"averageSessionDuration": 249.21595714211884,
|
|
"screenPageViews": 669,
|
|
"screenPageViewsPerSession": 1.7286821705426356,
|
|
"bounceRate": 0.42377260981912146,
|
|
},
|
|
]
|
|
|
|
response = MagicMock()
|
|
response.json.return_value = response_data
|
|
inputs = {"response": response, "stream_state": {}}
|
|
actual_records: Mapping[str, Any] = list(stream.parse_response(**inputs))
|
|
assert actual_records == expected_data
|
|
|
|
|
|
def test_request_headers(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None}
|
|
expected_headers = {}
|
|
assert stream.request_headers(**inputs) == expected_headers
|
|
|
|
|
|
def test_http_method(patch_base_class):
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
expected_method = "POST"
|
|
assert stream.http_method == expected_method
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("http_status", "should_retry"),
|
|
[
|
|
(HTTPStatus.OK, False),
|
|
(HTTPStatus.BAD_REQUEST, False),
|
|
(HTTPStatus.TOO_MANY_REQUESTS, True),
|
|
(HTTPStatus.INTERNAL_SERVER_ERROR, True),
|
|
],
|
|
)
|
|
def test_should_retry(patch_base_class, http_status, should_retry):
|
|
response_mock = MagicMock()
|
|
response_mock.status_code = http_status
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
assert stream.should_retry(response_mock) == should_retry
|
|
|
|
|
|
def test_backoff_time(patch_base_class):
|
|
response_mock = MagicMock()
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
|
|
expected_backoff_time = None
|
|
assert stream.backoff_time(response_mock) == expected_backoff_time
|
|
|
|
|
|
@freeze_time("2023-01-01 00:00:00")
|
|
def test_stream_slices():
|
|
config = {"date_ranges_start_date": datetime.date(2022, 12, 29), "window_in_days": 1, "dimensions": ["date"]}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
slices = list(stream.stream_slices(sync_mode=None))
|
|
assert slices == [
|
|
{"startDate": "2022-12-29", "endDate": "2022-12-29"},
|
|
{"startDate": "2022-12-30", "endDate": "2022-12-30"},
|
|
{"startDate": "2022-12-31", "endDate": "2022-12-31"},
|
|
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
|
|
]
|
|
|
|
config = {"date_ranges_start_date": datetime.date(2022, 12, 28), "window_in_days": 2, "dimensions": ["date"]}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
slices = list(stream.stream_slices(sync_mode=None))
|
|
assert slices == [
|
|
{"startDate": "2022-12-28", "endDate": "2022-12-29"},
|
|
{"startDate": "2022-12-30", "endDate": "2022-12-31"},
|
|
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
|
|
]
|
|
|
|
config = {"date_ranges_start_date": datetime.date(2022, 12, 20), "window_in_days": 5, "dimensions": ["date"]}
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
slices = list(stream.stream_slices(sync_mode=None))
|
|
assert slices == [
|
|
{"startDate": "2022-12-20", "endDate": "2022-12-24"},
|
|
{"startDate": "2022-12-25", "endDate": "2022-12-29"},
|
|
{"startDate": "2022-12-30", "endDate": "2023-01-01"},
|
|
]
|
|
|
|
|
|
def test_read_incremental(requests_mock):
|
|
config = {
|
|
"property_id": 123,
|
|
"date_ranges_start_date": datetime.date(2022, 12, 29),
|
|
"window_in_days": 1,
|
|
"dimensions": ["date"],
|
|
"metrics": ["totalUsers"],
|
|
}
|
|
|
|
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
|
|
stream_state = {}
|
|
|
|
responses = [
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20221229"}], "metricValues": [{"value": "100"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20221230"}], "metricValues": [{"value": "110"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20221231"}], "metricValues": [{"value": "120"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20230101"}], "metricValues": [{"value": "130"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
# 2-nd incremental read
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20221230"}], "metricValues": [{"value": "112"}]}],
|
|
"rowCount": 1
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20221231"}], "metricValues": [{"value": "125"}]}],
|
|
"rowCount": 1
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20230101"}], "metricValues": [{"value": "140"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
{
|
|
"dimensionHeaders": [{"name": "date"}],
|
|
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
|
|
"rows": [{"dimensionValues": [{"value": "20230102"}], "metricValues": [{"value": "150"}]}],
|
|
"rowCount": 1,
|
|
},
|
|
]
|
|
|
|
requests_mock.register_uri(
|
|
"POST",
|
|
"https://analyticsdata.googleapis.com/v1beta/properties/123:runReport",
|
|
json=lambda request, context: responses.pop(0),
|
|
)
|
|
|
|
with freeze_time("2023-01-01 12:00:00"):
|
|
records = list(read_incremental(stream, stream_state))
|
|
|
|
assert records == [
|
|
{"date": "20221229", "totalUsers": 100, "property_id": 123},
|
|
{"date": "20221230", "totalUsers": 110, "property_id": 123},
|
|
{"date": "20221231", "totalUsers": 120, "property_id": 123},
|
|
{"date": "20230101", "totalUsers": 130, "property_id": 123},
|
|
]
|
|
|
|
assert stream_state == {"date": "20230101"}
|
|
|
|
with freeze_time("2023-01-02 12:00:00"):
|
|
records = list(read_incremental(stream, stream_state))
|
|
|
|
assert records == [
|
|
{"date": "20221230", "totalUsers": 112, "property_id": 123},
|
|
{"date": "20221231", "totalUsers": 125, "property_id": 123},
|
|
{"date": "20230101", "totalUsers": 140, "property_id": 123},
|
|
{"date": "20230102", "totalUsers": 150, "property_id": 123},
|
|
]
|