1
0
mirror of synced 2025-12-31 06:05:12 -05:00
Files
airbyte/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py
Denys Davydov 3d14fc721f 🐛 Source GA Data API: change page size when calling check() (#27718)
* Connector health: source hubspot, gitlab, snapchat-marketing: fix builds

* #2277 source GA: limit page size when calling check

* source GA: upd changelog

* code formatting

* #2277 review fixes
2023-06-29 10:52:04 +03:00

399 lines
15 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import datetime
import random
from http import HTTPStatus
from typing import Any, Mapping
from unittest.mock import MagicMock
import pytest
from freezegun import freeze_time
from source_google_analytics_data_api.source import GoogleAnalyticsDataApiBaseStream
from .utils import read_incremental
json_credentials = """
{
"type": "service_account",
"project_id": "unittest-project-id",
"private_key_id": "9qf98e52oda52g5ne23al6evnf13649c2u077162c",
"private_key": "",
"client_email": "google-analytics-access@unittest-project-id.iam.gserviceaccount.com",
"client_id": "213243192021686092537",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/google-analytics-access%40unittest-project-id.iam.gserviceaccount.com"
}
"""
@pytest.fixture
def patch_base_class(mocker):
# Mock abstract methods to enable instantiating abstract class
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "path", f"{random.randint(100000000, 999999999)}:runReport")
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "primary_key", "test_primary_key")
mocker.patch.object(GoogleAnalyticsDataApiBaseStream, "__abstractmethods__", set())
return {
"config": {
"property_id": "496180525",
"credentials": {"auth_type": "Service", "credentials_json": json_credentials},
"dimensions": ["date", "deviceCategory", "operatingSystem", "browser"],
"metrics": [
"totalUsers",
"newUsers",
"sessions",
"sessionsPerUser",
"averageSessionDuration",
"screenPageViews",
"screenPageViewsPerSession",
"bounceRate",
],
"date_ranges_start_date": datetime.datetime.strftime((datetime.datetime.now() - datetime.timedelta(days=1)), "%Y-%m-%d"),
}
}
def test_request_params(patch_base_class):
assert (
GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_params(
stream_state=MagicMock(), stream_slice=MagicMock(), next_page_token=MagicMock()
)
== {}
)
def test_request_body_json(patch_base_class):
request_body_params = {"stream_state": MagicMock(), "stream_slice": MagicMock(), "next_page_token": None}
expected_body_json = {
"metrics": [
{"name": "totalUsers"},
{"name": "newUsers"},
{"name": "sessions"},
{"name": "sessionsPerUser"},
{"name": "averageSessionDuration"},
{"name": "screenPageViews"},
{"name": "screenPageViewsPerSession"},
{"name": "bounceRate"},
],
"dimensions": [
{"name": "date"},
{"name": "deviceCategory"},
{"name": "operatingSystem"},
{"name": "browser"},
],
"dateRanges": [request_body_params["stream_slice"]],
"returnPropertyQuota": True,
"offset": str(0),
"limit": "100000",
}
request_body_json = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_body_json(
**request_body_params
)
assert request_body_json == expected_body_json
def test_changed_page_size(patch_base_class):
request_body_params = {"stream_state": MagicMock(), "stream_slice": MagicMock(), "next_page_token": None}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
stream.page_size = 100
request_body_json = stream.request_body_json(**request_body_params)
assert request_body_json["limit"] == "100"
def test_next_page_token_equal_chunk(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response = MagicMock()
response.json.side_effect = [
{"rowCount": 300000},
{"rowCount": 300000},
{"rowCount": 300000},
]
inputs = {"response": response}
expected_tokens = [
{"offset": 100000},
{"offset": 200000},
None,
]
for expected_token in expected_tokens:
assert stream.next_page_token(**inputs) == expected_token
def test_next_page_token(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response = MagicMock()
response.json.side_effect = [
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
]
inputs = {"response": response}
expected_tokens = [
{"offset": 100000},
{"offset": 200000},
{"offset": 300000},
{"offset": 400000},
None,
]
for expected_token in expected_tokens:
assert stream.next_page_token(**inputs) == expected_token
def test_parse_response(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
response_data = {
"dimensionHeaders": [{"name": "date"}, {"name": "deviceCategory"}, {"name": "operatingSystem"}, {"name": "browser"}],
"metricHeaders": [
{"name": "totalUsers", "type": "TYPE_INTEGER"},
{"name": "newUsers", "type": "TYPE_INTEGER"},
{"name": "sessions", "type": "TYPE_INTEGER"},
{"name": "sessionsPerUser", "type": "TYPE_FLOAT"},
{"name": "averageSessionDuration", "type": "TYPE_SECONDS"},
{"name": "screenPageViews", "type": "TYPE_INTEGER"},
{"name": "screenPageViewsPerSession", "type": "TYPE_FLOAT"},
{"name": "bounceRate", "type": "TYPE_FLOAT"},
],
"rows": [
{
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Macintosh"}, {"value": "Chrome"}],
"metricValues": [
{"value": "344"},
{"value": "169"},
{"value": "420"},
{"value": "1.2209302325581395"},
{"value": "194.76313766428572"},
{"value": "614"},
{"value": "1.4619047619047618"},
{"value": "0.47857142857142859"},
],
},
{
"dimensionValues": [{"value": "20220731"}, {"value": "desktop"}, {"value": "Windows"}, {"value": "Chrome"}],
"metricValues": [
{"value": "322"},
{"value": "211"},
{"value": "387"},
{"value": "1.2018633540372672"},
{"value": "249.21595714211884"},
{"value": "669"},
{"value": "1.7286821705426356"},
{"value": "0.42377260981912146"},
],
},
],
"rowCount": 54,
"metadata": {"currencyCode": "USD", "timeZone": "America/Los_Angeles"},
"kind": "analyticsData#runReport",
}
expected_data = [
{
"property_id": "496180525",
"date": "20220731",
"deviceCategory": "desktop",
"operatingSystem": "Macintosh",
"browser": "Chrome",
"totalUsers": 344,
"newUsers": 169,
"sessions": 420,
"sessionsPerUser": 1.2209302325581395,
"averageSessionDuration": 194.76313766428572,
"screenPageViews": 614,
"screenPageViewsPerSession": 1.4619047619047618,
"bounceRate": 0.47857142857142859,
},
{
"property_id": "496180525",
"date": "20220731",
"deviceCategory": "desktop",
"operatingSystem": "Windows",
"browser": "Chrome",
"totalUsers": 322,
"newUsers": 211,
"sessions": 387,
"sessionsPerUser": 1.2018633540372672,
"averageSessionDuration": 249.21595714211884,
"screenPageViews": 669,
"screenPageViewsPerSession": 1.7286821705426356,
"bounceRate": 0.42377260981912146,
},
]
response = MagicMock()
response.json.return_value = response_data
inputs = {"response": response, "stream_state": {}}
actual_records: Mapping[str, Any] = list(stream.parse_response(**inputs))
assert actual_records == expected_data
def test_request_headers(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None}
expected_headers = {}
assert stream.request_headers(**inputs) == expected_headers
def test_http_method(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
expected_method = "POST"
assert stream.http_method == expected_method
@pytest.mark.parametrize(
("http_status", "should_retry"),
[
(HTTPStatus.OK, False),
(HTTPStatus.BAD_REQUEST, False),
(HTTPStatus.TOO_MANY_REQUESTS, True),
(HTTPStatus.INTERNAL_SERVER_ERROR, True),
],
)
def test_should_retry(patch_base_class, http_status, should_retry):
response_mock = MagicMock()
response_mock.status_code = http_status
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
assert stream.should_retry(response_mock) == should_retry
def test_backoff_time(patch_base_class):
response_mock = MagicMock()
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
expected_backoff_time = None
assert stream.backoff_time(response_mock) == expected_backoff_time
@freeze_time("2023-01-01 00:00:00")
def test_stream_slices():
config = {"date_ranges_start_date": datetime.date(2022, 12, 29), "window_in_days": 1, "dimensions": ["date"]}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
slices = list(stream.stream_slices(sync_mode=None))
assert slices == [
{"startDate": "2022-12-29", "endDate": "2022-12-29"},
{"startDate": "2022-12-30", "endDate": "2022-12-30"},
{"startDate": "2022-12-31", "endDate": "2022-12-31"},
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
]
config = {"date_ranges_start_date": datetime.date(2022, 12, 28), "window_in_days": 2, "dimensions": ["date"]}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
slices = list(stream.stream_slices(sync_mode=None))
assert slices == [
{"startDate": "2022-12-28", "endDate": "2022-12-29"},
{"startDate": "2022-12-30", "endDate": "2022-12-31"},
{"startDate": "2023-01-01", "endDate": "2023-01-01"},
]
config = {"date_ranges_start_date": datetime.date(2022, 12, 20), "window_in_days": 5, "dimensions": ["date"]}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
slices = list(stream.stream_slices(sync_mode=None))
assert slices == [
{"startDate": "2022-12-20", "endDate": "2022-12-24"},
{"startDate": "2022-12-25", "endDate": "2022-12-29"},
{"startDate": "2022-12-30", "endDate": "2023-01-01"},
]
def test_read_incremental(requests_mock):
config = {
"property_id": 123,
"date_ranges_start_date": datetime.date(2022, 12, 29),
"window_in_days": 1,
"dimensions": ["date"],
"metrics": ["totalUsers"],
}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=None, config=config)
stream_state = {}
responses = [
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20221229"}], "metricValues": [{"value": "100"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20221230"}], "metricValues": [{"value": "110"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20221231"}], "metricValues": [{"value": "120"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20230101"}], "metricValues": [{"value": "130"}]}],
"rowCount": 1,
},
# 2-nd incremental read
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20221230"}], "metricValues": [{"value": "112"}]}],
"rowCount": 1
},
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20221231"}], "metricValues": [{"value": "125"}]}],
"rowCount": 1
},
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20230101"}], "metricValues": [{"value": "140"}]}],
"rowCount": 1,
},
{
"dimensionHeaders": [{"name": "date"}],
"metricHeaders": [{"name": "totalUsers", "type": "TYPE_INTEGER"}],
"rows": [{"dimensionValues": [{"value": "20230102"}], "metricValues": [{"value": "150"}]}],
"rowCount": 1,
},
]
requests_mock.register_uri(
"POST",
"https://analyticsdata.googleapis.com/v1beta/properties/123:runReport",
json=lambda request, context: responses.pop(0),
)
with freeze_time("2023-01-01 12:00:00"):
records = list(read_incremental(stream, stream_state))
assert records == [
{"date": "20221229", "totalUsers": 100, "property_id": 123},
{"date": "20221230", "totalUsers": 110, "property_id": 123},
{"date": "20221231", "totalUsers": 120, "property_id": 123},
{"date": "20230101", "totalUsers": 130, "property_id": 123},
]
assert stream_state == {"date": "20230101"}
with freeze_time("2023-01-02 12:00:00"):
records = list(read_incremental(stream, stream_state))
assert records == [
{"date": "20221230", "totalUsers": 112, "property_id": 123},
{"date": "20221231", "totalUsers": 125, "property_id": 123},
{"date": "20230101", "totalUsers": 140, "property_id": 123},
{"date": "20230102", "totalUsers": 150, "property_id": 123},
]