1
0
mirror of synced 2026-01-09 06:03:17 -05:00
Files
airbyte/airbyte-integrations/connectors/source-google-ads/unit_tests/test_source.py
2023-07-24 17:21:02 +04:00

535 lines
20 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import re
from collections import namedtuple
from unittest.mock import Mock
import pytest
from airbyte_cdk import AirbyteLogger
from freezegun import freeze_time
from google.ads.googleads.errors import GoogleAdsException
from google.ads.googleads.v11.errors.types.authorization_error import AuthorizationErrorEnum
from pendulum import today
from source_google_ads.custom_query_stream import IncrementalCustomQuery
from source_google_ads.google_ads import GoogleAds
from source_google_ads.source import SourceGoogleAds
from source_google_ads.streams import AdGroupAdReport, AdGroupLabels, ServiceAccounts, chunk_date_range
from source_google_ads.utils import GAQL
from .common import MockErroringGoogleAdsClient, MockGoogleAdsClient, make_google_ads_exception
@pytest.fixture
def mock_account_info(mocker):
mocker.patch(
"source_google_ads.source.SourceGoogleAds.get_account_info",
Mock(return_value=[[{"customer.manager": False, "customer.time_zone": "Europe/Berlin", "customer.id": "8765"}]]),
)
@pytest.fixture()
def stream_mock(mocker, config, customers):
def mock(latest_record):
mocker.patch("source_google_ads.streams.GoogleAdsStream.read_records", Mock(return_value=[latest_record]))
google_api = GoogleAds(credentials=config["credentials"])
client = AdGroupAdReport(
start_date=config["start_date"], api=google_api, conversion_window_days=config["conversion_window_days"], customers=customers
)
return client
return mock
@pytest.fixture
def mocked_gads_api(mocker):
def mock(response=None, failure_code=1, failure_msg="", error_type=""):
def side_effect_func():
raise make_google_ads_exception(failure_code=failure_code, failure_msg=failure_msg, error_type=error_type)
yield
side_effect = []
if response:
side_effect.append(response)
if failure_msg or failure_code or error_type:
side_effect.append(side_effect_func())
mocker.patch("source_google_ads.google_ads.GoogleAds.send_request", side_effect=side_effect)
return mock
@pytest.fixture()
def mock_fields_meta_data():
DataType = namedtuple("DataType", ["name"])
Node = namedtuple("Node", ["data_type", "name", "enum_values", "is_repeated"])
nodes = (
Node(DataType("RESOURCE_NAME"), "campaign.accessible_bidding_strategy", [], False),
Node(
DataType("ENUM"),
"segments.ad_destination_type",
[
"APP_DEEP_LINK",
"APP_STORE",
"LEAD_FORM",
"LOCATION_LISTING",
"MAP_DIRECTIONS",
"MESSAGE",
"NOT_APPLICABLE",
"PHONE_CALL",
"UNKNOWN",
"UNMODELED_FOR_CONVERSIONS",
"UNSPECIFIED",
"WEBSITE",
"YOUTUBE",
],
False,
),
Node(DataType("DATE"), "campaign.start_date", [], is_repeated=False),
Node(DataType("DATE"), "campaign.end_date", [], False),
Node(DataType("DATE"), "segments.date", [], False),
Node(
DataType("ENUM"),
"accessible_bidding_strategy.target_impression_share.location",
["ABSOLUTE_TOP_OF_PAGE", "ANYWHERE_ON_PAGE", "TOP_OF_PAGE", "UNKNOWN", "UNSPECIFIED"],
False,
),
Node(DataType("STRING"), "campaign.name", [], False),
Node(DataType("DOUBLE"), "campaign.optimization_score", [], False),
Node(DataType("RESOURCE_NAME"), "campaign.resource_name", [], False),
Node(DataType("INT32"), "campaign.shopping_setting.campaign_priority", [], False),
Node(DataType("INT64"), "campaign.shopping_setting.merchant_id", [], False),
Node(DataType("BOOLEAN"), "campaign_budget.explicitly_shared", [], False),
Node(DataType("MESSAGE"), "bidding_strategy.enhanced_cpc", [], False),
)
return Mock(get_fields_metadata=Mock(return_value={node.name: node for node in nodes}))
# Test chunk date range without end date
@freeze_time("2022-01-30")
def test_chunk_date_range_without_end_date():
start_date_str = "2022-01-24"
conversion_window = 0
slices = list(chunk_date_range(
start_date=start_date_str, conversion_window=conversion_window, end_date=None, days_of_data_storage=None, range_days=1, time_zone="UTC"
))
expected_response = [
{"start_date": "2022-01-24", "end_date": "2022-01-24"},
{"start_date": "2022-01-25", "end_date": "2022-01-25"},
{"start_date": "2022-01-26", "end_date": "2022-01-26"},
{"start_date": "2022-01-27", "end_date": "2022-01-27"},
{"start_date": "2022-01-28", "end_date": "2022-01-28"},
{"start_date": "2022-01-29", "end_date": "2022-01-29"},
{"start_date": "2022-01-30", "end_date": "2022-01-30"},
]
assert expected_response == slices
def test_chunk_date_range():
start_date = "2021-03-04"
end_date = "2021-05-04"
conversion_window = 14
slices = list(chunk_date_range(start_date, conversion_window, end_date, range_days=10, time_zone="UTC"))
assert [
{"start_date": "2021-02-18", "end_date": "2021-02-27"},
{"start_date": "2021-02-28", "end_date": "2021-03-09"},
{"start_date": "2021-03-10", "end_date": "2021-03-19"},
{"start_date": "2021-03-20", "end_date": "2021-03-29"},
{"start_date": "2021-03-30", "end_date": "2021-04-08"},
{"start_date": "2021-04-09", "end_date": "2021-04-18"},
{"start_date": "2021-04-19", "end_date": "2021-04-28"},
{"start_date": "2021-04-29", "end_date": "2021-05-04"},
] == slices
def test_streams_count(config, mock_account_info):
source = SourceGoogleAds()
streams = source.streams(config)
expected_streams_number = 29
assert len(streams) == expected_streams_number
@pytest.mark.parametrize(
(
"query",
"is_metrics_in_query",
),
(
("SELECT customer.id, metrics.conversions, campaign.start_date FROM campaign", True),
("SELECT segments.ad_destination_type, campaign.start_date, campaign.end_date FROM campaign", False),
),
)
def test_metrics_in_custom_query(query, is_metrics_in_query):
source = SourceGoogleAds()
assert source.is_metrics_in_custom_query(GAQL.parse(query)) is is_metrics_in_query
@pytest.mark.parametrize(
("latest_record", "current_state", "expected_state"),
(
({"segments.date": "2020-01-01"}, {}, {"segments.date": "2020-01-01"}),
({"segments.date": "2020-02-01"}, {"segments.date": "2020-01-01"}, {"segments.date": "2020-02-01"}),
({"segments.date": "2021-03-03"}, {"1234567890": {"segments.date": "2020-02-01"}}, {"segments.date": "2021-03-03"}),
),
)
def test_updated_state(stream_mock, latest_record, current_state, expected_state):
mocked_stream = stream_mock(latest_record=latest_record)
mocked_stream.state = current_state
for _ in mocked_stream.read_records(sync_mode=Mock(), stream_slice={"customer_id": "1234567890"}):
pass
assert mocked_stream.state["1234567890"] == expected_state
def stream_instance(query, api_mock, **kwargs):
start_date = "2021-03-04"
conversion_window_days = 14
instance = IncrementalCustomQuery(
api=api_mock,
conversion_window_days=conversion_window_days,
start_date=start_date,
config={"query": GAQL.parse(query), "table_name": "whatever_table"},
**kwargs,
)
return instance
@pytest.mark.parametrize(
"original_query, expected_query",
[
(
"""
SELECT
campaign.id,
campaign.name,
campaign.status,
metrics.impressions
FROM campaign
WHERE campaign.status = 'PAUSED'
AND metrics.impressions > 100
ORDER BY campaign.status
""",
"""
SELECT
campaign.id,
campaign.name,
campaign.status,
metrics.impressions,
segments.date
FROM campaign
WHERE campaign.status = 'PAUSED'
AND metrics.impressions > 100
AND segments.date BETWEEN '1980-01-01' AND '2000-01-01'
ORDER BY campaign.status
""",
),
(
"""
SELECT
campaign.id,
campaign.name,
campaign.status,
metrics.impressions
FROM campaign
ORDER BY campaign.status
""",
"""
SELECT
campaign.id,
campaign.name,
campaign.status,
metrics.impressions,
segments.date
FROM campaign
WHERE segments.date BETWEEN '1980-01-01' AND '2000-01-01'
ORDER BY campaign.status
""",
),
(
"""
SELECT
campaign.id,
campaign.name,
campaign.status,
metrics.impressions
FROM campaign
WHERE campaign.status = 'PAUSED'
AND metrics.impressions > 100
""",
"""
SELECT
campaign.id,
campaign.name,
campaign.status,
metrics.impressions,
segments.date
FROM campaign
WHERE campaign.status = 'PAUSED'
AND metrics.impressions > 100
AND segments.date BETWEEN '1980-01-01' AND '2000-01-01'
""",
),
(
"""
SELECT
campaign.accessible_bidding_strategy,
segments.ad_destination_type,
campaign.start_date,
campaign.end_date
FROM campaign
""",
"""
SELECT
campaign.accessible_bidding_strategy,
segments.ad_destination_type,
campaign.start_date,
campaign.end_date,
segments.date
FROM campaign
WHERE segments.date BETWEEN '1980-01-01' AND '2000-01-01'
""",
),
],
)
def test_insert_date(original_query, expected_query):
expected_query = re.sub(r"\s+", " ", expected_query.strip())
assert str(IncrementalCustomQuery.insert_segments_date_expr(GAQL.parse(original_query), "1980-01-01", "2000-01-01")) == expected_query
def test_get_json_schema_parse_query(mock_fields_meta_data, customers):
query = """
SELECT
campaign.accessible_bidding_strategy,
segments.ad_destination_type,
campaign.start_date,
campaign.end_date
FROM campaign
"""
final_fields = [
"campaign.accessible_bidding_strategy",
"segments.ad_destination_type",
"campaign.start_date",
"campaign.end_date",
"segments.date",
]
instance = stream_instance(query=query, api_mock=mock_fields_meta_data, customers=customers)
final_schema = instance.get_json_schema()
schema_keys = final_schema["properties"]
assert set(schema_keys) == set(final_fields) # test 1
# Test get json schema when start and end date are provided in the config file
def test_get_json_schema_parse_query_with_end_date(mock_fields_meta_data, customers):
query = """
SELECT
campaign.accessible_bidding_strategy,
segments.ad_destination_type,
campaign.start_date,
campaign.end_date
FROM campaign
"""
final_fields = [
"campaign.accessible_bidding_strategy",
"segments.ad_destination_type",
"campaign.start_date",
"campaign.end_date",
"segments.date",
]
instance = stream_instance(query=query, api_mock=mock_fields_meta_data, end_date="2021-04-04", customers=customers)
final_schema = instance.get_json_schema()
schema_keys = final_schema["properties"]
assert set(schema_keys) == set(final_fields) # test 1
def test_google_type_conversion(mock_fields_meta_data, customers):
"""
query may be invalid (fields incompatibility did not checked).
But we are just testing types, without submitting the query and further steps.
Doing that with all possible types.
"""
desired_mapping = {
"accessible_bidding_strategy.target_impression_share.location": "string", # "ENUM"
"campaign.name": ["string", "null"], # STRING
"campaign.end_date": ["string", "null"], # DATE
"campaign.optimization_score": ["number", "null"], # DOUBLE
"campaign.resource_name": ["string", "null"], # RESOURCE_NAME
"campaign.shopping_setting.campaign_priority": ["integer", "null"], # INT32
"campaign.shopping_setting.merchant_id": ["integer", "null"], # INT64
"campaign_budget.explicitly_shared": ["boolean", "null"], # BOOLEAN
"bidding_strategy.enhanced_cpc": ["string", "null"], # MESSAGE
"segments.date": ["string", "null"], # autoadded, should be DATE
}
# query is select field of each type
query = """
SELECT
accessible_bidding_strategy.target_impression_share.location,
campaign.name,
campaign.end_date,
campaign.optimization_score,
campaign.resource_name,
campaign.shopping_setting.campaign_priority,
campaign.shopping_setting.merchant_id,
campaign_budget.explicitly_shared,
bidding_strategy.enhanced_cpc
FROM campaign
"""
instance = stream_instance(query=query, api_mock=mock_fields_meta_data, customers=customers)
final_schema = instance.get_json_schema()
schema_properties = final_schema.get("properties")
for prop, value in schema_properties.items():
assert desired_mapping[prop] == value.get("type"), f"{prop} should be {value}"
def test_check_connection_should_pass_when_config_valid(mocker):
mocker.patch("source_google_ads.source.GoogleAds", MockGoogleAdsClient)
source = SourceGoogleAds()
check_successful, message = source.check_connection(
AirbyteLogger(),
{
"credentials": {
"developer_token": "fake_developer_token",
"client_id": "fake_client_id",
"client_secret": "fake_client_secret",
"refresh_token": "fake_refresh_token",
},
"customer_id": "fake_customer_id",
"start_date": "2022-01-01",
"conversion_window_days": 14,
"custom_queries": [
{
"query": "SELECT campaign.accessible_bidding_strategy, segments.ad_destination_type, campaign.start_date, campaign.end_date FROM campaign",
"primary_key": None,
"cursor_field": "campaign.start_date",
"table_name": "happytable",
},
{
"query": "SELECT segments.ad_destination_type, segments.ad_network_type, segments.day_of_week, customer.auto_tagging_enabled, customer.id, metrics.conversions, campaign.start_date FROM campaign",
"primary_key": "customer.id",
"cursor_field": None,
"table_name": "unhappytable",
},
{
"query": "SELECT ad_group.targeting_setting.target_restrictions FROM ad_group",
"primary_key": "customer.id",
"cursor_field": None,
"table_name": "ad_group_custom",
},
],
},
)
assert check_successful
assert message is None
def test_check_connection_should_fail_when_api_call_fails(mocker):
# We patch the object inside source.py because that's the calling context
# https://docs.python.org/3/library/unittest.mock.html#where-to-patch
mocker.patch("source_google_ads.source.GoogleAds", MockErroringGoogleAdsClient)
source = SourceGoogleAds()
check_successful, message = source.check_connection(
AirbyteLogger(),
{
"credentials": {
"developer_token": "fake_developer_token",
"client_id": "fake_client_id",
"client_secret": "fake_client_secret",
"refresh_token": "fake_refresh_token",
},
"customer_id": "fake_customer_id",
"start_date": "2022-01-01",
"conversion_window_days": 14,
"custom_queries": [
{
"query": "SELECT campaign.accessible_bidding_strategy, segments.ad_destination_type, campaign.start_date, campaign.end_date FROM campaign",
"primary_key": None,
"cursor_field": "campaign.start_date",
"table_name": "happytable",
},
{
"query": "SELECT segments.ad_destination_type, segments.ad_network_type, segments.day_of_week, customer.auto_tagging_enabled, customer.id, metrics.conversions, campaign.start_date FROM campaign",
"primary_key": "customer.id",
"cursor_field": None,
"table_name": "unhappytable",
},
{
"query": "SELECT ad_group.targeting_setting.target_restrictions FROM ad_group",
"primary_key": "customer.id",
"cursor_field": None,
"table_name": "ad_group_custom",
},
],
},
)
assert not check_successful
assert message.startswith("Unable to connect to Google Ads API with the provided configuration")
def test_end_date_is_not_in_the_future(customers):
source = SourceGoogleAds()
config = source.get_incremental_stream_config(
None, {"end_date": today().add(days=1).to_date_string(), "conversion_window_days": 14, "start_date": "2020-01-23"}, customers
)
assert config.get("end_date") == today().to_date_string()
def test_invalid_custom_query_handled(mocked_gads_api, config):
# limit to one custom query, otherwise need to mock more side effects
config["custom_queries"] = [next(iter(config["custom_queries"]))]
mocked_gads_api(
response=[{"customer.id": "8765"}],
failure_msg="Unrecognized field in the query: 'ad_group_ad.ad.video_ad.media_file'",
error_type="request_error",
)
source = SourceGoogleAds()
status_ok, error = source.check_connection(AirbyteLogger(), config)
assert not status_ok
assert error == (
"Unable to connect to Google Ads API with the provided configuration - Unrecognized field in the query: "
"'ad_group_ad.ad.video_ad.media_file'"
)
@pytest.mark.parametrize(
("cls", "error", "failure_code", "raise_expected"),
(
(AdGroupLabels, "authorization_error", AuthorizationErrorEnum.AuthorizationError.CUSTOMER_NOT_ENABLED, False),
(AdGroupLabels, "internal_error", 1, True),
(ServiceAccounts, "authentication_error", 1, True),
(ServiceAccounts, "internal_error", 1, True),
),
)
def test_read_record_error_handling(config, customers, mocked_gads_api, cls, error, failure_code, raise_expected):
error_msg = "Some unexpected error"
mocked_gads_api(failure_code=failure_code, failure_msg=error_msg, error_type=error)
google_api = GoogleAds(credentials=config["credentials"])
stream = cls(api=google_api, customers=customers)
if raise_expected:
with pytest.raises(GoogleAdsException):
for _ in stream.read_records(sync_mode=Mock(), stream_slice={"customer_id": "1234567890"}):
pass
else:
for _ in stream.read_records(sync_mode=Mock(), stream_slice={"customer_id": "1234567890"}):
pass
def test_stream_slices(config, customers):
google_api = GoogleAds(credentials=config["credentials"])
stream = AdGroupAdReport(
start_date=config["start_date"],
api=google_api,
conversion_window_days=config["conversion_window_days"],
customers=customers,
end_date="2021-02-10",
)
slices = list(stream.stream_slices())
assert slices == [
{"start_date": "2020-12-18", "end_date": "2021-01-01", "customer_id": "123"},
{"start_date": "2021-01-02", "end_date": "2021-01-16", "customer_id": "123"},
{"start_date": "2021-01-17", "end_date": "2021-01-31", "customer_id": "123"},
{"start_date": "2021-02-01", "end_date": "2021-02-10", "customer_id": "123"},
]