1
0
mirror of synced 2026-01-08 21:05:13 -05:00
Files
airbyte/airbyte-integrations/connectors/source-google-ads/source_google_ads/google_ads.py
Denys Davydov c816d14fc1 Source Google Ads: add new streams (#28246)
* Connector health: source hubspot, gitlab, snapchat-marketing: fix builds

* add new streams

* upd changelog

* update CAT config

* update expected records
2023-07-13 16:53:08 +03:00

211 lines
8.7 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import logging
from enum import Enum
from typing import Any, Iterator, List, Mapping, MutableMapping
import backoff
from airbyte_cdk.models import FailureType
from airbyte_cdk.utils import AirbyteTracedException
from google.ads.googleads.client import GoogleAdsClient
from google.ads.googleads.v13.services.types.google_ads_service import GoogleAdsRow, SearchGoogleAdsResponse
from google.api_core.exceptions import ServerError, TooManyRequests
from google.auth import exceptions
from proto.marshal.collections import Repeated, RepeatedComposite
REPORT_MAPPING = {
"accounts": "customer",
"account_labels": "customer_label",
"account_performance_report": "customer",
"ad_group_ads": "ad_group_ad",
"ad_group_ad_labels": "ad_group_ad_label",
"ad_group_ad_report": "ad_group_ad",
"ad_groups": "ad_group",
"ad_group_bidding_strategies": "ad_group",
"ad_group_criterions": "ad_group_criterion",
"ad_group_criterion_labels": "ad_group_criterion_label",
"ad_group_labels": "ad_group_label",
"ad_listing_group_criterions": "ad_group_criterion",
"audience": "audience",
"campaigns": "campaign",
"campaign_real_time_bidding_settings": "campaign",
"campaign_bidding_strategies": "campaign",
"campaign_budget": "campaign_budget",
"campaign_labels": "campaign_label",
"click_view": "click_view",
"display_keyword_performance_report": "display_keyword_view",
"display_topics_performance_report": "topic_view",
"geographic_report": "geographic_view",
"keyword_report": "keyword_view",
"labels": "label",
"service_accounts": "customer",
"shopping_performance_report": "shopping_performance_view",
"user_interest": "user_interest",
"user_location_report": "user_location_view",
}
API_VERSION = "v13"
logger = logging.getLogger("airbyte")
class GoogleAds:
DEFAULT_PAGE_SIZE = 1000
def __init__(self, credentials: MutableMapping[str, Any]):
# `google-ads` library version `14.0.0` and higher requires an additional required parameter `use_proto_plus`.
# More details can be found here: https://developers.google.com/google-ads/api/docs/client-libs/python/protobuf-messages
credentials["use_proto_plus"] = True
self.client = self.get_google_ads_client(credentials)
self.ga_service = self.client.get_service("GoogleAdsService")
@staticmethod
def get_google_ads_client(credentials) -> GoogleAdsClient:
try:
return GoogleAdsClient.load_from_dict(credentials, version=API_VERSION)
except exceptions.RefreshError as e:
message = "The authentication to Google Ads has expired. Re-authenticate to restore access to Google Ads."
raise AirbyteTracedException(message=message, failure_type=FailureType.config_error) from e
@backoff.on_exception(
backoff.expo,
(ServerError, TooManyRequests),
on_backoff=lambda details: logger.info(
f"Caught retryable error after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..."
),
max_tries=5,
)
def send_request(self, query: str, customer_id: str) -> Iterator[SearchGoogleAdsResponse]:
client = self.client
search_request = client.get_type("SearchGoogleAdsRequest")
search_request.query = query
search_request.page_size = self.DEFAULT_PAGE_SIZE
search_request.customer_id = customer_id
return [self.ga_service.search(search_request)]
def get_fields_metadata(self, fields: List[str]) -> Mapping[str, Any]:
"""
Issue Google API request to get detailed information on data type for custom query columns.
:params fields list of columns for user defined query.
:return dict of fields type info.
"""
ga_field_service = self.client.get_service("GoogleAdsFieldService")
request = self.client.get_type("SearchGoogleAdsFieldsRequest")
request.page_size = len(fields)
fields_sql = ",".join([f"'{field}'" for field in fields])
request.query = f"""
SELECT
name,
data_type,
enum_values,
is_repeated
WHERE name in ({fields_sql})
"""
response = ga_field_service.search_google_ads_fields(request=request)
return {r.name: r for r in response}
@staticmethod
def get_fields_from_schema(schema: Mapping[str, Any]) -> List[str]:
properties = schema.get("properties")
return list(properties.keys())
@staticmethod
def convert_schema_into_query(
schema: Mapping[str, Any], report_name: str, from_date: str = None, to_date: str = None, cursor_field: str = None
) -> str:
from_category = REPORT_MAPPING[report_name]
fields = GoogleAds.get_fields_from_schema(schema)
fields = ", ".join(fields)
query_template = f"SELECT {fields} FROM {from_category}"
if cursor_field:
query_template += f" WHERE {cursor_field} >= '{from_date}' AND {cursor_field} <= '{to_date}' ORDER BY {cursor_field} ASC"
return query_template
@staticmethod
def get_field_value(field_value: GoogleAdsRow, field: str, schema_type: Mapping[str, Any]) -> str:
field_name = field.split(".")
for level_attr in field_name:
"""
We have an object of the GoogleAdsRow class, and in order to get all the attributes we requested,
we should alternately go through the nestings according to the path that we have in the field_name variable.
For example 'field_value' looks like:
customer {
resource_name: "customers/4186739445"
...
}
campaign {
resource_name: "customers/4186739445/campaigns/8765465473658"
....
}
ad_group {
resource_name: "customers/4186739445/adGroups/2345266867978"
....
}
metrics {
clicks: 0
...
}
ad_group_ad {
resource_name: "customers/4186739445/adGroupAds/2345266867978~46437453679869"
status: ENABLED
ad {
type_: RESPONSIVE_SEARCH_AD
id: 46437453679869
....
}
policy_summary {
approval_status: APPROVED
}
}
segments {
ad_network_type: SEARCH_PARTNERS
...
}
"""
try:
field_value = getattr(field_value, level_attr)
except AttributeError:
# In GoogleAdsRow there are attributes that add an underscore at the end in their name.
# For example, 'ad_group_ad.ad.type' is replaced by 'ad_group_ad.ad.type_'.
field_value = getattr(field_value, level_attr + "_", None)
if isinstance(field_value, Enum):
field_value = field_value.name
elif isinstance(field_value, (Repeated, RepeatedComposite)):
field_value = [str(value) for value in field_value]
# Google Ads has a lot of entities inside itself and we cannot process them all separately, because:
# 1. It will take a long time
# 2. We have no way to get data on absolutely all entities to test.
#
# To prevent JSON from throwing an error during deserialization, we made such a hack.
# For example:
# 1. ad_group_ad.ad.responsive_display_ad.long_headline - type AdTextAsset (https://developers.google.com/google-ads/api/reference/rpc/v6/AdTextAsset?hl=en).
# 2. ad_group_ad.ad.legacy_app_install_ad - type LegacyAppInstallAdInfo (https://developers.google.com/google-ads/api/reference/rpc/v7/LegacyAppInstallAdInfo?hl=en).
#
if not (isinstance(field_value, (list, int, float, str, bool, dict)) or field_value is None):
field_value = str(field_value)
# In case of custom query field has MESSAGE type it represents protobuf
# message and could be anything, convert it to a string or array of
# string if it has "repeated" flag on metadata
if schema_type.get("protobuf_message"):
if "array" in schema_type.get("type"):
field_value = [str(field) for field in field_value]
else:
field_value = str(field_value)
return field_value
@staticmethod
def parse_single_result(schema: Mapping[str, Any], result: GoogleAdsRow):
props = schema.get("properties")
fields = GoogleAds.get_fields_from_schema(schema)
single_record = {field: GoogleAds.get_field_value(result, field, props.get(field)) for field in fields}
return single_record