1
0
mirror of synced 2026-01-15 15:06:14 -05:00
Files
airbyte/airbyte-integrations/connectors/source-iterable/source_iterable/api.py
Marcos Marx dca2256a7c Bump 2022 license version (#13233)
* Bump year in license short to 2022

* remove protocol from cdk
2022-05-26 15:00:42 -03:00

270 lines
8.1 KiB
Python
Executable File

#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
import csv
import json
import urllib.parse as urlparse
from io import StringIO
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional
import requests
from airbyte_cdk.models import SyncMode
from source_iterable.iterable_streams import IterableExportStreamAdjustableRange, IterableExportStreamRanged, IterableStream
EVENT_ROWS_LIMIT = 200
CAMPAIGNS_PER_REQUEST = 20
class Lists(IterableStream):
data_field = "lists"
def path(self, **kwargs) -> str:
return "lists"
class ListUsers(IterableStream):
primary_key = "listId"
data_field = "getUsers"
name = "list_users"
def path(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> str:
return f"lists/{self.data_field}?listId={stream_slice['list_id']}"
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
lists = Lists(api_key=self._api_key)
for list_record in lists.read_records(sync_mode=kwargs.get("sync_mode", SyncMode.full_refresh)):
yield {"list_id": list_record["id"]}
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
list_id = self._get_list_id(response.url)
for user in response.iter_lines():
yield {"email": user.decode(), "listId": list_id}
@staticmethod
def _get_list_id(url: str) -> int:
parsed_url = urlparse.urlparse(url)
for q in parsed_url.query.split("&"):
key, value = q.split("=")
if key == "listId":
return int(value)
class Campaigns(IterableStream):
data_field = "campaigns"
def path(self, **kwargs) -> str:
return "campaigns"
class CampaignsMetrics(IterableStream):
name = "campaigns_metrics"
primary_key = None
data_field = None
def __init__(self, api_key: str, start_date: str):
"""
https://api.iterable.com/api/docs#campaigns_metrics
"""
super().__init__(api_key)
self.start_date = start_date
def path(self, **kwargs) -> str:
return "campaigns/metrics"
def request_params(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> MutableMapping[str, Any]:
params = super().request_params(**kwargs)
params["campaignId"] = stream_slice.get("campaign_ids")
params["startDateTime"] = self.start_date
return params
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
lists = Campaigns(api_key=self._api_key)
campaign_ids = []
for list_record in lists.read_records(sync_mode=kwargs.get("sync_mode", SyncMode.full_refresh)):
campaign_ids.append(list_record["id"])
if len(campaign_ids) == CAMPAIGNS_PER_REQUEST:
yield {"campaign_ids": campaign_ids}
campaign_ids = []
if campaign_ids:
yield {"campaign_ids": campaign_ids}
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
content = response.content.decode()
records = self._parse_csv_string_to_dict(content)
for record in records:
yield {"data": record}
@staticmethod
def _parse_csv_string_to_dict(csv_string: str) -> List[Dict[str, Any]]:
"""
Parse a response with a csv type to dict object
Example:
csv_string = "a,b,c,d
1,2,,3
6,,1,2"
output = [{"a": 1, "b": 2, "d": 3},
{"a": 6, "c": 1, "d": 2}]
:param csv_string: API endpoint response with csv format
:return: parsed API response
"""
reader = csv.DictReader(StringIO(csv_string), delimiter=",")
result = []
for row in reader:
for key, value in row.items():
if value == "":
continue
try:
row[key] = int(value)
except ValueError:
row[key] = float(value)
row = {k: v for k, v in row.items() if v != ""}
result.append(row)
return result
class Channels(IterableStream):
data_field = "channels"
def path(self, **kwargs) -> str:
return "channels"
class EmailBounce(IterableExportStreamAdjustableRange):
name = "email_bounce"
data_field = "emailBounce"
class EmailClick(IterableExportStreamAdjustableRange):
name = "email_click"
data_field = "emailClick"
class EmailComplaint(IterableExportStreamAdjustableRange):
name = "email_complaint"
data_field = "emailComplaint"
class EmailOpen(IterableExportStreamAdjustableRange):
name = "email_open"
data_field = "emailOpen"
class EmailSend(IterableExportStreamAdjustableRange):
name = "email_send"
data_field = "emailSend"
class EmailSendSkip(IterableExportStreamAdjustableRange):
name = "email_send_skip"
data_field = "emailSendSkip"
class EmailSubscribe(IterableExportStreamAdjustableRange):
name = "email_subscribe"
data_field = "emailSubscribe"
class EmailUnsubscribe(IterableExportStreamAdjustableRange):
name = "email_unsubscribe"
data_field = "emailUnsubscribe"
class Events(IterableStream):
"""
https://api.iterable.com/api/docs#export_exportUserEvents
"""
primary_key = None
data_field = "events"
common_fields = ("itblInternal", "_type", "createdAt", "email")
def path(self, **kwargs) -> str:
return "export/userEvents"
def request_params(self, stream_slice: Optional[Mapping[str, Any]], **kwargs) -> MutableMapping[str, Any]:
params = super().request_params(**kwargs)
params.update({"email": stream_slice["email"], "includeCustomEvents": "true"})
return params
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
lists = ListUsers(api_key=self._api_key)
stream_slices = lists.stream_slices()
for stream_slice in stream_slices:
for list_record in lists.read_records(sync_mode=kwargs.get("sync_mode", SyncMode.full_refresh), stream_slice=stream_slice):
yield {"email": list_record["email"]}
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
"""
Parse jsonl response body.
Put common event fields at the top level.
Put the rest of the fields in the `data` subobject.
"""
jsonl_records = StringIO(response.text)
for record in jsonl_records:
record_dict = json.loads(record)
record_dict_common_fields = {}
for field in self.common_fields:
record_dict_common_fields[field] = record_dict.pop(field, None)
yield {**record_dict_common_fields, "data": record_dict}
class MessageTypes(IterableStream):
data_field = "messageTypes"
name = "message_types"
def path(self, **kwargs) -> str:
return "messageTypes"
class Metadata(IterableStream):
primary_key = None
data_field = "results"
def path(self, **kwargs) -> str:
return "metadata"
class Templates(IterableExportStreamRanged):
data_field = "templates"
template_types = ["Base", "Blast", "Triggered", "Workflow"]
message_types = ["Email", "Push", "InApp", "SMS"]
def path(self, **kwargs) -> str:
return "templates"
def read_records(self, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Mapping[str, Any]]:
for template in self.template_types:
for message in self.message_types:
self.stream_params = {"templateType": template, "messageMedium": message}
yield from super().read_records(stream_slice=stream_slice, **kwargs)
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
response_json = response.json()
records = response_json.get(self.data_field, [])
for record in records:
record[self.cursor_field] = self._field_to_datetime(record[self.cursor_field])
yield record
class Users(IterableExportStreamRanged):
data_field = "user"
cursor_field = "profileUpdatedAt"