455 lines
17 KiB
Python
Executable File
455 lines
17 KiB
Python
Executable File
#
|
|
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
from abc import ABC
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
|
|
from urllib.parse import parse_qsl, urlparse
|
|
|
|
import requests
|
|
from airbyte_cdk.logger import AirbyteLogger
|
|
from airbyte_cdk.sources import AbstractSource
|
|
from airbyte_cdk.sources.streams import Stream
|
|
from airbyte_cdk.sources.streams.http import HttpStream
|
|
from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
|
|
from requests.auth import AuthBase
|
|
|
|
|
|
class IntercomStream(HttpStream, ABC):
|
|
url_base = "https://api.intercom.io/"
|
|
|
|
primary_key = "id"
|
|
data_fields = ["data"]
|
|
|
|
def __init__(
|
|
self,
|
|
authenticator: AuthBase,
|
|
start_date: str = None,
|
|
**kwargs,
|
|
):
|
|
self.start_date = start_date
|
|
|
|
super().__init__(authenticator=authenticator)
|
|
|
|
@property
|
|
def authenticator(self):
|
|
"""
|
|
Fix of the bug when isinstance(authenticator, AuthBase) and
|
|
default logic returns incorrect authenticator values
|
|
"""
|
|
if self._session.auth:
|
|
return self._session.auth
|
|
return super().authenticator
|
|
|
|
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
|
"""
|
|
Abstract method of HttpStream - should be overwritten.
|
|
Returning None means there are no more pages to read in response.
|
|
"""
|
|
|
|
next_page = response.json().get("pages", {}).get("next")
|
|
|
|
if next_page:
|
|
return dict(parse_qsl(urlparse(next_page).query))
|
|
|
|
def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
|
params = {}
|
|
if next_page_token:
|
|
params.update(**next_page_token)
|
|
return params
|
|
|
|
def request_headers(self, **kwargs) -> Mapping[str, Any]:
|
|
return {"Accept": "application/json"}
|
|
|
|
def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
|
|
try:
|
|
yield from super().read_records(*args, **kwargs)
|
|
except requests.exceptions.HTTPError as e:
|
|
error_message = e.response.text
|
|
if error_message:
|
|
self.logger.error(f"Stream {self.name}: {e.response.status_code} " f"{e.response.reason} - {error_message}")
|
|
raise e
|
|
|
|
def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
|
|
|
|
data = response.json()
|
|
|
|
for data_field in self.data_fields:
|
|
if data_field not in data:
|
|
continue
|
|
data = data[data_field]
|
|
if data and isinstance(data, list):
|
|
break
|
|
|
|
if isinstance(data, dict):
|
|
yield data
|
|
else:
|
|
yield from data
|
|
|
|
|
|
class IncrementalIntercomStream(IntercomStream, ABC):
|
|
cursor_field = "updated_at"
|
|
|
|
def __init__(self, authenticator: AuthBase, start_date: str = None, **kwargs):
|
|
super().__init__(authenticator, start_date, **kwargs)
|
|
self.has_old_records = False
|
|
|
|
def filter_by_state(self, stream_state: Mapping[str, Any] = None, record: Mapping[str, Any] = None) -> Iterable:
|
|
"""
|
|
Endpoint does not provide query filtering params, but they provide us
|
|
updated_at field in most cases, so we used that as incremental filtering
|
|
during the slicing.
|
|
"""
|
|
|
|
if not stream_state or record[self.cursor_field] > stream_state.get(self.cursor_field):
|
|
yield record
|
|
else:
|
|
self.has_old_records = True
|
|
|
|
def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
|
|
record = super().parse_response(response, stream_state, **kwargs)
|
|
|
|
for record in record:
|
|
yield from self.filter_by_state(stream_state=stream_state, record=record)
|
|
|
|
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, any]:
|
|
"""
|
|
This method is called once for each record returned from the API to
|
|
compare the cursor field value in that record with the current state
|
|
we then return an updated state object. If this is the first time we
|
|
run a sync or no state was passed, current_stream_state will be None.
|
|
"""
|
|
|
|
current_stream_state = current_stream_state or {}
|
|
|
|
current_stream_state_date = current_stream_state.get(self.cursor_field, self.start_date)
|
|
latest_record_date = latest_record.get(self.cursor_field, self.start_date)
|
|
|
|
return {self.cursor_field: max(current_stream_state_date, latest_record_date)}
|
|
|
|
|
|
class ChildStreamMixin:
|
|
parent_stream_class: Optional[IntercomStream] = None
|
|
|
|
def stream_slices(self, sync_mode, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
|
parent_stream = self.parent_stream_class(authenticator=self.authenticator, start_date=self.start_date)
|
|
for slice in parent_stream.stream_slices(sync_mode=sync_mode):
|
|
for item in self.parent_stream_class(
|
|
authenticator=self.authenticator, start_date=self.start_date, stream_slice=slice
|
|
).read_records(sync_mode=sync_mode):
|
|
yield {"id": item["id"]}
|
|
|
|
|
|
class Admins(IntercomStream):
|
|
"""Return list of all admins.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-admins
|
|
Endpoint: https://api.intercom.io/admins
|
|
"""
|
|
|
|
data_fields = ["admins"]
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return "admins"
|
|
|
|
|
|
class Companies(IncrementalIntercomStream):
|
|
"""Return list of all companies.
|
|
The Intercom API provides 2 similar endpoint for loading of companies:
|
|
1) "standard" - https://developers.intercom.com/intercom-api-reference/reference#list-companies.
|
|
But this endpoint does not work well for huge datasets and can have performance problems.
|
|
2) "scroll" - https://developers.intercom.com/intercom-api-reference/reference#iterating-over-all-companies
|
|
It has good performance but at same time only one script/client can use it across the client's entire account.
|
|
|
|
According to above circumstances no one endpoint can't be used permanently. That's why this stream tries can
|
|
apply both endpoints according to the following logic:
|
|
1) By default the stream tries to load data by "scroll" endpoint.
|
|
2) Try to wait a "scroll" request within a minute (3 attempts with delay 20,5 seconds)
|
|
if a "stroll" is busy by another script
|
|
3) Switch to using of the "standard" endpoint.
|
|
"""
|
|
|
|
class EndpointType(Enum):
|
|
scroll = "companies/scroll"
|
|
standard = "companies"
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self._backoff_count = 0
|
|
self._use_standard = False
|
|
self._endpoint_type = self.EndpointType.scroll
|
|
self._total_count = None # uses for saving of a total_count value once
|
|
|
|
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
|
"""For reset scroll needs to iterate pages untill the last.
|
|
Another way need wait 1 min for the scroll to expire to get a new list for companies segments."""
|
|
data = response.json()
|
|
if self._total_count is None and data.get("total_count"):
|
|
self._total_count = data["total_count"]
|
|
self.logger.info(f"found {self._total_count} companies")
|
|
if self.can_use_scroll():
|
|
|
|
scroll_param = data.get("scroll_param")
|
|
|
|
# this stream always has only one data field
|
|
data_field = self.data_fields[0]
|
|
if scroll_param and data.get(data_field):
|
|
return {"scroll_param": scroll_param}
|
|
elif not data.get("errors"):
|
|
return super().next_page_token(response)
|
|
return None
|
|
|
|
def need_use_standard(self):
|
|
return not self.can_use_scroll() or self._use_standard
|
|
|
|
def can_use_scroll(self):
|
|
"""Check backoff count"""
|
|
return self._backoff_count <= 3
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return self._endpoint_type.value
|
|
|
|
@classmethod
|
|
def check_exists_scroll(cls, response: requests.Response) -> bool:
|
|
if response.status_code in [400, 404]:
|
|
# example response:
|
|
# {..., "errors": [{'code': 'scroll_exists', 'message': 'scroll already exists for this workspace'}]}
|
|
# {..., "errors": [{'code': 'not_found', 'message':'scroll parameter not found'}]}
|
|
err_body = response.json()["errors"][0]
|
|
if err_body["code"] in ["scroll_exists", "not_found"]:
|
|
return True
|
|
|
|
return False
|
|
|
|
@property
|
|
def raise_on_http_errors(self) -> bool:
|
|
if self.need_use_standard() and self._endpoint_type == self.EndpointType.scroll:
|
|
return False
|
|
return True
|
|
|
|
def stream_slices(self, sync_mode, **kwargs) -> Iterable[Optional[Mapping[str, any]]]:
|
|
yield None
|
|
if self.need_use_standard():
|
|
self._endpoint_type = self.EndpointType.standard
|
|
yield None
|
|
|
|
def should_retry(self, response: requests.Response) -> bool:
|
|
if self.check_exists_scroll(response):
|
|
self._backoff_count += 1
|
|
if self.need_use_standard():
|
|
self.logger.error(
|
|
"Can't create a new scroll request within an minute or scroll param was expired. "
|
|
"Let's try to use a standard non-scroll endpoint."
|
|
)
|
|
return False
|
|
|
|
return True
|
|
return super().should_retry(response)
|
|
|
|
def backoff_time(self, response: requests.Response) -> Optional[float]:
|
|
if response.status_code == 404:
|
|
self._use_standard = True
|
|
# Need return value greater than zero to use UserDefinedBackoffException class
|
|
return 0.01
|
|
if self.check_exists_scroll(response):
|
|
self.logger.warning("A previous scroll request is exists. " "It must be deleted within an minute automatically")
|
|
# try to check 3 times
|
|
return 20.5
|
|
return super().backoff_time(response)
|
|
|
|
def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
|
|
if not self.raise_on_http_errors:
|
|
data = response.json()
|
|
if data.get("errors"):
|
|
return
|
|
yield from super().parse_response(response, stream_state=stream_state, **kwargs)
|
|
|
|
|
|
class CompanySegments(ChildStreamMixin, IncrementalIntercomStream):
|
|
"""Return list of all company segments.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-attached-segments-1
|
|
Endpoint: https://api.intercom.io/companies/<id>/segments
|
|
"""
|
|
|
|
parent_stream_class = Companies
|
|
|
|
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
|
return f"/companies/{stream_slice['id']}/segments"
|
|
|
|
|
|
class Conversations(IncrementalIntercomStream):
|
|
"""Return list of all conversations.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-conversations
|
|
Endpoint: https://api.intercom.io/conversations
|
|
"""
|
|
|
|
data_fields = ["conversations"]
|
|
|
|
def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
|
params = super().request_params(next_page_token, **kwargs)
|
|
params.update({"order": "desc", "sort": self.cursor_field})
|
|
return params
|
|
|
|
# We're sorting by desc. Once we hit the first page with an out-of-date result we can stop.
|
|
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
|
if self.has_old_records:
|
|
return None
|
|
|
|
return super().next_page_token(response)
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return "conversations"
|
|
|
|
|
|
class ConversationParts(ChildStreamMixin, IncrementalIntercomStream):
|
|
"""Return list of all conversation parts.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#retrieve-a-conversation
|
|
Endpoint: https://api.intercom.io/conversations/<id>
|
|
"""
|
|
|
|
data_fields = ["conversation_parts", "conversation_parts"]
|
|
parent_stream_class = Conversations
|
|
|
|
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
|
return f"/conversations/{stream_slice['id']}"
|
|
|
|
|
|
class Segments(IncrementalIntercomStream):
|
|
"""Return list of all segments.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-segments
|
|
Endpoint: https://api.intercom.io/segments
|
|
"""
|
|
|
|
data_fields = ["segments"]
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return "segments"
|
|
|
|
|
|
class Contacts(IncrementalIntercomStream):
|
|
"""Return list of all contacts.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-contacts
|
|
Endpoint: https://api.intercom.io/contacts
|
|
"""
|
|
|
|
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
|
"""
|
|
Abstract method of HttpStream - should be overwritten.
|
|
Returning None means there are no more pages to read in response.
|
|
"""
|
|
|
|
next_page = response.json().get("pages", {}).get("next")
|
|
|
|
if isinstance(next_page, dict):
|
|
return {"starting_after": next_page["starting_after"]}
|
|
|
|
if isinstance(next_page, str):
|
|
return super().next_page_token(response)
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return "contacts"
|
|
|
|
|
|
class DataAttributes(IntercomStream):
|
|
primary_key = "name"
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return "data_attributes"
|
|
|
|
|
|
class CompanyAttributes(DataAttributes):
|
|
"""Return list of all data attributes belonging to a workspace for companies.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-data-attributes
|
|
Endpoint: https://api.intercom.io/data_attributes?model=company
|
|
"""
|
|
|
|
def request_params(self, **kwargs) -> MutableMapping[str, Any]:
|
|
return {"model": "company"}
|
|
|
|
|
|
class ContactAttributes(DataAttributes):
|
|
"""Return list of all data attributes belonging to a workspace for contacts.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-data-attributes
|
|
Endpoint: https://api.intercom.io/data_attributes?model=contact
|
|
"""
|
|
|
|
def request_params(self, **kwargs) -> MutableMapping[str, Any]:
|
|
return {"model": "contact"}
|
|
|
|
|
|
class Tags(IntercomStream):
|
|
"""Return list of all tags.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-tags-for-an-app
|
|
Endpoint: https://api.intercom.io/tags
|
|
"""
|
|
|
|
primary_key = "name"
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return "tags"
|
|
|
|
|
|
class Teams(IntercomStream):
|
|
"""Return list of all teams.
|
|
API Docs: https://developers.intercom.com/intercom-api-reference/reference#list-teams
|
|
Endpoint: https://api.intercom.io/teams
|
|
"""
|
|
|
|
primary_key = "name"
|
|
data_fields = ["teams"]
|
|
|
|
def path(self, **kwargs) -> str:
|
|
return "teams"
|
|
|
|
|
|
class VersionApiAuthenticator(TokenAuthenticator):
|
|
"""Intercom API support its dynamic versions' switching.
|
|
But this connector should support only one for any resource account and
|
|
it is realised by the additional request header 'Intercom-Version'
|
|
Docs: https://developers.intercom.com/building-apps/docs/update-your-api-version#section-selecting-the-version-via-the-developer-hub
|
|
"""
|
|
|
|
relevant_supported_version = "2.2"
|
|
|
|
def get_auth_header(self) -> Mapping[str, Any]:
|
|
headers = super().get_auth_header()
|
|
headers["Intercom-Version"] = self.relevant_supported_version
|
|
return headers
|
|
|
|
|
|
class SourceIntercom(AbstractSource):
|
|
"""
|
|
Source Intercom fetch data from messaging platform.
|
|
"""
|
|
|
|
def check_connection(self, logger, config) -> Tuple[bool, any]:
|
|
authenticator = VersionApiAuthenticator(token=config["access_token"])
|
|
try:
|
|
url = f"{IntercomStream.url_base}/tags"
|
|
auth_headers = {"Accept": "application/json", **authenticator.get_auth_header()}
|
|
session = requests.get(url, headers=auth_headers)
|
|
session.raise_for_status()
|
|
return True, None
|
|
except requests.exceptions.RequestException as e:
|
|
return False, e
|
|
|
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
|
config["start_date"] = datetime.strptime(config["start_date"], "%Y-%m-%dT%H:%M:%SZ").timestamp()
|
|
AirbyteLogger().log("INFO", f"Using start_date: {config['start_date']}")
|
|
|
|
auth = VersionApiAuthenticator(token=config["access_token"])
|
|
return [
|
|
Admins(authenticator=auth, **config),
|
|
Companies(authenticator=auth, **config),
|
|
CompanySegments(authenticator=auth, **config),
|
|
Conversations(authenticator=auth, **config),
|
|
ConversationParts(authenticator=auth, **config),
|
|
Contacts(authenticator=auth, **config),
|
|
CompanyAttributes(authenticator=auth, **config),
|
|
ContactAttributes(authenticator=auth, **config),
|
|
Segments(authenticator=auth, **config),
|
|
Tags(authenticator=auth, **config),
|
|
Teams(authenticator=auth, **config),
|
|
]
|